Source code for mne_videobrowser.browsers.audio_browser

"""Contains AudioView and AudioBrowser Qt widgets for visualizing audio data."""

import logging
import os.path

import numpy as np
import numpy.typing as npt
import pyqtgraph as pg
from qtpy.QtCore import Qt, QTimer, Signal, Slot  # type: ignore
from qtpy.QtWidgets import (
    QComboBox,
    QHBoxLayout,
    QLabel,
    QPushButton,
    QSizePolicy,
    QVBoxLayout,
    QWidget,
)

from .. import audio_player
from ..media.audio import AudioFile
from . import gui_utils
from .syncable_browser import SyncableBrowserWidget
from .time_selector import TimeSelector

logger = logging.getLogger(__name__)


class AudioView(QWidget):
    """A widget for displaying audio waveform.

    Includes controls for channel selection and view options.

    Parameters
    ----------
    audio : AudioFile
        The audio file to be displayed.
    plotting_window_size : int | None, optional
        The size of the plotting window in samples. Plotting window size N means that
        N samples are aggregated into a single point in the plot. If None (default),
        the plotting window size is set to 1% of the audio sampling rate.
    default_view_len : float, optional
        The duration to show in the audio view with default zoom level,
        by default 10.0 seconds.
    time_selector_padding : float, optional
        Padding (in seconds) to apply when making sure that the user does not drag the
        selector too close to the edges of the view, by default 0.1 seconds.
    parent : QWidget | None, optional
        The parent widget for this view, by default None.
    """

    # Emits a signal with the sample index when the position changes
    sigSampleIndexChanged = Signal(int)  # sample index
    sigChannelSelectionChanged = Signal(object)  # channel index or None

    def __init__(
        self,
        audio: AudioFile,
        plotting_window_size: int | None = None,
        default_view_len: float = 10.0,
        time_selector_padding: float = 0.1,
        parent: QWidget | None = None,
    ) -> None:
        super().__init__(parent=parent)
        self._audio = audio
        if plotting_window_size is None:
            self._plotting_window_size = int(audio.sampling_rate / 100)
            logger.info(
                f"Using default plotting window size of {self._plotting_window_size} "
                f"for {audio.sampling_rate} Hz audio."
            )
        else:
            self._plotting_window_size = plotting_window_size

        self._time_selector_padding = time_selector_padding
        self._default_view_len = default_view_len
        # The index of the currently highlighted/selected sample
        self._current_sample = 0
        self._visible_duration_seconds = default_view_len  # this is changed by zooming
        self._channel_selection: int | None = None  # None shows mean of all channels

        self._layout = QVBoxLayout()
        self.setLayout(self._layout)

        # Add the plot that visualizes the audio data
        self._setup_plot_widget()
        self._set_clamped_time_range(0, self._visible_duration_seconds)

        # Add a vertical line that indicates the current sample and is movable
        self._time_selector = TimeSelector(parent=self)
        self._time_selector.sigSelectedTimeChanged.connect(self._on_time_selector_moved)

        # Add controls and information about the audio.
        self._setup_toolbar()

        # Initial visualization
        self._add_initial_plot()  # assumes time selector has been created
        self.display_at_sample(0, signal=False)

    @property
    def current_sample(self) -> int:
        """Get the index of the current sample position."""
        return self._current_sample

    @property
    def current_time(self) -> float:
        """Get the current position in seconds."""
        return self._current_sample / self._audio.sampling_rate

    @property
    def channel_selection(self) -> int | None:
        """Get the currently selected channel index.

        Returns
        -------
        int | None
            The index of the selected channel, or None if mean of all the channels
            is shown.
        """
        return self._channel_selection

    def display_at_sample(self, sample_idx: int, signal: bool = True) -> bool:
        """Set the currently highlighted sample index and update the view if necessary.

        Parameters
        ----------
        sample_idx : int
            The sample index to highlight in the audio view.
        signal : bool, optional
            Whether to emit the position changed signal, by default True.

        Returns
        -------
        bool
            True if the visualization was updated, False if the index is out of bounds.
        """
        if sample_idx < 0 or sample_idx >= self._audio.n_samples:
            logger.warning(
                f"Cannot display audio at sample index {sample_idx} (out of bounds)"
            )
            return False

        self._current_sample = sample_idx
        # Current time gets updated based on the sample index.

        self._time_selector.set_selected_time_no_signal(self.current_time)
        self._move_view_to_current_time()
        self._time_label.set_current_time(self.current_time)

        if signal:
            self.sigSampleIndexChanged.emit(self._current_sample)

        return True

    def display_at_time(self, time_seconds: float, signal: bool = True) -> bool:
        """Set the currently highlighted time and update the view if necessary.

        This is just a wrapper around `display_at_sample` that converts time to
        sample index.

        Parameters
        ----------
        time_seconds : float
            The time in seconds to highlight in the audio view.
        signal : bool, optional
            Whether to emit the position changed signal, by default True.

        Returns
        -------
        bool
            True if the visualization was updated, False if the time is out of bounds.
        """
        sample_idx = int(time_seconds * self._audio.sampling_rate)
        return self.display_at_sample(sample_idx, signal=signal)

    def _setup_plot_widget(self) -> None:
        """Set up the plot widget for audio visualization."""
        self._plot_widget = pg.PlotWidget()
        self._plot_widget.setBackground("w")
        self._plot_widget.setAntialiasing(False)  # might increase performance
        self._plot_widget.setLabel("bottom", "Time", "s")
        self._plot_widget.setLabel("left", "Amplitude")
        self._plot_widget.setMouseEnabled(x=True, y=False)
        # Restrict x-axis range to audio duration
        self._plot_widget.setLimits(
            xMin=0,
            xMax=self._audio.duration,
            minXRange=0.1,
            maxXRange=self._audio.duration,
        )
        self._layout.addWidget(self._plot_widget)

    def _add_initial_plot(self) -> None:
        """Plot the selected channel for the first time, setting up the plot.

        NOTE: This is meant to be called only once during initialization,
        later updates should be done with _plot_selected_channel.
        """
        # Ensure that curves have not been created.
        assert not hasattr(self, "_envelope_plot"), (
            "Audio plot already exists, this method should only be called once."
        )
        times, audio_envelope = self._get_interleaved_audio_envelope()
        logger.debug("Reading done, plotting initial audio waveform.")

        self._envelope_plot = self._plot_widget.plot(
            times, audio_envelope, pen=pg.mkPen("b", width=1), skipFiniteCheck=True
        )
        # Use downsampling to speed up rendering when zoomed out.
        self._envelope_plot.setDownsampling(auto=True, method="peak")
        # Enable clipping to improve performance when zoomed in.
        self._envelope_plot.setClipToView(True)

        logger.debug("Finished plotting initial audio waveform.")

        # Add time selector last so that it is on top of the audio plot.
        self._time_selector.add_to_plot(self._plot_widget)

    def _get_interleaved_audio_envelope(
        self,
    ) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float32]]:
        """Get audio envelope with interleaved min and max values for plotting.

        NOTE: Reads the entire audio data for the selected channel. This takes some time
        and memory for large audio files.
        """
        times, audio_min, audio_max = self._audio.get_min_max_envelope(
            window_size=self._plotting_window_size,
            channel_idx=self._channel_selection,
            sample_range=None,  # get all samples
        )
        n_points = len(times)
        interleaved_times = times.repeat(2)

        interleaved_audio = np.empty(n_points * 2, dtype=audio_min.dtype)
        interleaved_audio[0::2] = audio_min
        interleaved_audio[1::2] = audio_max

        return interleaved_times, interleaved_audio

    def _setup_toolbar(self) -> None:
        """Set up toolbar that contains controls and information about the audio."""
        toolbar_layout = QHBoxLayout()
        self._layout.addLayout(toolbar_layout)

        # Add name of the audio file as a label
        audio_name = os.path.basename(self._audio.fname)
        audio_label = QLabel(f"{audio_name}")
        audio_label.setSizePolicy(
            QSizePolicy.Policy.Minimum, QSizePolicy.Policy.Minimum
        )
        toolbar_layout.addWidget(audio_label)

        # Add info label that shows audio stats when hovered over
        info_icon = QLabel()
        info_pixmap = gui_utils.load_icon_pixmap("info.png")
        if info_pixmap is not None:
            info_icon.setPixmap(
                info_pixmap.scaled(
                    16,
                    16,
                    Qt.AspectRatioMode.KeepAspectRatio,
                    Qt.TransformationMode.SmoothTransformation,
                )
            )
        else:
            logger.warning("Info icon not found, using text-based icon")
            info_icon.setText("ℹ️")

        info_icon.setToolTip(
            f"File: {self._audio.fname}\n"
            f"Sampling rate: {self._audio.sampling_rate} Hz\n"
            f"Channels: {self._audio.n_channels}\n"
            f"Bit depth: {self._audio.bit_depth} bits\n"
            f"Duration: {self._audio.duration:.2f} s\n"
            f"Samples: {self._audio.n_samples}"
        )
        toolbar_layout.addWidget(info_icon)
        # Add the same hover info to the audio label
        audio_label.setToolTip(info_icon.toolTip())

        # Add zoom controls
        zoom_label = QLabel("Zoom:")
        toolbar_layout.addWidget(zoom_label)

        self._zoom_in_button = QPushButton("+")
        self._zoom_in_button.clicked.connect(self._zoom_in)
        toolbar_layout.addWidget(self._zoom_in_button)

        self._zoom_out_button = QPushButton("-")
        self._zoom_out_button.clicked.connect(self._zoom_out)
        toolbar_layout.addWidget(self._zoom_out_button)

        self._zoom_reset_button = QPushButton("Reset")
        self._zoom_reset_button.clicked.connect(self._reset_zoom)
        toolbar_layout.addWidget(self._zoom_reset_button)

        toolbar_layout.addStretch()

        # Add channel selector
        channel_label = QLabel("Channel:")
        toolbar_layout.addWidget(channel_label)

        self._channel_selector = QComboBox()
        self._channel_selector.addItem("All (show mean)")
        for i in range(self._audio.n_channels):
            self._channel_selector.addItem(f"Channel {i + 1}")
        self._channel_selector.currentIndexChanged.connect(
            self._update_selected_channel
        )
        toolbar_layout.addWidget(self._channel_selector)

        # Add label that shows the current time and max time.
        self._time_label = gui_utils.ElapsedTimeLabel(
            current_time_seconds=self.current_time,
            max_time_seconds=self._audio.duration,
            parent=self,
        )
        toolbar_layout.addWidget(self._time_label)

    def _move_view_to_current_time(self) -> None:
        """Ensure that the view contains the currently highlighted/selected sample.

        If the current sample is outside the current view range, move the view
        as many window lengths as needed to bring the current sample into view.
        """
        current_time = self.current_time
        window_min, window_max = self._plot_widget.viewRange()[0]
        window_len = window_max - window_min

        if window_min <= current_time <= window_max:
            # The time selector is already in the view range, no need to change it.
            logger.debug(
                f"Selected time {current_time:.3f} s is already in the audio view "
                f"range [{window_min:.3f}, {window_max:.3f}] seconds. No change needed."
            )
            return

        if current_time < window_min:
            moves_needed = int(np.ceil((window_min - current_time) / window_len))
            new_window_min = window_min - moves_needed * window_len
            new_window_max = window_max - moves_needed * window_len
        else:  # selected_time > window_max
            moves_needed = int(np.ceil((current_time - window_max) / window_len))
            new_window_min = window_min + moves_needed * window_len
            new_window_max = window_max + moves_needed * window_len

        logger.debug(
            f"Moving audio view to include selected time {current_time:.3f} seconds."
        )
        self._set_clamped_time_range(new_window_min, new_window_max)

    def _set_clamped_time_range(self, new_min: float, new_max: float) -> None:
        """Set x-axis range of the plot ensuring it does not exceed audio duration."""
        min_time = 0.0
        max_time = self._audio.duration

        # NOTE: My IDE does not know the correct signature of setXRange, so I have
        # ignored the warnings.

        if new_min < min_time:
            logger.debug(
                "Setting audio time range to start: "
                f"[{min_time}, {self._visible_duration_seconds}] with visible duration "
                f"{self._visible_duration_seconds} seconds."
            )
            self._plot_widget.setXRange(
                min_time,
                self._visible_duration_seconds,
                padding=0,  # type: ignore
            )

        elif new_max > max_time:
            logger.debug(
                "Setting audio time range to end: "
                f"[{max_time - self._visible_duration_seconds}, {max_time}] with "
                f"visible duration {self._visible_duration_seconds} seconds."
            )
            self._plot_widget.setXRange(
                max_time - self._visible_duration_seconds,
                max_time,
                padding=0,  # type: ignore
            )
        else:
            logger.debug(
                f"Setting audio time range to: [{new_min}, {new_max}] seconds "
                f"with visible duration {self._visible_duration_seconds} seconds."
            )
            self._plot_widget.setXRange(new_min, new_max, padding=0)  # type: ignore

    def _plot_selected_channel(self) -> None:
        """Re-plot the audio waveform for the currently selected channel."""
        logger.debug(
            f"Getting audio waveform for current channel: {self._channel_selection}"
        )
        times, audio_envelope = self._get_interleaved_audio_envelope()
        logger.debug("Reading done, updating audio waveform plot.")
        self._envelope_plot.setData(
            times, audio_envelope, pen=pg.mkPen("b", width=1), skipFiniteCheck=True
        )
        logger.debug("Audio waveform plot updated.")

    @Slot()
    def _on_time_selector_moved(self) -> None:
        """Handle when the selector line is moved by the user.

        Updates the current sample based on the new position of the selector
        and emits a signal for the position change. Does not change the visible window.
        """
        # Clamp the new time both to the current view range to make it impossible to
        # move the selector outside the visible range and to audio duration.
        view_min, view_max = self._plot_widget.viewRange()[0]
        clamp_range = (max(0.0, view_min), min(self._audio.duration, view_max))
        self._time_selector.clamp_selected_time_to_range(
            clamp_range, padding=self._time_selector_padding
        )
        clamped_time = self._time_selector.selected_time
        new_sample = int(clamped_time * self._audio.sampling_rate)

        # Update currently selected sample and time label.
        self._current_sample = new_sample  # Updates self.current_time automatically
        self._time_label.set_current_time(self.current_time)

        # Emit signal for position change
        self.sigSampleIndexChanged.emit(new_sample)

    @Slot(int)
    def _update_selected_channel(self, index: int) -> None:
        """Handle when the user changes the selected channel."""
        if index == 0:
            # If "All (show mean)" is selected, set channel selection to None
            self._channel_selection = None
        else:
            self._channel_selection = index - 1  # Adjust for "All" being index 0

        self._plot_selected_channel()
        self.sigChannelSelectionChanged.emit(self._channel_selection)

    @Slot()
    def _zoom_in(self) -> None:
        """Zoom in on the waveform."""
        # Halve the visible duration
        self._visible_duration_seconds = max(1.0, self._visible_duration_seconds / 2)
        self._center_view_on_current_sample()

    @Slot()
    def _zoom_out(self) -> None:
        """Zoom out on the waveform."""
        # Double the visible duration
        self._visible_duration_seconds = min(
            self._audio.duration, self._visible_duration_seconds * 2
        )
        self._center_view_on_current_sample()

    @Slot()
    def _reset_zoom(self) -> None:
        """Reset the view to center around the current sample with default zoom."""
        # Reset to default zoom level
        self._visible_duration_seconds = self._default_view_len
        self._center_view_on_current_sample()

    def _center_view_on_current_sample(self) -> None:
        """Try to center the view around the current sample with correct window size.

        Centering wil not be perfect if the current sample is too close to the edges
        of the audio data.
        """
        half_window = self._visible_duration_seconds / 2
        self._set_clamped_time_range(
            self.current_time - half_window, self.current_time + half_window
        )


[docs] class AudioBrowser(SyncableBrowserWidget): """Qt widget for browsing audio with playback controls. This browser allows interactive visualization of audio data from AudioFile objects. NOTE: This browser currently only supports a single audio file. Methods that receive `media_idx` parameter ignore it and signals emit zero as the media index. Parameters ---------- audio : AudioFile The audio file to visualize. playback_update_interval_ms : int, optional Determines how often the visualization is updated during audio playback. By default 50 ms, which corresponds to 20 updates per second. parent : QWidget | None, optional The parent widget, by default None. """ def __init__( self, audio: AudioFile, playback_update_interval_ms: int = 50, parent: QWidget | None = None, ) -> None: super().__init__(parent=parent) self._audio = audio # Get sample rate that is suitable for playing audio on the hardware we are # running on. Audio will be resampled if necessary. self._playing_rate = audio_player.find_sample_rate_for_playing( original_rate=audio.sampling_rate ) # For normalizing the audio during playback self._audio_max = audio.get_global_max_amplitude() # Create a timer that will be used to update the visualization during playback. self._playback_timer = QTimer(self) self._playback_timer.setInterval(playback_update_interval_ms) self._playback_timer.timeout.connect(self._on_playback_timeout) self._is_playing = False # How many samples to advance in the visualization when the playback timer # timeouts (how many samples there is in the playback update interval). self._playback_elapsed_samples = int( playback_update_interval_ms / 1000 * audio.sampling_rate ) self.setWindowTitle("Audio Browser") # Create the main layout self._layout = QVBoxLayout() self.setLayout(self._layout) # Create an audio view that handles the visualization. self._audio_view = AudioView(audio, parent=self) self._audio_view.sigSampleIndexChanged.connect( self._on_audio_view_sample_change ) # Keep playback audio data in sync with the visualized data. self._audio_view.sigChannelSelectionChanged.connect( self._set_playback_audio_data ) self._layout.addWidget(self._audio_view) # Create controls. self._slider = gui_utils.IndexSlider( min_value=0, max_value=audio.n_samples - 1, value=0, parent=self ) self._slider.sigIndexChanged.connect( lambda idx: self.set_position(idx, 0, signal=True) ) self._layout.addWidget(self._slider) self._navigation_bar = gui_utils.NavigationBar( prev_button_text="Backwards", next_button_text="Forward", parent=self, ) self._layout.addWidget(self._navigation_bar) self._navigation_bar.sigPlayPauseClicked.connect(self._toggle_play_pause) self._navigation_bar.sigNextClicked.connect(self._jump_forward) self._navigation_bar.sigPreviousClicked.connect(self._jump_backwards) self._update_browser_to_current_sample() # Sets self._playback_audio_data self._set_playback_audio_data(channel_idx=self._audio_view.channel_selection) @property def current_sample(self) -> int: """Get the index of the current sample position.""" return self._audio_view.current_sample @property def current_time(self) -> float: """Get the current position in seconds.""" return self._audio_view.current_time @property def is_playing(self) -> bool: """Return whether the audio is currently playing.""" return self._is_playing
[docs] def set_position( self, position_idx: int, media_idx: int, signal: bool = True ) -> bool: """Set the current position to the given sample index. NOTE: Does not do anything with media_idx, as this browser currently only supports a single audio file. """ success = self._audio_view.display_at_sample(position_idx, signal=False) if not success: logger.debug( f"Cannot set position to sample index {position_idx} (out of bounds). " "Keeping current position." ) return False self._update_browser_to_current_sample() if signal: # Emit zero as media_idx. self.sigPositionChanged.emit(0, position_idx) return True
[docs] def get_current_position(self, media_idx: int) -> int: """Get the index of the current sample position. Parameter media_idx is ignored as this browser only supports a single audio file. """ if media_idx != 0: logger.warning( f"AudioBrowser only supports a single audio file, but was asked for " f"current position of media on index {media_idx}. Returning position " f"of the only audio file (index 0)." ) return self.current_sample
[docs] def jump_to_end(self, media_idx: int, signal: bool = True) -> None: """Display the last sample of the audio. NOTE: Does not do anything with media_idx, as this browser currently only supports a single audio file. """ last_sample = self._audio.n_samples - 1 self.set_position(last_sample, media_idx, signal=signal)
[docs] def jump_to_start(self, media_idx: int, signal: bool = True) -> None: """Display the first sample of the audio. NOTE: Does not do anything with media_idx, as this browser currently only supports a single audio file. """ self.set_position(0, media_idx, signal=signal)
[docs] def start_playback(self, media_idx: int) -> None: """Start the audio playback from the current position.""" # Audio browser currently only supports a single audio file, # so media_idx is not used. logger.debug("Starting audio playback.") visualized_time = self.current_time # Starting sample for playback depends on the sample rate used for playing. playback_start_sample = int(visualized_time * self._playing_rate) audio_player.play( self._playback_audio_data[playback_start_sample:], sampling_rate=self._playing_rate, ) self._navigation_bar.set_playing() self._playback_timer.start() self._is_playing = True self.sigPlaybackStateChanged.emit(0, True) # emit zero as media_idx
[docs] def pause_playback(self) -> None: """Pause the audio playback.""" logger.debug("Pausing audio playback.") audio_player.stop() self._navigation_bar.set_paused() self._playback_timer.stop() self._is_playing = False self.sigPlaybackStateChanged.emit(0, False) # emit zero as media_idx
def _update_browser_to_current_sample(self) -> None: """Update the audio browser UI to reflect the currently selected sample.""" self._slider.set_value(self.current_sample, signal=False) self._update_buttons_enabled() @Slot(int) def _on_audio_view_sample_change(self, sample_idx: int) -> None: """Handle when the user dragged the time selector in the audio view.""" # The updated sample index is fetched from the audio view using # self.current_sample. self._update_browser_to_current_sample() # Emit the position changed signal with zero as media index. self.sigPositionChanged.emit(0, sample_idx) @Slot(object) def _set_playback_audio_data(self, channel_idx: int | None) -> None: """Set the audio data to play when user changes selected channel.""" if self._playing_rate == self._audio.sampling_rate: if channel_idx is None: self._playback_audio_data = self._audio.get_audio_mean() else: # Get data for the selected channel. self._playback_audio_data = self._audio.get_audio_all_channels()[ channel_idx, : ] else: logger.info( f"Resampling audio from {self._audio.sampling_rate} Hz to " f"{self._playing_rate} Hz for playback." ) self._playback_audio_data = self._audio.resample_poly( self._playing_rate, channel_idx=channel_idx ) # Normalize the audio to play it properly. if self._audio_max > 0: self._playback_audio_data /= self._audio_max @Slot() def _on_playback_timeout(self) -> None: """Update the visualization during playback when playback timer timeouts.""" if not self._is_playing: logger.warning( "Playback timer timed out when self._is_playing is False. " "Skipping updating position." ) return # Calculate at which sample index the playback is currently at. playback_sample_idx = self.current_sample + self._playback_elapsed_samples success = self.set_position(playback_sample_idx, media_idx=0, signal=True) if not success: # Pause playback if the sample index is out of bounds. self.pause_playback() @Slot() def _toggle_play_pause(self) -> None: """Start or stop the audio playback.""" if self._is_playing: self.pause_playback() else: self.start_playback(media_idx=0) # media_idx is not used in audio browser @Slot() def _jump_forward(self) -> None: """Advance one second in the audio.""" samples_to_advance = int(self._audio.sampling_rate) new_sample = self.current_sample + samples_to_advance self.set_position(new_sample, 0, signal=True) @Slot() def _jump_backwards(self) -> None: """Go back one second in the audio.""" samples_to_rewind = int(self._audio.sampling_rate) new_sample = self.current_sample - samples_to_rewind self.set_position(new_sample, 0, signal=True) def _update_buttons_enabled(self) -> None: """Enable or disable buttons based on the current position.""" # Buttons advance or rewind one second, so we need to check # if that is possible. max_time = self._audio.duration - 1.0 # seconds min_time = 1.0 self._navigation_bar.set_prev_enabled(self.current_time >= min_time) self._navigation_bar.set_next_enabled(self.current_time <= max_time) self._navigation_bar.set_play_pause_enabled( self.current_sample < self._audio.n_samples - 1 )