Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a button to record new samples #77

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ Contents
tsv_format
import_export
api
recording
6 changes: 6 additions & 0 deletions docs/recording.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Recording New Samples
=====================

New samples can be recorded directly from the application.

Select the audio device you want to use and then press the `Record` button.
55 changes: 53 additions & 2 deletions src/voice_annotation_tool/opened_project_frame.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
from PySide6.QtCore import QModelIndex, Slot
import os
import random
import string
from PySide6.QtCore import QModelIndex, QUrl, Slot
from PySide6.QtMultimedia import (
QAudioInput,
QMediaCaptureSession,
QMediaDevices,
QMediaFormat,
QMediaRecorder,
)
from PySide6.QtWidgets import QFrame, QFileDialog, QPushButton, QWidget
from voice_annotation_tool.annotation_list_model import AnnotationListModel
from voice_annotation_tool.opened_project_frame_ui import Ui_OpenedProjectFrame
Expand Down Expand Up @@ -50,10 +60,26 @@ def __init__(self):
self.audioPlaybackWidget.previous_pressed.connect(self.previous_pressed)
self.project: Project
self.annotationList.installEventFilter(self)

for age in AGE_STRINGS:
self.ageInput.addItem(age)
self.ageInput.addItem(self.tr("[Multiple]"))

self.recorder = QMediaRecorder()
self.input = QAudioInput()
self.session = QMediaCaptureSession()
self.session.setAudioInput(self.input)
self.recorder = QMediaRecorder()
self.session.setRecorder(self.recorder)
self.recorder.setMediaFormat(QMediaFormat.Wave)
self.recorder.setEncodingMode(QMediaRecorder.ConstantBitRateEncoding)
self.recorder.setAudioSampleRate(16000)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose to record and save in full quality, e.g. 48000 kHz, and then reduce the sample rate later for training and inference

self.recorder.setAudioBitRate(32)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think 16 Bit is the standard here, but I am unsure.

self.recorder.setQuality(QMediaRecorder.HighQuality)

for device in QMediaDevices.audioInputs():
self.deviceComboBox.addItem(device.description())

def get_playback_buttons(self) -> list[QPushButton]:
"""Returns a list of buttons used to control the audio playback."""
return self.audioPlaybackWidget.playback_buttons
Expand Down Expand Up @@ -164,7 +190,7 @@ def delete_selected(self):
"""Delete the selected annotations and audio files."""
for selected in self.get_selected_annotations():
self.project.delete_annotation(selected)
self.annotationList.model().layoutChanged.emit()
self.update_sample_list()
self.update_metadata_widgets()

def get_selected_annotations(self) -> list[Annotation]:
Expand All @@ -176,6 +202,11 @@ def get_selected_annotations(self) -> list[Annotation]:
annotations.append(selected_index.data(AnnotationListModel.ANNOTATION_ROLE))
return annotations

def update_sample_list(self):
"""Update the graphical representation of the
AnnotationListModel after it changed."""
self.annotationList.model().layoutChanged.emit()

@Slot()
def previous_pressed(self):
current = self.annotationList.currentIndex().row()
Expand Down Expand Up @@ -271,3 +302,23 @@ def import_profile_pressed(self):
def mark_unchanged_pressed(self):
for annotation in self.get_selected_annotations():
self.project.mark_unchanged(annotation)

@Slot()
def record_pressed(self):
if not self.project.audio_folder or not self.project.audio_folder.is_dir():
return
if self.recorder.recorderState() == QMediaRecorder.StoppedState:
name = "000" + "".join(random.choices("0123456789abcdef", k=125))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

random is difficult, as it might happen at random, that the same value is chosen, which might lead to bugs.

path = QUrl.fromLocalFile(os.fspath(self.project.audio_folder / name))
self.recorder.setOutputLocation(path)
self.recorder.record()
self.recordButton.setText(self.tr("Stop Recording"))
else:
self.recorder.stop()
self.recordButton.setText(self.tr("Record Sample"))
self.project.load_audio_files(self.project.audio_folder)
self.update_sample_list()

@Slot()
def device_selected(self, device: int):
self.input.setDevice(QMediaDevices.audioInputs()[device])
59 changes: 55 additions & 4 deletions ui/opened_project_frame.ui
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,23 @@
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="recordButton">
<property name="toolTip">
<string>Start recording a new sample and save it to the audio folder selected in the project settings. Click the button again to stop recording.</string>
</property>
<property name="text">
<string>&amp;Record Sample</string>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="deviceComboBox">
<property name="toolTip">
<string>Select the audio capture device used for recording.</string>
</property>
</widget>
</item>
</layout>
</item>
</layout>
Expand Down Expand Up @@ -270,8 +287,8 @@
<slot>gender_selected(int)</slot>
<hints>
<hint type="sourcelabel">
<x>542</x>
<y>34</y>
<x>540</x>
<y>43</y>
</hint>
<hint type="destinationlabel">
<x>1062</x>
Expand All @@ -286,8 +303,8 @@
<slot>accent_changed(QString)</slot>
<hints>
<hint type="sourcelabel">
<x>542</x>
<y>86</y>
<x>540</x>
<y>90</y>
</hint>
<hint type="destinationlabel">
<x>1047</x>
Expand Down Expand Up @@ -327,6 +344,38 @@
</hint>
</hints>
</connection>
<connection>
<sender>recordButton</sender>
<signal>pressed()</signal>
<receiver>OpenedProjectFrame</receiver>
<slot>record_pressed()</slot>
<hints>
<hint type="sourcelabel">
<x>978</x>
<y>689</y>
</hint>
<hint type="destinationlabel">
<x>1234</x>
<y>392</y>
</hint>
</hints>
</connection>
<connection>
<sender>deviceComboBox</sender>
<signal>currentIndexChanged(int)</signal>
<receiver>OpenedProjectFrame</receiver>
<slot>device_selected(int)</slot>
<hints>
<hint type="sourcelabel">
<x>823</x>
<y>707</y>
</hint>
<hint type="destinationlabel">
<x>1233</x>
<y>637</y>
</hint>
</hints>
</connection>
</connections>
<slots>
<slot>previous_pressed()</slot>
Expand All @@ -342,5 +391,7 @@
<slot>play_pause_pressed()</slot>
<slot>mark_unchanged_pressed()</slot>
<slot>metadata_changed(QString)</slot>
<slot>record_pressed()</slot>
<slot>device_selected(int)</slot>
</slots>
</ui>