From 56fefc3fdcf82fe94361452eeaa6eb2371576b55 Mon Sep 17 00:00:00 2001 From: ruohoruotsi Date: Mon, 10 Jun 2024 13:29:00 -0700 Subject: [PATCH 1/3] [CHECKPOINT] on misc fixes, improve layout, colors and stability --- requirements.txt | 3 +- scripts/start_macos_x86_64.sh | 2 +- src/yoruba_voice_speech_recorder/__main__.py | 250 +---------------- src/yoruba_voice_speech_recorder/audio.py | 51 +--- .../yv_recorder.py | 261 ++++++++++++++++++ .../{__main__.qml => yv_recorder.qml} | 44 ++- 6 files changed, 304 insertions(+), 307 deletions(-) create mode 100644 src/yoruba_voice_speech_recorder/yv_recorder.py rename src/yoruba_voice_speech_recorder/{__main__.qml => yv_recorder.qml} (83%) diff --git a/requirements.txt b/requirements.txt index 8cf1f35..96a3335 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ PySide6 pyaudio shortuuid -sounddevice -webrtcvad \ No newline at end of file +sounddevice \ No newline at end of file diff --git a/scripts/start_macos_x86_64.sh b/scripts/start_macos_x86_64.sh index 8fc6d96..31c4bf3 100755 --- a/scripts/start_macos_x86_64.sh +++ b/scripts/start_macos_x86_64.sh @@ -4,4 +4,4 @@ mkdir -p ~/Desktop/audio-data # launch app -python3 -m yoruba_voice_speech_recorder -p src/yoruba_voice_speech_recorder/prompts/yovo_3501.txt -d ~/Desktop/audio-data +python3 -m yoruba_voice_speech_recorder diff --git a/src/yoruba_voice_speech_recorder/__main__.py b/src/yoruba_voice_speech_recorder/__main__.py index 6c86bdc..2154907 100644 --- a/src/yoruba_voice_speech_recorder/__main__.py +++ b/src/yoruba_voice_speech_recorder/__main__.py @@ -1,253 +1,7 @@ -#!/usr/bin/env python3 - -import argparse -import datetime import logging -import math -import os -import os.path -import random -import re -import sys -import threading - -from PySide6.QtCore import QObject, Slot, QUrl -from PySide6.QtWidgets import QApplication -from PySide6.QtQml import QQmlApplicationEngine -from PySide6.QtWidgets import QMessageBox - -local_src_module_path = os.path.join(os.path.dirname(__file__), "../../") -sys.path.append(local_src_module_path) - -import src.yoruba_voice_speech_recorder.audio as audio -import shortuuid - -event = threading.Event() -current_frame = 0 - - -class Recorder(QObject): - """docstring for Recorder""" - - def __init__(self, save_dir, prompts_filename, ordered=True, prompts_count=250, prompt_len_soft_max=None): - super(Recorder, self).__init__() - self.scriptModel = None - self.speaker_id = None - self.speaker_name = None - if not os.path.isdir(save_dir): raise Exception("save_dir '%s' is not a directory" % save_dir) - self.save_dir = save_dir - if not os.path.isfile(prompts_filename): - # raise Exception("prompts_filename '%s' is not a file" % prompts_filename) - self.msgWarning = QMessageBox() - self.msgWarning.setIcon(QMessageBox.Warning) - self.msgWarning.setText(" Prompts file ńkọ́ ?\n Please load a prompts file") - self.msgWarning.setStandardButtons(QMessageBox.Ok) - self.msgWarning.setWindowTitle("Prompt file needed Message") - self.msgWarning.show() - - self.prompts_filename = prompts_filename - print(self.count_prompts_file_prompts_count()) - self.prompts_count = prompts_count - self.prompt_len_soft_max = prompt_len_soft_max - self.ordered = ordered - self.audio = audio.Audio() - - def count_prompts_file_prompts_count(self): - try: - with open(self.prompts_filename, 'r') as fp: - num_lines = 0 - for count, line in enumerate(fp): - line = line.strip() - - if line == "" \ - or line.startswith(";") \ - or line.startswith("#"): - continue - else: - num_lines += 1 - return num_lines - except FileNotFoundError as not_found: - print(not_found.filename) - - @Slot(QUrl) - def reinit_with_url(self, url): - filename = url.toLocalFile() - logging.debug('reinit_with_url: new prompt filename: %s', filename) - self.prompts_filename = filename # set new prompt filename - self.scriptModel.clear() # empty out list view - self.populate_listview() # re-init - - @Slot(QObject) - def init(self, scriptModel): - logging.debug("init: %s", scriptModel) - self.window.setProperty('saveDir', self.save_dir) - self.scriptModel = scriptModel - self.populate_listview() - - def populate_listview(self): - self.prompts_count = self.count_prompts_file_prompts_count() - logging.info("prompts_count >>>>> {}".format(self.prompts_count)) - self.window.setProperty('promptsName', os.path.splitext(os.path.basename(self.prompts_filename))[0]) - for script in self.get_scripts_from_file(self.prompts_count, self.prompts_filename, self.ordered, - split_len=self.prompt_len_soft_max): - self.window.appendScript({'script': script, 'filename': ''}) - - @Slot(bool) - def toggleRecording(self, recording): - logging.debug('toggleRecording: recording is now %s', recording) - - @Slot() - def startRecording(self): - size = self.flush() - logging.debug('flushed %s', size) - self.audio.stream.start_stream() - - @Slot() - def finishRecording(self): - self.audio.stream.stop_stream() - data = self.read_audio(drop_last=3) - if self.window.property('scriptFilename'): - self.deleteFile(self.window.property('scriptFilename')) - filename = os.path.normpath(os.path.join(self.window.property('saveDir'), - "recorder_" + datetime.datetime.now().strftime( - "%Y-%m-%d_%H-%M-%S_%f") + ".wav")) - self.window.setProperty('scriptFilename', filename) - self.audio.write_wav(filename, data) - scriptText = self.window.property('scriptText') - - # Double check speaker name and - if self.speaker_id is None or self.speaker_id.isspace() or self.speaker_id == "": - self.speaker_id = "UNNAMED_SPEAKER" - print(self.speaker_id) - with open(os.path.join(self.window.property('saveDir'), "recorder.tsv"), "a") as xsvfile: - xsvfile.write('\t'.join( - [filename, self.speaker_id, self.window.property('promptsName'), '', - self.sanitize_script(scriptText)]) + '\n') - logging.debug("wrote %s to %s", len(data), filename) - - @Slot(str) - def deleteFile(self, filename): - os.remove(filename) - xsvfile_in_path = os.path.join(self.window.property('saveDir'), "recorder.tsv") - xsvfile_out_path = os.path.join(self.window.property('saveDir'), "recorder_delete_temp.tsv") - with open(xsvfile_in_path, "r") as xsvfile_in: - with open(xsvfile_out_path, "w") as xsvfile_out: - for line in xsvfile_in: - if filename not in line: - xsvfile_out.write(line) - os.replace(xsvfile_out_path, xsvfile_in_path) - self.window.setProperty('scriptFilename', '') - - @Slot(str) - def acceptSpeakerNameText(self, speakerName): - print("acceptSpeakerNameText Slot") - self.speaker_name = speakerName - if self.speaker_name is None or self.speaker_name.isspace() or self.speaker_name == "": - self.speaker_name = "UNNAMED_SPEAKER" - self.speaker_id = self.speaker_name + "_" + str(shortuuid.uuid()[:16]) - - def read_audio(self, drop_last=None): - blocks = [] - while not self.audio.buffer_queue.empty(): - block = self.audio.buffer_queue.get_nowait() - # logging.debug('read %s', len(block) if block else None) - if block: - blocks.append(block) - # logging.debug('read total %s', len(b''.join(blocks))) - if drop_last: - blocks = blocks[:-drop_last] - return b''.join(blocks) - - def flush(self): - size = self.audio.buffer_queue.qsize() - while not self.audio.buffer_queue.empty(): - self.audio.buffer_queue.get_nowait() - return size - - def get_scripts_from_file(self, n, filename, ordered=False, split_len=None): - def filter(script): - # match = re.fullmatch(r'\w+ "(.*)"', script) - patterns = [ - r'^\w+ "(.*)"$', # arctic - r'^(.*) \(s.\d+\)$', # timit - ] - for pat in patterns: - script = re.sub(pat, r'\1', script, count=1) - return script - - with open(filename, 'r') as file: - scripts = [line.strip() for line in file if not line.startswith(';')] - if n is None: n = len(scripts) - if not ordered: - # random.shuffle(scripts) - scripts = [random.choice(scripts) for _ in range(n)] - scripts = scripts[:n] - scripts = [filter(script) for script in scripts] - if split_len is not None: - scripts = [self.split_script(script, split_len) for script in scripts] - scripts = sum(scripts, []) - return scripts[:n] - - # TODO - IO do we need to sanitize scripts? - @classmethod - def sanitize_script(cls, script): - script = re.sub(r'[\-]', ' ', script) - # script = re.sub(r'[,.?!:;"]', '', script) - return script.strip() - - - @classmethod - def split_script(cls, script, split_len): - scripts = [] - n = math.ceil(len(script) / split_len) - startpos = 0 - # print(script) - regex = re.compile(r'\s+') - for i in range(n): - match = regex.search(script, pos=startpos + split_len) - endpos = match.start() if match else None - scripts.append(script[startpos:endpos].strip()) - # print(startpos, endpos, scripts) - if endpos is None: break - startpos = endpos - return scripts - - -def main(): - current_path = os.path.abspath(os.path.dirname(__file__)) - qml_file = os.path.join(current_path, os.path.splitext(__file__)[0] + '.qml') - - parser = argparse.ArgumentParser(description=''' - Given a text file containing prompts, this app will choose a random selection - and ordering of them, display them to be dictated by the user, and record the - dictation audio and metadata to a `.wav` file and `recorder.tsv` file - respectively. - ''') - parser.add_argument('-p', '--prompts_filename', help='file containing prompts to choose from') - parser.add_argument('-d', '--save_dir', default='./audio_data', - help='where to save .wav & recorder.tsv files (default: %(default)s)') - parser.add_argument('-c', '--prompts_count', type=int, default=250, - help='number of prompts to select and display (default: %(default)s)') - parser.add_argument('-l', '--prompt_len_soft_max', type=int) - parser.add_argument('-o', '--ordered', action='store_true', default=True, - help='present prompts in order, as opposed to random (default: %(default)s)') - args = parser.parse_args() - assert args.prompts_filename - - os.environ["QT_AUTO_SCREEN_SCALE_FACTOR"] = "1" - app = QApplication(sys.argv) - engine = QQmlApplicationEngine() - engine.addImportPath(current_path) - kwargs = {k: v for k, v in vars(args).items() if v is not None and k in 'prompts_count prompt_len_soft_max'.split()} - recorder = Recorder(args.save_dir, args.prompts_filename, args.ordered, **kwargs) - engine.rootContext().setContextProperty('recorder', recorder) - engine.load(qml_file) - recorder.window = engine.rootObjects()[0] - - res = app.exec() - sys.exit(res) +import yoruba_voice_speech_recorder.yv_recorder as yv_recorder if __name__ == '__main__': logging.basicConfig(level=10) - main() + yv_recorder.main() diff --git a/src/yoruba_voice_speech_recorder/audio.py b/src/yoruba_voice_speech_recorder/audio.py index ff649f4..c22340f 100644 --- a/src/yoruba_voice_speech_recorder/audio.py +++ b/src/yoruba_voice_speech_recorder/audio.py @@ -1,6 +1,5 @@ import collections, wave, logging, os, datetime import pyaudio -import webrtcvad import queue @@ -75,48 +74,10 @@ def write_wav(self, filename, data): wf.close() -class VADAudio(Audio): - """Filter & segment audio with voice activity detection.""" - - def __init__(self, aggressiveness=3): - super(VADAudio, self).__init__() - self.vad = webrtcvad.Vad(aggressiveness) - - def vad_collector(self, padding_ms=300, ratio=0.75, blocks=None): - """Generator that yields series of consecutive audio blocks comprising each utterence, separated by yielding a single None. - Determines voice activity by ratio of blocks in padding_ms. Uses a buffer to include padding_ms prior to being triggered. - Example: (block, ..., block, None, block, ..., block, None, ...) - |---utterence---| |---utterence---| - """ - if blocks is None: blocks = iter(self) - num_padding_blocks = padding_ms // self.block_duration_ms - ring_buffer = collections.deque(maxlen=num_padding_blocks) - triggered = False - - for block in blocks: - is_speech = self.vad.is_speech(block, self.sample_rate) - - if not triggered: - ring_buffer.append((block, is_speech)) - num_voiced = len([f for f, speech in ring_buffer if speech]) - if num_voiced > ratio * ring_buffer.maxlen: - triggered = True - for f, s in ring_buffer: - yield f - ring_buffer.clear() - - else: - yield block - ring_buffer.append((block, is_speech)) - num_unvoiced = len([f for f, speech in ring_buffer if not speech]) - if num_unvoiced > ratio * ring_buffer.maxlen: - triggered = False - yield None - ring_buffer.clear() - - class AudioStore(object): - """Stores last `maxlen` recognitions as tuples (audio, text, grammar_name, rule_name), indexed in reverse order (0 most recent)""" + """Stores last `maxlen` recognitions as tuples (audio, text, grammar_name, rule_name), indexed in + reverse order (0 most recent) + """ def __init__(self, audio_obj, maxlen=0, save_dir=None, auto_save_func=None): self.audio_obj = audio_obj @@ -140,7 +101,8 @@ def finalize(self, text, grammar_name, rule_name): def save(self, index): if self.save_dir: - filename = os.path.join(self.save_dir, "retain_" + datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S_%f") + ".wav") + filename = os.path.join(self.save_dir, + "retain_" + datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S_%f") + ".wav") audio, text, grammar_name, rule_name = self.deque[index] self.audio_obj.write_wav(filename, audio) with open(os.path.join(self.save_dir, "retain.csv"), "a") as csvfile: @@ -148,9 +110,12 @@ def save(self, index): def __getitem__(self, key): return self.deque[key] + def __len__(self): return len(self.deque) + def __bool__(self): return True + def __nonzero__(self): return True diff --git a/src/yoruba_voice_speech_recorder/yv_recorder.py b/src/yoruba_voice_speech_recorder/yv_recorder.py new file mode 100644 index 0000000..b669416 --- /dev/null +++ b/src/yoruba_voice_speech_recorder/yv_recorder.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python3 + +import argparse +import datetime +import logging +import math +import os +import os.path +import random +import re +import sys +import threading + +from PySide6.QtCore import QObject, Slot, QUrl +from PySide6.QtWidgets import QApplication +from PySide6.QtQml import QQmlApplicationEngine +from PySide6.QtWidgets import QMessageBox + +# local_src_module_path = os.path.join(os.path.dirname(__file__), "../../") +# sys.path.append(local_src_module_path) + +import yoruba_voice_speech_recorder.audio as audio +import shortuuid +import time + +event = threading.Event() +current_frame = 0 +app = None + + +class Recorder(QObject): + """docstring for Recorder""" + + def __init__(self, save_dir, prompts_filename, ordered=True, prompts_count=250, prompt_len_soft_max=None): + super(Recorder, self).__init__() + self.scriptModel = None + self.speaker_id = None + self.speaker_name = None + if not os.path.isdir(save_dir): + raise Exception("save_dir '%s' is not a directory" % save_dir) + self.save_dir = save_dir + if not os.path.isfile(prompts_filename): + # raise Exception("prompts_filename '%s' is not a file" % prompts_filename) + self.msgWarning = QMessageBox() + self.msgWarning.setIcon(QMessageBox.Warning) + self.msgWarning.setText(" Prompts file ńkọ́ ?\n Please load a prompts file") + self.msgWarning.setStandardButtons(QMessageBox.Ok) + self.msgWarning.setWindowTitle("Prompt file needed Message") + self.msgWarning.show() + + self.prompts_filename = prompts_filename + print(self.count_prompts_file_prompts_count()) + self.prompts_count = prompts_count + self.prompt_len_soft_max = prompt_len_soft_max + self.ordered = ordered + self.audio = audio.Audio() + + def count_prompts_file_prompts_count(self): + try: + with open(self.prompts_filename, 'r') as fp: + num_lines = 0 + for count, line in enumerate(fp): + line = line.strip() + + if line == "" \ + or line.startswith(";") \ + or line.startswith("#"): + continue + else: + num_lines += 1 + return num_lines + except FileNotFoundError as not_found: + print(not_found.filename) + + @Slot(QUrl) + def reinit_with_url(self, url): + filename = url.toLocalFile() + logging.debug('reinit_with_url: new prompt filename: %s', filename) + self.prompts_filename = filename # set new prompt filename + self.scriptModel.clear() # empty out list view + self.populate_listview() # re-init + + @Slot(QObject) + def init(self, scriptModel): + logging.debug("init: %s", scriptModel) + self.window.setProperty('saveDir', self.save_dir) + self.scriptModel = scriptModel + self.populate_listview() + + def populate_listview(self): + self.prompts_count = self.count_prompts_file_prompts_count() + logging.info("prompts_count >>>>> {}".format(self.prompts_count)) + self.window.setProperty('promptsName', os.path.splitext(os.path.basename(self.prompts_filename))[0]) + for script in self.get_scripts_from_file(self.prompts_count, self.prompts_filename, self.ordered, + split_len=self.prompt_len_soft_max): + self.window.appendScript({'script': script, 'filename': ''}) + + @Slot(bool) + def toggleRecording(self, recording): + logging.debug('toggleRecording: recording is now %s', recording) + + @Slot() + def startRecording(self): + size = self.flush() + logging.debug('flushed %s', size) + self.audio.stream.start_stream() + + @Slot() + def finishRecording(self): + self.audio.stream.stop_stream() + data = self.read_audio(drop_last=3) + if self.window.property('scriptFilename'): + self.deleteFile(self.window.property('scriptFilename')) + filename = os.path.normpath(os.path.join(self.window.property('saveDir'), + "recorder_" + datetime.datetime.now().strftime( + "%Y-%m-%d_%H-%M-%S_%f") + ".wav")) + self.window.setProperty('scriptFilename', filename) + self.audio.write_wav(filename, data) + scriptText = self.window.property('scriptText') + + # Double check speaker name and + if self.speaker_id is None or self.speaker_id.isspace() or self.speaker_id == "": + self.speaker_id = "UNNAMED_SPEAKER" + print(self.speaker_id) + with open(os.path.join(self.window.property('saveDir'), "recorder.tsv"), "a") as xsvfile: + xsvfile.write('\t'.join( + [filename, self.speaker_id, self.window.property('promptsName'), '', + self.sanitize_script(scriptText)]) + '\n') + logging.debug("wrote %s to %s", len(data), filename) + + @Slot(str) + def deleteFile(self, filename): + os.remove(filename) + xsvfile_in_path = os.path.join(self.window.property('saveDir'), "recorder.tsv") + xsvfile_out_path = os.path.join(self.window.property('saveDir'), "recorder_delete_temp.tsv") + with open(xsvfile_in_path, "r") as xsvfile_in: + with open(xsvfile_out_path, "w") as xsvfile_out: + for line in xsvfile_in: + if filename not in line: + xsvfile_out.write(line) + os.replace(xsvfile_out_path, xsvfile_in_path) + self.window.setProperty('scriptFilename', '') + + @Slot(str) + def acceptSpeakerNameText(self, speakerName): + print("acceptSpeakerNameText Slot") + self.speaker_name = speakerName + if self.speaker_name is None or self.speaker_name.isspace() or self.speaker_name == "": + self.speaker_name = "UNNAMED_SPEAKER" + self.speaker_id = self.speaker_name + "_" + str(shortuuid.uuid()[:16]) + + def read_audio(self, drop_last=None): + blocks = [] + while not self.audio.buffer_queue.empty(): + block = self.audio.buffer_queue.get_nowait() + # logging.debug('read %s', len(block) if block else None) + if block: + blocks.append(block) + # logging.debug('read total %s', len(b''.join(blocks))) + if drop_last: + blocks = blocks[:-drop_last] + return b''.join(blocks) + + def flush(self): + size = self.audio.buffer_queue.qsize() + while not self.audio.buffer_queue.empty(): + self.audio.buffer_queue.get_nowait() + return size + + def get_scripts_from_file(self, n, filename, ordered=False, split_len=None): + def filter(script): + # match = re.fullmatch(r'\w+ "(.*)"', script) + patterns = [ + r'^\w+ "(.*)"$', # arctic + r'^(.*) \(s.\d+\)$', # timit + ] + for pat in patterns: + script = re.sub(pat, r'\1', script, count=1) + return script + + with open(filename, 'r') as file: + scripts = [line.strip() for line in file if not line.startswith(';')] + if n is None: n = len(scripts) + if not ordered: + # random.shuffle(scripts) + scripts = [random.choice(scripts) for _ in range(n)] + scripts = scripts[:n] + scripts = [filter(script) for script in scripts] + if split_len is not None: + scripts = [self.split_script(script, split_len) for script in scripts] + scripts = sum(scripts, []) + return scripts[:n] + + # TODO - IO do we need to sanitize scripts? + @classmethod + def sanitize_script(cls, script): + script = re.sub(r'[\-]', ' ', script) + # script = re.sub(r'[,.?!:;"]', '', script) + return script.strip() + + @classmethod + def split_script(cls, script, split_len): + scripts = [] + n = math.ceil(len(script) / split_len) + startpos = 0 + # print(script) + regex = re.compile(r'\s+') + for i in range(n): + match = regex.search(script, pos=startpos + split_len) + endpos = match.start() if match else None + scripts.append(script[startpos:endpos].strip()) + # print(startpos, endpos, scripts) + if endpos is None: break + startpos = endpos + return scripts + + +def main(): + global app + current_path = os.path.abspath(os.path.dirname(__file__)) + qml_file = os.path.join(current_path, os.path.splitext(__file__)[0] + '.qml') + + parser = argparse.ArgumentParser(description=''' + Given a text file containing prompts, this app will choose a random selection + and ordering of them, display them to be dictated by the user, and record the + dictation audio and metadata to a `.wav` file and `recorder.tsv` file + respectively. + ''') + parser.add_argument('-p', '--prompts_filename', default='./prompts/yovo_3501.txt', + help='file containing prompts to choose from') + parser.add_argument('-d', '--save_dir', default='./audio_data', + help='where to save .wav & recorder.tsv files (default: %(default)s)') + parser.add_argument('-c', '--prompts_count', type=int, default=250, + help='number of prompts to select and display (default: %(default)s)') + parser.add_argument('-l', '--prompt_len_soft_max', type=int) + parser.add_argument('-o', '--ordered', action='store_true', default=True, + help='present prompts in order, as opposed to random (default: %(default)s)') + args = parser.parse_args() + assert args.prompts_filename + + os.environ["QT_AUTO_SCREEN_SCALE_FACTOR"] = "1" + app = QApplication(sys.argv) + engine = QQmlApplicationEngine() + engine.addImportPath(current_path) + kwargs = {k: v for k, v in vars(args).items() if v is not None and k in 'prompts_count prompt_len_soft_max'.split()} + recorder = Recorder(args.save_dir, args.prompts_filename, args.ordered, **kwargs) + engine.rootContext().setContextProperty('recorder', recorder) + engine.load(qml_file) + recorder.window = engine.rootObjects()[0] + + # This launches the main window + res = app.exec() + + # ensure correct deletion order + # del engine, app + sys.exit(res) + + +if __name__ == '__main__': + logging.basicConfig(level=10) + main() diff --git a/src/yoruba_voice_speech_recorder/__main__.qml b/src/yoruba_voice_speech_recorder/yv_recorder.qml similarity index 83% rename from src/yoruba_voice_speech_recorder/__main__.qml rename to src/yoruba_voice_speech_recorder/yv_recorder.qml index 3c7cfdf..9a8c1fe 100644 --- a/src/yoruba_voice_speech_recorder/__main__.qml +++ b/src/yoruba_voice_speech_recorder/yv_recorder.qml @@ -1,6 +1,6 @@ import QtQuick import QtQuick.Window -import QtQuick.Controls +import QtQuick.Controls.Basic import QtQuick.Layouts import QtQuick.Dialogs import QtMultimedia @@ -10,7 +10,7 @@ Window { visible: true width: 1440; height: 1080 color: "#f5f5f6" - title: qsTr("Yorùbá Voice Recorder") + title: qsTr("Yorùbá Voice Speech Recorder") property bool recording: false property string promptsName: '' @@ -74,13 +74,13 @@ Window { Text { text: script // Item .script font.pointSize: 22 - color: filename == '' ? "black" : "green" + color: filename == "" ? "black" : "green" // anchors.verticalCenter: parent.verticalCenter // TODO IO this is broken } Text { text: 'Filename: ' + filename // Item .filename font.pointSize: 18 - color: filename == '' ? "red" : "black" + color: filename == "" ? "red" : "black" // font.bold: filename == '' ? false : true // color: "#ffffff" } @@ -114,11 +114,25 @@ Window { } Button { + id: prompt_button_control Layout.preferredHeight: 45 font.pointSize: 22 - text: "Load Prompts file" - highlighted: promptsName != '' ? true : false + text: qsTr("Load Prompts file") onClicked: { fileDialog.visible = true } + contentItem: Text { + text: prompt_button_control.text + font: prompt_button_control.font + opacity: enabled ? 1.0 : 0.3 + color: prompt_button_control.down ? "#16bb1a" : "#0f570f" + horizontalAlignment: Text.AlignHCenter + verticalAlignment: Text.AlignVCenter + elide: Text.ElideRight + } + background: Rectangle { + color: prompt_button_control.down ? "#cae3ca" : "#b0bbb0" + border.width: 1 + radius: 2 + } } TextArea { @@ -127,7 +141,10 @@ Window { readOnly: true text: promptsName verticalAlignment: TextField.AlignVCenter - + background: Rectangle { + border.width: 1 + border.color: promptsName != "" ? true : false + } } // Separator between Prompt file && Speakername @@ -135,11 +152,12 @@ Window { width: 15 } Text { - text: 'Speaker Name:' + text: qsTr("Speaker Name:") font.pointSize: 18 verticalAlignment: TextField.AlignVCenter } TextField { + id: control Layout.preferredHeight: 30 font.pointSize: 18 placeholderText: "Olúwadáminí" @@ -171,7 +189,7 @@ Window { Layout.preferredHeight: 60 font.pointSize: 22 highlighted: recording - text: recording ? "Stop" : "Start" + text: recording ? qsTr("Stop") : qsTr("Start") onClicked: { recording = !recording; if (recording) { @@ -188,7 +206,7 @@ Window { Layout.fillWidth: true Layout.preferredHeight: 40 font.pointSize: 22 - text: "Play" + text: qsTr("Play") enabled: scriptFilename highlighted: playFile.playbackState == playFile.PlayingState onClicked: { @@ -206,7 +224,7 @@ Window { Layout.fillWidth: true Layout.preferredHeight: 40 font.pointSize: 22 - text: "Delete" + text: qsTr("Delete") enabled: scriptFilename onClicked: recorder.deleteFile(scriptFilename) // @Slot def deleteFile(self, filename) } @@ -215,7 +233,7 @@ Window { Layout.fillWidth: true Layout.preferredHeight: 40 font.pointSize: 22 - text: recording ? "Cancel" : "Next" + text: recording ? qsTr("Cancel") : qsTr("Next") onClicked: { if (recording) { recording = !recording; @@ -229,7 +247,7 @@ Window { FileDialog { id: fileDialog - title: "Please choose a file" + title: qsTr("Please choose a file") selectedNameFilter.index: 0 nameFilters: ["Prompt files (*.txt)", "Text files (*.txt)"] From 5bc76656526b31b3b4df436af1929605eb6b382a Mon Sep 17 00:00:00 2001 From: ruohoruotsi Date: Mon, 17 Jun 2024 18:43:39 -0700 Subject: [PATCH 2/3] [ADD] another checkpoint --- scripts/start_macos_arm64.sh | 3 +- .../yv_recorder.py | 34 +++++++++++-------- .../yv_recorder.qml | 7 ++-- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/scripts/start_macos_arm64.sh b/scripts/start_macos_arm64.sh index 87f2c51..a786967 100755 --- a/scripts/start_macos_arm64.sh +++ b/scripts/start_macos_arm64.sh @@ -3,4 +3,5 @@ # setup dirs mkdir -p ~/Desktop/audio-data -env DYLD_LIBRARY_PATH="/opt/homebrew/lib:$DYLD_LIBRARY_PATH" python3 -m yoruba_voice_speech_recorder -p src/yoruba_voice_speech_recorder/prompts/yovo_3501.txt -d ~/Desktop/audio-data +# launch app with specific homebrew environment +env DYLD_LIBRARY_PATH="/opt/homebrew/lib:$DYLD_LIBRARY_PATH" python3 -m yoruba_voice_speech_recorder diff --git a/src/yoruba_voice_speech_recorder/yv_recorder.py b/src/yoruba_voice_speech_recorder/yv_recorder.py index b669416..906c1c1 100644 --- a/src/yoruba_voice_speech_recorder/yv_recorder.py +++ b/src/yoruba_voice_speech_recorder/yv_recorder.py @@ -15,6 +15,7 @@ from PySide6.QtWidgets import QApplication from PySide6.QtQml import QQmlApplicationEngine from PySide6.QtWidgets import QMessageBox +from PySide6 import QtGui # local_src_module_path = os.path.join(os.path.dirname(__file__), "../../") # sys.path.append(local_src_module_path) @@ -27,6 +28,8 @@ current_frame = 0 app = None +# QtGui.QGuiApplication.setSt(QtGui.QStyleFactory.create('Cleanlooks')) + class Recorder(QObject): """docstring for Recorder""" @@ -49,7 +52,7 @@ def __init__(self, save_dir, prompts_filename, ordered=True, prompts_count=250, self.msgWarning.show() self.prompts_filename = prompts_filename - print(self.count_prompts_file_prompts_count()) + logging.debug("Prompt file count: {}".format(self.count_prompts_file_prompts_count())) self.prompts_count = prompts_count self.prompt_len_soft_max = prompt_len_soft_max self.ordered = ordered @@ -70,27 +73,27 @@ def count_prompts_file_prompts_count(self): num_lines += 1 return num_lines except FileNotFoundError as not_found: - print(not_found.filename) + logging.error(not_found.filename) @Slot(QUrl) def reinit_with_url(self, url): filename = url.toLocalFile() logging.debug('reinit_with_url: new prompt filename: %s', filename) - self.prompts_filename = filename # set new prompt filename - self.scriptModel.clear() # empty out list view - self.populate_listview() # re-init + self.prompts_filename = filename # set new prompt filename + self.scriptModel.clear() # empty out list view + self.populate_listview() # re-init @Slot(QObject) def init(self, scriptModel): - logging.debug("init: %s", scriptModel) + # logging.debug("init: %s", scriptModel) self.window.setProperty('saveDir', self.save_dir) self.scriptModel = scriptModel self.populate_listview() def populate_listview(self): self.prompts_count = self.count_prompts_file_prompts_count() - logging.info("prompts_count >>>>> {}".format(self.prompts_count)) - self.window.setProperty('promptsName', os.path.splitext(os.path.basename(self.prompts_filename))[0]) + logging.debug("prompts_count >>>>> {}".format(self.prompts_count)) + self.window.setProperty('promptsName', os.path.basename(self.prompts_filename)) for script in self.get_scripts_from_file(self.prompts_count, self.prompts_filename, self.ordered, split_len=self.prompt_len_soft_max): self.window.appendScript({'script': script, 'filename': ''}) @@ -121,7 +124,7 @@ def finishRecording(self): # Double check speaker name and if self.speaker_id is None or self.speaker_id.isspace() or self.speaker_id == "": self.speaker_id = "UNNAMED_SPEAKER" - print(self.speaker_id) + logging.debug("Speaker ID: {}".format(self.speaker_id)) with open(os.path.join(self.window.property('saveDir'), "recorder.tsv"), "a") as xsvfile: xsvfile.write('\t'.join( [filename, self.speaker_id, self.window.property('promptsName'), '', @@ -143,7 +146,7 @@ def deleteFile(self, filename): @Slot(str) def acceptSpeakerNameText(self, speakerName): - print("acceptSpeakerNameText Slot") + logging.debug("acceptSpeakerNameText Slot") self.speaker_name = speakerName if self.speaker_name is None or self.speaker_name.isspace() or self.speaker_name == "": self.speaker_name = "UNNAMED_SPEAKER" @@ -203,13 +206,13 @@ def split_script(cls, script, split_len): scripts = [] n = math.ceil(len(script) / split_len) startpos = 0 - # print(script) + # logging.debug(script) regex = re.compile(r'\s+') for i in range(n): match = regex.search(script, pos=startpos + split_len) endpos = match.start() if match else None scripts.append(script[startpos:endpos].strip()) - # print(startpos, endpos, scripts) + # logging.debug(startpos, endpos, scripts) if endpos is None: break startpos = endpos return scripts @@ -226,9 +229,10 @@ def main(): dictation audio and metadata to a `.wav` file and `recorder.tsv` file respectively. ''') - parser.add_argument('-p', '--prompts_filename', default='./prompts/yovo_3501.txt', + parser.add_argument('-p', '--prompts_filename', + default=os.path.dirname(os.path.realpath(__file__)) + '/prompts/yovo_3501.txt', help='file containing prompts to choose from') - parser.add_argument('-d', '--save_dir', default='./audio_data', + parser.add_argument('-d', '--save_dir', default=os.path.expanduser("~") + '/Desktop/audio-data', help='where to save .wav & recorder.tsv files (default: %(default)s)') parser.add_argument('-c', '--prompts_count', type=int, default=250, help='number of prompts to select and display (default: %(default)s)') @@ -257,5 +261,5 @@ def main(): if __name__ == '__main__': - logging.basicConfig(level=10) + logging.basicConfig(level=logging.DEBUG) main() diff --git a/src/yoruba_voice_speech_recorder/yv_recorder.qml b/src/yoruba_voice_speech_recorder/yv_recorder.qml index 9a8c1fe..4942fac 100644 --- a/src/yoruba_voice_speech_recorder/yv_recorder.qml +++ b/src/yoruba_voice_speech_recorder/yv_recorder.qml @@ -1,6 +1,6 @@ import QtQuick import QtQuick.Window -import QtQuick.Controls.Basic +import QtQuick.Controls.macOS // Basic // Fusion // Universal // Imagine import QtQuick.Layouts import QtQuick.Dialogs import QtMultimedia @@ -136,12 +136,13 @@ Window { } TextArea { - width: 100 font.pointSize: 18 readOnly: true text: promptsName verticalAlignment: TextField.AlignVCenter background: Rectangle { + implicitWidth: 200 + implicitHeight: 40 border.width: 1 border.color: promptsName != "" ? true : false } @@ -166,7 +167,7 @@ Window { border.color: control.enabled ? "#21be2b" : "transparent" } onAccepted: { - console.log("Speaker Name is: " + text) + console.log("Speaker Name: " + text) recorder.acceptSpeakerNameText(text) } } From 08a940ab196f9b2fbd3cc9f06f2fc60104e53a31 Mon Sep 17 00:00:00 2001 From: ruohoruotsi Date: Wed, 24 Jul 2024 00:14:31 -0700 Subject: [PATCH 3/3] [FIX] PySide6==6.7.1 bug <> which breaks playback of utterances --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 96a3335..a19750a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -PySide6 +PySide6==6.7.0 pyaudio shortuuid sounddevice \ No newline at end of file