Skip to content

Commit

Permalink
Merge pull request #2 from coqui-ai/dev
Browse files Browse the repository at this point in the history
Bug fix in MP3 and FLAC compute length on TTSDataset (coqui-ai#3092)
  • Loading branch information
Pranjalya committed Dec 29, 2023
2 parents 73977a7 + 5dcc16d commit 3257ee6
Show file tree
Hide file tree
Showing 71 changed files with 164 additions and 110 deletions.
15 changes: 13 additions & 2 deletions TTS/tts/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from TTS.utils.audio import AudioProcessor
from TTS.utils.audio.numpy_transforms import compute_energy as calculate_energy

import mutagen

# to prevent too many open files error as suggested here
# https://github.com/pytorch/pytorch/issues/11201#issuecomment-421146936
torch.multiprocessing.set_sharing_strategy("file_system")
Expand Down Expand Up @@ -42,6 +44,15 @@ def string2filename(string):
return filename


def get_audio_size(audiopath):
extension = audiopath.rpartition(".")[-1].lower()
if extension not in {"mp3", "wav", "flac"}:
raise RuntimeError(f"The audio format {extension} is not supported, please convert the audio files to mp3, flac, or wav format!")

audio_info = mutagen.File(audiopath).info
return int(audio_info.length * audio_info.sample_rate)


class TTSDataset(Dataset):
def __init__(
self,
Expand Down Expand Up @@ -176,7 +187,7 @@ def lengths(self):
lens = []
for item in self.samples:
_, wav_file, *_ = _parse_sample(item)
audio_len = os.path.getsize(wav_file) / 16 * 8 # assuming 16bit audio
audio_len = get_audio_size(wav_file)
lens.append(audio_len)
return lens

Expand Down Expand Up @@ -295,7 +306,7 @@ def load_data(self, idx):
def _compute_lengths(samples):
new_samples = []
for item in samples:
audio_length = os.path.getsize(item["audio_file"]) / 16 * 8 # assuming 16bit audio
audio_length = get_audio_size(item["audio_file"])
text_lenght = len(item["text"])
item["audio_length"] = audio_length
item["text_length"] = text_lenght
Expand Down
6 changes: 4 additions & 2 deletions TTS/tts/models/xtts.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,11 +757,13 @@ def load_checkpoint(
checkpoint_dir = "."
model_path = checkpoint_path or os.path.join(checkpoint_dir, "model.pth")
vocab_path = vocab_path or os.path.join(checkpoint_dir, "vocab.json")
speaker_file_path = speaker_file_path or os.path.join(checkpoint_dir, "speakers_xtts.pth")

if speaker_file_path is None and checkpoint_dir is not None:
speaker_file_path = os.path.join(checkpoint_dir, "speakers_xtts.pth")

self.language_manager = LanguageManager(config)
self.speaker_manager = None
if os.path.exists(speaker_file_path):
if speaker_file_path is not None and os.path.exists(speaker_file_path):
self.speaker_manager = SpeakerManager(speaker_file_path)

if os.path.exists(vocab_path):
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ pyyaml>=6.0
fsspec>=2023.6.0 # <= 2023.9.1 makes aux tests fail
aiohttp>=3.8.1
packaging>=23.1
mutagen==1.47.0
# deps for examples
flask>=2.0.1
# deps for inference
Expand Down
9 changes: 9 additions & 0 deletions tests/data/ljspeech/metadata_flac.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
audio_file|text|transcription|speaker_name
wavs/LJ001-0001.flac|Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition|Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition|ljspeech-0
wavs/LJ001-0002.flac|in being comparatively modern.|in being comparatively modern.|ljspeech-0
wavs/LJ001-0003.flac|For although the Chinese took impressions from wood blocks engraved in relief for centuries before the woodcutters of the Netherlands, by a similar process|For although the Chinese took impressions from wood blocks engraved in relief for centuries before the woodcutters of the Netherlands, by a similar process|ljspeech-1
wavs/LJ001-0004.flac|produced the block books, which were the immediate predecessors of the true printed book,|produced the block books, which were the immediate predecessors of the true printed book,|ljspeech-1
wavs/LJ001-0005.flac|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|ljspeech-2
wavs/LJ001-0006.flac|And it is worth mention in passing that, as an example of fine typography,|And it is worth mention in passing that, as an example of fine typography,|ljspeech-2
wavs/LJ001-0007.flac|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about 1455,|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about fourteen fifty-five,|ljspeech-3
wavs/LJ001-0008.flac|has never been surpassed.|has never been surpassed.|ljspeech-3
9 changes: 9 additions & 0 deletions tests/data/ljspeech/metadata_mp3.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
audio_file|text|transcription|speaker_name
wavs/LJ001-0001.mp3|Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition|Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition|ljspeech-0
wavs/LJ001-0002.mp3|in being comparatively modern.|in being comparatively modern.|ljspeech-0
wavs/LJ001-0003.mp3|For although the Chinese took impressions from wood blocks engraved in relief for centuries before the woodcutters of the Netherlands, by a similar process|For although the Chinese took impressions from wood blocks engraved in relief for centuries before the woodcutters of the Netherlands, by a similar process|ljspeech-1
wavs/LJ001-0004.mp3|produced the block books, which were the immediate predecessors of the true printed book,|produced the block books, which were the immediate predecessors of the true printed book,|ljspeech-1
wavs/LJ001-0005.mp3|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|ljspeech-2
wavs/LJ001-0006.mp3|And it is worth mention in passing that, as an example of fine typography,|And it is worth mention in passing that, as an example of fine typography,|ljspeech-2
wavs/LJ001-0007.mp3|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about 1455,|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about fourteen fifty-five,|ljspeech-3
wavs/LJ001-0008.mp3|has never been surpassed.|has never been surpassed.|ljspeech-3
9 changes: 9 additions & 0 deletions tests/data/ljspeech/metadata_wav.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
audio_file|text|transcription|speaker_name
wavs/LJ001-0001.wav|Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition|Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition|ljspeech-0
wavs/LJ001-0002.wav|in being comparatively modern.|in being comparatively modern.|ljspeech-0
wavs/LJ001-0003.wav|For although the Chinese took impressions from wood blocks engraved in relief for centuries before the woodcutters of the Netherlands, by a similar process|For although the Chinese took impressions from wood blocks engraved in relief for centuries before the woodcutters of the Netherlands, by a similar process|ljspeech-1
wavs/LJ001-0004.wav|produced the block books, which were the immediate predecessors of the true printed book,|produced the block books, which were the immediate predecessors of the true printed book,|ljspeech-1
wavs/LJ001-0005.wav|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|ljspeech-2
wavs/LJ001-0006.wav|And it is worth mention in passing that, as an example of fine typography,|And it is worth mention in passing that, as an example of fine typography,|ljspeech-2
wavs/LJ001-0007.wav|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about 1455,|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about fourteen fifty-five,|ljspeech-3
wavs/LJ001-0008.wav|has never been surpassed.|has never been surpassed.|ljspeech-3
Binary file added tests/data/ljspeech/wavs/LJ001-0001.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0001.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0002.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0002.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0003.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0003.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0004.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0004.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0005.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0005.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0006.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0006.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0007.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0007.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0008.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0008.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0009.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0009.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0010.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0010.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0011.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0011.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0012.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0012.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0013.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0013.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0014.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0014.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0015.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0015.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0016.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0016.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0017.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0017.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0018.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0018.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0019.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0019.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0020.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0020.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0021.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0021.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0022.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0022.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0023.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0023.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0024.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0024.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0025.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0025.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0026.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0026.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0027.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0027.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0028.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0028.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0029.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0029.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0030.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0030.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0031.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0031.mp3
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0032.flac
Binary file not shown.
Binary file added tests/data/ljspeech/wavs/LJ001-0032.mp3
Binary file not shown.
Loading

0 comments on commit 3257ee6

Please sign in to comment.