From 994be163e1ce916b9594e5227fc6551e015b152f Mon Sep 17 00:00:00 2001 From: Martin Weinelt Date: Sun, 29 Jan 2023 22:47:00 +0000 Subject: [PATCH] Use packaging.version for version comparisons (#2310) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Use packaging.version for version comparisons The distutils package is deprecated¹ and relies on PEP 386² version comparisons, which have been superseded by PEP 440³ which is implemented through the packaging module. With more recent distutils versions, provided through setuptools vendoring, we are seeing the following exception during version comparisons: > TypeError: '<' not supported between instances of 'str' and 'int' This is fixed by this migration. [1] https://docs.python.org/3/library/distutils.html [2] https://peps.python.org/pep-0386/ [3] https://peps.python.org/pep-0440/ * Improve espeak version detection robustness On many modern systems espeak is just a symlink to espeak-ng. In that case looking for the 3rd word in the version output will break the version comparison, when it finds `text-to-speech:`, instead of a proper version. This will not break during runtime, where espeak-ng would be prioritized, but the phonemizer and tokenizer tests force the backend to `espeak`, which exhibits this breakage. This improves the version detection by simply looking for the version after the "text-to-speech:" token. * Replace distuils.copy_tree with shutil.copytree The distutils module is deprecated and slated for removal in Python 3.12. Its usage should be replaced, in this case by a compatible method from shutil. --- TTS/bin/resample.py | 4 ++-- TTS/tts/layers/glow_tts/glow.py | 5 ++--- TTS/tts/utils/text/phonemizers/espeak_wrapper.py | 14 +++++++++++--- pyproject.toml | 4 ++-- requirements.txt | 1 + setup.py | 5 +++-- tests/text_tests/test_phonemizer.py | 11 ++++++----- 7 files changed, 27 insertions(+), 17 deletions(-) diff --git a/TTS/bin/resample.py b/TTS/bin/resample.py index ec96dcc00b..eb4ee58e32 100644 --- a/TTS/bin/resample.py +++ b/TTS/bin/resample.py @@ -2,8 +2,8 @@ import glob import os from argparse import RawTextHelpFormatter -from distutils.dir_util import copy_tree from multiprocessing import Pool +from shutil import copytree import librosa import soundfile as sf @@ -19,7 +19,7 @@ def resample_file(func_args): def resample_files(input_dir, output_sr, output_dir=None, file_ext="wav", n_jobs=10): if output_dir: print("Recursively copying the input folder...") - copy_tree(input_dir, output_dir) + copytree(input_dir, output_dir) input_dir = output_dir print("Resampling the audio files...") diff --git a/TTS/tts/layers/glow_tts/glow.py b/TTS/tts/layers/glow_tts/glow.py index 3b745018a2..273c62a5c0 100644 --- a/TTS/tts/layers/glow_tts/glow.py +++ b/TTS/tts/layers/glow_tts/glow.py @@ -1,6 +1,5 @@ -from distutils.version import LooseVersion - import torch +from packaging.version import Version from torch import nn from torch.nn import functional as F @@ -91,7 +90,7 @@ def __init__(self, channels, num_splits=4, no_jacobian=False, **kwargs): # pyli self.no_jacobian = no_jacobian self.weight_inv = None - if LooseVersion(torch.__version__) < LooseVersion("1.9"): + if Version(torch.__version__) < Version("1.9"): w_init = torch.qr(torch.FloatTensor(self.num_splits, self.num_splits).normal_())[0] else: w_init = torch.linalg.qr(torch.FloatTensor(self.num_splits, self.num_splits).normal_(), "complete")[0] diff --git a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py index 5c0865bc45..8982a89377 100644 --- a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py +++ b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py @@ -1,9 +1,10 @@ import logging import re import subprocess -from distutils.version import LooseVersion from typing import Dict, List +from packaging.version import Version + from TTS.tts.utils.text.phonemizers.base import BasePhonemizer from TTS.tts.utils.text.punctuation import Punctuation @@ -14,9 +15,16 @@ def is_tool(name): return which(name) is not None +# Use a regex pattern to match the espeak version, because it may be +# symlinked to espeak-ng, which moves the version bits to another spot. +espeak_version_pattern = re.compile(r"text-to-speech:\s(?P\d+\.\d+(\.\d+)?)") + + def get_espeak_version(): output = subprocess.getoutput("espeak --version") - return output.split()[2] + match = espeak_version_pattern.search(output) + + return match.group("version") def get_espeakng_version(): @@ -168,7 +176,7 @@ def phonemize_espeak(self, text: str, separator: str = "|", tie=False) -> str: else: # split with '_' if self.backend == "espeak": - if LooseVersion(self.backend_version) >= LooseVersion("1.48.15"): + if Version(self.backend_version) >= Version("1.48.15"): args.append("--ipa=1") else: args.append("--ipa=3") diff --git a/pyproject.toml b/pyproject.toml index b790159d5f..8bc91b45fd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools", "wheel", "cython==0.29.28", "numpy==1.21.6"] +requires = ["setuptools", "wheel", "cython==0.29.28", "numpy==1.21.6", "packaging"] [flake8] max-line-length=120 @@ -30,4 +30,4 @@ exclude = ''' [tool.isort] line_length = 120 profile = "black" -multi_line_output = 3 \ No newline at end of file +multi_line_output = 3 diff --git a/requirements.txt b/requirements.txt index 8464d7cb2f..7ee23dab5a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,6 +14,7 @@ tqdm anyascii pyyaml fsspec>=2021.04.0 +packaging # deps for examples flask # deps for inference diff --git a/setup.py b/setup.py index f95d79f14c..259c3cd15a 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ import os import subprocess import sys -from distutils.version import LooseVersion +from packaging.version import Version import numpy import setuptools.command.build_py @@ -31,7 +31,8 @@ from Cython.Build import cythonize from setuptools import Extension, find_packages, setup -if LooseVersion(sys.version) < LooseVersion("3.7") or LooseVersion(sys.version) >= LooseVersion("3.11"): +python_version = sys.version.split()[0] +if Version(python_version) < Version("3.7") or Version(python_version) >= Version("3.11"): raise RuntimeError("TTS requires python >= 3.7 and < 3.11 " "but your Python version is {}".format(sys.version)) diff --git a/tests/text_tests/test_phonemizer.py b/tests/text_tests/test_phonemizer.py index 794a8fd75a..4ca6238428 100644 --- a/tests/text_tests/test_phonemizer.py +++ b/tests/text_tests/test_phonemizer.py @@ -1,5 +1,6 @@ import unittest -from distutils.version import LooseVersion + +from packaging.version import Version from TTS.tts.utils.text.phonemizers import ESpeak, Gruut, JA_JP_Phonemizer, ZH_CN_Phonemizer from TTS.tts.utils.text.phonemizers.multi_phonemizer import MultiPhonemizer @@ -40,7 +41,7 @@ class TestEspeakPhonemizer(unittest.TestCase): def setUp(self): self.phonemizer = ESpeak(language="en-us", backend="espeak") - if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"): + if Version(self.phonemizer.backend_version) >= Version("1.48.15"): target_phonemes = EXPECTED_ESPEAK_v1_48_15_PHONEMES else: target_phonemes = EXPECTED_ESPEAK_PHONEMES @@ -52,7 +53,7 @@ def setUp(self): # multiple punctuations text = "Be a voice, not an! echo?" gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ?" - if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"): + if Version(self.phonemizer.backend_version) >= Version("1.48.15"): gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ?" output = self.phonemizer.phonemize(text, separator="|") output = output.replace("|", "") @@ -61,7 +62,7 @@ def setUp(self): # not ending with punctuation text = "Be a voice, not an! echo" gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ" - if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"): + if Version(self.phonemizer.backend_version) >= Version("1.48.15"): gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ" output = self.phonemizer.phonemize(text, separator="") self.assertEqual(output, gt) @@ -69,7 +70,7 @@ def setUp(self): # extra space after the sentence text = "Be a voice, not an! echo. " gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ." - if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"): + if Version(self.phonemizer.backend_version) >= Version("1.48.15"): gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ." output = self.phonemizer.phonemize(text, separator="") self.assertEqual(output, gt)