Merge pull request #2485 from coqui-ai/dev

🐬 v0.13.0
coqui-ai · Apr 5, 2023 · a01ca65 · a01ca65
2 parents 12f3365 + 1233365
commit a01ca65
Show file tree

Hide file tree

Showing 41 changed files with 4,910 additions and 80 deletions.
diff --git a/.github/workflows/inference_tests.yml b/.github/workflows/inference_tests.yml
@@ -32,7 +32,8 @@ jobs:
       - name: check OS
         run: cat /etc/os-release
       - name: set ENV
-        run: export TRAINER_TELEMETRY=0
+        run: |
+          export TRAINER_TELEMETRY=0
       - name: Install dependencies
         run: |
           sudo apt-get update
@@ -49,4 +50,6 @@ jobs:
           python3 -m pip install .[all]
           python3 setup.py egg_info
       - name: Unit tests
-        run: make inference_tests
+        run: |
+          export COQUI_STUDIO_TOKEN=${{ secrets.COQUI_STUDIO_TOKEN }}
+          make inference_tests
diff --git a/.gitignore b/.gitignore
@@ -137,7 +137,7 @@ VCTK-Corpus-removed-silence/*
 # ignore training logs
 trainer_*_log.txt
 
-# files used internally fro dev, test etc.
+# files used internally for dev, test etc.
 tests/outputs/*
 tests/train_outputs/*
 TODO.txt
@@ -168,3 +168,4 @@ internal/*
 wandb
 depot/*
 coqui_recipes/*
+local_scripts/*
diff --git a/README.md b/README.md
@@ -195,8 +195,38 @@ tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path=OUTPUT_PATH)
 # Example voice cloning with YourTTS in English, French and Portuguese:
 tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
 tts.tts_to_file("This is voice cloning.", speaker_wav="my/cloning/audio.wav", language="en", file_path="output.wav")
-tts.tts_to_file("C'est le clonage de la voix.", speaker_wav="my/cloning/audio.wav", language="fr", file_path="output.wav")
-tts.tts_to_file("Isso é clonagem de voz.", speaker_wav="my/cloning/audio.wav", language="pt", file_path="output.wav")
+tts.tts_to_file("C'est le clonage de la voix.", speaker_wav="my/cloning/audio.wav", language="fr-fr", file_path="output.wav")
+tts.tts_to_file("Isso é clonagem de voz.", speaker_wav="my/cloning/audio.wav", language="pt-br", file_path="output.wav")
+
+
+# Example voice conversion converting speaker of the `source_wav` to the speaker of the `target_wav`
+
+tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False, gpu=True)
+tts.voice_conversion_to_file(source_wav="my/source.wav", target_wav="my/target.wav", file_path="output.wav")
+
+# Example voice cloning by a single speaker TTS model combining with the voice conversion model. This way, you can
+# clone voices by using any model in 🐸TTS.
+
+tts = TTS("tts_models/de/thorsten/tacotron2-DDC")
+tts.tts_with_vc_to_file(
+    "Wie sage ich auf Italienisch, dass ich dich liebe?",
+    speaker_wav="target/speaker.wav",
+    file_path="ouptut.wav"
+)
+
+# Example text to speech using [🐸Coqui Studio](https://coqui.ai) models. You can use all of your available speakers in the studio.
+# [🐸Coqui Studio](https://coqui.ai) API token is required. You can get it from the [account page](https://coqui.ai/account).
+# You should set the `COQUI_STUDIO_TOKEN` environment variable to use the API token.
+
+# If you have a valid API token set you will see the studio speakers as separate models in the list.
+# The name format is coqui_studio/en/<studio_speaker_name>/coqui_studio
+models = TTS().list_models()
+# Init TTS with the target studio speaker
+tts = TTS(model_name="coqui_studio/en/Torcull Diarmuid/coqui_studio", progress_bar=False, gpu=False)
+# Run TTS
+tts.tts_to_file(text="This is a test.", file_path=OUTPUT_PATH)
+# Run TTS with emotion and speed control
+tts.tts_to_file(text="This is a test.", file_path=OUTPUT_PATH, emotion="Happy", speed=1.5)
 ```
 
 ### Command line `tts`

diff --git a/TTS/.models.json b/TTS/.models.json
@@ -802,5 +802,18 @@
                 }
             }
         }
+    },
+    "voice_conversion_models":{
+        "multilingual":{
+            "vctk":{
+                "freevc24":{
+                    "github_rls_url": "https://coqui.gateway.scarf.sh/v0.13.0_models/voice_conversion_models--multilingual--vctk--freevc24.zip",
+                    "description": "FreeVC model trained on VCTK dataset from https://github.com/OlaWod/FreeVC",
+                    "author": "Jing-Yi Li @OlaWod",
+                    "license": "MIT",
+                    "commit": null
+                }
+            }
+        }
     }
 }
diff --git a/TTS/VERSION b/TTS/VERSION
@@ -1 +1 @@
-0.12.0
+0.13.0