Skip to content

Commit

Permalink
Merge pull request #147 from lissyx/ds-0.9
Browse files Browse the repository at this point in the history
Ds 0.9
  • Loading branch information
lissyx committed Dec 3, 2020
2 parents 4be6d3e + e9b5a96 commit 5a0f61b
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 6 deletions.
13 changes: 8 additions & 5 deletions DeepSpeech/Dockerfile.train
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
FROM nvidia/cuda:10.0-cudnn7-runtime-ubuntu18.04

ARG ds_repo=mozilla/DeepSpeech
ARG ds_branch=3ebcbdea91f1d60fc9e92c35da7b040f312246df
ARG ds_sha1=3ebcbdea91f1d60fc9e92c35da7b040f312246df
ARG ds_branch=4270e22fe02f4fa7430a721ac917f6353c36f455
ARG ds_sha1=4270e22fe02f4fa7430a721ac917f6353c36f455
ARG cc_repo=mozilla/CorporaCreator
ARG cc_sha1=73622cf8399f8e634aee2f0e76dacc879226e3ac
ARG kenlm_repo=kpu/kenlm
Expand Down Expand Up @@ -90,6 +90,7 @@ RUN apt-get -qq update && apt-get -qq install -y --no-install-recommends \
curl \
wget \
git \
ffmpeg \
python3 \
python3-pip \
ca-certificates \
Expand Down Expand Up @@ -170,14 +171,16 @@ COPY --chown=trainer:trainer corpora.patch $CC_DIR

RUN patch -p1 < corpora.patch

# Avoid "error: pandas 1.1.0 is installed but pandas==1.0.5 is required by {'modin'}"
RUN pip install pandas==1.0.5

# error: parso 0.8.0 is installed but parso<0.8.0,>=0.7.0 is required by {'jedi'}
RUN pip install parso==0.7.0

RUN pip install modin[all]

RUN python setup.py install

# For CC PMF importer
RUN pip install num2words

WORKDIR $HOMEDIR

ENV PATH="$HOMEDIR/kenlm/build/bin/:$PATH"
Expand Down
2 changes: 2 additions & 0 deletions DeepSpeech/checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ done;

mkdir /mnt/extracted/data/ || true

python -c "import tensorflow as tf; tf.test.is_gpu_available()"

# Checking with basic LDC93S1 before running into heavy-load
pushd $HOME/ds/
./bin/run-tc-ldc93s1_new.sh 2 16000
Expand Down
14 changes: 14 additions & 0 deletions DeepSpeech/fr/import_ccpmf.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash

set -xe

pushd $HOME/ds/
if [ ! -f "/mnt/extracted/data/ccpmf/transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020/ccpmf_train.csv" ]; then
# Hot patching like that.
sed -ri 's/MAX_SECS = .*/MAX_SECS = 4.5/g' bin/import_ccpmf.py

python bin/import_ccpmf.py \
${IMPORTERS_VALIDATE_LOCALE} \
/mnt/extracted/data/ccpmf/
fi;
popd
2 changes: 2 additions & 0 deletions DeepSpeech/fr/importers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ import_trainingspeech.sh
import_slr57.sh

../import_m-ailabs.sh

import_ccpmf.sh
2 changes: 1 addition & 1 deletion DeepSpeech/fr/metadata.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set -xe

export METADATA_AUTHOR="DeepSpeech-FR-Team"
export METADATA_MODEL_NAME="deepspeech-fr"
export METADATA_MODEL_VERSION="0.5"
export METADATA_MODEL_VERSION="0.6"
export METADATA_CONTACT_INFO="https://discourse.mozilla.org/c/voice/fr"
export METADATA_LICENSE="MIT-0"
export METADATA_LANGUAGE="fr-FR"
Expand Down
34 changes: 34 additions & 0 deletions DeepSpeech/fr/validate_label.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,34 @@ def validate_label(label):
return None

skip_foreign_chars = [
'ʔ',
'ε',
'ι',
'ο',
'ό',
'ρ',
'ς',
'ψ',
'գ',
'զ',
'ا',
'ب',
'د',
'ر',
'ل',
'ن',
'و',
'ي',
'ቀ',
'ወ',
'う',
'ゔ',
'へ',
'ま',
'め',
'や',
'貴',
'青',
'い',
'た',
'つ',
Expand All @@ -25,6 +53,12 @@ def validate_label(label):
'杜',
'美',
'馆',
'삼',
'고',
'생',
'기',
'집',
'먹',
]

for skip in skip_foreign_chars:
Expand Down

0 comments on commit 5a0f61b

Please sign in to comment.