Saving 5th tutorial

pythonlessons · pythonlessons · commit d13fbfd1902c · 2023-02-20T09:27:39.000+02:00
diff --git a/README.md b/README.md
@@ -16,4 +16,5 @@ Each tutorial has its own requirements.txt file for a specific mltu version. As
 1. [Text Recognition With TensorFlow and CTC network](https://pylessons.com/ctc-text-recognition), code in ```Tutorials\01_image_to_word``` folder;
 2. [TensorFlow OCR model for reading Captchas](https://pylessons.com/tensorflow-ocr-captcha), code in ```Tutorials\02_captcha_to_text``` folder;
 3. [Handwriting words recognition with TensorFlow](https://pylessons.com/handwriting-recognition), code in ```Tutorials\03_handwriting_recognition``` folder;
-4. [Handwritten sentence recognition with TensorFlow](https://pylessons.com/handwritten-sentence-recognition), code in ```Tutorials\04_sentence_recognition``` folder;
+4. [Handwritten sentence recognition with TensorFlow](https://pylessons.com/handwritten-sentence-recognition), code in ```Tutorials\04_sentence_recognition``` folder;
+5. [Introduction to speech recognition with TensorFlow](https://pylessons.com/speech-recognition), code in ```Tutorials\05_speech_recognition``` folder;
diff --git a/Tutorials/05_sound_to_text/inferenceModel.py b/Tutorials/05_sound_to_text/inferenceModel.py
@@ -23,30 +23,29 @@ def predict(self, data: np.ndarray):
     import pandas as pd
     from tqdm import tqdm
     from mltu.configs import BaseModelConfigs
-    import matplotlib.pyplot as plt
-    import matplotlib
-    matplotlib.interactive(False)
 
-    configs = BaseModelConfigs.load("Models/05_sound_to_text/202301221900/configs.yaml")
+    configs = BaseModelConfigs.load("Models/05_sound_to_text/202302051936/configs.yaml")
 
-    model = WavToTextModel(model_path=configs.model_path, char_list=configs.vocab, force_cpu=True)
+    model = WavToTextModel(model_path=configs.model_path, char_list=configs.vocab, force_cpu=False)
 
-    df = pd.read_csv("Models/05_sound_to_text/202301221900/val.csv").values.tolist()
+    df = pd.read_csv("Models/05_sound_to_text/202302051936/val.csv").values.tolist()
 
     accum_cer, accum_wer = [], []
     for wav_path, label in tqdm(df):
         
         spectrogram = WavReader.get_spectrogram(wav_path, frame_length=configs.frame_length, frame_step=configs.frame_step, fft_length=configs.fft_length)
-        WavReader.plot_raw_audio(wav_path, label)
+        # WavReader.plot_raw_audio(wav_path, label)
 
         padded_spectrogram = np.pad(spectrogram, ((configs.max_spectrogram_length - spectrogram.shape[0], 0),(0,0)), mode='constant', constant_values=0)
 
-        WavReader.plot_spectrogram(spectrogram, label)
+        # WavReader.plot_spectrogram(spectrogram, label)
 
         text = model.predict(padded_spectrogram)
 
-        cer = get_cer(text, label.lower())
-        wer = get_wer(text, label.lower())
+        true_label = "".join([l for l in label.lower() if l in configs.vocab])
+
+        cer = get_cer(text, true_label)
+        wer = get_wer(text, true_label)
 
         accum_cer.append(cer)
         accum_wer.append(wer)
diff --git a/Tutorials/05_sound_to_text/train.py b/Tutorials/05_sound_to_text/train.py
@@ -2,8 +2,14 @@
 try: [tf.config.experimental.set_memory_growth(gpu, True) for gpu in tf.config.experimental.list_physical_devices('GPU')]
 except: pass
 
-from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
+import stow
+import tarfile
+import pandas as pd
+from tqdm import tqdm
+from urllib.request import urlopen
+from io import BytesIO
 
+from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
 from mltu.dataProvider import DataProvider
 from mltu.preprocessors import WavReader
 from mltu.transformers import LabelIndexer, LabelPadding, SpectrogramPadding
@@ -14,16 +20,6 @@
 from model import train_model
 from configs import ModelConfigs
 
-import stow
-import pandas as pd
-from tqdm import tqdm
-
-import stow
-import tarfile
-from tqdm import tqdm
-from urllib.request import urlopen
-from io import BytesIO
-
 def download_and_unzip(url, extract_to='Datasets', chunk_size=1024*1024):
     http_response = urlopen(url)
 
diff --git a/Tutorials/README.md b/Tutorials/README.md
@@ -1,3 +1,6 @@
 # Tutorials and Examples made with MLTU library:
 1. [Text Recognition With TensorFlow and CTC network](https://pylessons.com/ctc-text-recognition), code in ```Tutorials\01_image_to_word``` folder;
-2. [TensorFlow OCR model for reading Captchas](https://pylessons.com/tensorflow-ocr-captcha), code in ```Tutorials\02_captcha_to_text``` folder;
+2. [TensorFlow OCR model for reading Captchas](https://pylessons.com/tensorflow-ocr-captcha), code in ```Tutorials\02_captcha_to_text``` folder;
+3. [Handwriting words recognition with TensorFlow](https://pylessons.com/handwriting-recognition), code in ```Tutorials\03_handwriting_recognition``` folder;
+4. [Handwritten sentence recognition with TensorFlow](https://pylessons.com/handwritten-sentence-recognition), code in ```Tutorials\04_sentence_recognition``` folder;
+5. [Introduction to speech recognition with TensorFlow](https://pylessons.com/speech-recognition), code in ```Tutorials\05_speech_recognition``` folder;
diff --git a/mltu/__init__.py b/mltu/__init__.py
@@ -1 +1 @@
-__version__ = "0.1.5"
+__version__ = "0.1.6"

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.1.5"`
	`1`	`+__version__ = "0.1.6"`