spaces

Softcatala · Dec 29, 2024 · 1f38244 · 1f38244
1 parent a3a7668
commit 1f38244
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 4 deletions.
diff --git a/open_dubbing/speech_to_text.py b/open_dubbing/speech_to_text.py
@@ -14,6 +14,7 @@
 
 import array
 import logging
+import re
 
 from abc import ABC, abstractmethod
 from typing import Mapping, Sequence
@@ -71,6 +72,16 @@ def _transcribe(
     ) -> str:
         pass
 
+    # Whisper sometimes includes spaces at the begining of sentences or multiple spaces between words
+    def _make_sure_single_space(self, sentence: str) -> str:
+        fixed = re.sub(r"\s{2,}", " ", sentence)
+        if sentence != fixed:
+            logging.info(f" _make_sure_single_space: {sentence} - original")
+            logging.info(f" _make_sure_single_space: {fixed} - fixed")
+
+        fixed = fixed.strip()
+        return fixed
+
     def transcribe_audio_chunks(
         self,
         *,
@@ -99,9 +110,7 @@ def transcribe_audio_chunks(
                         vocals_filepath=path,
                         source_language_iso_639_1=iso_639_1,
                     )
-                    transcribed_text = (
-                        transcribed_text.strip()
-                    )  # Whisper sometimes includes spaces at the begining of sentences
+                    transcribed_text = self._make_sure_single_space(transcribed_text)
             except Exception as e:
                 logging.error(
                     f"speech_to_text.transcribe_audio_chunks. file '{path}', error: '{e}'"

diff --git a/sc.sh b/sc.sh
@@ -6,7 +6,7 @@ branch_name=$(git rev-parse --abbrev-ref HEAD)
 
 declare -a target_languages=("cat")  # Catalan (cat) and French (fra)
 declare -a inputs=($(find ../dubbing/od-videos/ -type f -name "*.mp4"))
-declare -a inputs=("videos/jordi.mp4" )
+declare -a inputs=("videos/jobinterview.mp4" )
 
 for input_file in "${inputs[@]}"; do
   output_directory="output/$(basename "${input_file%.*}").${branch_name}/"

diff --git a/tests/speech_to_text_test.py b/tests/speech_to_text_test.py
@@ -186,3 +186,17 @@ def test_get_unique_speakers_largest_audio(self):
         )
 
         assert [("SPEAKER_01", "chunk_114.mp3")] == result
+
+    # Assuming the SpeechToTextFasterWhisper class is already imported
+    @pytest.mark.parametrize(
+        "input_text, expected_output",
+        [
+            ("Hello  my friends", "Hello my friends"),  # Case with two spaces
+            ("Hello    my    friends", "Hello my friends"),
+            ("Hello my friends", "Hello my friends"),
+            ("  Hello   my friends  ", "Hello my friends"),
+        ],
+    )
+    def test_make_sure_single_space(self, input_text, expected_output):
+        result = SpeechToTextFasterWhisper()._make_sure_single_space(input_text)
+        assert result == expected_output