Skip to content

Commit

Permalink
spaces
Browse files Browse the repository at this point in the history
  • Loading branch information
jordimas committed Dec 29, 2024
1 parent a3a7668 commit 1f38244
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 4 deletions.
15 changes: 12 additions & 3 deletions open_dubbing/speech_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import array
import logging
import re

from abc import ABC, abstractmethod
from typing import Mapping, Sequence
Expand Down Expand Up @@ -71,6 +72,16 @@ def _transcribe(
) -> str:
pass

# Whisper sometimes includes spaces at the begining of sentences or multiple spaces between words
def _make_sure_single_space(self, sentence: str) -> str:
fixed = re.sub(r"\s{2,}", " ", sentence)
if sentence != fixed:
logging.info(f" _make_sure_single_space: {sentence} - original")
logging.info(f" _make_sure_single_space: {fixed} - fixed")

fixed = fixed.strip()
return fixed

def transcribe_audio_chunks(
self,
*,
Expand Down Expand Up @@ -99,9 +110,7 @@ def transcribe_audio_chunks(
vocals_filepath=path,
source_language_iso_639_1=iso_639_1,
)
transcribed_text = (
transcribed_text.strip()
) # Whisper sometimes includes spaces at the begining of sentences
transcribed_text = self._make_sure_single_space(transcribed_text)
except Exception as e:
logging.error(
f"speech_to_text.transcribe_audio_chunks. file '{path}', error: '{e}'"
Expand Down
2 changes: 1 addition & 1 deletion sc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ branch_name=$(git rev-parse --abbrev-ref HEAD)

declare -a target_languages=("cat") # Catalan (cat) and French (fra)
declare -a inputs=($(find ../dubbing/od-videos/ -type f -name "*.mp4"))
declare -a inputs=("videos/jordi.mp4" )
declare -a inputs=("videos/jobinterview.mp4" )

for input_file in "${inputs[@]}"; do
output_directory="output/$(basename "${input_file%.*}").${branch_name}/"
Expand Down
14 changes: 14 additions & 0 deletions tests/speech_to_text_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,3 +186,17 @@ def test_get_unique_speakers_largest_audio(self):
)

assert [("SPEAKER_01", "chunk_114.mp3")] == result

# Assuming the SpeechToTextFasterWhisper class is already imported
@pytest.mark.parametrize(
"input_text, expected_output",
[
("Hello my friends", "Hello my friends"), # Case with two spaces
("Hello my friends", "Hello my friends"),
("Hello my friends", "Hello my friends"),
(" Hello my friends ", "Hello my friends"),
],
)
def test_make_sure_single_space(self, input_text, expected_output):
result = SpeechToTextFasterWhisper()._make_sure_single_space(input_text)
assert result == expected_output

0 comments on commit 1f38244

Please sign in to comment.