Skip to content

Commit

Permalink
Removed language selection and modularized code structure with issue #4
Browse files Browse the repository at this point in the history
  • Loading branch information
kumarpanzade committed Jul 3, 2024
1 parent 19a8174 commit af2b19a
Show file tree
Hide file tree
Showing 9 changed files with 195 additions and 181 deletions.
132 changes: 26 additions & 106 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,147 +349,67 @@
import os
import numpy as np
import streamlit as st
import torch
import whisper
import librosa
import io
import openai
from pytube import YouTube

# Import configurations and functions from modules
from config import openai_api_key, model_id, model_path
from load_model import load_model
from transcribe_audio import transcribe_audio
from extract_entities import extract_entities
from translate_text import translate_text

# Load environment variables
load_dotenv()

# Set your OpenAI API key
openai.api_key = os.getenv("OPENAI_KEY")

# Function to extract entities using OpenAI API
def extract_entities(text):
prompt = f"""
The following entities are present in Indian Languages.
Please extract the following entities from the text.
Provide entities for both in English and the original language in a structured format:
Text: "{text}"
- Name:
- Phone Numbers:
- Addresses:
- Email:
"""
response = openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant that extracts information from Indian multilingual text."},
{"role": "user", "content": prompt}
],
max_tokens=200
)
entities_text = response.choices[0].message.content

return entities_text

# Function to translate text from Indian languages to English using OpenAI GPT-3.5-turbo
def translate_text(text, source_language):
prompt = f"Translate the following text from {source_language} to English:\n\n{text}"
response = openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant that translates text from Indian languages to English."},
{"role": "user", "content": prompt}
],
max_tokens=150
)
translated_text = response.choices[0].message.content

return translated_text

# Use the cache decorator from Streamlit
@st.cache(allow_output_mutation=True)
def load_model(model_id, model_path):
# Define available device (CPU/GPU)
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load model on available device
model = whisper.load_model(model_id, device=device, download_root=model_path)

# Display model's parameters in the app's logs
print(
f"Model will be run on {device}\n"
f"Model is {'multilingual' if model.is_multilingual else 'English-only'} "
f"and has {sum(np.prod(p.shape) for p in model.parameters()):,} parameters."
)

return model

def download_audio_from_youtube(url):
yt = YouTube(url)
audio_stream = yt.streams.filter(only_audio=True).first()
audio_file = audio_stream.download(filename='audio.mp4')
return audio_file
# Set OpenAI API key
openai.api_key = openai_api_key

# Main function to run the Streamlit app
def main():
# Display title
st.title("Whisper - Speech to Text App")

# Set up environment variables for model ID and path
model_id = os.environ.get('MODEL_ID', 'small') # Use a smaller model for quicker transcription by default
model_path = os.environ.get('MODEL_PATH', 'whisper_model') # Default path if not set
st.title("Speech to Text App")

# Load the Whisper model
model = load_model(model_id, model_path)

# Add a selectbox for language selection
languages = ['hi', 'bn', 'te', 'mr', 'ta', 'ur', 'gu', 'kn', 'ml', 'pa'] # List of Indian language codes
language = st.selectbox("Select the language of the audio file", languages, index=0)
# Language selection dropdown
#languages = ['hi', 'bn', 'te', 'mr', 'ta', 'ur', 'gu', 'kn', 'ml', 'pa']
#language = st.selectbox("Select the language of the audio file", languages, index=0)

# Option to upload audio file or provide YouTube link
st.write("Upload an audio file or provide a YouTube link:")
audio_file = st.file_uploader("Upload an audio file", type=["mp3", "wav"])
youtube_link = st.text_input("Or enter a YouTube link")

transcript = {"text": "The audio file could not be transcribed :("}
options = dict(beam_size=5, best_of=5, language=language)
transcribe_options = dict(task="transcribe", **options)
# File uploader for audio files
st.write("Upload an audio file:")
audio_file = st.file_uploader("Select an audio",type=["mp3", "wav"])

audio_data = None

if audio_file:
# Read file content
# Process uploaded audio file
st.write("We are extracting these entities:\n- Name:\n- Phone Numbers:\n- Addresses:\n- Email:\n- PIN Code:\n- Occupation:\n- Gender:")
audio_bytes = audio_file.read()
st.audio(audio_bytes)

# Convert bytes to a file-like object using io.BytesIO
audio_file = io.BytesIO(audio_bytes)

# Convert to numpy array
audio_data, _ = librosa.load(audio_file, sr=16000) # Load with target sample rate of 16000 for Whisper

elif youtube_link:
try:
audio_file = download_audio_from_youtube(youtube_link)
st.audio(audio_file)

# Load audio file using librosa
audio_data, _ = librosa.load(audio_file, sr=16000)
except Exception as e:
st.error(f"Error downloading audio from YouTube: {e}")
st.error(f"Error loading audio file: {e}")

# Transcribe audio on button click
# Perform transcription and other tasks on button click
if audio_data is not None and st.button("Transcribe"):
with st.spinner("Transcribing audio..."):
transcript = model.transcribe(audio_data, **transcribe_options)
transcription_text = transcript["text"]
transcription_text = transcribe_audio(model, audio_data)
st.write(transcription_text)

# Extract entities from the transcription text
with st.spinner("Extracting entities..."):
entities = extract_entities(transcription_text)
st.write("Extracted Entities:")
st.write(entities)

# Translate transcription to English
with st.spinner("Translating to English..."):
translated_text = translate_text(transcription_text, language)
translated_text = translate_text(transcription_text)
st.write("Translated Text:")
st.write(translated_text)

# Entry point of the script
if __name__ == "__main__":
main()
8 changes: 8 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import os
from dotenv import load_dotenv

load_dotenv()

openai_api_key = os.getenv("OPENAI_KEY")
model_id = os.getenv('MODEL_ID', 'large-v3')
model_path = os.getenv('MODEL_PATH', 'whisper_model')
3 changes: 1 addition & 2 deletions download_whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@

model_path = "whisper_model"
model_id = 'large-v3'

# Ensure the directory exists
s
os.makedirs(model_path, exist_ok=True)

# Download model
Expand Down
32 changes: 32 additions & 0 deletions extract_entities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import openai
from config import openai_api_key

openai.api_key = openai_api_key

def extract_entities(text):
prompt = f"""
The following entities are present in Indian Languages.
Please extract the following entities from the text.
Provide entities for both in English and the original language of the audio in well-structured format:
Text: "{text}"
- Name:
- Phone Numbers:
- Addresses:
- Email:
- PIN Code:
- Occupation:
- Gender:
"""
response = openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant that extracts information from Indian multilingual text."},
{"role": "user", "content": prompt}
],
max_tokens=200
)
entities_text = response.choices[0].message.content

return entities_text
15 changes: 15 additions & 0 deletions load_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import torch
import whisper
import numpy as np
import streamlit as st

@st.cache(allow_output_mutation=True)
def load_model(model_id, model_path):
device = "cuda" if torch.cuda.is_available() else "cpu"
model = whisper.load_model(model_id, device=device, download_root=model_path)
print(
f"Model will be run on {device}\n"
f"Model is {'multilingual' if model.is_multilingual else 'English-only'} "
f"and has {sum(np.prod(p.shape) for p in model.parameters()):,} parameters."
)
return model
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ openai-whisper
streamlit
librosa
pytest
pytube
Loading

0 comments on commit af2b19a

Please sign in to comment.