-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#60 implemented whishper_timestamped model and llama 3.2 for action i…
…tmes
- Loading branch information
Showing
16 changed files
with
378 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
from dotenv import load_dotenv | ||
import whisper | ||
import ollama | ||
import logging | ||
from logger import logger | ||
import openai | ||
import whisper_timestamped as whisper_ts | ||
import json | ||
import datetime | ||
|
||
# Load environment variables | ||
load_dotenv() | ||
|
||
# Import configurations and functions from modules | ||
from config import openai_api_key, model_id, model_path | ||
from load_model import load_model | ||
#from extract_entities import extract_entities | ||
|
||
openai.api_key = openai_api_key | ||
#Load whisher model | ||
model = load_model(model_id, model_path, True) | ||
|
||
|
||
#transcripe the audio to its original language | ||
def process_all_steps(audio): | ||
#transcription =transcribe(audio) | ||
translation = translate_with_whisper(audio) | ||
#translation = translate_with_ollama(transcription) | ||
#summary = summarize_using_llama(translation) | ||
summary = summarize_using_openai(translation) | ||
#return [transcription, translation, summary] | ||
return [translation, summary] | ||
|
||
def transcribe(audio): | ||
logger.info("Started transciption") | ||
result = model.transcribe(audio,fp16=False) | ||
transcription = result["text"] | ||
return transcription | ||
|
||
def transcribe_with_whisper_ts(audio_file): | ||
audio = whisper_ts.load_audio(audio_file) | ||
logger.info("Started transciption through whishper") | ||
#as suggested in the document | ||
options = dict(beam_size=5, best_of=5, temperature=(0.0, 0.2, 0.4, 0.6, 0.8, 1.0)) | ||
translate_options = dict(task="translate", **options) | ||
print(datetime.datetime.now()) | ||
result = whisper_ts.transcribe_timestamped(model,audio,condition_on_previous_text=False,vad=True,trust_whisper_timestamps=False,**translate_options) | ||
print(datetime.datetime.now()) | ||
#result = whisper_ts.transcribe(model, audio) | ||
return result | ||
|
||
|
||
#translate the audio file to English language using whisper model | ||
def translate_with_whisper(audio): | ||
logger.info("Started transciption through whishper") | ||
options = dict(beam_size=5, best_of=5) | ||
translate_options = dict(task="translate", **options) | ||
result = model.transcribe(audio,**translate_options) | ||
return result["text"] | ||
|
||
#translate the text from transciption to English language | ||
def translate_with_ollama(text): | ||
logger.info("Started transciption through llama") | ||
response = ollama.generate(model= "llama3.2", prompt = "Translate the following text to English:"+text+"\n SUMMARY:\n") | ||
translation = response["response"] | ||
return translation | ||
|
||
#Using Ollama and llama3.1 modle, summarize the English translation | ||
def summarize_using_llama(text): | ||
response = ollama.generate(model= "llama3.2", prompt = "Provide highlights of conversion inbullet points without pretext:"+text+"\n \n") | ||
summary = response["response"] | ||
return summary | ||
|
||
|
||
#Using openaie, summarize the English translation | ||
def summarize_using_openai(text): | ||
logger.info("Started summarization") | ||
prompt = "Summarize the following text: " +text | ||
try: | ||
response = openai.chat.completions.create( | ||
model="gpt-4o", | ||
messages=[ | ||
{"role": "system", "content": "You are a helpful assistant that extracts information from Indian multilingual text."}, | ||
{"role": "user", "content": prompt} | ||
], | ||
max_tokens=500 | ||
) | ||
summary = response.choices[0].message.content | ||
except Exception as e: | ||
logger.error(e) | ||
summary = "Unable to exract summary" | ||
return summary | ||
|
||
text="It's like a dialogue in a movie. They don't believe if you say you are going to win. They believe only if you say you have won. It's very difficult to get a name in India. If you win in sports, everyone will be able to say the name you have won. How is this situation for you? We have been training for 4 years. In 4 years, I have been to many national meet like this. But at that time, I have only won bronze, silver and gold. In this meet, I have won my first gold. For this, We worked very hard for a year and achieved this success. Superb! How did your journey start? Tell us about your family. I don't have a father in my family. I have only my mother. My mother is a farmer. I have two sisters. When I was in 8th or 9th grade, I ran a school sports relay. At that time, my school PD sir took me to the district division. I won medals in that. But I didn't win medals even at the state level. At that time, I was not doing any training. I went to Koko training after coming to college. I was in Koko training for 3 years. After that, I came to Athletics school. My coach's name is Manikandan Arumugam. I trained with her for 4 years and now I am fully involved in Athletics. Superb! Superb! They say one important thing. No matter what sport you play, if you get angry, you can't win. You were talking about your coach, Manikandan Arumugam, correct? You tell about him. He is also an Athlete Sir. He is working in Southern Railway. He has been medalist for 10 years in National level. He has kept his rank for 10 years." | ||
|
||
#Marathi audio | ||
#trasnslation = transcribe_with_whisper_ts("https://utfs.io/f/9ed82ee5-4dd9-4eeb-8f77-9a1dfbf35bc2-gfje9d.mp3") | ||
#Tamil audio | ||
#trasnslation = transcribe_with_whisper_ts("https://utfs.io/f/3c714bc6-f728-48b6-813c-a77a8d281a7e-gfje9d.mp3") | ||
#trasnslation = transcribe_with_whisper_ts("https://utfs.io/f/d3c3c169-02b7-4b70-a3e2-8f62514f5433-gfje9d.mp3") | ||
#out = summarize_using_llama(trasnslation["text"]) | ||
out = summarize_using_llama(text) | ||
'''segs = [] | ||
seg = {} | ||
segments = trasnslation["segments"] | ||
for segment in segments: | ||
seg = {"start":segment["start"],"end":segment["end"],"text":segment["text"]} | ||
segs.append(seg) | ||
result = {"text":trasnslation["text"], "segments": segs, "summary":out} | ||
print(result)''' | ||
print(out) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import os | ||
from dotenv import load_dotenv | ||
|
||
load_dotenv() | ||
|
||
openai_api_key = os.getenv("OPENAI_KEY") | ||
#model_id = os.getenv('MODEL_ID', 'large-v3') | ||
model_id = os.getenv('MODEL_ID') | ||
model_path = os.getenv('MODEL_PATH') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import sys | ||
|
||
# # Check if two command-line arguments are provided | ||
if len(sys.argv) !=3: | ||
print("Usage: python download_model.py <whisper_model_id> <whisper_model_output_path>") | ||
print("Example: python download_model.py large-v3 /workspace/whisper-model/") | ||
sys.exit(1) | ||
|
||
# Check if the model path ends with '/' | ||
model_path = sys.argv[2] | ||
if not model_path.endswith('/'): | ||
model_path += '/' | ||
|
||
### Download the model in a local directory - Specify the version you want to use in the first parameter | ||
import whisper | ||
model_id = sys.argv[1] | ||
model_path = f'{model_path}{model_id}' | ||
# Available models = ['tiny.en', 'tiny', 'base.en', 'base', 'small.en', 'small', 'medium.en', 'medium', 'large-v1', 'large-v2', 'large-v3', 'large'] | ||
|
||
# The whisper module’s load_model() method loads a whisper model in your Python application. You must pass the model name as a parameter to the load_model() method. | ||
try: | ||
model = whisper.load_model(model_id, download_root=model_path) | ||
print("Model has successfully been downloaded") | ||
except Exception as e: | ||
print(f"Error downloading the model: {e}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import whisper | ||
import os | ||
|
||
model_path = "whisper_model" | ||
model_id = 'large-v3' | ||
os.makedirs(model_path, exist_ok=True) | ||
|
||
# Download model | ||
model = whisper.load_model(model_id, download_root=model_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
#import openai | ||
from config import openai_api_key | ||
|
||
#openai.api_key = openai_api_key | ||
|
||
def extract_entities(text): | ||
prompt = f""" | ||
The following entities are present in Indian Languages. | ||
Please extract the following entities from the text: | ||
Name, pin code, phone number, gender, occupation, and address. | ||
Provide the summary of the text in exact below format: | ||
Name is ......., pin code is ........, phone number is ........, gender is ........, occupation is ........, Address is ............ . | ||
Text: "{text}" | ||
Summary: | ||
Detailed view: | ||
Original language: {text} | ||
Text: "{text}" | ||
Summary: | ||
Detailed view: | ||
Original language: {text} | ||
""" | ||
|
||
try: | ||
response = openai.chat.completions.create( | ||
model="gpt-4o", | ||
messages=[ | ||
{"role": "system", "content": "You are a helpful assistant that extracts information from Indian multilingual text."}, | ||
{"role": "user", "content": prompt} | ||
], | ||
max_tokens=500 | ||
) | ||
response_text = response.choices[0].message.content | ||
except Exception as e: | ||
return f"Error during OpenAI API call: {e}", "Detailed view not available." | ||
|
||
# Process the response to extract summary and detailed transcription | ||
if "Detailed view:" in response_text: | ||
parts = response_text.split("Detailed view:") | ||
summary_part = parts[0].strip() | ||
detailed_transcription_part = parts[1].strip() | ||
else: | ||
summary_part = response_text.strip() | ||
detailed_transcription_part = "Detailed view not provided." | ||
|
||
# Format the summary and detailed transcription | ||
formatted_summary = format_summary(summary_part) | ||
formatted_detailed_transcription = format_detailed_transcription(detailed_transcription_part) | ||
|
||
return formatted_summary, formatted_detailed_transcription | ||
|
||
def format_summary(summary): | ||
# Process the summary to remove unnecessary parts | ||
lines = summary.split('\n') | ||
summary_lines = [] | ||
is_summary_section = False | ||
|
||
for line in lines: | ||
line = line.strip() | ||
if line.startswith("Summary:"): | ||
is_summary_section = True | ||
continue | ||
if is_summary_section: | ||
summary_lines.append(line) | ||
|
||
formatted_summary = ' '.join(summary_lines) | ||
return formatted_summary | ||
|
||
def format_detailed_transcription(detailed_transcription): | ||
# Process the detailed transcription to ensure proper formatting | ||
lines = detailed_transcription.split('\n') | ||
detailed_lines = [line.strip() for line in lines if line.strip()] | ||
formatted_detailed_transcription = '\n'.join(detailed_lines) | ||
return formatted_detailed_transcription |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import torch | ||
import whisper | ||
import whisper_timestamped | ||
|
||
#load the whisper model from net if it isn't stored locally | ||
def load_model(model_id, model_path, is_ts): | ||
#check GPU is avaialbe | ||
device = "cuda" if torch.cuda.is_available() else "cpu" | ||
#device = "cpu" | ||
if (is_ts): | ||
model = whisper_timestamped.load_model(model_id, device=device, download_root=model_path) | ||
else: | ||
model = whisper.load_model(model_id, device=device, download_root=model_path) | ||
print( | ||
f"Model will be run on {device}\n" | ||
f"Model is {'multilingual' if model.is_multilingual else 'English-only'} " | ||
) | ||
return model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
from os import path | ||
import logging | ||
import logging.config | ||
|
||
#log_file_path = path.join(path.dirname(path.abspath(__file__)), 'log.config') | ||
#logging.config.fileConfig(log_file_path) | ||
|
||
# create logger | ||
logger = logging.getLogger('simpleExample') | ||
|
||
# 'application' code | ||
logger.debug('debug message') | ||
logger.info('info message') | ||
logger.warning('warn message') | ||
logger.error('error message') | ||
logger.critical('critical message') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
[loggers] | ||
keys=root,simpleExample | ||
|
||
[handlers] | ||
keys=consoleHandler | ||
|
||
[formatters] | ||
keys=simpleFormatter | ||
|
||
[logger_root] | ||
level=DEBUG | ||
handlers=consoleHandler | ||
|
||
[logger_simpleExample] | ||
level=DEBUG | ||
handlers=consoleHandler | ||
qualname=simpleExample | ||
propagate=0 | ||
|
||
[handler_consoleHandler] | ||
class=StreamHandler | ||
level=DEBUG | ||
formatter=simpleFormatter | ||
args=(sys.stdout,) | ||
|
||
[formatter_simpleFormatter] | ||
format=%(asctime)s - %(name)s - %(levelname)s - %(message)s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.