-
Notifications
You must be signed in to change notification settings - Fork 0
/
sP2Tx.py
69 lines (50 loc) · 1.91 KB
/
sP2Tx.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import requests
import time
from api_secrets import API_KEY_ASSEMBLYAI
upload_endpoint = 'https://api.assemblyai.com/v2/upload'
transcript_endpoint = 'https://api.assemblyai.com/v2/transcript'
headers_auth_only = {'authorization': API_KEY_ASSEMBLYAI}
headers = {
"authorization": API_KEY_ASSEMBLYAI,
"content-type": "application/json"
}
CHUNK_SIZE = 5_242_880 # 5MB
def upload(filename):
def read_file(filename):
with open(filename, 'rb') as f:
while True:
data = f.read(CHUNK_SIZE)
if not data:
break
yield data
upload_response = requests.post(upload_endpoint, headers=headers_auth_only, data=read_file(filename))
return upload_response.json()['upload_url']
def transcribe(audio_url):
transcript_request = {
'audio_url': audio_url
}
transcript_response = requests.post(transcript_endpoint, json=transcript_request, headers=headers)
return transcript_response.json()['id']
def poll(transcript_id):
polling_endpoint = transcript_endpoint + '/' + transcript_id
polling_response = requests.get(polling_endpoint, headers=headers)
return polling_response.json()
def get_transcription_result_url(url):
transcribe_id = transcribe(url)
while True:
data = poll(transcribe_id)
if data['status'] == 'completed':
return data, None
elif data['status'] == 'error':
return data, data['error']
print("waiting for 30 seconds")
time.sleep(30)
def save_transcript(url, title):
data, error = get_transcription_result_url(url)
if data:
filename = title + '.txt'
with open(filename, 'w') as f:
f.write(data['text'])
print('Transcript saved')
elif error:
print("Error!!!", error)