40 lines
1.1 KiB
Python
40 lines
1.1 KiB
Python
|
#!/usr/bin/python
|
||
|
|
||
|
import sys
|
||
|
import base64
|
||
|
import googleapiclient.discovery
|
||
|
import os
|
||
|
from natsort import natsorted
|
||
|
|
||
|
for dirname, dirnames, filenames in os.walk(sys.argv[1]):
|
||
|
# print path to all filenames.
|
||
|
for filename in natsorted(filenames):
|
||
|
speech_file = os.path.join(dirname, filename)
|
||
|
with open(speech_file, 'rb') as speech:
|
||
|
# Base64 encode the binary audio file for inclusion in the JSON
|
||
|
# request.
|
||
|
temp = base64.b64encode(speech.read())
|
||
|
speech_content = temp.decode()
|
||
|
|
||
|
# Construct the request
|
||
|
service = googleapiclient.discovery.build('speech', 'v1')
|
||
|
service_request = service.speech().recognize(
|
||
|
body={
|
||
|
"config": {
|
||
|
"encoding": "LINEAR16", # raw 16-bit signed LE samples
|
||
|
"sampleRateHertz": 44100, # 16 khz
|
||
|
"languageCode": "pl-PL", # a BCP-47 language tag
|
||
|
},
|
||
|
"audio": {
|
||
|
"content": speech_content
|
||
|
}
|
||
|
})
|
||
|
|
||
|
response = service_request.execute()
|
||
|
#recognized_text = 'Transcribed Text: \n'
|
||
|
recognized_text = ''
|
||
|
if len(response) > 0:
|
||
|
for i in range(len(response['results'])):
|
||
|
recognized_text += response['results'][i]['alternatives'][0]['transcript']
|
||
|
print(recognized_text)
|