40 lines
1.1 KiB
Python
40 lines
1.1 KiB
Python
#!/usr/bin/python
|
|
|
|
import sys
|
|
import base64
|
|
import googleapiclient.discovery
|
|
import os
|
|
from natsort import natsorted
|
|
|
|
for dirname, dirnames, filenames in os.walk(sys.argv[1]):
|
|
# print path to all filenames.
|
|
for filename in natsorted(filenames):
|
|
speech_file = os.path.join(dirname, filename)
|
|
with open(speech_file, 'rb') as speech:
|
|
# Base64 encode the binary audio file for inclusion in the JSON
|
|
# request.
|
|
temp = base64.b64encode(speech.read())
|
|
speech_content = temp.decode()
|
|
|
|
# Construct the request
|
|
service = googleapiclient.discovery.build('speech', 'v1')
|
|
service_request = service.speech().recognize(
|
|
body={
|
|
"config": {
|
|
"encoding": "LINEAR16", # raw 16-bit signed LE samples
|
|
"sampleRateHertz": 44100, # 16 khz
|
|
"languageCode": "pl-PL", # a BCP-47 language tag
|
|
},
|
|
"audio": {
|
|
"content": speech_content
|
|
}
|
|
})
|
|
|
|
response = service_request.execute()
|
|
#recognized_text = 'Transcribed Text: \n'
|
|
recognized_text = ''
|
|
if len(response) > 0:
|
|
for i in range(len(response['results'])):
|
|
recognized_text += response['results'][i]['alternatives'][0]['transcript']
|
|
print(recognized_text)
|