#!/usr/bin/python import sys import base64 import googleapiclient.discovery import os from natsort import natsorted for dirname, dirnames, filenames in os.walk(sys.argv[1]): # print path to all filenames. for filename in natsorted(filenames): speech_file = os.path.join(dirname, filename) with open(speech_file, 'rb') as speech: # Base64 encode the binary audio file for inclusion in the JSON # request. temp = base64.b64encode(speech.read()) speech_content = temp.decode() # Construct the request service = googleapiclient.discovery.build('speech', 'v1') service_request = service.speech().recognize( body={ "config": { "encoding": "LINEAR16", # raw 16-bit signed LE samples "sampleRateHertz": 44100, # 16 khz "languageCode": "pl-PL", # a BCP-47 language tag }, "audio": { "content": speech_content } }) response = service_request.execute() #recognized_text = 'Transcribed Text: \n' recognized_text = '' if len(response) > 0: for i in range(len(response['results'])): recognized_text += response['results'][i]['alternatives'][0]['transcript'] print(recognized_text)