some tests, looks good
This commit is contained in:
parent
bed44957c1
commit
fc4d521933
95
src/reco.py
Normal file
95
src/reco.py
Normal file
@ -0,0 +1,95 @@
|
||||
#from google.cloud import speech_v1
|
||||
from google.cloud import speech_v1p1beta1
|
||||
from google.cloud.speech_v1p1beta1 import enums
|
||||
from google.cloud.speech_v1p1beta1 import types
|
||||
from pymongo import MongoClient
|
||||
import json
|
||||
import argparse
|
||||
from google.protobuf.json_format import MessageToJson,MessageToDict
|
||||
from storageUpload import getMongoCollection
|
||||
from bson.objectid import ObjectId
|
||||
import datetime
|
||||
|
||||
|
||||
def main(args):
|
||||
uri = "gs://archspeechreco/wave/5df3e63d4c0402698d7837f3.wav"
|
||||
reco = recognize(uri)
|
||||
recoDict = MessageToDict(reco)
|
||||
#print(json.dumps(transcript,indent=4,ensure_ascii=False))
|
||||
|
||||
words = recoDict["results"][-1]["alternatives"][0]["words"]
|
||||
transcript = "".join( [ trans["alternatives"][0]["transcript"] for trans in recoDict["results"][:-1] ] )
|
||||
|
||||
mongoUri = "mongodb://speechRecoUser:speech!reco@localhost/archSpeechReco"
|
||||
dbName = "archSpeechReco"
|
||||
colName = "moviesMeta"
|
||||
col = getMongoCollection(colName,dbName,mongoUri)
|
||||
now = datetime.datetime.now()
|
||||
try:
|
||||
col.update_one(
|
||||
{"_id": ObjectId("5df3e63d4c0402698d7837f3")},
|
||||
{"$set":{"gcTextReco.transcript":transcript,
|
||||
"gcTextReco.words":words,
|
||||
"gcTextReco.transcripted":now.strftime("%Y-%m-%d %H:%M:%S")}}
|
||||
)
|
||||
except Exception as e: print(e)
|
||||
else:
|
||||
print("mongo update OK")
|
||||
|
||||
def recognize(storage_uri):
|
||||
"""
|
||||
Transcribe long audio file from Cloud Storage using asynchronous speech
|
||||
recognition
|
||||
|
||||
Args:
|
||||
storage_uri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
|
||||
"""
|
||||
|
||||
#client = speech_v1.SpeechClient()
|
||||
client = speech_v1p1beta1.SpeechClient()
|
||||
# storage_uri = 'gs://cloud-samples-data/speech/brooklyn_bridge.raw'
|
||||
|
||||
# Sample rate in Hertz of the audio data sent
|
||||
sample_rate_hertz = 44100
|
||||
|
||||
# The language of the supplied audio
|
||||
language_code = "pl-PL"
|
||||
|
||||
# Encoding of audio data sent. This sample sets this explicitly.
|
||||
# This field is optional for FLAC and WAV audio formats.
|
||||
encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16
|
||||
enable_speaker_diarization = True
|
||||
#config = {
|
||||
#"sample_rate_hertz": sample_rate_hertz,
|
||||
# "language_code": language_code,
|
||||
# "encoding": encoding,
|
||||
# "enableSpeakerDiarization": enable_speaker_diarization
|
||||
#
|
||||
d_config = types.SpeakerDiarizationConfig(
|
||||
enable_speaker_diarization=True
|
||||
)
|
||||
config = types.RecognitionConfig(
|
||||
encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16,
|
||||
sample_rate_hertz = 44100,
|
||||
language_code = "pl-PL",
|
||||
diarization_config=d_config
|
||||
)
|
||||
|
||||
audio = {"uri": storage_uri}
|
||||
|
||||
operation = client.long_running_recognize(config, audio)
|
||||
|
||||
print(u"Waiting for operation to complete...")
|
||||
response = operation.result()
|
||||
|
||||
#for result in response.results:
|
||||
# # First alternative is the most probable result
|
||||
# alternative = result.alternatives[0]
|
||||
# print(u"Transcript: {}".format(alternative.transcript))
|
||||
return response
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Google Cloud speech2text API client')
|
||||
parser.add_argument("--format", default='mp4', help="format to fetch and upload, [mp4, wav]")
|
||||
args = parser.parse_args()
|
||||
main(args)
|
Loading…
Reference in New Issue
Block a user