spacje
This commit is contained in:
parent
2e6bd33357
commit
fc5486561a
@ -5,7 +5,6 @@ from pymongo import MongoClient
|
|||||||
import argparse
|
import argparse
|
||||||
from google.protobuf.json_format import MessageToDict
|
from google.protobuf.json_format import MessageToDict
|
||||||
from mongo.helpers import get_mongo_collection
|
from mongo.helpers import get_mongo_collection
|
||||||
from bson.objectid import ObjectId
|
|
||||||
import datetime
|
import datetime
|
||||||
import time
|
import time
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
@ -36,7 +35,7 @@ def run_reco(uri):
|
|||||||
|
|
||||||
if len(recoDict) != 0:
|
if len(recoDict) != 0:
|
||||||
words = recoDict["results"][-1]["alternatives"][0]["words"]
|
words = recoDict["results"][-1]["alternatives"][0]["words"]
|
||||||
transcript = "".join([trans["alternatives"][0]["transcript"] for trans in recoDict["results"][:-1]])
|
transcript = " ".join([trans["alternatives"][0]["transcript"] for trans in recoDict["results"][:-1]])
|
||||||
elif len(recoDict) == 0:
|
elif len(recoDict) == 0:
|
||||||
words = {}
|
words = {}
|
||||||
transcript = "film niemy"
|
transcript = "film niemy"
|
||||||
|
6
src/temp
6
src/temp
@ -6,7 +6,6 @@ def get_mongo_collection(colName,dbName,uri):
|
|||||||
client = MongoClient(uri,maxPoolSize=512)
|
client = MongoClient(uri,maxPoolSize=512)
|
||||||
db = client[dbName]
|
db = client[dbName]
|
||||||
col = db[colName]
|
col = db[colName]
|
||||||
|
|
||||||
return col
|
return col
|
||||||
|
|
||||||
col = get_mongo_collection(colName,dbName,mongoUri)
|
col = get_mongo_collection(colName,dbName,mongoUri)
|
||||||
@ -15,9 +14,11 @@ col.aggregate(pipeline)
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
pipeline = [{'$match': {'gcTextReco': {'$exists': True}}}, {'$project': {'gcTextReco.words': 1}}, {'$limit': 10}]
|
pipeline = [{'$match': {'$and': [{'source':'kronikiprl'},{'gcTextReco': {'$exists': True}}]}}, {'$project': {'gcTextReco.words': 1}}]
|
||||||
|
|
||||||
words = col.aggregate(pipeline)
|
words = col.aggregate(pipeline)
|
||||||
|
words_dict={}
|
||||||
|
|
||||||
for w in words:
|
for w in words:
|
||||||
ww = w['gcTextReco']['words']
|
ww = w['gcTextReco']['words']
|
||||||
words_dict[w['_id']] = " ".join([ e['word'] for e in ww ])
|
words_dict[w['_id']] = " ".join([ e['word'] for e in ww ])
|
||||||
@ -25,7 +26,6 @@ for w in words:
|
|||||||
|
|
||||||
for key, value in words_dict.items():
|
for key, value in words_dict.items():
|
||||||
try:
|
try:
|
||||||
col.update_one(
|
|
||||||
col.update_one({"_id": key},{"$set":{"gcTextReco.transcript_fix":value}})
|
col.update_one({"_id": key},{"$set":{"gcTextReco.transcript_fix":value}})
|
||||||
|
|
||||||
except Exception as e: print(e)
|
except Exception as e: print(e)
|
||||||
|
Loading…
Reference in New Issue
Block a user