spacje

2020-08-20 22:25:37 +02:00 · 2020-08-20 22:25:37 +02:00 · fc5486561a
commit fc5486561a
parent 2e6bd33357
2 changed files with 5 additions and 6 deletions
--- a/src/reco.py
+++ b/src/reco.py
@ -5,7 +5,6 @@ from pymongo import MongoClient
 import argparse
 from google.protobuf.json_format import MessageToDict
 from mongo.helpers import get_mongo_collection
 from bson.objectid import ObjectId
 import datetime
 import time
 import concurrent.futures
@ -36,7 +35,7 @@ def run_reco(uri):
    if len(recoDict) != 0:
        words = recoDict["results"][-1]["alternatives"][0]["words"]
-        transcript = "".join([trans["alternatives"][0]["transcript"] for trans in recoDict["results"][:-1]])
+        transcript = " ".join([trans["alternatives"][0]["transcript"] for trans in recoDict["results"][:-1]])
    elif len(recoDict) == 0:
        words = {}
        transcript = "film niemy"
--- a/src/temp
+++ b/src/temp
@ -6,7 +6,6 @@ def get_mongo_collection(colName,dbName,uri):
    client = MongoClient(uri,maxPoolSize=512)
    db = client[dbName]
    col = db[colName]
    return col
 col = get_mongo_collection(colName,dbName,mongoUri)
@ -15,9 +14,11 @@ col.aggregate(pipeline)
-pipeline = [{'$match': {'gcTextReco': {'$exists': True}}}, {'$project': {'gcTextReco.words': 1}}, {'$limit': 10}]
+pipeline = [{'$match': {'$and': [{'source':'kronikiprl'},{'gcTextReco': {'$exists': True}}]}}, {'$project': {'gcTextReco.words': 1}}]
 words = col.aggregate(pipeline)
 words_dict={}
 for w in words:
 	ww = w['gcTextReco']['words']
 	words_dict[w['_id']] = " ".join([ e['word'] for e in ww ])
@ -25,7 +26,6 @@ for w in words:
 for key, value in words_dict.items():
 	try:
        	col.update_one(
 		col.update_one({"_id": key},{"$set":{"gcTextReco.transcript_fix":value}})
 except Exception as e: print(e)