spacje
This commit is contained in:
parent
2e6bd33357
commit
fc5486561a
@ -5,7 +5,6 @@ from pymongo import MongoClient
|
||||
import argparse
|
||||
from google.protobuf.json_format import MessageToDict
|
||||
from mongo.helpers import get_mongo_collection
|
||||
from bson.objectid import ObjectId
|
||||
import datetime
|
||||
import time
|
||||
import concurrent.futures
|
||||
|
6
src/temp
6
src/temp
@ -6,7 +6,6 @@ def get_mongo_collection(colName,dbName,uri):
|
||||
client = MongoClient(uri,maxPoolSize=512)
|
||||
db = client[dbName]
|
||||
col = db[colName]
|
||||
|
||||
return col
|
||||
|
||||
col = get_mongo_collection(colName,dbName,mongoUri)
|
||||
@ -15,9 +14,11 @@ col.aggregate(pipeline)
|
||||
|
||||
|
||||
|
||||
pipeline = [{'$match': {'gcTextReco': {'$exists': True}}}, {'$project': {'gcTextReco.words': 1}}, {'$limit': 10}]
|
||||
pipeline = [{'$match': {'$and': [{'source':'kronikiprl'},{'gcTextReco': {'$exists': True}}]}}, {'$project': {'gcTextReco.words': 1}}]
|
||||
|
||||
words = col.aggregate(pipeline)
|
||||
words_dict={}
|
||||
|
||||
for w in words:
|
||||
ww = w['gcTextReco']['words']
|
||||
words_dict[w['_id']] = " ".join([ e['word'] for e in ww ])
|
||||
@ -25,7 +26,6 @@ for w in words:
|
||||
|
||||
for key, value in words_dict.items():
|
||||
try:
|
||||
col.update_one(
|
||||
col.update_one({"_id": key},{"$set":{"gcTextReco.transcript_fix":value}})
|
||||
|
||||
except Exception as e: print(e)
|
||||
|
Loading…
Reference in New Issue
Block a user