archSpeechReco/src/temp

114 lines
9.3 KiB
Plaintext

mongoUri = "mongodb://speechRecoUser:speech!reco@localhost/archSpeechReco"
dbName = "archSpeechReco"
colName = "moviesMeta"
def get_mongo_collection(colName,dbName,uri):
client = MongoClient(uri,maxPoolSize=512)
db = client[dbName]
col = db[colName]
return col
col = get_mongo_collection(colName,dbName,mongoUri)
col.aggregate(pipeline)
pipeline = [{'$match': {'gcTextReco': {'$exists': True}}}, {'$project': {'gcTextReco.words': 1}}, {'$limit': 10}]
words = col.aggregate(pipeline)
for w in words:
ww = w['gcTextReco']['words']
words_dict[w['_id']] = " ".join([ e['word'] for e in ww ])
for key, value in words_dict.items():
try:
col.update_one(
col.update_one({"_id": key},{"$set":{"gcTextReco.transcript_fix":value}})
except Exception as e: print(e)
else:
print(f"mongo update OK {uri.split('/')[4].split('.')[0]}")
var pipeline = [{"$match":{"gcTextReco": {"$exists": true}}}, {"$project": {"_id":0, "durationStart": { "$concat": ["$description.date", "T00:00:00Z"] }, "wid": {"$toString": parseInt({"$substr": [ {"$toString": "$_id"}, 20, -1]},16) }, "creator": { $ifNull: [ "$description.details.Produkcja", "null" ]}, "originalDate": "$description.date", "contents": ["$gcTextReco.transcript_fix"], "url":"$url", "title":"$title", "durationEnd": { "$concat": ["$description.date", "T23:59:59Z"] }}}, {$out: "export"} ]
var pipeline = [{"$match":{"gcTextReco": {"$exists": true}}}, {"$project": {"_id":0, "durationStart": { "$concat": ["$description.date", "T00:00:00Z"] }, "wid":{ "$concat": ["Filmoteka_", {"$toString": parseInt({"$substr": [ {"$toString": "$_id"}, 20, -1]},16) } ] }, "creator": { $ifNull: [ "$description.details.Produkcja", "null" ]}, "originalDate": "$description.date", "contents": ["$gcTextReco.transcript"], "url":"$url", "title":"$title", "durationEnd": { "$concat": ["$description.date", "T23:59:59Z"] }}}, {$limit: 2} ]
var pipeline = [{"$match":{"gcTextReco": {"$exists": true}}}, {"$project": {"_id":0, "durationStart": { "$concat": ["$description.date", "T00:00:00Z"] }, "wid":{ "$concat": ["Filmoteka_", {"$toString": "$_id"} ] }, "creator": { $ifNull: [ "$description.details.Produkcja", "null" ]}, "originalDate": "$description.date", "contents": ["$gcTextReco.transcript_fix"], "url":"$url", "title":"$title", "durationEnd": { "$concat": ["$description.date", "T23:59:59Z"] }}}, {$out: "export3"} ]
db.moviesMeta.aggregate(pipeline)
var pipeline = [ {$match: {$and: [ {"hash": /[abcd]0$/}, {"gcsWav.location": {"$exists": 1}}, {"gcTextReco.transcript_fix": {"$not": /^$/}} ] }},
{$project: {"_id":0, "hash":1, "plik":{ "$substr": ["$gcsWav.location", 5, -1]}, "url":1,"opis": "$description.desc", "transkrypcja": "$gcTextReco.transcript_fix"}},
{$out: "sample100"}
]
--exec 'gsutil cp {} gs://archspeechreco/wave && rm {} && gsutil cp `echo {} | cut -d "." -f1`.mp4 gs://archspeechreco/mp4 && rm `echo {} | cut -d "." -f1`.mp4'
youtube-dl -f mp4 -i --id -x --audio-format wav --add-metadata --write-description \
--proxy socks5://localhost:9999/ -k \
--exec 'gsutil cp {} gs://archspeechreco/wave && rm {} && gsutil cp `echo {} | cut -d "." -f1`.mp4 gs://archspeechreco/mp4 && rm `echo {} | cut -d "." -f1`.mp4' \
https://www.youtube.com/user/renirable/videos
youtube-dl --proxy socks5://localhost:9999/ --get-title https://www.youtube.com/playlist?list=PLE6CBDC963E1806AD
youtube-dl -f mp4 -i --id -x --audio-format wav --add-metadata --write-description \
--proxy socks5://localhost:9999/ -k \
--exec 'gsutil cp ./{} gs://archspeechreco/wave && rm ./{} && gsutil cp `echo ./{} | cut -d "." -f1`.mp4 gs://archspeechreco/mp4 && rm `echo ./{} | cut -d "." -f1`.mp4' \
https://www.youtube.com/playlist?list=PLE6CBDC963E1806AD
youtube-dl -f mp4 -i --id -x --audio-format wav --add-metadata --write-description \
--proxy socks5://localhost:9999/ -k \
--exec 'gsutil cp {} gs://archspeechreco/wave && rm {} && gsutil cp `echo {} | cut -d "." -f1`.mp4 gs://archspeechreco/mp4 && rm `echo {} | cut -d "." -f1`.mp4' \
https://www.youtube.com/channel/UCy91ke1yYCZiFdnZ3vTdY_Q/videos
for f in `ls sonda/ | cut -d "." -f1`; do echo gs://archspeechreco/wave/$f.wav; done | gsutil -m cp -I ./sonda_wave/
for f in `ls | cut -d "." -f1`; do youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=$f > $f.title; done
for f in `ls | cut -d "." -f1`; do youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=$f > $f.title; done
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=-7tezSQBZhg > -7tezSQBZhg.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=0fr0vQfZeKE > 0fr0vQfZeKE.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=1FhejGVNFuI > 1FhejGVNFuI.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=3aWb4Te6F84 > 3aWb4Te6F84.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=3xj4AjWgZr0 > 3xj4AjWgZr0.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=4kKl_iiMjj4 > 4kKl_iiMjj4.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=56I3zYf316s > 56I3zYf316s.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=6oUId6Jx1OM > 6oUId6Jx1OM.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=75yM0jdkBrs > 75yM0jdkBrs.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=8wg0XEIwYV4 > 8wg0XEIwYV4.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=91WKUB2BXBU > 91WKUB2BXBU.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=EBhJzIF1t3M > EBhJzIF1t3M.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=GFwW49KqoW4 > GFwW49KqoW4.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=QV0BLvTjAYg > QV0BLvTjAYg.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=S3lk1ZcrsH0 > S3lk1ZcrsH0.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=U6yoI0yBLQk > U6yoI0yBLQk.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=UPTtFoqySeY > UPTtFoqySeY.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=U_t7y_ktmLE > U_t7y_ktmLE.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=WZclHUbylVs > WZclHUbylVs.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=Wl-n6VHXAJ4 > Wl-n6VHXAJ4.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=YLr7pwIMW8g > YLr7pwIMW8g.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=Ymo0WUJc7T0 > Ymo0WUJc7T0.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=aPHXeR8VdHM > aPHXeR8VdHM.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=cNzas0WtnrU > cNzas0WtnrU.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=eCWPEy3sriM > eCWPEy3sriM.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=e_EoPQObDvY > e_EoPQObDvY.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=fOHdDZg1jQ0 > fOHdDZg1jQ0.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=gMRdK0rt8yg > gMRdK0rt8yg.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=nU9y_E3zysc > nU9y_E3zysc.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=pHoFRQViBg4 > pHoFRQViBg4.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=tFB6tcVsLQY > tFB6tcVsLQY.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=tWzIfplDy0s > tWzIfplDy0s.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=uosd3_3KwnY > uosd3_3KwnY.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=uuFmYozhoNM > uuFmYozhoNM.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=vFQflTQV-f0 > vFQflTQV-f0.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=weGOS1cw2BM > weGOS1cw2BM.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=xzBw5MCjf2U > xzBw5MCjf2U.title
youtube-dl -q -e --proxy socks5://localhost:9999/ https://www.youtube.com/watch?v=zJeTPTjkcOc > zJeTPTjkcOc.title