gcs file uploader, mp4 only
This commit is contained in:
parent
35ab9b8013
commit
40b9efeaab
101
src/storageUpload.py
Normal file
101
src/storageUpload.py
Normal file
@ -0,0 +1,101 @@
|
||||
from google.cloud import storage
|
||||
import sys
|
||||
import urllib
|
||||
from pymongo import MongoClient
|
||||
from bson.objectid import ObjectId
|
||||
import os
|
||||
import datetime
|
||||
|
||||
|
||||
def main():
|
||||
uri = "mongodb://speechRecoUser:speech!reco@localhost/archSpeechReco"
|
||||
dbName = "archSpeechReco"
|
||||
colName = "moviesMeta"
|
||||
bucket = 'archspeechreco'
|
||||
|
||||
col = getMongoCollection(colName,dbName,uri)
|
||||
|
||||
toUpload = getUploadList(col)
|
||||
|
||||
for i in toUpload:
|
||||
fileName = ObjectId(i['_id'])
|
||||
getVid( i['url'], ObjectId( i['_id'] ) )
|
||||
upload_blob(bucket, fileName, "mp4/{}.mp4".format(fileName),col)
|
||||
try:
|
||||
os.remove("{}.mp4".format(fileName))
|
||||
except:
|
||||
print("{}.mp4 has NOT been removed".format(fileName))
|
||||
else:
|
||||
print("{}.mp4 has been removed".format(fileName))
|
||||
|
||||
|
||||
def upload_blob(bucket_name, source_file_name, destination_blob_name,col):
|
||||
"""Uploads a file to the bucket."""
|
||||
storage_client = storage.Client()
|
||||
bucket = storage_client.get_bucket(bucket_name)
|
||||
blob = bucket.blob(destination_blob_name)
|
||||
|
||||
try:
|
||||
blob.upload_from_filename("{}.mp4".format(source_file_name))
|
||||
except:
|
||||
print("gcs upload failed")
|
||||
else:
|
||||
print('File {}.mp4 uploaded to {}.'.format(
|
||||
source_file_name,
|
||||
destination_blob_name))
|
||||
now = datetime.datetime.now()
|
||||
try:
|
||||
col.update_one(
|
||||
{"_id": ObjectId(source_file_name)},
|
||||
{"$set":{
|
||||
"gcs":{
|
||||
"location":destination_blob_name,
|
||||
"uploadDate":now.strftime("%Y-%m-%d %H:%M:%S")
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
except:
|
||||
print("mongo update failed")
|
||||
else:
|
||||
print("mongo update OK")
|
||||
|
||||
|
||||
def getMongoCollection(colName,dbName,uri):
|
||||
client = MongoClient(uri)
|
||||
db = client[dbName]
|
||||
col = db[colName]
|
||||
|
||||
return col
|
||||
|
||||
|
||||
def getUploadList(col):
|
||||
pipeline = []
|
||||
#$match phase, filetr documents withour gcs field - movies not uploaded to gcs
|
||||
pipeline.append({"$match": {
|
||||
"gcs": {"$exists": False}
|
||||
}
|
||||
})
|
||||
#project phase, show only url and _id keys
|
||||
pipeline.append({"$project": {
|
||||
"url": { "$concat": [ "http://repozytorium.fn.org.pl/",{"$arrayElemAt": [ "$mp4",0 ]}] }
|
||||
}
|
||||
})
|
||||
#skip first N documents
|
||||
#pipeline.append({"$skip":362})
|
||||
#fetch only N documents
|
||||
#pipeline.append({"$limit":20})
|
||||
|
||||
return col.aggregate(pipeline)
|
||||
|
||||
|
||||
def getVid(url,out):
|
||||
try:
|
||||
urllib.request.urlretrieve(url, "{}.mp4".format(out))
|
||||
except:
|
||||
print("wrong URL, can't download")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
Loading…
Reference in New Issue
Block a user