header update

This commit is contained in:
Michał Kozłowski 2023-01-07 14:32:45 +01:00
parent 5c3611f972
commit bf6014ba98

View File

@ -8,8 +8,7 @@ import pickle
import time import time
from pprint import pprint from pprint import pprint
session = requests.Session() headers = {'User-Agent': 'ImageDownloadOcrBot/1.0 (micha9@op.pl) requests/2.28.1'}
session.headers.update({'User-Agent': 'ImageDownloadOcrBot/1.0 (micha9@op.pl)'})
def save_state(index, offset): def save_state(index, offset):
with open("./state.pickle", "wb") as state_file: with open("./state.pickle", "wb") as state_file:
@ -31,7 +30,7 @@ def main(args):
for n, row in enumerate(tqdm(df.iterrows(), total=len(df))): for n, row in enumerate(tqdm(df.iterrows(), total=len(df))):
try: try:
time.sleep(0.2) time.sleep(0.2)
r = requests.get(f"https:{row[1]['image_url']}", stream=True) r = requests.get(f"https:{row[1]['image_url']}", stream=True, headers=headers)
if r.status_code != 200: if r.status_code != 200:
pprint(r.__dict__) pprint(r.__dict__)
save_state(n, offset) save_state(n, offset)