diff --git a/image_download.py b/image_download.py index 9dee29c..f630b39 100644 --- a/image_download.py +++ b/image_download.py @@ -8,8 +8,7 @@ import pickle import time from pprint import pprint -session = requests.Session() -session.headers.update({'User-Agent': 'ImageDownloadOcrBot/1.0 (micha9@op.pl)'}) +headers = {'User-Agent': 'ImageDownloadOcrBot/1.0 (micha9@op.pl) requests/2.28.1'} def save_state(index, offset): with open("./state.pickle", "wb") as state_file: @@ -31,7 +30,7 @@ def main(args): for n, row in enumerate(tqdm(df.iterrows(), total=len(df))): try: time.sleep(0.2) - r = requests.get(f"https:{row[1]['image_url']}", stream=True) + r = requests.get(f"https:{row[1]['image_url']}", stream=True, headers=headers) if r.status_code != 200: pprint(r.__dict__) save_state(n, offset)