qol update
This commit is contained in:
parent
bf6014ba98
commit
0f3a6eb9da
@ -13,18 +13,21 @@ headers = {'User-Agent': 'ImageDownloadOcrBot/1.0 (micha9@op.pl) requests/2.28.1
|
|||||||
def save_state(index, offset):
|
def save_state(index, offset):
|
||||||
with open("./state.pickle", "wb") as state_file:
|
with open("./state.pickle", "wb") as state_file:
|
||||||
pickle.dump({"row_index": index+offset}, state_file, protocol=pickle.HIGHEST_PROTOCOL)
|
pickle.dump({"row_index": index+offset}, state_file, protocol=pickle.HIGHEST_PROTOCOL)
|
||||||
|
print("Saving state, index: ", index+offset)
|
||||||
|
|
||||||
def main(args):
|
def main(args):
|
||||||
|
|
||||||
df = pd.read_csv(args.file_path, sep="\t")
|
df = pd.read_csv(args.file_path, sep="\t")
|
||||||
offset = 0
|
offset = 0
|
||||||
if not os.path.exists(args.output_folder):
|
if not os.path.exists(args.output_folder):
|
||||||
|
print(f"Creating missing folder: {args.output_folder}")
|
||||||
os.mkdir(args.output_folder)
|
os.mkdir(args.output_folder)
|
||||||
|
|
||||||
if args.from_checkpoint:
|
if args.from_checkpoint:
|
||||||
with open("state.pickle", "rb") as state:
|
with open("state.pickle", "rb") as state:
|
||||||
state_dict = pickle.load(state)
|
state_dict = pickle.load(state)
|
||||||
offset = state_dict["row_index"]
|
offset = state_dict["row_index"]
|
||||||
|
print("Starting from checkpoint, index: ", offset)
|
||||||
df = df[offset:]
|
df = df[offset:]
|
||||||
|
|
||||||
for n, row in enumerate(tqdm(df.iterrows(), total=len(df))):
|
for n, row in enumerate(tqdm(df.iterrows(), total=len(df))):
|
||||||
@ -41,7 +44,8 @@ def main(args):
|
|||||||
title = row[1]['title'].replace("Strona:", "").replace("/", "-")
|
title = row[1]['title'].replace("Strona:", "").replace("/", "-")
|
||||||
image.save(f"{args.output_folder}/{title}.png")
|
image.save(f"{args.output_folder}/{title}.png")
|
||||||
|
|
||||||
if round(sum(os.path.getsize(f"./images/{file}") for file in os.listdir("./images")) * 0.000001, 2) > args.max_folder_size_gb:
|
if round(sum(os.path.getsize(f"./images/{file}") for file in os.listdir("./images")) * 0.000001, 2) > args.max_folder_size_mb:
|
||||||
|
print(f"Limit size of: {args.max_folder_size_mb}, exceeded")
|
||||||
save_state(n, offset)
|
save_state(n, offset)
|
||||||
break
|
break
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -59,7 +63,7 @@ if __name__ == "__main__":
|
|||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--file_path", type=str, required=True)
|
parser.add_argument("--file_path", type=str, required=True)
|
||||||
parser.add_argument("--output_folder", type=str, default="./images")
|
parser.add_argument("--output_folder", type=str, default="./images")
|
||||||
parser.add_argument("--max_folder_size_gb", default=5000.0, type=float, required=False)
|
parser.add_argument("--max_folder_size_mb", default=5000.0, type=float, required=False)
|
||||||
parser.add_argument("--from_checkpoint", type=bool, required=False, default=False)
|
parser.add_argument("--from_checkpoint", type=bool, required=False, default=False)
|
||||||
args, left_argv = parser.parse_known_args()
|
args, left_argv = parser.parse_known_args()
|
||||||
main(args)
|
main(args)
|
Loading…
Reference in New Issue
Block a user