jfz-2023-s473564/TaskG00/run.py
2024-01-20 19:53:48 +01:00

33 lines
1.1 KiB
Python

import re2 as re
import csv
import time
def format_surnames(surname_file):
with open(surname_file, 'r', encoding='utf-8') as csvfile:
reader = csv.reader(csvfile)
surnames = {row[0].lower() for row in reader}
return surnames
def find_matching_lines(input_file, output_file, female_surname_file, male_surname_file):
start_time = time.time()
female_surnames = format_surnames(female_surname_file)
male_surnames = format_surnames(male_surname_file)
all_surnames = female_surnames.union(male_surnames)
pattern = re.compile("|".join(all_surnames).lower(), setting)
with open(input_file, 'r', encoding='utf-8') as infile, \
open(output_file, 'w', encoding='utf-8') as outfile:
for line in infile:
if pattern.search(line):
outfile.write(line)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Czas wykonania programu: {elapsed_time} sekundy")
setting = re.Options()
setting.max_mem = (1 << 30) * 3
find_matching_lines('polish_wiki_excerpt.in', 'output.txt', 'nazwiska_kobiet.csv', 'nazwiska_męskie.csv')