import re2 as re import csv import time def format_surnames(surname_file): with open(surname_file, 'r', encoding='utf-8') as csvfile: reader = csv.reader(csvfile) surnames = {row[0].lower() for row in reader} return surnames def find_matching_lines(input_file, output_file, female_surname_file, male_surname_file): start_time = time.time() female_surnames = format_surnames(female_surname_file) male_surnames = format_surnames(male_surname_file) all_surnames = female_surnames.union(male_surnames) pattern = re.compile("|".join(all_surnames).lower(), setting) with open(input_file, 'r', encoding='utf-8') as infile, \ open(output_file, 'w', encoding='utf-8') as outfile: for line in infile: if pattern.search(line): outfile.write(line) end_time = time.time() elapsed_time = end_time - start_time print(f"Czas wykonania programu: {elapsed_time} sekundy") setting = re.Options() setting.max_mem = (1 << 30) * 3 find_matching_lines('polish_wiki_excerpt.in', 'output.txt', 'nazwiska_kobiet.csv', 'nazwiska_męskie.csv')