33 lines
1.1 KiB
Python
33 lines
1.1 KiB
Python
|
import re2 as re
|
||
|
import csv
|
||
|
import time
|
||
|
|
||
|
def format_surnames(surname_file):
|
||
|
with open(surname_file, 'r', encoding='utf-8') as csvfile:
|
||
|
reader = csv.reader(csvfile)
|
||
|
surnames = {row[0].lower() for row in reader}
|
||
|
return surnames
|
||
|
|
||
|
def find_matching_lines(input_file, output_file, female_surname_file, male_surname_file):
|
||
|
start_time = time.time()
|
||
|
|
||
|
female_surnames = format_surnames(female_surname_file)
|
||
|
male_surnames = format_surnames(male_surname_file)
|
||
|
all_surnames = female_surnames.union(male_surnames)
|
||
|
|
||
|
pattern = re.compile("|".join(all_surnames).lower(), setting)
|
||
|
|
||
|
with open(input_file, 'r', encoding='utf-8') as infile, \
|
||
|
open(output_file, 'w', encoding='utf-8') as outfile:
|
||
|
for line in infile:
|
||
|
if pattern.search(line):
|
||
|
outfile.write(line)
|
||
|
|
||
|
end_time = time.time()
|
||
|
elapsed_time = end_time - start_time
|
||
|
print(f"Czas wykonania programu: {elapsed_time} sekundy")
|
||
|
|
||
|
setting = re.Options()
|
||
|
setting.max_mem = (1 << 30) * 3
|
||
|
find_matching_lines('polish_wiki_excerpt.in', 'output.txt', 'nazwiska_kobiet.csv', 'nazwiska_męskie.csv')
|