0bba61bbcd
Modyfiy Makefile - enlarge to 40 parallel crawles. Add 4XX http code to retry list. Remove processed.final.txt Probably fix remove_blacklisted.py
19 lines
388 B
Python
Executable File
19 lines
388 B
Python
Executable File
#!/usr/bin/env python3
|
|
import sys
|
|
|
|
def is_blacklisted(line, blacklisted_domains):
|
|
for domain in blacklisted_domains:
|
|
if domain in line:
|
|
return True
|
|
return False
|
|
|
|
|
|
with open(sys.argv[1]) as f:
|
|
blacklisted_domains = [line.rstrip('\n') for line in f]
|
|
|
|
for line in sys.stdin:
|
|
if not is_blacklisted(line, blacklisted_domains):
|
|
print(line, end='')
|
|
|
|
|