censor sources
This commit is contained in:
parent
8496f12f16
commit
ced8bd00b6
@ -21,7 +21,6 @@ coon
|
||||
crap
|
||||
cunt
|
||||
damn
|
||||
dick
|
||||
dicker
|
||||
dickerin
|
||||
dickhead
|
||||
@ -40,7 +39,6 @@ flange
|
||||
fuck
|
||||
fudgepacker
|
||||
Goddamn
|
||||
hell
|
||||
Horsedick
|
||||
jizz
|
||||
knobend
|
||||
@ -61,7 +59,6 @@ slut
|
||||
sluttish
|
||||
slutty
|
||||
spunk
|
||||
toss
|
||||
tosser
|
||||
turd
|
||||
twat
|
||||
|
38
fast-aligner/censor_sources.py
Executable file
38
fast-aligner/censor_sources.py
Executable file
@ -0,0 +1,38 @@
|
||||
#!/usr/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import sys, re, os
|
||||
|
||||
|
||||
raw_profanity_whole_pattern = r'\b('
|
||||
raw_profanity_parts_pattern = '('
|
||||
|
||||
for profanity_file_path in os.listdir('bad-words'):
|
||||
with open('bad-words/'+profanity_file_path) as pf:
|
||||
if profanity_file_path.startswith('whole'):
|
||||
for word in pf:
|
||||
raw_profanity_whole_pattern += word.rstrip()+'|'
|
||||
else:
|
||||
for word in pf:
|
||||
raw_profanity_parts_pattern += word.rstrip()+'|'
|
||||
|
||||
|
||||
raw_profanity_whole_pattern = raw_profanity_whole_pattern[:-1]+r')\b'
|
||||
raw_profanity_parts_pattern = raw_profanity_parts_pattern[:-1]+')'
|
||||
|
||||
profanity_whole_pattern = re.compile(raw_profanity_whole_pattern, re.IGNORECASE)
|
||||
profanity_parts_pattern = re.compile(raw_profanity_parts_pattern, re.IGNORECASE)
|
||||
|
||||
with open(sys.argv[1]) as sources_file:
|
||||
for line in sources_file:
|
||||
line = line.rstrip()
|
||||
found_whole = re.search(profanity_whole_pattern, line)
|
||||
found_parts = re.search(profanity_parts_pattern, line)
|
||||
|
||||
if found_whole or found_parts:
|
||||
censored = re.sub(profanity_whole_pattern, '*'*5,line)
|
||||
censored = re.sub(profanity_parts_pattern, '*'*5,censored)
|
||||
print(censored)
|
||||
sys.stderr.write('Censored: %s to %s\n' % (line, censored))
|
||||
else:
|
||||
print(line)
|
Loading…
Reference in New Issue
Block a user