#!/usr/bin/python3 # -*- coding: utf-8 -*- import sys, re, os raw_profanity_whole_pattern = r'\b(' raw_profanity_parts_pattern = '(' for profanity_file_path in os.listdir('bad-words'): with open('bad-words/'+profanity_file_path) as pf: if profanity_file_path.startswith('whole'): for word in pf: raw_profanity_whole_pattern += word.rstrip()+'|' else: for word in pf: raw_profanity_parts_pattern += word.rstrip()+'|' raw_profanity_whole_pattern = raw_profanity_whole_pattern[:-1]+r')\b' raw_profanity_parts_pattern = raw_profanity_parts_pattern[:-1]+')' profanity_whole_pattern = re.compile(raw_profanity_whole_pattern, re.IGNORECASE) profanity_parts_pattern = re.compile(raw_profanity_parts_pattern, re.IGNORECASE) with open(sys.argv[1]) as sources_file: for line in sources_file: line = line.rstrip() found_whole = re.search(profanity_whole_pattern, line) found_parts = re.search(profanity_parts_pattern, line) if found_whole or found_parts: censored = re.sub(profanity_whole_pattern, '*'*5,line) censored = re.sub(profanity_parts_pattern, '*'*5,censored) print(censored) sys.stderr.write('Censored: %s to %s\n' % (line, censored)) else: print(line)