diff --git a/wiki_cleanup.py b/wiki_cleanup.py new file mode 100644 index 0000000..2f7c322 --- /dev/null +++ b/wiki_cleanup.py @@ -0,0 +1,34 @@ +from os import listdir +from os.path import isfile, join +import os +import sys +import re +import subprocess + +# argv[1] - wiki files directory + +if len(sys.argv) < 2: + print("Argument missing. Usage: python3 wiki_cleanup.py ") +else: + path = sys.argv[1] + if os.path.exists(path): + files = [ f for f in listdir(path) if isfile(join(path, f)) ] + for filename in files: + name, file_extension = os.path.splitext(filename) + if file_extension == ".txt": + print("Cleaning up %s/%s" % (path, filename)) + #options = ['pandoc','-f','mediawiki', '-t', 'markdown', '-s', join(path,filename), '-o', join(path,name + ".md")] + with open(join(path,filename), "r+", encoding="utf-8") as file: + content = file.read() + content = re.sub("(\-{3,}\n(.*)[\n]*)$", "", content) # remove trailing categories list + content = re.sub("#acl(.+)[\n]*", "", content) # remove acls + content = re.sub("#format(.+)[\n]*", "", content) # remove format metadata + content = re.sub("#language(.+)[\n]*", "", content) # remove language metadata + content = re.sub("## page was renamed from(.+)[\n]*", "", content) # remove page name notice + file.seek(0) + file.truncate() + file.write(content) + file.close() + else: + print("Error: the path specified is invalid") +