from os import listdir from os.path import isfile, join import os import sys import re import subprocess # argv[1] - wiki files directory if len(sys.argv) < 2: print("Argument missing. Usage: python3 wiki_cleanup.py ") else: path = sys.argv[1] if os.path.exists(path): files = [ f for f in listdir(path) if isfile(join(path, f)) ] for filename in files: name, file_extension = os.path.splitext(filename) if file_extension == ".txt": print("Cleaning up %s/%s" % (path, filename)) #options = ['pandoc','-f','mediawiki', '-t', 'markdown', '-s', join(path,filename), '-o', join(path,name + ".md")] with open(join(path,filename), "r+", encoding="utf-8") as file: content = file.read() content = re.sub("(\-{3,}\n(.*)[\n]*)$", "", content) # remove trailing categories list content = re.sub("#acl(.+)[\n]*", "", content) # remove acls content = re.sub("#format(.+)[\n]*", "", content) # remove format metadata content = re.sub("#language(.+)[\n]*", "", content) # remove language metadata content = re.sub("## page was renamed from(.+)[\n]*", "", content) # remove page name notice file.seek(0) file.truncate() file.write(content) file.close() else: print("Error: the path specified is invalid")