Pre-conversion cleanup

This commit is contained in:
Maciej Wilczyński 2017-04-07 10:24:55 +02:00
parent 2e5a069e73
commit 1ab154541f

34
wiki_cleanup.py Normal file
View File

@ -0,0 +1,34 @@
from os import listdir
from os.path import isfile, join
import os
import sys
import re
import subprocess
# argv[1] - wiki files directory
if len(sys.argv) < 2:
print("Argument missing. Usage: python3 wiki_cleanup.py <wiki content dir>")
else:
path = sys.argv[1]
if os.path.exists(path):
files = [ f for f in listdir(path) if isfile(join(path, f)) ]
for filename in files:
name, file_extension = os.path.splitext(filename)
if file_extension == ".txt":
print("Cleaning up %s/%s" % (path, filename))
#options = ['pandoc','-f','mediawiki', '-t', 'markdown', '-s', join(path,filename), '-o', join(path,name + ".md")]
with open(join(path,filename), "r+", encoding="utf-8") as file:
content = file.read()
content = re.sub("(\-{3,}\n(.*)[\n]*)$", "", content) # remove trailing categories list
content = re.sub("#acl(.+)[\n]*", "", content) # remove acls
content = re.sub("#format(.+)[\n]*", "", content) # remove format metadata
content = re.sub("#language(.+)[\n]*", "", content) # remove language metadata
content = re.sub("## page was renamed from(.+)[\n]*", "", content) # remove page name notice
file.seek(0)
file.truncate()
file.write(content)
file.close()
else:
print("Error: the path specified is invalid")