Pre-conversion cleanup
This commit is contained in:
parent
2e5a069e73
commit
1ab154541f
34
wiki_cleanup.py
Normal file
34
wiki_cleanup.py
Normal file
@ -0,0 +1,34 @@
|
||||
from os import listdir
|
||||
from os.path import isfile, join
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
# argv[1] - wiki files directory
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("Argument missing. Usage: python3 wiki_cleanup.py <wiki content dir>")
|
||||
else:
|
||||
path = sys.argv[1]
|
||||
if os.path.exists(path):
|
||||
files = [ f for f in listdir(path) if isfile(join(path, f)) ]
|
||||
for filename in files:
|
||||
name, file_extension = os.path.splitext(filename)
|
||||
if file_extension == ".txt":
|
||||
print("Cleaning up %s/%s" % (path, filename))
|
||||
#options = ['pandoc','-f','mediawiki', '-t', 'markdown', '-s', join(path,filename), '-o', join(path,name + ".md")]
|
||||
with open(join(path,filename), "r+", encoding="utf-8") as file:
|
||||
content = file.read()
|
||||
content = re.sub("(\-{3,}\n(.*)[\n]*)$", "", content) # remove trailing categories list
|
||||
content = re.sub("#acl(.+)[\n]*", "", content) # remove acls
|
||||
content = re.sub("#format(.+)[\n]*", "", content) # remove format metadata
|
||||
content = re.sub("#language(.+)[\n]*", "", content) # remove language metadata
|
||||
content = re.sub("## page was renamed from(.+)[\n]*", "", content) # remove page name notice
|
||||
file.seek(0)
|
||||
file.truncate()
|
||||
file.write(content)
|
||||
file.close()
|
||||
else:
|
||||
print("Error: the path specified is invalid")
|
||||
|
Loading…
Reference in New Issue
Block a user