Pre-conversion cleanup
This commit is contained in:
parent
2e5a069e73
commit
1ab154541f
34
wiki_cleanup.py
Normal file
34
wiki_cleanup.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
from os import listdir
|
||||||
|
from os.path import isfile, join
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
# argv[1] - wiki files directory
|
||||||
|
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
print("Argument missing. Usage: python3 wiki_cleanup.py <wiki content dir>")
|
||||||
|
else:
|
||||||
|
path = sys.argv[1]
|
||||||
|
if os.path.exists(path):
|
||||||
|
files = [ f for f in listdir(path) if isfile(join(path, f)) ]
|
||||||
|
for filename in files:
|
||||||
|
name, file_extension = os.path.splitext(filename)
|
||||||
|
if file_extension == ".txt":
|
||||||
|
print("Cleaning up %s/%s" % (path, filename))
|
||||||
|
#options = ['pandoc','-f','mediawiki', '-t', 'markdown', '-s', join(path,filename), '-o', join(path,name + ".md")]
|
||||||
|
with open(join(path,filename), "r+", encoding="utf-8") as file:
|
||||||
|
content = file.read()
|
||||||
|
content = re.sub("(\-{3,}\n(.*)[\n]*)$", "", content) # remove trailing categories list
|
||||||
|
content = re.sub("#acl(.+)[\n]*", "", content) # remove acls
|
||||||
|
content = re.sub("#format(.+)[\n]*", "", content) # remove format metadata
|
||||||
|
content = re.sub("#language(.+)[\n]*", "", content) # remove language metadata
|
||||||
|
content = re.sub("## page was renamed from(.+)[\n]*", "", content) # remove page name notice
|
||||||
|
file.seek(0)
|
||||||
|
file.truncate()
|
||||||
|
file.write(content)
|
||||||
|
file.close()
|
||||||
|
else:
|
||||||
|
print("Error: the path specified is invalid")
|
||||||
|
|
Loading…
Reference in New Issue
Block a user