Initial commit, moin exporting
This commit is contained in:
commit
f42ee7010d
48
moin_export.py
Normal file
48
moin_export.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from os import listdir
|
||||||
|
from os.path import isfile, join
|
||||||
|
|
||||||
|
|
||||||
|
# argv[1] - moin root directory
|
||||||
|
# argv[2] - output directory
|
||||||
|
|
||||||
|
if len(sys.argv) < 3:
|
||||||
|
print("Argument missing. Usage: python3 moin_export.py <moin dir> <export dir>" )
|
||||||
|
else:
|
||||||
|
if os.path.exists(sys.argv[1]):
|
||||||
|
filesdir = sys.argv[1] + "/wiki/data/pages"
|
||||||
|
for file in listdir(filesdir):
|
||||||
|
loc = filesdir + "/" + file + "/revisions"
|
||||||
|
try:
|
||||||
|
files = [ f for f in listdir(loc) if isfile(join(loc,f)) ]
|
||||||
|
latest_file = sorted(files,reverse=True)[0] # highest number
|
||||||
|
f = open(join(loc,latest_file), 'r', encoding="utf-8")
|
||||||
|
contents = f.read()
|
||||||
|
output_loc = sys.argv[2] #todo: validation
|
||||||
|
|
||||||
|
if not os.path.exists(output_loc):
|
||||||
|
os.makedirs(output_loc)
|
||||||
|
|
||||||
|
new_name = file
|
||||||
|
characters = re.findall("(\([0-9a-f]{2,}\))", file)
|
||||||
|
characters = list(set(characters)) #remove duplicates
|
||||||
|
for character in characters:
|
||||||
|
code = character[1:-1]
|
||||||
|
utf = bytes.fromhex(code).decode('utf-8')
|
||||||
|
new_name = new_name.replace(character, utf)
|
||||||
|
|
||||||
|
output_filename = output_loc + "/" + new_name + ".txt"
|
||||||
|
fo = open(output_filename, "wb")
|
||||||
|
fo.write(contents.encode("utf-8"))
|
||||||
|
fo.close()
|
||||||
|
f.close()
|
||||||
|
except FileNotFoundError:
|
||||||
|
print("Directory " + file + " is empty.")
|
||||||
|
except IOError:
|
||||||
|
print("File " + latest_file + " does not exist.")
|
||||||
|
else:
|
||||||
|
print("Error: invalid Moin directory path")
|
||||||
|
|
Loading…
Reference in New Issue
Block a user