moin_utils/moin_export.py

49 lines
1.5 KiB
Python

#!/usr/bin/env python3
import sys
import os
import re
from os import listdir
from os.path import isfile, join
# argv[1] - moin root directory
# argv[2] - output directory
if len(sys.argv) < 3:
print("Argument missing. Usage: python3 moin_export.py <moin dir> <export dir>" )
else:
if os.path.exists(sys.argv[1]):
filesdir = sys.argv[1] + "/wiki/data/pages"
for file in listdir(filesdir):
loc = filesdir + "/" + file + "/revisions"
try:
files = [ f for f in listdir(loc) if isfile(join(loc,f)) ]
latest_file = sorted(files,reverse=True)[0] # highest number
f = open(join(loc,latest_file), 'r', encoding="utf-8")
contents = f.read()
output_loc = sys.argv[2] #todo: validation
if not os.path.exists(output_loc):
os.makedirs(output_loc)
new_name = file
characters = re.findall("(\([0-9a-f]{2,}\))", file)
characters = list(set(characters)) #remove duplicates
for character in characters:
code = character[1:-1]
utf = bytes.fromhex(code).decode('utf-8')
new_name = new_name.replace(character, utf)
output_filename = output_loc + "/" + new_name + ".txt"
fo = open(output_filename, "wb")
fo.write(contents.encode("utf-8"))
fo.close()
f.close()
except FileNotFoundError:
print("Directory " + file + " is empty.")
except IOError:
print("File " + latest_file + " does not exist.")
else:
print("Error: invalid Moin directory path")