commit f42ee7010dab2a4cc25c31755b87580804baf77c Author: Maciej WilczyƄski Date: Fri Apr 7 09:06:17 2017 +0200 Initial commit, moin exporting diff --git a/moin_export.py b/moin_export.py new file mode 100644 index 0000000..ac95d83 --- /dev/null +++ b/moin_export.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +import sys +import os +import re +from os import listdir +from os.path import isfile, join + + +# argv[1] - moin root directory +# argv[2] - output directory + +if len(sys.argv) < 3: + print("Argument missing. Usage: python3 moin_export.py " ) +else: + if os.path.exists(sys.argv[1]): + filesdir = sys.argv[1] + "/wiki/data/pages" + for file in listdir(filesdir): + loc = filesdir + "/" + file + "/revisions" + try: + files = [ f for f in listdir(loc) if isfile(join(loc,f)) ] + latest_file = sorted(files,reverse=True)[0] # highest number + f = open(join(loc,latest_file), 'r', encoding="utf-8") + contents = f.read() + output_loc = sys.argv[2] #todo: validation + + if not os.path.exists(output_loc): + os.makedirs(output_loc) + + new_name = file + characters = re.findall("(\([0-9a-f]{2,}\))", file) + characters = list(set(characters)) #remove duplicates + for character in characters: + code = character[1:-1] + utf = bytes.fromhex(code).decode('utf-8') + new_name = new_name.replace(character, utf) + + output_filename = output_loc + "/" + new_name + ".txt" + fo = open(output_filename, "wb") + fo.write(contents.encode("utf-8")) + fo.close() + f.close() + except FileNotFoundError: + print("Directory " + file + " is empty.") + except IOError: + print("File " + latest_file + " does not exist.") + else: + print("Error: invalid Moin directory path") +