From 85ddba33d79aefe569a0c8bae63eb481058e5f10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Wilczy=C5=84ski?= Date: Fri, 7 Apr 2017 11:43:35 +0200 Subject: [PATCH] Added markdown cleanup, gitignore, and makefile --- .gitignore | 3 +++ Makefile | 36 ++++++++++++++++++++++++++++++++++++ convert_markdown.py | 7 ++++--- md_cleanup.py | 30 ++++++++++++++++++++++++++++++ wiki_cleanup.py | 1 - 5 files changed, 73 insertions(+), 4 deletions(-) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 md_cleanup.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7380cf6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +md/ +wiki/ +oldwiki/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..283749a --- /dev/null +++ b/Makefile @@ -0,0 +1,36 @@ +PYTHON = python3 +DIR = $(CURDIR) +WIKIOUTDIR = $(DIR)/wiki +MDOUTDIR = $(DIR)/md +OLDDIR = $(DIR)/oldwiki + +.PHONY: default clean clean_md clean_wiki \ + export initial_cleanup final_cleanup \ + convert all + +default: + echo "Moin export and convert -- read Makefile for more details" + +clean: clean_md clean_wiki + +clean_md: + rm -rf $(MDOUTDIR) + +clean_wiki: + rm -rf $(WIKIOUTDIR) + +export: moin_export.py + mkdir $(WIKIOUTDIR) &> /dev/null || true + $(PYTHON) moin_export.py $(OLDDIR) $(WIKIOUTDIR) + +initial_cleanup: wiki_cleanup.py + $(PYTHON) wiki_cleanup.py $(WIKIOUTDIR) + +final_cleanup: md_cleanup.py + $(PYTHON) md_cleanup.py $(MDOUTDIR) + +convert: convert_markdown.py + mkdir $(MDOUTDIR) &> /dev/null || true + $(PYTHON) convert_markdown.py $(WIKIOUTDIR) $(MDOUTDIR) + +all: clean export initial_cleanup convert final_cleanup diff --git a/convert_markdown.py b/convert_markdown.py index 957435a..0afc962 100644 --- a/convert_markdown.py +++ b/convert_markdown.py @@ -6,17 +6,18 @@ import subprocess # argv[1] - wiki files directory -if len(sys.argv) < 2: - print("Argument missing. Usage: python3 convert_markdown.py ") +if len(sys.argv) < 3: + print("Argument missing. Usage: python3 convert_markdown.py ") else: path = sys.argv[1] + output_path = sys.argv[2] if os.path.exists(path): files = [ f for f in listdir(path) if isfile(join(path, f)) ] for filename in files: name, file_extension = os.path.splitext(filename) if file_extension == ".txt": print("Converting %s/%s" % (path, filename)) - options = ['pandoc','-f','mediawiki','-t','markdown','-s',join(path,filename),'-o',join(path,name + ".md")] + options = ['pandoc','-f','mediawiki','-t','markdown','-s',join(path,filename),'-o',join(output_path,name + ".md")] subprocess.call(options) else: print("Error: the path specified is invalid") diff --git a/md_cleanup.py b/md_cleanup.py new file mode 100644 index 0000000..babacb5 --- /dev/null +++ b/md_cleanup.py @@ -0,0 +1,30 @@ +from os import listdir +from os.path import isfile, join +import os +import sys +import re +import subprocess + +# argv[1] - markdown files directory + +if len(sys.argv) < 2: + print("Argument missing. Usage: python3 md_cleanup.py ") +else: + path = sys.argv[1] + if os.path.exists(path): + files = [ f for f in listdir(path) if isfile(join(path, f)) ] + for filename in files: + name, file_extension = os.path.splitext(filename) + if file_extension == ".md": + print("Cleaning up %s/%s" % (path, filename)) + with open(join(path,filename), "r+", encoding="utf-8") as file: + content = file.read() + content = re.sub(r"`\*(.*)`[\\]?", r"*\g<1>", content) + content = re.sub(r"!(\w+)", r"\g<1>", content) + file.seek(0) + file.truncate() + file.write(content) + file.close() + else: + print("Error: the path specified is invalid") + diff --git a/wiki_cleanup.py b/wiki_cleanup.py index 2f7c322..64186c8 100644 --- a/wiki_cleanup.py +++ b/wiki_cleanup.py @@ -17,7 +17,6 @@ else: name, file_extension = os.path.splitext(filename) if file_extension == ".txt": print("Cleaning up %s/%s" % (path, filename)) - #options = ['pandoc','-f','mediawiki', '-t', 'markdown', '-s', join(path,filename), '-o', join(path,name + ".md")] with open(join(path,filename), "r+", encoding="utf-8") as file: content = file.read() content = re.sub("(\-{3,}\n(.*)[\n]*)$", "", content) # remove trailing categories list