Added markdown cleanup, gitignore, and makefile

This commit is contained in:
Maciej Wilczyński 2017-04-07 11:43:35 +02:00
parent 1ab154541f
commit 85ddba33d7
5 changed files with 73 additions and 4 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
md/
wiki/
oldwiki/

36
Makefile Normal file
View File

@ -0,0 +1,36 @@
PYTHON = python3
DIR = $(CURDIR)
WIKIOUTDIR = $(DIR)/wiki
MDOUTDIR = $(DIR)/md
OLDDIR = $(DIR)/oldwiki
.PHONY: default clean clean_md clean_wiki \
export initial_cleanup final_cleanup \
convert all
default:
echo "Moin export and convert -- read Makefile for more details"
clean: clean_md clean_wiki
clean_md:
rm -rf $(MDOUTDIR)
clean_wiki:
rm -rf $(WIKIOUTDIR)
export: moin_export.py
mkdir $(WIKIOUTDIR) &> /dev/null || true
$(PYTHON) moin_export.py $(OLDDIR) $(WIKIOUTDIR)
initial_cleanup: wiki_cleanup.py
$(PYTHON) wiki_cleanup.py $(WIKIOUTDIR)
final_cleanup: md_cleanup.py
$(PYTHON) md_cleanup.py $(MDOUTDIR)
convert: convert_markdown.py
mkdir $(MDOUTDIR) &> /dev/null || true
$(PYTHON) convert_markdown.py $(WIKIOUTDIR) $(MDOUTDIR)
all: clean export initial_cleanup convert final_cleanup

View File

@ -6,17 +6,18 @@ import subprocess
# argv[1] - wiki files directory # argv[1] - wiki files directory
if len(sys.argv) < 2: if len(sys.argv) < 3:
print("Argument missing. Usage: python3 convert_markdown.py <wiki content dir>") print("Argument missing. Usage: python3 convert_markdown.py <wiki content dir> <output dir>")
else: else:
path = sys.argv[1] path = sys.argv[1]
output_path = sys.argv[2]
if os.path.exists(path): if os.path.exists(path):
files = [ f for f in listdir(path) if isfile(join(path, f)) ] files = [ f for f in listdir(path) if isfile(join(path, f)) ]
for filename in files: for filename in files:
name, file_extension = os.path.splitext(filename) name, file_extension = os.path.splitext(filename)
if file_extension == ".txt": if file_extension == ".txt":
print("Converting %s/%s" % (path, filename)) print("Converting %s/%s" % (path, filename))
options = ['pandoc','-f','mediawiki','-t','markdown','-s',join(path,filename),'-o',join(path,name + ".md")] options = ['pandoc','-f','mediawiki','-t','markdown','-s',join(path,filename),'-o',join(output_path,name + ".md")]
subprocess.call(options) subprocess.call(options)
else: else:
print("Error: the path specified is invalid") print("Error: the path specified is invalid")

30
md_cleanup.py Normal file
View File

@ -0,0 +1,30 @@
from os import listdir
from os.path import isfile, join
import os
import sys
import re
import subprocess
# argv[1] - markdown files directory
if len(sys.argv) < 2:
print("Argument missing. Usage: python3 md_cleanup.py <md content dir>")
else:
path = sys.argv[1]
if os.path.exists(path):
files = [ f for f in listdir(path) if isfile(join(path, f)) ]
for filename in files:
name, file_extension = os.path.splitext(filename)
if file_extension == ".md":
print("Cleaning up %s/%s" % (path, filename))
with open(join(path,filename), "r+", encoding="utf-8") as file:
content = file.read()
content = re.sub(r"`\*(.*)`[\\]?", r"*\g<1>", content)
content = re.sub(r"!(\w+)", r"\g<1>", content)
file.seek(0)
file.truncate()
file.write(content)
file.close()
else:
print("Error: the path specified is invalid")

View File

@ -17,7 +17,6 @@ else:
name, file_extension = os.path.splitext(filename) name, file_extension = os.path.splitext(filename)
if file_extension == ".txt": if file_extension == ".txt":
print("Cleaning up %s/%s" % (path, filename)) print("Cleaning up %s/%s" % (path, filename))
#options = ['pandoc','-f','mediawiki', '-t', 'markdown', '-s', join(path,filename), '-o', join(path,name + ".md")]
with open(join(path,filename), "r+", encoding="utf-8") as file: with open(join(path,filename), "r+", encoding="utf-8") as file:
content = file.read() content = file.read()
content = re.sub("(\-{3,}\n(.*)[\n]*)$", "", content) # remove trailing categories list content = re.sub("(\-{3,}\n(.*)[\n]*)$", "", content) # remove trailing categories list