From 0a00fd931865668e0df22409d703ea480acc86fb Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Fri, 25 Aug 2017 14:28:30 +0100 Subject: [PATCH] Add option to include raw templates as cells --- .../refine/importers/WikitextImporter.java | 43 ++++++++++++++++--- .../modules/core/langs/translation-en.json | 1 + .../parser-interfaces/wikitext-parser-ui.html | 2 + .../parser-interfaces/wikitext-parser-ui.js | 6 +++ 4 files changed, 47 insertions(+), 5 deletions(-) diff --git a/main/src/com/google/refine/importers/WikitextImporter.java b/main/src/com/google/refine/importers/WikitextImporter.java index 31df95e4f..7f53eb482 100644 --- a/main/src/com/google/refine/importers/WikitextImporter.java +++ b/main/src/com/google/refine/importers/WikitextImporter.java @@ -19,6 +19,9 @@ import org.sweble.wikitext.parser.nodes.WtItalics; import org.sweble.wikitext.parser.nodes.WtNewline; import org.sweble.wikitext.parser.nodes.WtNode; import org.sweble.wikitext.parser.nodes.WtSection; +import org.sweble.wikitext.parser.nodes.WtTemplate; +import org.sweble.wikitext.parser.nodes.WtTemplateArgument; +import org.sweble.wikitext.parser.nodes.WtTemplateArguments; import org.sweble.wikitext.parser.nodes.WtText; import org.sweble.wikitext.parser.nodes.WtInternalLink; import org.sweble.wikitext.parser.nodes.WtExternalLink; @@ -77,6 +80,7 @@ public class WikitextImporter extends TabularImportingParserBase { JSONUtilities.safePut(options, "guessCellValueTypes", false); JSONUtilities.safePut(options, "blankSpanningCells", true); + JSONUtilities.safePut(options, "includeRawTemplates", false); JSONUtilities.safePut(options, "wikiUrl", "https://en.wikipedia.org/wiki/"); return options; @@ -125,6 +129,7 @@ public class WikitextImporter extends TabularImportingParserBase { private List currentRow; private boolean blankSpanningCells; + private boolean includeRawTemplates; private int rowId; private List spanningCells; @@ -138,8 +143,9 @@ public class WikitextImporter extends TabularImportingParserBase { private int spanningCellIdx; private List internalLinksInCell; - public WikitextTableVisitor(boolean blankSpanningCells) { + public WikitextTableVisitor(boolean blankSpanningCells, boolean includeRawTemplates) { this.blankSpanningCells = blankSpanningCells; + this.includeRawTemplates = includeRawTemplates; caption = null; header = new ArrayList(); rows = new ArrayList>(); @@ -165,7 +171,7 @@ public class WikitextImporter extends TabularImportingParserBase { public void visit(WtNode e) { // Ignore other nodes - // System.out.println(e.getNodeName()); + System.out.println(e.getNodeName()); } /* Table handling */ @@ -343,7 +349,11 @@ public class WikitextImporter extends TabularImportingParserBase { } public void visit(WtName e) { - currentXmlAttr = e.getAsString(); + try { + currentXmlAttr = e.getAsString(); + } catch (UnsupportedOperationException _) { + currentXmlAttr = null; + } } public void visit(WtValue e) { @@ -352,7 +362,6 @@ public class WikitextImporter extends TabularImportingParserBase { /* Link management */ - public void visit(WtInternalLink e) { currentInternalLink = e.getTarget().getAsString(); internalLinksInCell.add(currentInternalLink); @@ -396,6 +405,29 @@ public class WikitextImporter extends TabularImportingParserBase { // already handled, in WtExternalLink, added here for clarity } + /* Templates */ + + public void visit(WtTemplate e) { + if (includeRawTemplates) { + writeText("{{"+e.getName().getAsString()); + WtTemplateArguments args = e.getArgs(); + for (int i = 0; i != args.size(); i++) { + writeText("|"); + iterate(args.get(i)); + } + writeText("}}"); + } + } + + public void visit(WtTemplateArgument e) { + writeText("|"); + if(e.hasName()) { + writeText(e.getName().getAsString()); + writeText("="); + } + iterate(e.getValue()); + } + /* Content blocks */ public void visit(WtParsedWikitextPage e) { @@ -546,7 +578,8 @@ public class WikitextImporter extends TabularImportingParserBase { // Compile the retrieved page boolean blankSpanningCells = JSONUtilities.getBoolean(options, "blankSpanningCells", true); - final WikitextTableVisitor vs = new WikitextTableVisitor(blankSpanningCells); + boolean includeRawTemplates = JSONUtilities.getBoolean(options, "includeRawTemplates", false); + final WikitextTableVisitor vs = new WikitextTableVisitor(blankSpanningCells, includeRawTemplates); vs.go(parsedArticle); WikiTableDataReader dataReader = new WikiTableDataReader(vs); diff --git a/main/webapp/modules/core/langs/translation-en.json b/main/webapp/modules/core/langs/translation-en.json index 7c5f13ed7..2758fd067 100644 --- a/main/webapp/modules/core/langs/translation-en.json +++ b/main/webapp/modules/core/langs/translation-en.json @@ -114,6 +114,7 @@ "store-blank": "Store blank rows", "store-nulls": "Store blank cells as nulls", "blank-spanning-cells": "Pad cells spanning over multiple rows or columns with nulls", + "include-raw-templates": "Include raw templates as wikicode", "wiki-base-url": "Reconcile to wiki with base URL:", "invalid-wikitext": "No table could be parsed. Are you sure this is a valid wiki table?", "store-source": "Store file source
(file names, URLs)
in each row", diff --git a/main/webapp/modules/core/scripts/index/parser-interfaces/wikitext-parser-ui.html b/main/webapp/modules/core/scripts/index/parser-interfaces/wikitext-parser-ui.html index 6b4fee39a..ff41f7841 100644 --- a/main/webapp/modules/core/scripts/index/parser-interfaces/wikitext-parser-ui.html +++ b/main/webapp/modules/core/scripts/index/parser-interfaces/wikitext-parser-ui.html @@ -13,6 +13,8 @@ + + diff --git a/main/webapp/modules/core/scripts/index/parser-interfaces/wikitext-parser-ui.js b/main/webapp/modules/core/scripts/index/parser-interfaces/wikitext-parser-ui.js index 4e9b67879..1b30198a7 100644 --- a/main/webapp/modules/core/scripts/index/parser-interfaces/wikitext-parser-ui.js +++ b/main/webapp/modules/core/scripts/index/parser-interfaces/wikitext-parser-ui.js @@ -87,6 +87,7 @@ Refine.WikitextParserUI.prototype.getOptions = function() { } options.storeBlankRows = this._optionContainerElmts.storeBlankRowsCheckbox[0].checked; options.blankSpanningCells = this._optionContainerElmts.blankSpanningCellsCheckbox[0].checked; + options.includeRawTemplates = this._optionContainerElmts.includeRawTemplatesCheckbox[0].checked; options.guessCellValueTypes = this._optionContainerElmts.guessCellValueTypesCheckbox[0].checked; @@ -113,6 +114,7 @@ Refine.WikitextParserUI.prototype._initialize = function() { $('#or-import-rows2').text($.i18n._('core-index-parser')["rows-data"]); $('#or-import-parseCell').html($.i18n._('core-index-parser')["parse-cell"]); $('#or-import-blankSpanningCells').text($.i18n._('core-index-parser')["blank-spanning-cells"]); + $('#or-import-includeRawTemplates').text($.i18n._('core-index-parser')["include-raw-templates"]); $('#or-import-blank').text($.i18n._('core-index-parser')["store-blank"]); $('#or-import-null').text($.i18n._('core-index-parser')["store-nulls"]); $('#or-import-source').html($.i18n._('core-index-parser')["store-source"]); @@ -142,6 +144,10 @@ Refine.WikitextParserUI.prototype._initialize = function() { this._optionContainerElmts.blankSpanningCellsCheckbox.prop("checked", true); } + if (this._config.includeRawTemplates) { + this._optionContainerElmts.includeRawTemplatesCheckbox.prop("checked", true); + } + if (this._config.storeBlankRows) { this._optionContainerElmts.storeBlankRowsCheckbox.prop("checked", true); }