Include image file names like templates in Wikitable importer

This commit is contained in:
Antonin Delpeuch 2018-02-02 11:22:15 +00:00
parent 843641ca74
commit 1f87a26f4c
4 changed files with 42 additions and 4 deletions

View File

@ -30,6 +30,7 @@ import org.sweble.wikitext.parser.nodes.WtText;
import org.sweble.wikitext.parser.nodes.WtInternalLink; import org.sweble.wikitext.parser.nodes.WtInternalLink;
import org.sweble.wikitext.parser.nodes.WtExternalLink; import org.sweble.wikitext.parser.nodes.WtExternalLink;
import org.sweble.wikitext.parser.nodes.WtHeading; import org.sweble.wikitext.parser.nodes.WtHeading;
import org.sweble.wikitext.parser.nodes.WtImageLink;
import org.sweble.wikitext.parser.nodes.WtLinkTitle; import org.sweble.wikitext.parser.nodes.WtLinkTitle;
import org.sweble.wikitext.parser.nodes.WtLinkTitle.WtNoLinkTitle; import org.sweble.wikitext.parser.nodes.WtLinkTitle.WtNoLinkTitle;
import org.sweble.wikitext.parser.nodes.WtUrl; import org.sweble.wikitext.parser.nodes.WtUrl;
@ -423,8 +424,7 @@ public class WikitextImporter extends TabularImportingParserBase {
public void visit(WtName e) { public void visit(WtName e) {
try { try {
currentXmlAttr = e.getAsString(); currentXmlAttr = e.getAsString();
} catch (UnsupportedOperationException soe) { } catch (UnsupportedOperationException soe) {
currentXmlAttr = null; currentXmlAttr = null;
} }
@ -507,6 +507,14 @@ public class WikitextImporter extends TabularImportingParserBase {
iterate(e.getValue()); iterate(e.getValue());
} }
public void visit(WtImageLink e) {
if(includeRawTemplates) {
writeText("[[");
writeText(e.getTarget().getAsString());
writeText("]]");
}
}
/* Content blocks */ /* Content blocks */
public void visit(WtParsedWikitextPage e) { public void visit(WtParsedWikitextPage e) {

View File

@ -44,6 +44,7 @@ import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import com.google.refine.importers.WikitextImporter; import com.google.refine.importers.WikitextImporter;
import com.google.refine.util.JSONUtilities;
public class WikitextImporterTests extends ImporterTest { public class WikitextImporterTests extends ImporterTest {
@ -281,6 +282,34 @@ public class WikitextImporterTests extends ImporterTest {
Assert.assertEquals(project.rows.get(1).cells.get(4).value, "http://gnu.org"); Assert.assertEquals(project.rows.get(1).cells.get(4).value, "http://gnu.org");
Assert.assertEquals(project.rows.get(1).cells.get(2).value, "http://microsoft.com/"); Assert.assertEquals(project.rows.get(1).cells.get(2).value, "http://microsoft.com/");
} }
/**
* Include templates and image filenames
*/
@Test
public void readTableWithTemplates() {
String input = "\n"
+ "{|\n"
+ "|-\n"
+ "| {{free to read}} || b || c \n"
+ "|-\n"
+ "| d\n"
+ "| [[File:My logo.svg|70px]]\n"
+ "| f<br>\n"
+ "|-\n"
+ "|}\n";
try {
prepareOptions(0, 0, true, true, null);
parse(input);
} catch (Exception e) {
Assert.fail("Parsing failed", e);
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
Assert.assertEquals(project.rows.size(), 2);
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "{{free to read}}");
Assert.assertEquals(project.rows.get(1).cells.get(1).value, "[[File:My logo.svg]]");
}
//--helpers-- //--helpers--
@ -298,6 +327,7 @@ public class WikitextImporterTests extends ImporterTest {
whenGetBooleanOption("blankSpanningCells", options, blankSpanningCells); whenGetBooleanOption("blankSpanningCells", options, blankSpanningCells);
whenGetBooleanOption("storeBlankCellsAsNulls", options, true); whenGetBooleanOption("storeBlankCellsAsNulls", options, true);
whenGetBooleanOption("parseReferences", options, true); whenGetBooleanOption("parseReferences", options, true);
whenGetBooleanOption("includeRawTemplates", options, true);
whenGetStringOption("wikiUrl", options, wikiUrl); whenGetStringOption("wikiUrl", options, wikiUrl);
whenGetStringOption("reconService", options, "https://tools.wmflabs.org/openrefine-wikidata/en/api"); whenGetStringOption("reconService", options, "https://tools.wmflabs.org/openrefine-wikidata/en/api");
} }

View File

@ -136,7 +136,7 @@
"store-blank": "Store blank rows", "store-blank": "Store blank rows",
"store-nulls": "Store blank cells as nulls", "store-nulls": "Store blank cells as nulls",
"blank-spanning-cells": "Pad cells spanning over multiple rows or columns with nulls", "blank-spanning-cells": "Pad cells spanning over multiple rows or columns with nulls",
"include-raw-templates": "Include templates as raw wikicode", "include-raw-templates": "Include templates and images as raw wikicode",
"parse-references": "Extract references in additional columns", "parse-references": "Extract references in additional columns",
"wiki-base-url": "Reconcile to wiki with base URL:", "wiki-base-url": "Reconcile to wiki with base URL:",
"invalid-wikitext": "No table could be parsed. Are you sure this is a valid wiki table?", "invalid-wikitext": "No table could be parsed. Are you sure this is a valid wiki table?",

View File

@ -133,7 +133,7 @@
"store-nulls": "Analyser les cellules vides comme nulles", "store-nulls": "Analyser les cellules vides comme nulles",
"lines-into-row": "lignes comme une seule", "lines-into-row": "lignes comme une seule",
"custom": "autre", "custom": "autre",
"include-raw-templates": "Inclure les modèles an tant que wikicode brut", "include-raw-templates": "Inclure les modèles et images comme du wikicode brut",
"quotation-mark": "Des guillemets sont utilisés<br/>pour délimiter les cellules qui contiennent<br/>des séparateurs de colonne", "quotation-mark": "Des guillemets sont utilisés<br/>pour délimiter les cellules qui contiennent<br/>des séparateurs de colonne",
"invalid-wikitext": "Aucun tableau n'a pu être extrait. Êtes-vous sûr·e que c'est un wiki-tableau valide ?", "invalid-wikitext": "Aucun tableau n'a pu être extrait. Êtes-vous sûr·e que c'est un wiki-tableau valide ?",
"json-parser": "Cliquer sur le premier nœud JSON { } correspondant à la première ligne à charger.", "json-parser": "Cliquer sur le premier nœud JSON { } correspondant à la première ligne à charger.",