From e2a22a699458dae9f46140ac8c4753d951e0ba4c Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Fri, 20 Oct 2017 15:32:58 +0100 Subject: [PATCH] Forbid pipe characters in URL references to ease parsing. This is a temporary fix before we do full Wikitext parsing inside references (this needs a change upstream). See https://github.com/sweble/sweble-wikitext/issues/67 . --- .../refine/importers/WikitextImporter.java | 2 +- .../importers/WikitextImporterTests.java | 29 +++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/main/src/com/google/refine/importers/WikitextImporter.java b/main/src/com/google/refine/importers/WikitextImporter.java index 1106256b3..9927df528 100644 --- a/main/src/com/google/refine/importers/WikitextImporter.java +++ b/main/src/com/google/refine/importers/WikitextImporter.java @@ -156,7 +156,7 @@ public class WikitextImporter extends TabularImportingParserBase { private int spanningCellIdx; private List internalLinksInCell; - private final Pattern urlPattern = Pattern.compile("\\b(https?|ftp)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]", + private final Pattern urlPattern = Pattern.compile("\\b(https?|ftp)://[-a-zA-Z0-9+&@#/%?=~_!:,.;]*[-a-zA-Z0-9+&@#/%=~_]", Pattern.CASE_INSENSITIVE); public WikitextTableVisitor(boolean blankSpanningCells, boolean includeRawTemplates) { diff --git a/main/tests/server/src/com/google/refine/tests/importers/WikitextImporterTests.java b/main/tests/server/src/com/google/refine/tests/importers/WikitextImporterTests.java index ec0173189..1686e89dc 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/WikitextImporterTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/WikitextImporterTests.java @@ -224,6 +224,35 @@ public class WikitextImporterTests extends ImporterTest { Assert.assertEquals(project.rows.get(1).cells.get(4).value, "http://gnu.org"); Assert.assertEquals(project.rows.get(1).cells.get(2).value, "http://microsoft.com/"); } + + @Test + public void readTableWithReferencesTemplates() { + // inspired from https://www.mediawiki.org/wiki/Help:Tables + String input = "{|\n" + +"! price\n" + +"! fruit\n" + +"! merchant\n" + +"|-\n" + +"| a || b {{cite web|url=http://gnu.org|accessdate=2017-08-30}} || c or {{cite journal|url=http://microsoft.com/|title=BLah}} \n" + +"|-\n" + +"| d || e || f \n" + +"|-\n" + +"|}\n"; + + try { + prepareOptions(-1, true, true, null); + parse(input); + } catch (Exception e) { + Assert.fail("Parsing failed", e); + } + Assert.assertEquals(project.columnModel.columns.size(), 5); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "b"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "http://gnu.org"); + Assert.assertEquals(project.rows.get(0).cells.get(4).value, "http://microsoft.com/"); + Assert.assertEquals(project.rows.get(1).cells.get(4).value, "http://gnu.org"); + Assert.assertEquals(project.rows.get(1).cells.get(2).value, "http://microsoft.com/"); + } + //--helpers-- private void parse(String wikitext) {