Merge pull request #1275 from OpenRefine/wikitext-url-fix
Forbid pipe characters in URL references to ease parsing.
This commit is contained in:
commit
21f4d62474
@ -156,7 +156,7 @@ public class WikitextImporter extends TabularImportingParserBase {
|
|||||||
private int spanningCellIdx;
|
private int spanningCellIdx;
|
||||||
private List<String> internalLinksInCell;
|
private List<String> internalLinksInCell;
|
||||||
|
|
||||||
private final Pattern urlPattern = Pattern.compile("\\b(https?|ftp)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]",
|
private final Pattern urlPattern = Pattern.compile("\\b(https?|ftp)://[-a-zA-Z0-9+&@#/%?=~_!:,.;]*[-a-zA-Z0-9+&@#/%=~_]",
|
||||||
Pattern.CASE_INSENSITIVE);
|
Pattern.CASE_INSENSITIVE);
|
||||||
|
|
||||||
public WikitextTableVisitor(boolean blankSpanningCells, boolean includeRawTemplates) {
|
public WikitextTableVisitor(boolean blankSpanningCells, boolean includeRawTemplates) {
|
||||||
|
@ -224,6 +224,35 @@ public class WikitextImporterTests extends ImporterTest {
|
|||||||
Assert.assertEquals(project.rows.get(1).cells.get(4).value, "http://gnu.org");
|
Assert.assertEquals(project.rows.get(1).cells.get(4).value, "http://gnu.org");
|
||||||
Assert.assertEquals(project.rows.get(1).cells.get(2).value, "http://microsoft.com/");
|
Assert.assertEquals(project.rows.get(1).cells.get(2).value, "http://microsoft.com/");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void readTableWithReferencesTemplates() {
|
||||||
|
// inspired from https://www.mediawiki.org/wiki/Help:Tables
|
||||||
|
String input = "{|\n"
|
||||||
|
+"! price\n"
|
||||||
|
+"! fruit\n"
|
||||||
|
+"! merchant\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"| a || b <ref name=\"myref\">{{cite web|url=http://gnu.org|accessdate=2017-08-30}}</ref> || c <ref name=\"ms\"> or {{cite journal|url=http://microsoft.com/|title=BLah}} </ref>\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"| d || e <ref name=\"ms\"/>|| f <ref name=\"myref\" />\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"|}\n";
|
||||||
|
|
||||||
|
try {
|
||||||
|
prepareOptions(-1, true, true, null);
|
||||||
|
parse(input);
|
||||||
|
} catch (Exception e) {
|
||||||
|
Assert.fail("Parsing failed", e);
|
||||||
|
}
|
||||||
|
Assert.assertEquals(project.columnModel.columns.size(), 5);
|
||||||
|
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "b");
|
||||||
|
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "http://gnu.org");
|
||||||
|
Assert.assertEquals(project.rows.get(0).cells.get(4).value, "http://microsoft.com/");
|
||||||
|
Assert.assertEquals(project.rows.get(1).cells.get(4).value, "http://gnu.org");
|
||||||
|
Assert.assertEquals(project.rows.get(1).cells.get(2).value, "http://microsoft.com/");
|
||||||
|
}
|
||||||
|
|
||||||
//--helpers--
|
//--helpers--
|
||||||
|
|
||||||
private void parse(String wikitext) {
|
private void parse(String wikitext) {
|
||||||
|
Loading…
Reference in New Issue
Block a user