Merge pull request #1275 from OpenRefine/wikitext-url-fix
Forbid pipe characters in URL references to ease parsing.
This commit is contained in:
commit
21f4d62474
@ -156,7 +156,7 @@ public class WikitextImporter extends TabularImportingParserBase {
|
||||
private int spanningCellIdx;
|
||||
private List<String> internalLinksInCell;
|
||||
|
||||
private final Pattern urlPattern = Pattern.compile("\\b(https?|ftp)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]",
|
||||
private final Pattern urlPattern = Pattern.compile("\\b(https?|ftp)://[-a-zA-Z0-9+&@#/%?=~_!:,.;]*[-a-zA-Z0-9+&@#/%=~_]",
|
||||
Pattern.CASE_INSENSITIVE);
|
||||
|
||||
public WikitextTableVisitor(boolean blankSpanningCells, boolean includeRawTemplates) {
|
||||
|
@ -224,6 +224,35 @@ public class WikitextImporterTests extends ImporterTest {
|
||||
Assert.assertEquals(project.rows.get(1).cells.get(4).value, "http://gnu.org");
|
||||
Assert.assertEquals(project.rows.get(1).cells.get(2).value, "http://microsoft.com/");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readTableWithReferencesTemplates() {
|
||||
// inspired from https://www.mediawiki.org/wiki/Help:Tables
|
||||
String input = "{|\n"
|
||||
+"! price\n"
|
||||
+"! fruit\n"
|
||||
+"! merchant\n"
|
||||
+"|-\n"
|
||||
+"| a || b <ref name=\"myref\">{{cite web|url=http://gnu.org|accessdate=2017-08-30}}</ref> || c <ref name=\"ms\"> or {{cite journal|url=http://microsoft.com/|title=BLah}} </ref>\n"
|
||||
+"|-\n"
|
||||
+"| d || e <ref name=\"ms\"/>|| f <ref name=\"myref\" />\n"
|
||||
+"|-\n"
|
||||
+"|}\n";
|
||||
|
||||
try {
|
||||
prepareOptions(-1, true, true, null);
|
||||
parse(input);
|
||||
} catch (Exception e) {
|
||||
Assert.fail("Parsing failed", e);
|
||||
}
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 5);
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "b");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "http://gnu.org");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(4).value, "http://microsoft.com/");
|
||||
Assert.assertEquals(project.rows.get(1).cells.get(4).value, "http://gnu.org");
|
||||
Assert.assertEquals(project.rows.get(1).cells.get(2).value, "http://microsoft.com/");
|
||||
}
|
||||
|
||||
//--helpers--
|
||||
|
||||
private void parse(String wikitext) {
|
||||
|
Loading…
Reference in New Issue
Block a user