Fix the text format guesser so it doesn't inappropriately guess WikiText (#2924)
* Fix text guesser so it doesn't guess wikitext Fixes #2850 - Add simple magic detector for zip & gzip files to keep it from attempting to guess binary files - Add a counter for C0 controls for the same reason - Tighten wikitable counters to require marker at beginning of the line, per the specification - Refactor to use Apache Commons instead of private counting methods - Add tests for most TextGuesser formats * Remove misplaced duplicate test data file * Fix LGTM warning + minor cleanups * Use BoundedInputStream to prevent runaway lines
This commit is contained in:
parent
fb9c8e5fef
commit
a3fab26cca
@ -26,66 +26,92 @@
|
|||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
package com.google.refine.importers;
|
package com.google.refine.importers;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.io.Reader;
|
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.commons.io.input.BoundedInputStream;
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
|
||||||
|
import com.google.common.base.CharMatcher;
|
||||||
import com.google.refine.importing.FormatGuesser;
|
import com.google.refine.importing.FormatGuesser;
|
||||||
|
|
||||||
public class TextFormatGuesser implements FormatGuesser {
|
public class TextFormatGuesser implements FormatGuesser {
|
||||||
|
|
||||||
|
private static final int XML_BRACKETS_THRESHOLD = 5;
|
||||||
|
private static final int JSON_BRACES_THRESHOLD = 5;
|
||||||
|
private static final long CONTROLS_THRESHOLD = 10;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String guess(File file, String encoding, String seedFormat) {
|
public String guess(File file, String encoding, String seedFormat) {
|
||||||
try {
|
try(InputStream fis = new FileInputStream(file)) {
|
||||||
InputStream is = new FileInputStream(file);
|
if (isCompressed(file)) {
|
||||||
Reader reader = encoding != null ? new InputStreamReader(is, encoding) : new InputStreamReader(is);
|
return "binary";
|
||||||
|
};
|
||||||
|
|
||||||
try {
|
InputStream bis = new BoundedInputStream(fis, 64 * 1024); // TODO: This seems like a lot
|
||||||
int totalBytes = 0;
|
try (BufferedReader reader = new BufferedReader(
|
||||||
int openBraces = 0;
|
encoding != null ? new InputStreamReader(bis, encoding) : new InputStreamReader(bis))) {
|
||||||
|
int totalChars = 0;
|
||||||
|
long openBraces = 0;
|
||||||
int closeBraces = 0;
|
int closeBraces = 0;
|
||||||
int openAngleBrackets = 0;
|
int openAngleBrackets = 0;
|
||||||
int closeAngleBrackets = 0;
|
int closeAngleBrackets = 0;
|
||||||
int wikiTableBegin = 0;
|
int wikiTableBegin = 0;
|
||||||
|
int wikiTableEnd = 0;
|
||||||
int wikiTableRow = 0;
|
int wikiTableRow = 0;
|
||||||
int trailingPeriods = 0;
|
int trailingPeriods = 0;
|
||||||
|
int controls = 0;
|
||||||
|
|
||||||
char firstChar = ' ';
|
char firstChar = ' ';
|
||||||
boolean foundFirstChar = false;
|
boolean foundFirstChar = false;
|
||||||
|
|
||||||
char[] chars = new char[4096];
|
String line;
|
||||||
int c;
|
while ((line = reader.readLine()) != null && controls < CONTROLS_THRESHOLD) {
|
||||||
while (totalBytes < 64 * 1024 && (c = reader.read(chars)) > 0) {
|
line = line.trim();
|
||||||
String chunk = String.valueOf(chars, 0, c);
|
controls += CharMatcher.javaIsoControl().countIn(line);
|
||||||
openBraces += countSubstrings(chunk, "{");
|
openBraces += line.chars().filter(ch -> ch == '{').count();
|
||||||
closeBraces += countSubstrings(chunk, "}");
|
closeBraces += StringUtils.countMatches(line, "}");
|
||||||
openAngleBrackets += countSubstrings(chunk, "<");
|
openAngleBrackets += StringUtils.countMatches(line, "<");
|
||||||
closeAngleBrackets += countSubstrings(chunk, ">");
|
closeAngleBrackets += StringUtils.countMatches(line, ">");
|
||||||
wikiTableBegin += countSubstrings(chunk, "{|");
|
if (line.startsWith("{|")) {
|
||||||
wikiTableRow += countSubstrings(chunk, "|-");
|
wikiTableBegin++;
|
||||||
trailingPeriods += countLineSuffix(chunk, ".");
|
} else if (line.startsWith("|}")) {
|
||||||
|
wikiTableEnd++;
|
||||||
|
} else if (line.startsWith("|-")) {
|
||||||
|
wikiTableRow++;
|
||||||
|
}
|
||||||
|
if (line.endsWith(".")) {
|
||||||
|
trailingPeriods++;
|
||||||
|
}
|
||||||
|
|
||||||
if (!foundFirstChar) {
|
if (!foundFirstChar) {
|
||||||
chunk = chunk.trim();
|
if (line.length() > 0) {
|
||||||
if (chunk.length() > 0) {
|
firstChar = line.charAt(0);
|
||||||
firstChar = chunk.charAt(0);
|
|
||||||
foundFirstChar = true;
|
foundFirstChar = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
totalBytes += c;
|
totalChars += line.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Make thresholds proportional to the amount of data read?
|
||||||
|
if (controls >= CONTROLS_THRESHOLD) {
|
||||||
|
return "binary";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (foundFirstChar) {
|
if (foundFirstChar) {
|
||||||
if (wikiTableBegin >= 1 && wikiTableRow >= 2) {
|
if (wikiTableBegin >= 1 && (wikiTableBegin - wikiTableEnd <= 1) && wikiTableRow >= 2) {
|
||||||
return "text/wiki";
|
return "text/wiki";
|
||||||
} if ((firstChar == '{' || firstChar == '[') &&
|
} if ((firstChar == '{' || firstChar == '[') &&
|
||||||
openBraces >= 5 && closeBraces >= 5) {
|
openBraces >= JSON_BRACES_THRESHOLD && closeBraces >= JSON_BRACES_THRESHOLD) {
|
||||||
return "text/json";
|
return "text/json";
|
||||||
} else if (openAngleBrackets >= 5 && closeAngleBrackets >= 5) {
|
} else if (openAngleBrackets >= XML_BRACKETS_THRESHOLD
|
||||||
|
&& closeAngleBrackets >= XML_BRACKETS_THRESHOLD) {
|
||||||
if (trailingPeriods > 0) {
|
if (trailingPeriods > 0) {
|
||||||
return "text/rdf/n3";
|
return "text/rdf/n3";
|
||||||
} else if (firstChar == '<') {
|
} else if (firstChar == '<') {
|
||||||
@ -94,9 +120,6 @@ public class TextFormatGuesser implements FormatGuesser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return "text/line-based";
|
return "text/line-based";
|
||||||
} finally {
|
|
||||||
reader.close();
|
|
||||||
is.close();
|
|
||||||
}
|
}
|
||||||
} catch (UnsupportedEncodingException e) {
|
} catch (UnsupportedEncodingException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
@ -106,45 +129,19 @@ public class TextFormatGuesser implements FormatGuesser {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
static public int countSubstrings(String s, String sub) {
|
private boolean isCompressed(File file) throws IOException {
|
||||||
int count = 0;
|
// Check for common compressed file types to protect ourselves from binary data
|
||||||
int from = 0;
|
try(InputStream is = new FileInputStream(file)) {
|
||||||
while (from < s.length()) {
|
byte[] magic = new byte[4];
|
||||||
int i = s.indexOf(sub, from);
|
int count = is.read(magic);
|
||||||
if (i < 0) {
|
if (count == 4 && Arrays.equals(magic, new byte[] {0x50,0x4B, 0x03, 0x04}) || // zip
|
||||||
break;
|
Arrays.equals(magic, new byte[] {0x50,0x4B, 0x07, 0x08}) ||
|
||||||
} else {
|
(magic[0] == 0x1F && magic[1] == (byte)0x8B) // gzip
|
||||||
from = i + sub.length();
|
) {
|
||||||
count++;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return count;
|
return false;
|
||||||
}
|
|
||||||
|
|
||||||
static public int countLineSuffix(String s, String suffix) {
|
|
||||||
int count = 0;
|
|
||||||
int from = 0;
|
|
||||||
while (from < s.length()) {
|
|
||||||
int lineEnd = s.indexOf('\n', from);
|
|
||||||
if (lineEnd < 0) {
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
int i = lineEnd - 1;
|
|
||||||
while (i >= from + suffix.length() - 1) {
|
|
||||||
if (Character.isWhitespace(s.charAt(i))) {
|
|
||||||
i--;
|
|
||||||
} else {
|
|
||||||
String suffix2 = s.subSequence(i - suffix.length() + 1, i + 1).toString();
|
|
||||||
if (suffix2.equals(suffix)) {
|
|
||||||
count++;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
from = lineEnd + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return count;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
BIN
main/tests/data/Colorado-Municipalities-small-xlsx.gz
Normal file
BIN
main/tests/data/Colorado-Municipalities-small-xlsx.gz
Normal file
Binary file not shown.
@ -0,0 +1,269 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2020 OpenRefine committers
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.importers;
|
||||||
|
|
||||||
|
|
||||||
|
import static org.testng.Assert.assertEquals;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.OutputStreamWriter;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.util.zip.GZIPInputStream;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.testng.annotations.AfterMethod;
|
||||||
|
import org.testng.annotations.BeforeMethod;
|
||||||
|
import org.testng.annotations.BeforeTest;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import com.google.common.io.PatternFilenameFilter;
|
||||||
|
import com.google.refine.importing.FormatGuesser;
|
||||||
|
|
||||||
|
|
||||||
|
public class TextFormatGuesserTests extends ImporterTest {
|
||||||
|
|
||||||
|
FormatGuesser guesser;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@BeforeTest
|
||||||
|
public void init() {
|
||||||
|
logger = LoggerFactory.getLogger(this.getClass());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@BeforeMethod
|
||||||
|
public void setUp() {
|
||||||
|
super.setUp();
|
||||||
|
guesser = new TextFormatGuesser();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@AfterMethod
|
||||||
|
public void tearDown(){
|
||||||
|
guesser = null;
|
||||||
|
super.tearDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void xlsTextGuessTest() throws FileNotFoundException, IOException {
|
||||||
|
String dir = ClassLoader.getSystemResource("Colorado-Municipalities-small-xlsx.gz").getPath();
|
||||||
|
InputStream is = new GZIPInputStream(new FileInputStream(new File(dir)));
|
||||||
|
File tmp = File.createTempFile("openrefinetests-textguesser", "");
|
||||||
|
FileUtils.copyInputStreamToFile(is, tmp);
|
||||||
|
String format = guesser.guess(tmp, "UTF-8", "text");
|
||||||
|
assertEquals(format, "binary");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void csvGuesserTest() {
|
||||||
|
extensionGuesserTests("csv", "text/line-based");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled=false) // FIXME: Our JSON guesser doesn't work on small files
|
||||||
|
public void jsonGuesserTest() {
|
||||||
|
extensionGuesserTests("json", "text/json");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void xmlGuesserTest() {
|
||||||
|
extensionGuesserTests("xml", "text/xml");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void extensionGuesserTests(String extension, String expectedFormat) {
|
||||||
|
String dir = ClassLoader.getSystemResource("food.csv").getPath();
|
||||||
|
dir = dir.substring(0, dir.lastIndexOf('/'));
|
||||||
|
File testDataDir = new File(dir);
|
||||||
|
for (String testFile : testDataDir.list(new PatternFilenameFilter(".+\\." + extension))) {
|
||||||
|
String format = guesser.guess(new File(dir, testFile), "UTF-8", "text");
|
||||||
|
logger.info(format + " " + testFile);
|
||||||
|
assertEquals(format, expectedFormat);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void guessWikiTable() throws IOException {
|
||||||
|
String input = "\n"
|
||||||
|
+ "{|\n"
|
||||||
|
+ "|-\n"
|
||||||
|
+ "| a || b<br/>2 || c \n"
|
||||||
|
+ "|-\n"
|
||||||
|
+ "| d || e || f<br>\n"
|
||||||
|
+ "|-\n"
|
||||||
|
+ "|}\n";
|
||||||
|
testWikiTableString(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testWikiTableString(String input) throws IOException, FileNotFoundException {
|
||||||
|
File tmp = File.createTempFile("openrefinetests-textguesser", "");
|
||||||
|
OutputStreamWriter writer = new OutputStreamWriter(
|
||||||
|
new FileOutputStream(tmp),
|
||||||
|
Charset.forName("UTF-8").newEncoder()
|
||||||
|
);
|
||||||
|
writer.write(input);
|
||||||
|
writer.close();
|
||||||
|
String format = guesser.guess(tmp, "UTF-8", "text");
|
||||||
|
assertEquals(format, "text/wiki");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void guessTableWithMisplacedHeaders() throws FileNotFoundException, IOException {
|
||||||
|
String input = "\n"
|
||||||
|
+ "{|\n"
|
||||||
|
+ "|-\n"
|
||||||
|
+ "| a || b<br/>2 || c \n"
|
||||||
|
+ "|-\n"
|
||||||
|
+ "| d\n"
|
||||||
|
+ "! e\n"
|
||||||
|
+ "| f<br>\n"
|
||||||
|
+ "|-\n"
|
||||||
|
+ "|}\n";
|
||||||
|
testWikiTableString(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void guessTableWithLinks() throws FileNotFoundException, IOException {
|
||||||
|
|
||||||
|
// Data credits: Wikipedia contributors, https://de.wikipedia.org/w/index.php?title=Agenturen_der_Europäischen_Union&action=edit
|
||||||
|
String input = "\n"
|
||||||
|
+"{|\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"| [[Europäisches Zentrum für die Förderung der Berufsbildung|Cedefop]] || Cedefop || http://www.cedefop.europa.eu/\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"| [[Europäische Stiftung zur Verbesserung der Lebens- und Arbeitsbedingungen]] || EUROFOUND || [http://www.eurofound.europa.eu/]\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"| [[Europäische Beobachtungsstelle für Drogen und Drogensucht]] || EMCDDA || [http://www.emcdda.europa.eu/ europa.eu]\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"|}\n";
|
||||||
|
testWikiTableString(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void readStyledTableWithHeader() throws FileNotFoundException, IOException {
|
||||||
|
// Data credits: Wikipedia contributors, https://de.wikipedia.org/w/index.php?title=Agenturen_der_Europäischen_Union&action=edit
|
||||||
|
String input = "\n"
|
||||||
|
+"==Agenturen==\n"
|
||||||
|
+"{| class=\"wikitable sortable\"\n"
|
||||||
|
+"! style=\"text-align:left; width: 60em\" | Offizieller Name\n"
|
||||||
|
+"! style=\"text-align:left; width: 9em\" | Abkürzung\n"
|
||||||
|
+"! style=\"text-align:left; width: 6em\" | Website\n"
|
||||||
|
+"! style=\"text-align:left; width: 15em\" | Standort\n"
|
||||||
|
+"! style=\"text-align:left; width: 18em\" | Staat\n"
|
||||||
|
+"! style=\"text-align:left; width: 6em\" | Gründung\n"
|
||||||
|
+"! style=\"text-align:left; width: 50em\" | Anmerkungen\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"| [[Europäisches Zentrum für die Förderung der Berufsbildung]] || '''Cedefop''' || [http://www.cedefop.europa.eu/] || [[Thessaloniki]] || {{Griechenland}} || 1975 ||\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"| [[Europäische Stiftung zur Verbesserung der Lebens- und Arbeitsbedingungen]] || ''EUROFOUND'' || [http://www.eurofound.europa.eu/] || [[Dublin]] || {{Irland}} || 1975 ||\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"| [[Europäische Beobachtungsstelle für Drogen und Drogensucht]] || EMCDDA || [http://www.emcdda.europa.eu/] || [[Lissabon]] || {{Portugal}} || 1993 ||\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"|}\n";
|
||||||
|
testWikiTableString(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void guessTableWithSpanningCells() throws FileNotFoundException, IOException {
|
||||||
|
// inspired from https://www.mediawiki.org/wiki/Help:Tables
|
||||||
|
String input = "{| class=\"wikitable\"\n"
|
||||||
|
+"!colspan=\"6\"|Shopping List\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"|Bread & Butter\n"
|
||||||
|
+"|Pie\n"
|
||||||
|
+"|Buns\n"
|
||||||
|
+"|rowspan=\"2\"|Danish\n"
|
||||||
|
+"|colspan=\"2\"|Croissant\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"|Cheese\n"
|
||||||
|
+"|colspan=\"2\"|Ice cream\n"
|
||||||
|
+"|Butter\n"
|
||||||
|
+"|Yogurt\n"
|
||||||
|
+"|}\n";
|
||||||
|
testWikiTableString(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void guessTableWithReferences() throws FileNotFoundException, IOException {
|
||||||
|
// inspired from https://www.mediawiki.org/wiki/Help:Tables
|
||||||
|
String input = "{|\n"
|
||||||
|
+"! price\n"
|
||||||
|
+"! fruit\n"
|
||||||
|
+"! merchant\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"| a || b <ref name=\"myref\"> See [http://gnu.org here]</ref> || c <ref name=\"ms\"> or http://microsoft.com/ </ref>\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"| d || e <ref name=\"ms\"/>|| f <ref name=\"myref\" />\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"|}\n";
|
||||||
|
testWikiTableString(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void guessTableWithReferencesTemplates() throws FileNotFoundException, IOException {
|
||||||
|
// inspired from https://www.mediawiki.org/wiki/Help:Tables
|
||||||
|
String input = "{|\n"
|
||||||
|
+"! price\n"
|
||||||
|
+"! fruit\n"
|
||||||
|
+"! merchant\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"| a || b <ref name=\"myref\">{{cite web|url=http://gnu.org|accessdate=2017-08-30}}</ref> || c <ref name=\"ms\"> or {{cite journal|url=http://microsoft.com/|title=BLah}} </ref>\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"| d || e <ref name=\"ms\"/>|| f <ref name=\"myref\" />\n"
|
||||||
|
+"|-\n"
|
||||||
|
+"|}\n";
|
||||||
|
testWikiTableString(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void guessTableWithTemplates() throws FileNotFoundException, IOException {
|
||||||
|
String input = "\n"
|
||||||
|
+ "{|\n"
|
||||||
|
+ "|-\n"
|
||||||
|
+ "| {{free to read}} || b || c \n"
|
||||||
|
+ "|-\n"
|
||||||
|
+ "| d\n"
|
||||||
|
+ "| [[File:My logo.svg|70px]]\n"
|
||||||
|
+ "| f<br>\n"
|
||||||
|
+ "|-\n"
|
||||||
|
+ "|}\n";
|
||||||
|
testWikiTableString(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Binary file not shown.
Loading…
Reference in New Issue
Block a user