* Don't count TABs as control characters - fixes #3061 * Add TSV test. Replace info logging w/assert message
This commit is contained in:
parent
62ae8ae946
commit
fc21d58ed1
@ -74,7 +74,7 @@ public class TextFormatGuesser implements FormatGuesser {
|
|||||||
String line;
|
String line;
|
||||||
while ((line = reader.readLine()) != null && controls < CONTROLS_THRESHOLD) {
|
while ((line = reader.readLine()) != null && controls < CONTROLS_THRESHOLD) {
|
||||||
line = line.trim();
|
line = line.trim();
|
||||||
controls += CharMatcher.javaIsoControl().countIn(line);
|
controls += CharMatcher.javaIsoControl().and(CharMatcher.whitespace().negate()).countIn(line);
|
||||||
openBraces += line.chars().filter(ch -> ch == '{').count();
|
openBraces += line.chars().filter(ch -> ch == '{').count();
|
||||||
closeBraces += StringUtils.countMatches(line, "}");
|
closeBraces += StringUtils.countMatches(line, "}");
|
||||||
openAngleBrackets += StringUtils.countMatches(line, "<");
|
openAngleBrackets += StringUtils.countMatches(line, "<");
|
||||||
|
@ -83,6 +83,7 @@ public class TextFormatGuesserTests extends ImporterTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void xlsTextGuessTest() throws FileNotFoundException, IOException {
|
public void xlsTextGuessTest() throws FileNotFoundException, IOException {
|
||||||
|
// Test an XLSX file without the correct file extension
|
||||||
String dir = ClassLoader.getSystemResource("Colorado-Municipalities-small-xlsx.gz").getPath();
|
String dir = ClassLoader.getSystemResource("Colorado-Municipalities-small-xlsx.gz").getPath();
|
||||||
InputStream is = new GZIPInputStream(new FileInputStream(new File(dir)));
|
InputStream is = new GZIPInputStream(new FileInputStream(new File(dir)));
|
||||||
File tmp = File.createTempFile("openrefinetests-textguesser", "");
|
File tmp = File.createTempFile("openrefinetests-textguesser", "");
|
||||||
@ -96,6 +97,11 @@ public class TextFormatGuesserTests extends ImporterTest {
|
|||||||
extensionGuesserTests("csv", "text/line-based");
|
extensionGuesserTests("csv", "text/line-based");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void tsvGuesserTest() {
|
||||||
|
extensionGuesserTests("tsv", "text/line-based");
|
||||||
|
}
|
||||||
|
|
||||||
@Test(enabled=false) // FIXME: Our JSON guesser doesn't work on small files
|
@Test(enabled=false) // FIXME: Our JSON guesser doesn't work on small files
|
||||||
public void jsonGuesserTest() {
|
public void jsonGuesserTest() {
|
||||||
extensionGuesserTests("json", "text/json");
|
extensionGuesserTests("json", "text/json");
|
||||||
@ -112,8 +118,7 @@ public class TextFormatGuesserTests extends ImporterTest {
|
|||||||
File testDataDir = new File(dir);
|
File testDataDir = new File(dir);
|
||||||
for (String testFile : testDataDir.list(new PatternFilenameFilter(".+\\." + extension))) {
|
for (String testFile : testDataDir.list(new PatternFilenameFilter(".+\\." + extension))) {
|
||||||
String format = guesser.guess(new File(dir, testFile), "UTF-8", "text");
|
String format = guesser.guess(new File(dir, testFile), "UTF-8", "text");
|
||||||
logger.info(format + " " + testFile);
|
assertEquals(format, expectedFormat, "Format guess failed for " + testFile);
|
||||||
assertEquals(format, expectedFormat);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user