diff --git a/main/src/com/google/refine/importers/TextFormatGuesser.java b/main/src/com/google/refine/importers/TextFormatGuesser.java index d607340df..a5c8f072b 100644 --- a/main/src/com/google/refine/importers/TextFormatGuesser.java +++ b/main/src/com/google/refine/importers/TextFormatGuesser.java @@ -74,7 +74,7 @@ public class TextFormatGuesser implements FormatGuesser { String line; while ((line = reader.readLine()) != null && controls < CONTROLS_THRESHOLD) { line = line.trim(); - controls += CharMatcher.javaIsoControl().countIn(line); + controls += CharMatcher.javaIsoControl().and(CharMatcher.whitespace().negate()).countIn(line); openBraces += line.chars().filter(ch -> ch == '{').count(); closeBraces += StringUtils.countMatches(line, "}"); openAngleBrackets += StringUtils.countMatches(line, "<"); diff --git a/main/tests/server/src/com/google/refine/importers/TextFormatGuesserTests.java b/main/tests/server/src/com/google/refine/importers/TextFormatGuesserTests.java index a159bb9e6..adf3ff4c4 100644 --- a/main/tests/server/src/com/google/refine/importers/TextFormatGuesserTests.java +++ b/main/tests/server/src/com/google/refine/importers/TextFormatGuesserTests.java @@ -83,6 +83,7 @@ public class TextFormatGuesserTests extends ImporterTest { @Test public void xlsTextGuessTest() throws FileNotFoundException, IOException { + // Test an XLSX file without the correct file extension String dir = ClassLoader.getSystemResource("Colorado-Municipalities-small-xlsx.gz").getPath(); InputStream is = new GZIPInputStream(new FileInputStream(new File(dir))); File tmp = File.createTempFile("openrefinetests-textguesser", ""); @@ -96,6 +97,11 @@ public class TextFormatGuesserTests extends ImporterTest { extensionGuesserTests("csv", "text/line-based"); } + @Test + public void tsvGuesserTest() { + extensionGuesserTests("tsv", "text/line-based"); + } + @Test(enabled=false) // FIXME: Our JSON guesser doesn't work on small files public void jsonGuesserTest() { extensionGuesserTests("json", "text/json"); @@ -112,8 +118,7 @@ public class TextFormatGuesserTests extends ImporterTest { File testDataDir = new File(dir); for (String testFile : testDataDir.list(new PatternFilenameFilter(".+\\." + extension))) { String format = guesser.guess(new File(dir, testFile), "UTF-8", "text"); - logger.info(format + " " + testFile); - assertEquals(format, expectedFormat); + assertEquals(format, expectedFormat, "Format guess failed for " + testFile); } }