Stricter detection of json and xml formats on import, by checking for initial nonspace character.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@2266 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
2deae9d785
commit
7935dfd60e
@ -25,6 +25,9 @@ public class TextFormatGuesser implements FormatGuesser {
|
|||||||
int openAngleBrackets = 0;
|
int openAngleBrackets = 0;
|
||||||
int closeAngleBrackets = 0;
|
int closeAngleBrackets = 0;
|
||||||
|
|
||||||
|
char firstChar = ' ';
|
||||||
|
boolean foundFirstChar = false;
|
||||||
|
|
||||||
char[] chars = new char[4096];
|
char[] chars = new char[4096];
|
||||||
int c;
|
int c;
|
||||||
while (totalBytes < 64 * 1024 && (c = reader.read(chars)) > 0) {
|
while (totalBytes < 64 * 1024 && (c = reader.read(chars)) > 0) {
|
||||||
@ -34,16 +37,26 @@ public class TextFormatGuesser implements FormatGuesser {
|
|||||||
openAngleBrackets += countSubstrings(chunk, "<");
|
openAngleBrackets += countSubstrings(chunk, "<");
|
||||||
closeAngleBrackets += countSubstrings(chunk, ">");
|
closeAngleBrackets += countSubstrings(chunk, ">");
|
||||||
|
|
||||||
|
if (!foundFirstChar) {
|
||||||
|
chunk = chunk.trim();
|
||||||
|
if (chunk.length() > 0) {
|
||||||
|
firstChar = chunk.charAt(0);
|
||||||
|
foundFirstChar = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
totalBytes += c;
|
totalBytes += c;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (openBraces >= 5 && closeBraces >= 5) {
|
if (foundFirstChar) {
|
||||||
return "text/json";
|
if ((firstChar == '{' || firstChar == '[') &&
|
||||||
} else if (openAngleBrackets >= 5 && closeAngleBrackets >= 5) {
|
openBraces >= 5 && closeBraces >= 5) {
|
||||||
return "text/xml";
|
return "text/json";
|
||||||
} else {
|
} else if (firstChar == '<' &&
|
||||||
return "text/line-based";
|
openAngleBrackets >= 5 && closeAngleBrackets >= 5) {
|
||||||
|
return "text/xml";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return "text/line-based";
|
||||||
} finally {
|
} finally {
|
||||||
is.close();
|
is.close();
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user