Xml importer should stop trying to skip over initial non-xml content after some number of characters.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@2336 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2011-10-18 15:25:31 +00:00
parent 9710521ef8
commit 223074bb25

View File

@ -317,11 +317,16 @@ public class XmlImporter extends TreeImportingParserBase {
return factory.createXMLStreamReader(wrapPrefixRemovingInputStream(inputStream));
}
final static private InputStream wrapPrefixRemovingInputStream(InputStream inputStream) throws IOException {
final static private InputStream wrapPrefixRemovingInputStream(InputStream inputStream)
throws XMLStreamException, IOException {
PushbackInputStream pis = new PushbackInputStream(inputStream);
int b;
while ((b = pis.read()) >= 0) {
if (b == '<') {
int count = 0;
while (count < 100 && (b = pis.read()) >= 0) {
if (++count > 100) {
throw new XMLStreamException(
"File starts with too much non-XML content to skip over");
} else if (b == '<') {
pis.unread(b);
break;
}