For XML files, ignore not just leading whitespace but anything except <.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@2313 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2011-10-10 20:51:00 +00:00
parent 99830b2ea9
commit 1a14d82393

View File

@ -314,14 +314,14 @@ public class XmlImporter extends TreeImportingParserBase {
factory.setProperty(XMLInputFactory.IS_COALESCING, true); factory.setProperty(XMLInputFactory.IS_COALESCING, true);
factory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, true); factory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, true);
return factory.createXMLStreamReader(wrapPrefixSpaceRemovingInputStream(inputStream)); return factory.createXMLStreamReader(wrapPrefixRemovingInputStream(inputStream));
} }
final static private InputStream wrapPrefixSpaceRemovingInputStream(InputStream inputStream) throws IOException { final static private InputStream wrapPrefixRemovingInputStream(InputStream inputStream) throws IOException {
PushbackInputStream pis = new PushbackInputStream(inputStream); PushbackInputStream pis = new PushbackInputStream(inputStream);
int b; int b;
while ((b = pis.read()) >= 0) { while ((b = pis.read()) >= 0) {
if (!Character.isWhitespace(b)) { if (b == '<') {
pis.unread(b); pis.unread(b);
break; break;
} }