From 223074bb2539402358e80b0ab5f971b3ca13a30c Mon Sep 17 00:00:00 2001 From: David Huynh Date: Tue, 18 Oct 2011 15:25:31 +0000 Subject: [PATCH] Xml importer should stop trying to skip over initial non-xml content after some number of characters. git-svn-id: http://google-refine.googlecode.com/svn/trunk@2336 7d457c2a-affb-35e4-300a-418c747d4874 --- main/src/com/google/refine/importers/XmlImporter.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/main/src/com/google/refine/importers/XmlImporter.java b/main/src/com/google/refine/importers/XmlImporter.java index 6219bbc59..5900de6fb 100644 --- a/main/src/com/google/refine/importers/XmlImporter.java +++ b/main/src/com/google/refine/importers/XmlImporter.java @@ -317,11 +317,16 @@ public class XmlImporter extends TreeImportingParserBase { return factory.createXMLStreamReader(wrapPrefixRemovingInputStream(inputStream)); } - final static private InputStream wrapPrefixRemovingInputStream(InputStream inputStream) throws IOException { + final static private InputStream wrapPrefixRemovingInputStream(InputStream inputStream) + throws XMLStreamException, IOException { PushbackInputStream pis = new PushbackInputStream(inputStream); int b; - while ((b = pis.read()) >= 0) { - if (b == '<') { + int count = 0; + while (count < 100 && (b = pis.read()) >= 0) { + if (++count > 100) { + throw new XMLStreamException( + "File starts with too much non-XML content to skip over"); + } else if (b == '<') { pis.unread(b); break; }