Fixed xml parsing error due to whitespaces in front of <?xml>.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@2246 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2011-09-19 09:06:36 +00:00
parent 66cf0b6596
commit db3bbb5c86

View File

@ -37,6 +37,7 @@ import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.List; import java.util.List;
import javax.servlet.ServletException; import javax.servlet.ServletException;
@ -81,6 +82,7 @@ public class XmlImporter extends TreeImportingParserBase {
JSONObject firstFileRecord = fileRecords.get(0); JSONObject firstFileRecord = fileRecords.get(0);
File file = ImportingUtilities.getFile(job, firstFileRecord); File file = ImportingUtilities.getFile(job, firstFileRecord);
InputStream is = new FileInputStream(file); InputStream is = new FileInputStream(file);
try { try {
XMLStreamReader parser = createXMLStreamReader(is); XMLStreamReader parser = createXMLStreamReader(is);
PreviewParsingState state = new PreviewParsingState(); PreviewParsingState state = new PreviewParsingState();
@ -200,13 +202,15 @@ public class XmlImporter extends TreeImportingParserBase {
new XmlParser(inputStream), rootColumnGroup, limit, options, exceptions); new XmlParser(inputStream), rootColumnGroup, limit, options, exceptions);
} catch (XMLStreamException e) { } catch (XMLStreamException e) {
exceptions.add(e); exceptions.add(e);
} catch (IOException e) {
exceptions.add(e);
} }
} }
static public class XmlParser implements TreeReader { static public class XmlParser implements TreeReader {
final protected XMLStreamReader parser; final protected XMLStreamReader parser;
public XmlParser(InputStream inputStream) throws XMLStreamException { public XmlParser(InputStream inputStream) throws XMLStreamException, IOException {
parser = createXMLStreamReader(inputStream); parser = createXMLStreamReader(inputStream);
} }
@ -305,12 +309,23 @@ public class XmlImporter extends TreeImportingParserBase {
} }
} }
final static private XMLStreamReader createXMLStreamReader(InputStream inputStream) throws XMLStreamException { final static private XMLStreamReader createXMLStreamReader(InputStream inputStream) throws XMLStreamException, IOException {
XMLInputFactory factory = XMLInputFactory.newInstance(); XMLInputFactory factory = XMLInputFactory.newInstance();
factory.setProperty(XMLInputFactory.IS_COALESCING, true); factory.setProperty(XMLInputFactory.IS_COALESCING, true);
factory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, true); factory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, true);
return factory.createXMLStreamReader(inputStream); return factory.createXMLStreamReader(wrapPrefixSpaceRemovingInputStream(inputStream));
} }
final static private InputStream wrapPrefixSpaceRemovingInputStream(InputStream inputStream) throws IOException {
PushbackInputStream pis = new PushbackInputStream(inputStream);
int b;
while ((b = pis.read()) >= 0) {
if (!Character.isWhitespace(b)) {
pis.unread(b);
break;
}
}
return pis;
}
} }