diff --git a/main/src/com/google/refine/importers/TreeImporter.java b/main/src/com/google/refine/importers/TreeImporter.java index f69b9b73c..c8290cce8 100644 --- a/main/src/com/google/refine/importers/TreeImporter.java +++ b/main/src/com/google/refine/importers/TreeImporter.java @@ -16,7 +16,7 @@ import com.google.refine.model.Cell; import com.google.refine.model.Column; import com.google.refine.model.Project; -public class TreeImporter { +public abstract class TreeImporter { final static Logger logger = LoggerFactory.getLogger("TreeImporter"); /** @@ -90,7 +90,7 @@ public class TreeImporter { public List> rows = new LinkedList>(); } - static public void sortRecordElementCandidates(List list) { + static protected void sortRecordElementCandidates(List list) { Collections.sort(list, new Comparator() { public int compare(RecordElementCandidate o1, RecordElementCandidate o2) { return o2.count - o1.count; diff --git a/main/src/com/google/refine/importers/XmlImportUtilities.java b/main/src/com/google/refine/importers/XmlImportUtilities.java index 6c145c981..76ab83d6b 100644 --- a/main/src/com/google/refine/importers/XmlImportUtilities.java +++ b/main/src/com/google/refine/importers/XmlImportUtilities.java @@ -8,6 +8,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import javax.servlet.ServletException; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamConstants; import javax.xml.stream.XMLStreamException; @@ -16,6 +17,7 @@ import javax.xml.stream.XMLStreamReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.refine.importers.parsers.TreeParser; import com.google.refine.model.Cell; import com.google.refine.model.Project; import com.google.refine.model.Row; @@ -23,13 +25,11 @@ import com.google.refine.model.Row; public class XmlImportUtilities extends TreeImporter { final static Logger logger = LoggerFactory.getLogger("XmlImporterUtilities"); - static public String[] detectPathFromTag(InputStream inputStream, String tag) { + static public String[] detectPathFromTag(TreeParser parser, String tag) { try { - XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream); - while (parser.hasNext()) { int eventType = parser.next(); - if (eventType == XMLStreamConstants.START_ELEMENT) { + if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml List path = detectRecordElement(parser, tag); if (path != null) { String[] path2 = new String[path.size()]; @@ -47,13 +47,13 @@ public class XmlImportUtilities extends TreeImporter { return null; } - + /** * Looks for an element with the given tag name in the Xml being parsed, returning the path hierarchy to reach it. * * @param parser * @param tag - * The Xml element name (can be qualified) to search for + * The element name (can be qualified) to search for * @return * If the tag is found, an array of strings is returned. * If the tag is at the top level, the tag will be the only item in the array. @@ -61,33 +61,43 @@ public class XmlImportUtilities extends TreeImporter { * Null if the the tag is not found. * @throws XMLStreamException */ - static protected List detectRecordElement(XMLStreamReader parser, String tag) throws XMLStreamException { - if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT) - parser.next(); - String localName = parser.getLocalName(); - String fullName = composeName(parser.getPrefix(), localName); - if (tag.equals(parser.getLocalName()) || tag.equals(fullName)) { - List path = new LinkedList(); - path.add(localName); + static protected List detectRecordElement(TreeParser parser, String tag) throws ServletException { + try{ + if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT) //FIXME uses Xml, and is not generic + parser.next(); + + String localName = parser.getLocalName(); + String fullName = composeName(parser.getPrefix(), localName); + if (tag.equals(parser.getLocalName()) || tag.equals(fullName)) { + List path = new LinkedList(); + path.add(localName); - return path; - } + return path; + } - while (parser.hasNext()) { - int eventType = parser.next(); - if (eventType == XMLStreamConstants.END_ELEMENT) { - break; - } else if (eventType == XMLStreamConstants.START_ELEMENT) { - List path = detectRecordElement(parser, tag); - if (path != null) { - path.add(0, localName); - return path; + while (parser.hasNext()) { + int eventType = parser.next(); + if (eventType == XMLStreamConstants.END_ELEMENT) { //FIXME uses Xml, and is not generic + break; + } else if (eventType == XMLStreamConstants.START_ELEMENT) { //FIXME uses Xml, and is not generic + List path = detectRecordElement(parser, tag); + if (path != null) { + path.add(0, localName); + return path; + } } } + }catch(ServletException e){ + // silent + // e.printStackTrace(); } return null; } - + + static protected String composeName(String prefix, String localName) { + return prefix != null && prefix.length() > 0 ? (prefix + ":" + localName) : localName; + } + /** * Seeks for recurring XML element in an InputStream * which are likely candidates for being data records @@ -334,10 +344,6 @@ public class XmlImportUtilities extends TreeImporter { } } - static protected String composeName(String prefix, String localName) { - return prefix != null && prefix.length() > 0 ? (prefix + ":" + localName) : localName; - } - /** * * @param project diff --git a/main/src/com/google/refine/importers/XmlImporter.java b/main/src/com/google/refine/importers/XmlImporter.java index 7b23428ec..2117dc26a 100644 --- a/main/src/com/google/refine/importers/XmlImporter.java +++ b/main/src/com/google/refine/importers/XmlImporter.java @@ -11,6 +11,8 @@ import org.slf4j.LoggerFactory; import com.google.refine.ProjectMetadata; import com.google.refine.importers.TreeImporter.ImportColumnGroup; +import com.google.refine.importers.parsers.TreeParser; +import com.google.refine.importers.parsers.XmlParser; import com.google.refine.model.Project; public class XmlImporter implements StreamImporter { @@ -44,9 +46,19 @@ public class XmlImporter implements StreamImporter { } if (options.containsKey("importer-record-tag")) { - recordPath = XmlImportUtilities.detectPathFromTag( - new ByteArrayInputStream(buffer, 0, bytes_read), + InputStream iStream = new ByteArrayInputStream(buffer, 0, bytes_read); + + TreeParser parser = null; + try{ + parser = new XmlParser(iStream); + recordPath = XmlImportUtilities.detectPathFromTag( + parser, options.getProperty("importer-record-tag")); + }catch(Exception e){ + // silent + // e.printStackTrace(); + } + } else { recordPath = XmlImportUtilities.detectRecordElement( new ByteArrayInputStream(buffer, 0, bytes_read)); diff --git a/main/src/com/google/refine/importers/parsers/TreeParser.java b/main/src/com/google/refine/importers/parsers/TreeParser.java new file mode 100644 index 000000000..a13517d8f --- /dev/null +++ b/main/src/com/google/refine/importers/parsers/TreeParser.java @@ -0,0 +1,11 @@ +package com.google.refine.importers.parsers; + +import javax.servlet.ServletException; + +public interface TreeParser { + public int next() throws ServletException; + public int getEventType(); + public boolean hasNext() throws ServletException; + public String getLocalName(); + public String getPrefix(); +} diff --git a/main/src/com/google/refine/importers/parsers/XmlParser.java b/main/src/com/google/refine/importers/parsers/XmlParser.java new file mode 100644 index 000000000..d56ee571a --- /dev/null +++ b/main/src/com/google/refine/importers/parsers/XmlParser.java @@ -0,0 +1,54 @@ +package com.google.refine.importers.parsers; + +import java.io.InputStream; + +import javax.servlet.ServletException; +import javax.xml.stream.FactoryConfigurationError; +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; + +public class XmlParser implements TreeParser{ + XMLStreamReader parser = null; + + public XmlParser(InputStream inputStream){ + try { + parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream); + } catch (XMLStreamException e) { + // silent + // e.printStackTrace(); + } catch (FactoryConfigurationError e) { + // silent + // e.printStackTrace(); + } + } + + public int next() throws ServletException{ + try { + return parser.next(); + } catch (XMLStreamException e) { + //TODO log and return + throw new ServletException(e.getMessage()); + } + } + + public int getEventType(){ + return parser.getEventType(); + } + + public boolean hasNext() throws ServletException{ + try { + return parser.hasNext(); + } catch (XMLStreamException e) { + throw new ServletException(e.getMessage()); + } + } + + public String getLocalName(){ + return parser.getLocalName(); + } + + public String getPrefix(){ + return parser.getPrefix(); + } +} diff --git a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesStub.java b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesStub.java index 5ad0bc824..b6525b798 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesStub.java +++ b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesStub.java @@ -2,15 +2,17 @@ package com.google.refine.tests.importers; import java.util.List; +import javax.servlet.ServletException; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; import com.google.refine.importers.XmlImportUtilities; +import com.google.refine.importers.parsers.TreeParser; import com.google.refine.model.Project; public class XmlImportUtilitiesStub extends XmlImportUtilities { - public List detectRecordElementWrapper(XMLStreamReader parser, String tag) throws XMLStreamException{ + public List detectRecordElementWrapper(TreeParser parser, String tag) throws ServletException{ return super.detectRecordElement(parser, tag); } diff --git a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java index b4c6e94f1..eda8bd792 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java @@ -6,6 +6,7 @@ import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.List; +import javax.servlet.ServletException; import javax.xml.stream.FactoryConfigurationError; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; @@ -21,6 +22,7 @@ import org.testng.annotations.Test; import com.google.refine.importers.TreeImporter.ImportColumn; import com.google.refine.importers.TreeImporter.ImportColumnGroup; import com.google.refine.importers.TreeImporter.ImportRecord; +import com.google.refine.importers.parsers.XmlParser; import com.google.refine.model.Project; import com.google.refine.model.Row; import com.google.refine.tests.RefineTest; @@ -67,8 +69,9 @@ public class XmlImportUtilitiesTests extends RefineTest { public void detectPathFromTagTest(){ loadXml("author1genre1"); String tag = "library"; - - String[] response = XmlImportUtilitiesStub.detectPathFromTag(inputStream, tag); + //createParser(); + XmlParser xmlParser = new XmlParser(inputStream); + String[] response = XmlImportUtilitiesStub.detectPathFromTag(xmlParser, tag); Assert.assertNotNull(response); Assert.assertEquals(response.length, 1); Assert.assertEquals(response[0], "library"); @@ -78,7 +81,10 @@ public class XmlImportUtilitiesTests extends RefineTest { public void detectPathFromTagWithNestedElement(){ loadXml("author1genre1"); String tag = "book"; - String[] response = XmlImportUtilitiesStub.detectPathFromTag(inputStream, tag); + + //createParser(); + XmlParser xmlParser = new XmlParser(inputStream); + String[] response = XmlImportUtilitiesStub.detectPathFromTag(xmlParser, tag); Assert.assertNotNull(response); Assert.assertEquals(response.length, 2); Assert.assertEquals(response[0], "library"); @@ -88,14 +94,15 @@ public class XmlImportUtilitiesTests extends RefineTest { @Test public void detectRecordElementTest(){ loadXml("author1genre1"); - createParser(); + //createParser(); + XmlParser xmlParser = new XmlParser(inputStream); String tag="library"; List response = new ArrayList(); try { - response = SUT.detectRecordElementWrapper(parser, tag); - } catch (XMLStreamException e) { - Assert.fail(); + response = SUT.detectRecordElementWrapper(xmlParser, tag); + } catch (ServletException e) { + Assert.fail(e.getMessage()); } Assert.assertNotNull(response); Assert.assertEquals(response.size(), 1); @@ -105,14 +112,15 @@ public class XmlImportUtilitiesTests extends RefineTest { @Test public void detectRecordElementCanHandleWithNestedElements(){ loadXml("author1genre1"); - createParser(); + //createParser(); + XmlParser xmlParser = new XmlParser(inputStream); String tag="book"; List response = new ArrayList(); try { - response = SUT.detectRecordElementWrapper(parser, tag); - } catch (XMLStreamException e) { - Assert.fail(); + response = SUT.detectRecordElementWrapper(xmlParser, tag); + } catch (ServletException e) { + Assert.fail(e.getMessage()); } Assert.assertNotNull(response); Assert.assertEquals(response.size(), 2); @@ -123,14 +131,15 @@ public class XmlImportUtilitiesTests extends RefineTest { @Test public void detectRecordElementIsNullForUnfoundTag(){ loadXml("author1genre1"); - createParser(); + //createParser(); + XmlParser xmlParser = new XmlParser(inputStream); String tag=""; List response = new ArrayList(); try { - response = SUT.detectRecordElementWrapper(parser, tag); - } catch (XMLStreamException e) { - Assert.fail(); + response = SUT.detectRecordElementWrapper(xmlParser, tag); + } catch (ServletException e) { + Assert.fail(e.getMessage()); } Assert.assertNull(response); }