From 31073d77121d70dc9e81976184892cb7574f04df Mon Sep 17 00:00:00 2001 From: Tom Morris Date: Fri, 7 Oct 2011 19:06:53 +0000 Subject: [PATCH] Refactor importer interfaces to narrow exceptions thrown and handled git-svn-id: http://google-refine.googlecode.com/svn/trunk@2296 7d457c2a-affb-35e4-300a-418c747d4874 --- .../google/refine/importers/JsonImporter.java | 38 ++++---- .../google/refine/importers/XmlImporter.java | 24 ++--- .../refine/importers/tree/TreeReader.java | 19 ++-- .../importers/tree/TreeReaderException.java | 23 +++++ .../importers/tree/XmlImportUtilities.java | 95 +++++++++---------- .../importers/XmlImportUtilitiesTests.java | 5 +- 6 files changed, 114 insertions(+), 90 deletions(-) create mode 100644 main/src/com/google/refine/importers/tree/TreeReaderException.java diff --git a/main/src/com/google/refine/importers/JsonImporter.java b/main/src/com/google/refine/importers/JsonImporter.java index bedbf7e63..caf4b453c 100644 --- a/main/src/com/google/refine/importers/JsonImporter.java +++ b/main/src/com/google/refine/importers/JsonImporter.java @@ -40,8 +40,6 @@ import java.io.InputStream; import java.io.Reader; import java.util.List; -import javax.servlet.ServletException; - import org.codehaus.jackson.JsonFactory; import org.codehaus.jackson.JsonParseException; import org.codehaus.jackson.JsonParser; @@ -55,6 +53,7 @@ import com.google.refine.ProjectMetadata; import com.google.refine.importers.tree.ImportColumnGroup; import com.google.refine.importers.tree.TreeImportingParserBase; import com.google.refine.importers.tree.TreeReader; +import com.google.refine.importers.tree.TreeReaderException; import com.google.refine.importing.ImportingJob; import com.google.refine.importing.ImportingUtilities; import com.google.refine.model.Project; @@ -132,7 +131,7 @@ public class JsonImporter extends TreeImportingParserBase { JsonToken token = parser.nextToken(); state.tokenCount++; return parseForPreview(parser, state, token); - } catch (Exception e) { + } catch (IOException e) { return null; } } @@ -158,7 +157,7 @@ public class JsonImporter extends TreeImportingParserBase { default: break loop; } - } catch (Exception e) { + } catch (IOException e) { break; } } @@ -182,7 +181,7 @@ public class JsonImporter extends TreeImportingParserBase { Object element = parseForPreview(parser, state, token); JSONUtilities.append(result, element); } - } catch (Exception e) { + } catch (IOException e) { break; } } @@ -213,7 +212,7 @@ public class JsonImporter extends TreeImportingParserBase { public JSONTreeReader(Reader reader) { try { parser = factory.createJsonParser(reader); - } catch (Exception e) { + } catch (IOException e) { e.printStackTrace(); } } @@ -251,12 +250,12 @@ public class JsonImporter extends TreeImportingParserBase { } @Override - public Token current() throws ServletException { + public Token current() { return this.mapToToken(parser.getCurrentToken()); } @Override - public String getFieldName() throws ServletException{ + public String getFieldName() throws TreeReaderException { try { String text = parser.getCurrentName(); @@ -271,8 +270,8 @@ public class JsonImporter extends TreeImportingParserBase { //end of workaround return text; - } catch (Exception e) { - throw new ServletException(e); + } catch (IOException e) { + throw new TreeReaderException(e); } } @@ -285,32 +284,33 @@ public class JsonImporter extends TreeImportingParserBase { } @Override - public String getFieldValue() throws ServletException { + public String getFieldValue() throws TreeReaderException { try { return parser.getText(); - } catch (Exception e) { - throw new ServletException(e); + } catch (IOException e) { + throw new TreeReaderException(e); } } @Override - public boolean hasNext() throws ServletException { + public boolean hasNext() { return true; //FIXME fairly obtuse, is there a better way (advancing, then rewinding?) } @Override - public Token next() throws ServletException { + public Token next() throws TreeReaderException { JsonToken next; try { next = parser.nextToken(); } catch (JsonParseException e) { - throw new ServletException(e); + throw new TreeReaderException(e); } catch (IOException e) { - throw new ServletException(e); + throw new TreeReaderException(e); } + // TODO just return null here? if(next == null) { - throw new ServletException("No more Json Tokens in stream"); + throw new TreeReaderException("No more Json Tokens in stream"); } //The following is a workaround for inconsistent Jackson JsonParser @@ -318,7 +318,7 @@ public class JsonImporter extends TreeImportingParserBase { try { this.thisTokenIsAFieldName = true; this.lastFieldName = parser.getCurrentName(); - } catch (Exception e) { + } catch (IOException e) { //silent } }else if(next == JsonToken.START_ARRAY || next == JsonToken.START_OBJECT){ diff --git a/main/src/com/google/refine/importers/XmlImporter.java b/main/src/com/google/refine/importers/XmlImporter.java index 292bf46b3..3affe393a 100644 --- a/main/src/com/google/refine/importers/XmlImporter.java +++ b/main/src/com/google/refine/importers/XmlImporter.java @@ -40,7 +40,6 @@ import java.io.InputStream; import java.io.PushbackInputStream; import java.util.List; -import javax.servlet.ServletException; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamConstants; import javax.xml.stream.XMLStreamException; @@ -55,6 +54,7 @@ import com.google.refine.ProjectMetadata; import com.google.refine.importers.tree.ImportColumnGroup; import com.google.refine.importers.tree.TreeImportingParserBase; import com.google.refine.importers.tree.TreeReader; +import com.google.refine.importers.tree.TreeReaderException; import com.google.refine.importing.ImportingJob; import com.google.refine.importing.ImportingUtilities; import com.google.refine.model.Project; @@ -215,26 +215,26 @@ public class XmlImporter extends TreeImportingParserBase { } @Override - public Token next() throws ServletException { + public Token next() throws TreeReaderException { try { if (!parser.hasNext()) { - throw new ServletException("End of XML stream"); + throw new TreeReaderException("End of XML stream"); } } catch (XMLStreamException e) { - throw new ServletException(e); + throw new TreeReaderException(e); } int currentToken = -1; try { currentToken = parser.next(); } catch (XMLStreamException e) { - throw new ServletException(e); + throw new TreeReaderException(e); } return mapToToken(currentToken); } - protected Token mapToToken(int token) throws ServletException { + protected Token mapToToken(int token) { switch(token){ case XMLStreamConstants.START_ELEMENT: return Token.StartEntity; case XMLStreamConstants.END_ELEMENT: return Token.EndEntity; @@ -256,24 +256,24 @@ public class XmlImporter extends TreeImportingParserBase { } @Override - public Token current() throws ServletException{ + public Token current() throws TreeReaderException { return this.mapToToken(parser.getEventType()); } @Override - public boolean hasNext() throws ServletException{ + public boolean hasNext() throws TreeReaderException { try { return parser.hasNext(); } catch (XMLStreamException e) { - throw new ServletException(e); + throw new TreeReaderException(e); } } @Override - public String getFieldName() throws ServletException{ - try{ + public String getFieldName() throws TreeReaderException { + try { return parser.getLocalName(); - }catch(IllegalStateException e){ + } catch (IllegalStateException e) { return null; } } diff --git a/main/src/com/google/refine/importers/tree/TreeReader.java b/main/src/com/google/refine/importers/tree/TreeReader.java index 732f89558..b1c9cf842 100644 --- a/main/src/com/google/refine/importers/tree/TreeReader.java +++ b/main/src/com/google/refine/importers/tree/TreeReader.java @@ -33,6 +33,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package com.google.refine.importers.tree; +/** + * Interface for all tree-shaped parsers. + * + * This is effectively part of the contract for {@link TreeImportingParserBase}. + */ public interface TreeReader { public enum Token { Ignorable, @@ -42,14 +47,14 @@ public interface TreeReader { //append additional tokens only if necessary (most should be just mapped to Value or Ignorable) } - public Token current() throws Exception; //aka getCurrentToken - - public boolean hasNext() throws Exception; - public Token next() throws Exception; - - public String getFieldName() throws Exception; //aka getFieldName + public Token current() throws TreeReaderException; // aka getCurrentToken + + public boolean hasNext() throws TreeReaderException; + public Token next() throws TreeReaderException; + + public String getFieldName() throws TreeReaderException; public String getPrefix(); - public String getFieldValue() throws Exception; + public String getFieldValue() throws TreeReaderException; public int getAttributeCount(); public String getAttributeValue(int index); diff --git a/main/src/com/google/refine/importers/tree/TreeReaderException.java b/main/src/com/google/refine/importers/tree/TreeReaderException.java new file mode 100644 index 000000000..bf604b216 --- /dev/null +++ b/main/src/com/google/refine/importers/tree/TreeReaderException.java @@ -0,0 +1,23 @@ +package com.google.refine.importers.tree; + + +/** + * An Exception from the TreeReader interface methods. + */ +public class TreeReaderException extends Exception { + + private static final long serialVersionUID = 1L; + + public TreeReaderException(String message, Throwable cause) { + super(message, cause); + } + + public TreeReaderException(String message) { + super(message); + } + + public TreeReaderException(Throwable cause) { + super(cause); + } + +} diff --git a/main/src/com/google/refine/importers/tree/XmlImportUtilities.java b/main/src/com/google/refine/importers/tree/XmlImportUtilities.java index 5a4183c8a..46d7ca531 100644 --- a/main/src/com/google/refine/importers/tree/XmlImportUtilities.java +++ b/main/src/com/google/refine/importers/tree/XmlImportUtilities.java @@ -53,24 +53,19 @@ import com.google.refine.model.Row; public class XmlImportUtilities extends TreeImportUtilities { final static Logger logger = LoggerFactory.getLogger("XmlImportUtilities"); - static public String[] detectPathFromTag(TreeReader parser, String tag) { - try { - while (parser.hasNext()) { - Token eventType = parser.next(); - if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) { - List path = detectRecordElement(parser, tag); - if (path != null) { - String[] path2 = new String[path.size()]; + static public String[] detectPathFromTag(TreeReader parser, String tag) throws TreeReaderException { + while (parser.hasNext()) { + Token eventType = parser.next(); + if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) { + List path = detectRecordElement(parser, tag); + if (path != null) { + String[] path2 = new String[path.size()]; - path.toArray(path2); + path.toArray(path2); - return path2; - } + return path2; } } - } catch (Exception e) { - // silent - // e.printStackTrace(); } return null; @@ -89,36 +84,31 @@ public class XmlImportUtilities extends TreeImportUtilities { * null if the the tag is not found. * @throws ServletException */ - static protected List detectRecordElement(TreeReader parser, String tag) throws Exception { - try{ - if(parser.current() == Token.Ignorable) { - parser.next(); - } + static protected List detectRecordElement(TreeReader parser, String tag) throws TreeReaderException { + if(parser.current() == Token.Ignorable) { + parser.next(); + } - String localName = parser.getFieldName(); - String fullName = composeName(parser.getPrefix(), localName); - if (tag.equals(parser.getFieldName()) || tag.equals(fullName)) { - List path = new LinkedList(); - path.add(localName); + String localName = parser.getFieldName(); + String fullName = composeName(parser.getPrefix(), localName); + if (tag.equals(parser.getFieldName()) || tag.equals(fullName)) { + List path = new LinkedList(); + path.add(localName); - return path; - } + return path; + } - while (parser.hasNext()) { - Token eventType = parser.next(); - if (eventType == Token.EndEntity) {//XMLStreamConstants.END_ELEMENT) { - break; - } else if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) { - List path = detectRecordElement(parser, tag); - if (path != null) { - path.add(0, localName); - return path; - } + while (parser.hasNext()) { + Token eventType = parser.next(); + if (eventType == Token.EndEntity) {//XMLStreamConstants.END_ELEMENT) { + break; + } else if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) { + List path = detectRecordElement(parser, tag); + if (path != null) { + path.add(0, localName); + return path; } } - } catch (Exception e) { - // silent - // e.printStackTrace(); } return null; } @@ -154,9 +144,9 @@ public class XmlImportUtilities extends TreeImportUtilities { } } } - } catch (Exception e) { + } catch (TreeReaderException e) { // silent - // e.printStackTrace(); + e.printStackTrace(); } if (candidates.size() > 0) { @@ -186,7 +176,8 @@ public class XmlImportUtilities extends TreeImportUtilities { if (parser.getFieldValue().trim().length() > 0) { textNodeCount++; } - }catch(Exception e){ + }catch(TreeReaderException e){ + e.printStackTrace(); //silent } } else if (eventType == Token.StartEntity) { @@ -209,9 +200,9 @@ public class XmlImportUtilities extends TreeImportUtilities { } } } - } catch (Exception e) { + } catch (TreeReaderException e) { // silent - // e.printStackTrace(); + e.printStackTrace(); } if (immediateChildCandidateMap.size() > 0) { @@ -273,9 +264,10 @@ public class XmlImportUtilities extends TreeImportUtilities { Token eventType = parser.next(); if (eventType == Token.StartEntity) { findRecord(project, parser, recordPath, 0, rootColumnGroup, limit); + logger.info("Project rows after findRecord = "+project.rows.size()); } } - } catch (Exception e) { + } catch (TreeReaderException e) { logger.error("Exception from XML parse",e); } } @@ -298,7 +290,7 @@ public class XmlImportUtilities extends TreeImportUtilities { int pathIndex, ImportColumnGroup rootColumnGroup, int limit - ) throws Exception { + ) throws TreeReaderException { logger.trace("findRecord(Project, TreeReader, String[], int, ImportColumnGroup"); if(parser.current() == Token.Ignorable){//XMLStreamConstants.START_DOCUMENT){ @@ -315,6 +307,7 @@ public class XmlImportUtilities extends TreeImportUtilities { while (parser.hasNext() && (limit <= 0 || project.rows.size() < limit)) { Token eventType = parser.next(); if (eventType == Token.StartEntity) { + // TODO: find instead of process?? findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit); } else if (eventType == Token.EndEntity) { break; @@ -337,7 +330,7 @@ public class XmlImportUtilities extends TreeImportUtilities { } } - static protected void skip(TreeReader parser) throws Exception { + static protected void skip(TreeReader parser) throws TreeReaderException { while (parser.hasNext()) { Token eventType = parser.next(); if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) { @@ -360,7 +353,7 @@ public class XmlImportUtilities extends TreeImportUtilities { Project project, TreeReader parser, ImportColumnGroup rootColumnGroup - ) throws Exception { + ) throws TreeReaderException { logger.trace("processRecord(Project,TreeReader,ImportColumnGroup)"); ImportRecord record = new ImportRecord(); @@ -380,7 +373,7 @@ public class XmlImportUtilities extends TreeImportUtilities { Project project, TreeReader parser, ImportColumnGroup rootColumnGroup - ) throws Exception { + ) throws TreeReaderException { logger.trace("processFieldAsRecord(Project,TreeReader,ImportColumnGroup)"); String text = parser.getFieldValue().trim(); @@ -431,7 +424,7 @@ public class XmlImportUtilities extends TreeImportUtilities { TreeReader parser, ImportColumnGroup columnGroup, ImportRecord record - ) throws Exception { + ) throws TreeReaderException { logger.trace("processSubRecord(Project,TreeReader,ImportColumnGroup,ImportRecord)"); if(parser.current() == Token.Ignorable) { @@ -486,6 +479,8 @@ public class XmlImportUtilities extends TreeImportUtilities { } } else if (eventType == Token.EndEntity) { break; + } else { + logger.info("unknown event type " + eventType); } } diff --git a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java index ef745e3cf..f40a5ba6e 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java @@ -55,6 +55,7 @@ import com.google.refine.importers.tree.ImportColumn; import com.google.refine.importers.tree.ImportColumnGroup; import com.google.refine.importers.tree.ImportRecord; import com.google.refine.importers.tree.TreeReader; +import com.google.refine.importers.tree.TreeReaderException; import com.google.refine.model.Project; import com.google.refine.model.Row; import com.google.refine.tests.RefineTest; @@ -100,7 +101,7 @@ public class XmlImportUtilitiesTests extends RefineTest { } @Test - public void detectPathFromTagXmlTest(){ + public void detectPathFromTagXmlTest() throws TreeReaderException{ loadData("author1genre1"); String tag = "library"; @@ -113,7 +114,7 @@ public class XmlImportUtilitiesTests extends RefineTest { } @Test - public void detectPathFromTagWithNestedElementXml(){ + public void detectPathFromTagWithNestedElementXml() throws TreeReaderException{ loadData("author1genre1"); String tag = "book";