Refactor importer interfaces to narrow exceptions thrown and handled

git-svn-id: http://google-refine.googlecode.com/svn/trunk@2296 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Tom Morris 2011-10-07 19:06:53 +00:00
parent 50927b33dc
commit 31073d7712
6 changed files with 114 additions and 90 deletions

View File

@ -40,8 +40,6 @@ import java.io.InputStream;
import java.io.Reader; import java.io.Reader;
import java.util.List; import java.util.List;
import javax.servlet.ServletException;
import org.codehaus.jackson.JsonFactory; import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonParseException; import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.JsonParser; import org.codehaus.jackson.JsonParser;
@ -55,6 +53,7 @@ import com.google.refine.ProjectMetadata;
import com.google.refine.importers.tree.ImportColumnGroup; import com.google.refine.importers.tree.ImportColumnGroup;
import com.google.refine.importers.tree.TreeImportingParserBase; import com.google.refine.importers.tree.TreeImportingParserBase;
import com.google.refine.importers.tree.TreeReader; import com.google.refine.importers.tree.TreeReader;
import com.google.refine.importers.tree.TreeReaderException;
import com.google.refine.importing.ImportingJob; import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities; import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project; import com.google.refine.model.Project;
@ -132,7 +131,7 @@ public class JsonImporter extends TreeImportingParserBase {
JsonToken token = parser.nextToken(); JsonToken token = parser.nextToken();
state.tokenCount++; state.tokenCount++;
return parseForPreview(parser, state, token); return parseForPreview(parser, state, token);
} catch (Exception e) { } catch (IOException e) {
return null; return null;
} }
} }
@ -158,7 +157,7 @@ public class JsonImporter extends TreeImportingParserBase {
default: default:
break loop; break loop;
} }
} catch (Exception e) { } catch (IOException e) {
break; break;
} }
} }
@ -182,7 +181,7 @@ public class JsonImporter extends TreeImportingParserBase {
Object element = parseForPreview(parser, state, token); Object element = parseForPreview(parser, state, token);
JSONUtilities.append(result, element); JSONUtilities.append(result, element);
} }
} catch (Exception e) { } catch (IOException e) {
break; break;
} }
} }
@ -213,7 +212,7 @@ public class JsonImporter extends TreeImportingParserBase {
public JSONTreeReader(Reader reader) { public JSONTreeReader(Reader reader) {
try { try {
parser = factory.createJsonParser(reader); parser = factory.createJsonParser(reader);
} catch (Exception e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} }
} }
@ -251,12 +250,12 @@ public class JsonImporter extends TreeImportingParserBase {
} }
@Override @Override
public Token current() throws ServletException { public Token current() {
return this.mapToToken(parser.getCurrentToken()); return this.mapToToken(parser.getCurrentToken());
} }
@Override @Override
public String getFieldName() throws ServletException{ public String getFieldName() throws TreeReaderException {
try { try {
String text = parser.getCurrentName(); String text = parser.getCurrentName();
@ -271,8 +270,8 @@ public class JsonImporter extends TreeImportingParserBase {
//end of workaround //end of workaround
return text; return text;
} catch (Exception e) { } catch (IOException e) {
throw new ServletException(e); throw new TreeReaderException(e);
} }
} }
@ -285,32 +284,33 @@ public class JsonImporter extends TreeImportingParserBase {
} }
@Override @Override
public String getFieldValue() throws ServletException { public String getFieldValue() throws TreeReaderException {
try { try {
return parser.getText(); return parser.getText();
} catch (Exception e) { } catch (IOException e) {
throw new ServletException(e); throw new TreeReaderException(e);
} }
} }
@Override @Override
public boolean hasNext() throws ServletException { public boolean hasNext() {
return true; //FIXME fairly obtuse, is there a better way (advancing, then rewinding?) return true; //FIXME fairly obtuse, is there a better way (advancing, then rewinding?)
} }
@Override @Override
public Token next() throws ServletException { public Token next() throws TreeReaderException {
JsonToken next; JsonToken next;
try { try {
next = parser.nextToken(); next = parser.nextToken();
} catch (JsonParseException e) { } catch (JsonParseException e) {
throw new ServletException(e); throw new TreeReaderException(e);
} catch (IOException e) { } catch (IOException e) {
throw new ServletException(e); throw new TreeReaderException(e);
} }
// TODO just return null here?
if(next == null) { if(next == null) {
throw new ServletException("No more Json Tokens in stream"); throw new TreeReaderException("No more Json Tokens in stream");
} }
//The following is a workaround for inconsistent Jackson JsonParser //The following is a workaround for inconsistent Jackson JsonParser
@ -318,7 +318,7 @@ public class JsonImporter extends TreeImportingParserBase {
try { try {
this.thisTokenIsAFieldName = true; this.thisTokenIsAFieldName = true;
this.lastFieldName = parser.getCurrentName(); this.lastFieldName = parser.getCurrentName();
} catch (Exception e) { } catch (IOException e) {
//silent //silent
} }
}else if(next == JsonToken.START_ARRAY || next == JsonToken.START_OBJECT){ }else if(next == JsonToken.START_ARRAY || next == JsonToken.START_OBJECT){

View File

@ -40,7 +40,6 @@ import java.io.InputStream;
import java.io.PushbackInputStream; import java.io.PushbackInputStream;
import java.util.List; import java.util.List;
import javax.servlet.ServletException;
import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants; import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamException;
@ -55,6 +54,7 @@ import com.google.refine.ProjectMetadata;
import com.google.refine.importers.tree.ImportColumnGroup; import com.google.refine.importers.tree.ImportColumnGroup;
import com.google.refine.importers.tree.TreeImportingParserBase; import com.google.refine.importers.tree.TreeImportingParserBase;
import com.google.refine.importers.tree.TreeReader; import com.google.refine.importers.tree.TreeReader;
import com.google.refine.importers.tree.TreeReaderException;
import com.google.refine.importing.ImportingJob; import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities; import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project; import com.google.refine.model.Project;
@ -215,26 +215,26 @@ public class XmlImporter extends TreeImportingParserBase {
} }
@Override @Override
public Token next() throws ServletException { public Token next() throws TreeReaderException {
try { try {
if (!parser.hasNext()) { if (!parser.hasNext()) {
throw new ServletException("End of XML stream"); throw new TreeReaderException("End of XML stream");
} }
} catch (XMLStreamException e) { } catch (XMLStreamException e) {
throw new ServletException(e); throw new TreeReaderException(e);
} }
int currentToken = -1; int currentToken = -1;
try { try {
currentToken = parser.next(); currentToken = parser.next();
} catch (XMLStreamException e) { } catch (XMLStreamException e) {
throw new ServletException(e); throw new TreeReaderException(e);
} }
return mapToToken(currentToken); return mapToToken(currentToken);
} }
protected Token mapToToken(int token) throws ServletException { protected Token mapToToken(int token) {
switch(token){ switch(token){
case XMLStreamConstants.START_ELEMENT: return Token.StartEntity; case XMLStreamConstants.START_ELEMENT: return Token.StartEntity;
case XMLStreamConstants.END_ELEMENT: return Token.EndEntity; case XMLStreamConstants.END_ELEMENT: return Token.EndEntity;
@ -256,24 +256,24 @@ public class XmlImporter extends TreeImportingParserBase {
} }
@Override @Override
public Token current() throws ServletException{ public Token current() throws TreeReaderException {
return this.mapToToken(parser.getEventType()); return this.mapToToken(parser.getEventType());
} }
@Override @Override
public boolean hasNext() throws ServletException{ public boolean hasNext() throws TreeReaderException {
try { try {
return parser.hasNext(); return parser.hasNext();
} catch (XMLStreamException e) { } catch (XMLStreamException e) {
throw new ServletException(e); throw new TreeReaderException(e);
} }
} }
@Override @Override
public String getFieldName() throws ServletException{ public String getFieldName() throws TreeReaderException {
try{ try {
return parser.getLocalName(); return parser.getLocalName();
}catch(IllegalStateException e){ } catch (IllegalStateException e) {
return null; return null;
} }
} }

View File

@ -33,6 +33,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.importers.tree; package com.google.refine.importers.tree;
/**
* Interface for all tree-shaped parsers.
*
* This is effectively part of the contract for {@link TreeImportingParserBase}.
*/
public interface TreeReader { public interface TreeReader {
public enum Token { public enum Token {
Ignorable, Ignorable,
@ -42,14 +47,14 @@ public interface TreeReader {
//append additional tokens only if necessary (most should be just mapped to Value or Ignorable) //append additional tokens only if necessary (most should be just mapped to Value or Ignorable)
} }
public Token current() throws Exception; //aka getCurrentToken public Token current() throws TreeReaderException; // aka getCurrentToken
public boolean hasNext() throws Exception; public boolean hasNext() throws TreeReaderException;
public Token next() throws Exception; public Token next() throws TreeReaderException;
public String getFieldName() throws Exception; //aka getFieldName public String getFieldName() throws TreeReaderException;
public String getPrefix(); public String getPrefix();
public String getFieldValue() throws Exception; public String getFieldValue() throws TreeReaderException;
public int getAttributeCount(); public int getAttributeCount();
public String getAttributeValue(int index); public String getAttributeValue(int index);

View File

@ -0,0 +1,23 @@
package com.google.refine.importers.tree;
/**
* An Exception from the TreeReader interface methods.
*/
public class TreeReaderException extends Exception {
private static final long serialVersionUID = 1L;
public TreeReaderException(String message, Throwable cause) {
super(message, cause);
}
public TreeReaderException(String message) {
super(message);
}
public TreeReaderException(Throwable cause) {
super(cause);
}
}

View File

@ -53,24 +53,19 @@ import com.google.refine.model.Row;
public class XmlImportUtilities extends TreeImportUtilities { public class XmlImportUtilities extends TreeImportUtilities {
final static Logger logger = LoggerFactory.getLogger("XmlImportUtilities"); final static Logger logger = LoggerFactory.getLogger("XmlImportUtilities");
static public String[] detectPathFromTag(TreeReader parser, String tag) { static public String[] detectPathFromTag(TreeReader parser, String tag) throws TreeReaderException {
try { while (parser.hasNext()) {
while (parser.hasNext()) { Token eventType = parser.next();
Token eventType = parser.next(); if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) { List<String> path = detectRecordElement(parser, tag);
List<String> path = detectRecordElement(parser, tag); if (path != null) {
if (path != null) { String[] path2 = new String[path.size()];
String[] path2 = new String[path.size()];
path.toArray(path2); path.toArray(path2);
return path2; return path2;
}
} }
} }
} catch (Exception e) {
// silent
// e.printStackTrace();
} }
return null; return null;
@ -89,36 +84,31 @@ public class XmlImportUtilities extends TreeImportUtilities {
* null if the the tag is not found. * null if the the tag is not found.
* @throws ServletException * @throws ServletException
*/ */
static protected List<String> detectRecordElement(TreeReader parser, String tag) throws Exception { static protected List<String> detectRecordElement(TreeReader parser, String tag) throws TreeReaderException {
try{ if(parser.current() == Token.Ignorable) {
if(parser.current() == Token.Ignorable) { parser.next();
parser.next(); }
}
String localName = parser.getFieldName(); String localName = parser.getFieldName();
String fullName = composeName(parser.getPrefix(), localName); String fullName = composeName(parser.getPrefix(), localName);
if (tag.equals(parser.getFieldName()) || tag.equals(fullName)) { if (tag.equals(parser.getFieldName()) || tag.equals(fullName)) {
List<String> path = new LinkedList<String>(); List<String> path = new LinkedList<String>();
path.add(localName); path.add(localName);
return path; return path;
} }
while (parser.hasNext()) { while (parser.hasNext()) {
Token eventType = parser.next(); Token eventType = parser.next();
if (eventType == Token.EndEntity) {//XMLStreamConstants.END_ELEMENT) { if (eventType == Token.EndEntity) {//XMLStreamConstants.END_ELEMENT) {
break; break;
} else if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) { } else if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
List<String> path = detectRecordElement(parser, tag); List<String> path = detectRecordElement(parser, tag);
if (path != null) { if (path != null) {
path.add(0, localName); path.add(0, localName);
return path; return path;
}
} }
} }
} catch (Exception e) {
// silent
// e.printStackTrace();
} }
return null; return null;
} }
@ -154,9 +144,9 @@ public class XmlImportUtilities extends TreeImportUtilities {
} }
} }
} }
} catch (Exception e) { } catch (TreeReaderException e) {
// silent // silent
// e.printStackTrace(); e.printStackTrace();
} }
if (candidates.size() > 0) { if (candidates.size() > 0) {
@ -186,7 +176,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
if (parser.getFieldValue().trim().length() > 0) { if (parser.getFieldValue().trim().length() > 0) {
textNodeCount++; textNodeCount++;
} }
}catch(Exception e){ }catch(TreeReaderException e){
e.printStackTrace();
//silent //silent
} }
} else if (eventType == Token.StartEntity) { } else if (eventType == Token.StartEntity) {
@ -209,9 +200,9 @@ public class XmlImportUtilities extends TreeImportUtilities {
} }
} }
} }
} catch (Exception e) { } catch (TreeReaderException e) {
// silent // silent
// e.printStackTrace(); e.printStackTrace();
} }
if (immediateChildCandidateMap.size() > 0) { if (immediateChildCandidateMap.size() > 0) {
@ -273,9 +264,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
Token eventType = parser.next(); Token eventType = parser.next();
if (eventType == Token.StartEntity) { if (eventType == Token.StartEntity) {
findRecord(project, parser, recordPath, 0, rootColumnGroup, limit); findRecord(project, parser, recordPath, 0, rootColumnGroup, limit);
logger.info("Project rows after findRecord = "+project.rows.size());
} }
} }
} catch (Exception e) { } catch (TreeReaderException e) {
logger.error("Exception from XML parse",e); logger.error("Exception from XML parse",e);
} }
} }
@ -298,7 +290,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
int pathIndex, int pathIndex,
ImportColumnGroup rootColumnGroup, ImportColumnGroup rootColumnGroup,
int limit int limit
) throws Exception { ) throws TreeReaderException {
logger.trace("findRecord(Project, TreeReader, String[], int, ImportColumnGroup"); logger.trace("findRecord(Project, TreeReader, String[], int, ImportColumnGroup");
if(parser.current() == Token.Ignorable){//XMLStreamConstants.START_DOCUMENT){ if(parser.current() == Token.Ignorable){//XMLStreamConstants.START_DOCUMENT){
@ -315,6 +307,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
while (parser.hasNext() && (limit <= 0 || project.rows.size() < limit)) { while (parser.hasNext() && (limit <= 0 || project.rows.size() < limit)) {
Token eventType = parser.next(); Token eventType = parser.next();
if (eventType == Token.StartEntity) { if (eventType == Token.StartEntity) {
// TODO: find instead of process??
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit); findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit);
} else if (eventType == Token.EndEntity) { } else if (eventType == Token.EndEntity) {
break; break;
@ -337,7 +330,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
} }
} }
static protected void skip(TreeReader parser) throws Exception { static protected void skip(TreeReader parser) throws TreeReaderException {
while (parser.hasNext()) { while (parser.hasNext()) {
Token eventType = parser.next(); Token eventType = parser.next();
if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) { if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
@ -360,7 +353,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
Project project, Project project,
TreeReader parser, TreeReader parser,
ImportColumnGroup rootColumnGroup ImportColumnGroup rootColumnGroup
) throws Exception { ) throws TreeReaderException {
logger.trace("processRecord(Project,TreeReader,ImportColumnGroup)"); logger.trace("processRecord(Project,TreeReader,ImportColumnGroup)");
ImportRecord record = new ImportRecord(); ImportRecord record = new ImportRecord();
@ -380,7 +373,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
Project project, Project project,
TreeReader parser, TreeReader parser,
ImportColumnGroup rootColumnGroup ImportColumnGroup rootColumnGroup
) throws Exception { ) throws TreeReaderException {
logger.trace("processFieldAsRecord(Project,TreeReader,ImportColumnGroup)"); logger.trace("processFieldAsRecord(Project,TreeReader,ImportColumnGroup)");
String text = parser.getFieldValue().trim(); String text = parser.getFieldValue().trim();
@ -431,7 +424,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
TreeReader parser, TreeReader parser,
ImportColumnGroup columnGroup, ImportColumnGroup columnGroup,
ImportRecord record ImportRecord record
) throws Exception { ) throws TreeReaderException {
logger.trace("processSubRecord(Project,TreeReader,ImportColumnGroup,ImportRecord)"); logger.trace("processSubRecord(Project,TreeReader,ImportColumnGroup,ImportRecord)");
if(parser.current() == Token.Ignorable) { if(parser.current() == Token.Ignorable) {
@ -486,6 +479,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
} }
} else if (eventType == Token.EndEntity) { } else if (eventType == Token.EndEntity) {
break; break;
} else {
logger.info("unknown event type " + eventType);
} }
} }

View File

@ -55,6 +55,7 @@ import com.google.refine.importers.tree.ImportColumn;
import com.google.refine.importers.tree.ImportColumnGroup; import com.google.refine.importers.tree.ImportColumnGroup;
import com.google.refine.importers.tree.ImportRecord; import com.google.refine.importers.tree.ImportRecord;
import com.google.refine.importers.tree.TreeReader; import com.google.refine.importers.tree.TreeReader;
import com.google.refine.importers.tree.TreeReaderException;
import com.google.refine.model.Project; import com.google.refine.model.Project;
import com.google.refine.model.Row; import com.google.refine.model.Row;
import com.google.refine.tests.RefineTest; import com.google.refine.tests.RefineTest;
@ -100,7 +101,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
} }
@Test @Test
public void detectPathFromTagXmlTest(){ public void detectPathFromTagXmlTest() throws TreeReaderException{
loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>"); loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
String tag = "library"; String tag = "library";
@ -113,7 +114,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
} }
@Test @Test
public void detectPathFromTagWithNestedElementXml(){ public void detectPathFromTagWithNestedElementXml() throws TreeReaderException{
loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>"); loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
String tag = "book"; String tag = "book";