Refactor importer interfaces to narrow exceptions thrown and handled

git-svn-id: http://google-refine.googlecode.com/svn/trunk@2296 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Tom Morris 2011-10-07 19:06:53 +00:00
parent 50927b33dc
commit 31073d7712
6 changed files with 114 additions and 90 deletions

View File

@ -40,8 +40,6 @@ import java.io.InputStream;
import java.io.Reader;
import java.util.List;
import javax.servlet.ServletException;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.JsonParser;
@ -55,6 +53,7 @@ import com.google.refine.ProjectMetadata;
import com.google.refine.importers.tree.ImportColumnGroup;
import com.google.refine.importers.tree.TreeImportingParserBase;
import com.google.refine.importers.tree.TreeReader;
import com.google.refine.importers.tree.TreeReaderException;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project;
@ -132,7 +131,7 @@ public class JsonImporter extends TreeImportingParserBase {
JsonToken token = parser.nextToken();
state.tokenCount++;
return parseForPreview(parser, state, token);
} catch (Exception e) {
} catch (IOException e) {
return null;
}
}
@ -158,7 +157,7 @@ public class JsonImporter extends TreeImportingParserBase {
default:
break loop;
}
} catch (Exception e) {
} catch (IOException e) {
break;
}
}
@ -182,7 +181,7 @@ public class JsonImporter extends TreeImportingParserBase {
Object element = parseForPreview(parser, state, token);
JSONUtilities.append(result, element);
}
} catch (Exception e) {
} catch (IOException e) {
break;
}
}
@ -213,7 +212,7 @@ public class JsonImporter extends TreeImportingParserBase {
public JSONTreeReader(Reader reader) {
try {
parser = factory.createJsonParser(reader);
} catch (Exception e) {
} catch (IOException e) {
e.printStackTrace();
}
}
@ -251,12 +250,12 @@ public class JsonImporter extends TreeImportingParserBase {
}
@Override
public Token current() throws ServletException {
public Token current() {
return this.mapToToken(parser.getCurrentToken());
}
@Override
public String getFieldName() throws ServletException{
public String getFieldName() throws TreeReaderException {
try {
String text = parser.getCurrentName();
@ -271,8 +270,8 @@ public class JsonImporter extends TreeImportingParserBase {
//end of workaround
return text;
} catch (Exception e) {
throw new ServletException(e);
} catch (IOException e) {
throw new TreeReaderException(e);
}
}
@ -285,32 +284,33 @@ public class JsonImporter extends TreeImportingParserBase {
}
@Override
public String getFieldValue() throws ServletException {
public String getFieldValue() throws TreeReaderException {
try {
return parser.getText();
} catch (Exception e) {
throw new ServletException(e);
} catch (IOException e) {
throw new TreeReaderException(e);
}
}
@Override
public boolean hasNext() throws ServletException {
public boolean hasNext() {
return true; //FIXME fairly obtuse, is there a better way (advancing, then rewinding?)
}
@Override
public Token next() throws ServletException {
public Token next() throws TreeReaderException {
JsonToken next;
try {
next = parser.nextToken();
} catch (JsonParseException e) {
throw new ServletException(e);
throw new TreeReaderException(e);
} catch (IOException e) {
throw new ServletException(e);
throw new TreeReaderException(e);
}
// TODO just return null here?
if(next == null) {
throw new ServletException("No more Json Tokens in stream");
throw new TreeReaderException("No more Json Tokens in stream");
}
//The following is a workaround for inconsistent Jackson JsonParser
@ -318,7 +318,7 @@ public class JsonImporter extends TreeImportingParserBase {
try {
this.thisTokenIsAFieldName = true;
this.lastFieldName = parser.getCurrentName();
} catch (Exception e) {
} catch (IOException e) {
//silent
}
}else if(next == JsonToken.START_ARRAY || next == JsonToken.START_OBJECT){

View File

@ -40,7 +40,6 @@ import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.List;
import javax.servlet.ServletException;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
@ -55,6 +54,7 @@ import com.google.refine.ProjectMetadata;
import com.google.refine.importers.tree.ImportColumnGroup;
import com.google.refine.importers.tree.TreeImportingParserBase;
import com.google.refine.importers.tree.TreeReader;
import com.google.refine.importers.tree.TreeReaderException;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project;
@ -215,26 +215,26 @@ public class XmlImporter extends TreeImportingParserBase {
}
@Override
public Token next() throws ServletException {
public Token next() throws TreeReaderException {
try {
if (!parser.hasNext()) {
throw new ServletException("End of XML stream");
throw new TreeReaderException("End of XML stream");
}
} catch (XMLStreamException e) {
throw new ServletException(e);
throw new TreeReaderException(e);
}
int currentToken = -1;
try {
currentToken = parser.next();
} catch (XMLStreamException e) {
throw new ServletException(e);
throw new TreeReaderException(e);
}
return mapToToken(currentToken);
}
protected Token mapToToken(int token) throws ServletException {
protected Token mapToToken(int token) {
switch(token){
case XMLStreamConstants.START_ELEMENT: return Token.StartEntity;
case XMLStreamConstants.END_ELEMENT: return Token.EndEntity;
@ -256,24 +256,24 @@ public class XmlImporter extends TreeImportingParserBase {
}
@Override
public Token current() throws ServletException{
public Token current() throws TreeReaderException {
return this.mapToToken(parser.getEventType());
}
@Override
public boolean hasNext() throws ServletException{
public boolean hasNext() throws TreeReaderException {
try {
return parser.hasNext();
} catch (XMLStreamException e) {
throw new ServletException(e);
throw new TreeReaderException(e);
}
}
@Override
public String getFieldName() throws ServletException{
try{
public String getFieldName() throws TreeReaderException {
try {
return parser.getLocalName();
}catch(IllegalStateException e){
} catch (IllegalStateException e) {
return null;
}
}

View File

@ -33,6 +33,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.importers.tree;
/**
* Interface for all tree-shaped parsers.
*
* This is effectively part of the contract for {@link TreeImportingParserBase}.
*/
public interface TreeReader {
public enum Token {
Ignorable,
@ -42,14 +47,14 @@ public interface TreeReader {
//append additional tokens only if necessary (most should be just mapped to Value or Ignorable)
}
public Token current() throws Exception; //aka getCurrentToken
public boolean hasNext() throws Exception;
public Token next() throws Exception;
public String getFieldName() throws Exception; //aka getFieldName
public Token current() throws TreeReaderException; // aka getCurrentToken
public boolean hasNext() throws TreeReaderException;
public Token next() throws TreeReaderException;
public String getFieldName() throws TreeReaderException;
public String getPrefix();
public String getFieldValue() throws Exception;
public String getFieldValue() throws TreeReaderException;
public int getAttributeCount();
public String getAttributeValue(int index);

View File

@ -0,0 +1,23 @@
package com.google.refine.importers.tree;
/**
* An Exception from the TreeReader interface methods.
*/
public class TreeReaderException extends Exception {
private static final long serialVersionUID = 1L;
public TreeReaderException(String message, Throwable cause) {
super(message, cause);
}
public TreeReaderException(String message) {
super(message);
}
public TreeReaderException(Throwable cause) {
super(cause);
}
}

View File

@ -53,24 +53,19 @@ import com.google.refine.model.Row;
public class XmlImportUtilities extends TreeImportUtilities {
final static Logger logger = LoggerFactory.getLogger("XmlImportUtilities");
static public String[] detectPathFromTag(TreeReader parser, String tag) {
try {
while (parser.hasNext()) {
Token eventType = parser.next();
if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
List<String> path = detectRecordElement(parser, tag);
if (path != null) {
String[] path2 = new String[path.size()];
static public String[] detectPathFromTag(TreeReader parser, String tag) throws TreeReaderException {
while (parser.hasNext()) {
Token eventType = parser.next();
if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
List<String> path = detectRecordElement(parser, tag);
if (path != null) {
String[] path2 = new String[path.size()];
path.toArray(path2);
path.toArray(path2);
return path2;
}
return path2;
}
}
} catch (Exception e) {
// silent
// e.printStackTrace();
}
return null;
@ -89,36 +84,31 @@ public class XmlImportUtilities extends TreeImportUtilities {
* null if the the tag is not found.
* @throws ServletException
*/
static protected List<String> detectRecordElement(TreeReader parser, String tag) throws Exception {
try{
if(parser.current() == Token.Ignorable) {
parser.next();
}
static protected List<String> detectRecordElement(TreeReader parser, String tag) throws TreeReaderException {
if(parser.current() == Token.Ignorable) {
parser.next();
}
String localName = parser.getFieldName();
String fullName = composeName(parser.getPrefix(), localName);
if (tag.equals(parser.getFieldName()) || tag.equals(fullName)) {
List<String> path = new LinkedList<String>();
path.add(localName);
String localName = parser.getFieldName();
String fullName = composeName(parser.getPrefix(), localName);
if (tag.equals(parser.getFieldName()) || tag.equals(fullName)) {
List<String> path = new LinkedList<String>();
path.add(localName);
return path;
}
return path;
}
while (parser.hasNext()) {
Token eventType = parser.next();
if (eventType == Token.EndEntity) {//XMLStreamConstants.END_ELEMENT) {
break;
} else if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
List<String> path = detectRecordElement(parser, tag);
if (path != null) {
path.add(0, localName);
return path;
}
while (parser.hasNext()) {
Token eventType = parser.next();
if (eventType == Token.EndEntity) {//XMLStreamConstants.END_ELEMENT) {
break;
} else if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
List<String> path = detectRecordElement(parser, tag);
if (path != null) {
path.add(0, localName);
return path;
}
}
} catch (Exception e) {
// silent
// e.printStackTrace();
}
return null;
}
@ -154,9 +144,9 @@ public class XmlImportUtilities extends TreeImportUtilities {
}
}
}
} catch (Exception e) {
} catch (TreeReaderException e) {
// silent
// e.printStackTrace();
e.printStackTrace();
}
if (candidates.size() > 0) {
@ -186,7 +176,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
if (parser.getFieldValue().trim().length() > 0) {
textNodeCount++;
}
}catch(Exception e){
}catch(TreeReaderException e){
e.printStackTrace();
//silent
}
} else if (eventType == Token.StartEntity) {
@ -209,9 +200,9 @@ public class XmlImportUtilities extends TreeImportUtilities {
}
}
}
} catch (Exception e) {
} catch (TreeReaderException e) {
// silent
// e.printStackTrace();
e.printStackTrace();
}
if (immediateChildCandidateMap.size() > 0) {
@ -273,9 +264,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
Token eventType = parser.next();
if (eventType == Token.StartEntity) {
findRecord(project, parser, recordPath, 0, rootColumnGroup, limit);
logger.info("Project rows after findRecord = "+project.rows.size());
}
}
} catch (Exception e) {
} catch (TreeReaderException e) {
logger.error("Exception from XML parse",e);
}
}
@ -298,7 +290,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
int pathIndex,
ImportColumnGroup rootColumnGroup,
int limit
) throws Exception {
) throws TreeReaderException {
logger.trace("findRecord(Project, TreeReader, String[], int, ImportColumnGroup");
if(parser.current() == Token.Ignorable){//XMLStreamConstants.START_DOCUMENT){
@ -315,6 +307,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
while (parser.hasNext() && (limit <= 0 || project.rows.size() < limit)) {
Token eventType = parser.next();
if (eventType == Token.StartEntity) {
// TODO: find instead of process??
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit);
} else if (eventType == Token.EndEntity) {
break;
@ -337,7 +330,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
}
}
static protected void skip(TreeReader parser) throws Exception {
static protected void skip(TreeReader parser) throws TreeReaderException {
while (parser.hasNext()) {
Token eventType = parser.next();
if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
@ -360,7 +353,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
Project project,
TreeReader parser,
ImportColumnGroup rootColumnGroup
) throws Exception {
) throws TreeReaderException {
logger.trace("processRecord(Project,TreeReader,ImportColumnGroup)");
ImportRecord record = new ImportRecord();
@ -380,7 +373,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
Project project,
TreeReader parser,
ImportColumnGroup rootColumnGroup
) throws Exception {
) throws TreeReaderException {
logger.trace("processFieldAsRecord(Project,TreeReader,ImportColumnGroup)");
String text = parser.getFieldValue().trim();
@ -431,7 +424,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
TreeReader parser,
ImportColumnGroup columnGroup,
ImportRecord record
) throws Exception {
) throws TreeReaderException {
logger.trace("processSubRecord(Project,TreeReader,ImportColumnGroup,ImportRecord)");
if(parser.current() == Token.Ignorable) {
@ -486,6 +479,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
}
} else if (eventType == Token.EndEntity) {
break;
} else {
logger.info("unknown event type " + eventType);
}
}

View File

@ -55,6 +55,7 @@ import com.google.refine.importers.tree.ImportColumn;
import com.google.refine.importers.tree.ImportColumnGroup;
import com.google.refine.importers.tree.ImportRecord;
import com.google.refine.importers.tree.TreeReader;
import com.google.refine.importers.tree.TreeReaderException;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.tests.RefineTest;
@ -100,7 +101,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
}
@Test
public void detectPathFromTagXmlTest(){
public void detectPathFromTagXmlTest() throws TreeReaderException{
loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
String tag = "library";
@ -113,7 +114,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
}
@Test
public void detectPathFromTagWithNestedElementXml(){
public void detectPathFromTagWithNestedElementXml() throws TreeReaderException{
loadData("<?xml version=\"1.0\"?><library><book id=\"1\"><author>author1</author><genre>genre1</genre></book></library>");
String tag = "book";