Added support for Excel 2007 XML file format.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@73 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-02-08 23:44:33 +00:00
parent 736c6ec1de
commit cd376c7532
7 changed files with 45 additions and 34 deletions

View File

@ -1,15 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="src/main/java"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="lib" path="lib/servlet-api-2.5.jar" sourcepath="lib-src/servlet-api-2.5-sources.jar"/>
<classpathentry kind="lib" path="lib/jetty-6.1.22.jar" sourcepath="lib-src/jetty-6.1.22-sources.jar"/>
<classpathentry kind="lib" path="lib/jetty-util-6.1.22.jar" sourcepath="lib-src/jetty-util-6.1.22-sources.jar"/>
<classpathentry kind="lib" path="lib/log4j-1.2.15.jar" sourcepath="lib-src/log4j-1.2.15-sources.jar"/>
<classpathentry kind="lib" path="lib/commons-codec-1.3.jar" sourcepath="lib-src/commons-codec-1.3-sources.jar"/>
<classpathentry kind="lib" path="lib/commons-lang-2.4.jar" sourcepath="lib-src/commons-lang-2.4-sources.jar"/>
<classpathentry kind="lib" path="lib/poi-3.6.jar" sourcepath="lib-src/poi-3.6-sources.jar"/>
<classpathentry kind="lib" path="lib/cos-05Nov2002.jar"/>
<classpathentry kind="lib" path="lib/json-20100208.jar" sourcepath="lib-src/json-20100208-sources.jar"/>
<classpathentry kind="output" path="build/classes"/>
</classpath>
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="src/main/java"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="lib" path="lib/servlet-api-2.5.jar" sourcepath="lib-src/servlet-api-2.5-sources.jar"/>
<classpathentry kind="lib" path="lib/jetty-6.1.22.jar" sourcepath="lib-src/jetty-6.1.22-sources.jar"/>
<classpathentry kind="lib" path="lib/jetty-util-6.1.22.jar" sourcepath="lib-src/jetty-util-6.1.22-sources.jar"/>
<classpathentry kind="lib" path="lib/log4j-1.2.15.jar" sourcepath="lib-src/log4j-1.2.15-sources.jar"/>
<classpathentry kind="lib" path="lib/commons-codec-1.3.jar" sourcepath="lib-src/commons-codec-1.3-sources.jar"/>
<classpathentry kind="lib" path="lib/commons-lang-2.4.jar" sourcepath="lib-src/commons-lang-2.4-sources.jar"/>
<classpathentry kind="lib" path="lib/cos-05Nov2002.jar"/>
<classpathentry kind="lib" path="lib/json-20100208.jar" sourcepath="lib-src/json-20100208-sources.jar"/>
<classpathentry kind="lib" path="lib/poi-3.6.jar"/>
<classpathentry kind="lib" path="lib/poi-ooxml-3.6.jar"/>
<classpathentry kind="output" path="build/classes"/>
</classpath>

BIN
lib/dom4j-1.6.1.jar Normal file

Binary file not shown.

BIN
lib/poi-ooxml-3.6.jar Normal file

Binary file not shown.

Binary file not shown.

BIN
lib/xmlbeans-2.3.0.jar Normal file

Binary file not shown.

View File

@ -189,15 +189,18 @@ public class CreateProjectCommand extends Command {
"application/x-ms-excel".equals(contentType) ||
"application/vnd.ms-excel".equals(contentType) ||
"application/x-excel".equals(contentType) ||
"application/xls".equals(contentType) ||
"application/x-xls".equals(contentType)) {
"application/xls".equals(contentType)) {
return new ExcelImporter();
return new ExcelImporter(false);
} else if("application/x-xls".equals(contentType)) {
return new ExcelImporter(true);
}
} else if (fileName != null) {
fileName = fileName.toLowerCase();
if (fileName.endsWith(".xls")) { // Note: we can't handle .xlsx yet
return new ExcelImporter();
if (fileName.endsWith(".xls")) {
return new ExcelImporter(false);
} else if (fileName.endsWith(".xlsx")) {
return new ExcelImporter(true);
}
}

View File

@ -7,11 +7,11 @@ import java.util.List;
import java.util.Properties;
import org.apache.commons.lang.NotImplementedException;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import com.metaweb.gridworks.model.Cell;
import com.metaweb.gridworks.model.Column;
@ -19,6 +19,11 @@ import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.model.Row;
public class ExcelImporter implements Importer {
final protected boolean _xmlBased;
public ExcelImporter(boolean xmlBased) {
_xmlBased = xmlBased;
}
public boolean takesReader() {
return false;
@ -33,9 +38,11 @@ public class ExcelImporter implements Importer {
public void read(InputStream inputStream, Project project,
Properties options) throws Exception {
POIFSFileSystem fs = new POIFSFileSystem(inputStream);
HSSFWorkbook wb = new HSSFWorkbook(fs);
HSSFSheet sheet = wb.getSheetAt(0);
Workbook wb = _xmlBased ?
new XSSFWorkbook(inputStream) :
new HSSFWorkbook(new POIFSFileSystem(inputStream));
Sheet sheet = wb.getSheetAt(0);
int firstRow = sheet.getFirstRowNum();
int lastRow = sheet.getLastRowNum();
@ -48,7 +55,7 @@ public class ExcelImporter implements Importer {
* Find the header row
*/
for (; r <= lastRow; r++) {
HSSFRow row = sheet.getRow(r);
org.apache.poi.ss.usermodel.Row row = sheet.getRow(r);
if (row == null) {
continue;
}
@ -60,7 +67,7 @@ public class ExcelImporter implements Importer {
nonBlankHeaderStrings = new ArrayList<String>(lastCell - firstCell + 1);
for (int c = firstCell; c <= lastCell; c++) {
HSSFCell cell = row.getCell(c);
org.apache.poi.ss.usermodel.Cell cell = row.getCell(c);
if (cell != null) {
String text = cell.getStringCellValue().trim();
if (text.length() > 0) {
@ -93,7 +100,7 @@ public class ExcelImporter implements Importer {
* Now process the data rows
*/
for (; r <= lastRow; r++) {
HSSFRow row = sheet.getRow(r);
org.apache.poi.ss.usermodel.Row row = sheet.getRow(r);
if (row == null) {
continue;
}
@ -109,21 +116,21 @@ public class ExcelImporter implements Importer {
continue;
}
HSSFCell cell = row.getCell(c);
org.apache.poi.ss.usermodel.Cell cell = row.getCell(c);
if (cell == null) {
continue;
}
int cellType = cell.getCellType();
if (cellType == HSSFCell.CELL_TYPE_ERROR ||
cellType == HSSFCell.CELL_TYPE_BLANK) {
if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_ERROR ||
cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_BLANK) {
continue;
}
Object value = null;
if (cellType == HSSFCell.CELL_TYPE_BOOLEAN) {
if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_BOOLEAN) {
value = cell.getBooleanCellValue();
} else if (cellType == HSSFCell.CELL_TYPE_NUMERIC) {
} else if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_NUMERIC) {
value = cell.getNumericCellValue();
} else {
String text = cell.getStringCellValue().trim();