Added support for Excel 2007 XML file format.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@73 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-02-08 23:44:33 +00:00
parent 736c6ec1de
commit cd376c7532
7 changed files with 45 additions and 34 deletions

View File

@ -8,8 +8,9 @@
<classpathentry kind="lib" path="lib/log4j-1.2.15.jar" sourcepath="lib-src/log4j-1.2.15-sources.jar"/> <classpathentry kind="lib" path="lib/log4j-1.2.15.jar" sourcepath="lib-src/log4j-1.2.15-sources.jar"/>
<classpathentry kind="lib" path="lib/commons-codec-1.3.jar" sourcepath="lib-src/commons-codec-1.3-sources.jar"/> <classpathentry kind="lib" path="lib/commons-codec-1.3.jar" sourcepath="lib-src/commons-codec-1.3-sources.jar"/>
<classpathentry kind="lib" path="lib/commons-lang-2.4.jar" sourcepath="lib-src/commons-lang-2.4-sources.jar"/> <classpathentry kind="lib" path="lib/commons-lang-2.4.jar" sourcepath="lib-src/commons-lang-2.4-sources.jar"/>
<classpathentry kind="lib" path="lib/poi-3.6.jar" sourcepath="lib-src/poi-3.6-sources.jar"/>
<classpathentry kind="lib" path="lib/cos-05Nov2002.jar"/> <classpathentry kind="lib" path="lib/cos-05Nov2002.jar"/>
<classpathentry kind="lib" path="lib/json-20100208.jar" sourcepath="lib-src/json-20100208-sources.jar"/> <classpathentry kind="lib" path="lib/json-20100208.jar" sourcepath="lib-src/json-20100208-sources.jar"/>
<classpathentry kind="lib" path="lib/poi-3.6.jar"/>
<classpathentry kind="lib" path="lib/poi-ooxml-3.6.jar"/>
<classpathentry kind="output" path="build/classes"/> <classpathentry kind="output" path="build/classes"/>
</classpath> </classpath>

BIN
lib/dom4j-1.6.1.jar Normal file

Binary file not shown.

BIN
lib/poi-ooxml-3.6.jar Normal file

Binary file not shown.

Binary file not shown.

BIN
lib/xmlbeans-2.3.0.jar Normal file

Binary file not shown.

View File

@ -189,15 +189,18 @@ public class CreateProjectCommand extends Command {
"application/x-ms-excel".equals(contentType) || "application/x-ms-excel".equals(contentType) ||
"application/vnd.ms-excel".equals(contentType) || "application/vnd.ms-excel".equals(contentType) ||
"application/x-excel".equals(contentType) || "application/x-excel".equals(contentType) ||
"application/xls".equals(contentType) || "application/xls".equals(contentType)) {
"application/x-xls".equals(contentType)) {
return new ExcelImporter(); return new ExcelImporter(false);
} else if("application/x-xls".equals(contentType)) {
return new ExcelImporter(true);
} }
} else if (fileName != null) { } else if (fileName != null) {
fileName = fileName.toLowerCase(); fileName = fileName.toLowerCase();
if (fileName.endsWith(".xls")) { // Note: we can't handle .xlsx yet if (fileName.endsWith(".xls")) {
return new ExcelImporter(); return new ExcelImporter(false);
} else if (fileName.endsWith(".xlsx")) {
return new ExcelImporter(true);
} }
} }

View File

@ -7,11 +7,11 @@ import java.util.List;
import java.util.Properties; import java.util.Properties;
import org.apache.commons.lang.NotImplementedException; import org.apache.commons.lang.NotImplementedException;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import com.metaweb.gridworks.model.Cell; import com.metaweb.gridworks.model.Cell;
import com.metaweb.gridworks.model.Column; import com.metaweb.gridworks.model.Column;
@ -19,6 +19,11 @@ import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.model.Row; import com.metaweb.gridworks.model.Row;
public class ExcelImporter implements Importer { public class ExcelImporter implements Importer {
final protected boolean _xmlBased;
public ExcelImporter(boolean xmlBased) {
_xmlBased = xmlBased;
}
public boolean takesReader() { public boolean takesReader() {
return false; return false;
@ -33,9 +38,11 @@ public class ExcelImporter implements Importer {
public void read(InputStream inputStream, Project project, public void read(InputStream inputStream, Project project,
Properties options) throws Exception { Properties options) throws Exception {
POIFSFileSystem fs = new POIFSFileSystem(inputStream); Workbook wb = _xmlBased ?
HSSFWorkbook wb = new HSSFWorkbook(fs); new XSSFWorkbook(inputStream) :
HSSFSheet sheet = wb.getSheetAt(0); new HSSFWorkbook(new POIFSFileSystem(inputStream));
Sheet sheet = wb.getSheetAt(0);
int firstRow = sheet.getFirstRowNum(); int firstRow = sheet.getFirstRowNum();
int lastRow = sheet.getLastRowNum(); int lastRow = sheet.getLastRowNum();
@ -48,7 +55,7 @@ public class ExcelImporter implements Importer {
* Find the header row * Find the header row
*/ */
for (; r <= lastRow; r++) { for (; r <= lastRow; r++) {
HSSFRow row = sheet.getRow(r); org.apache.poi.ss.usermodel.Row row = sheet.getRow(r);
if (row == null) { if (row == null) {
continue; continue;
} }
@ -60,7 +67,7 @@ public class ExcelImporter implements Importer {
nonBlankHeaderStrings = new ArrayList<String>(lastCell - firstCell + 1); nonBlankHeaderStrings = new ArrayList<String>(lastCell - firstCell + 1);
for (int c = firstCell; c <= lastCell; c++) { for (int c = firstCell; c <= lastCell; c++) {
HSSFCell cell = row.getCell(c); org.apache.poi.ss.usermodel.Cell cell = row.getCell(c);
if (cell != null) { if (cell != null) {
String text = cell.getStringCellValue().trim(); String text = cell.getStringCellValue().trim();
if (text.length() > 0) { if (text.length() > 0) {
@ -93,7 +100,7 @@ public class ExcelImporter implements Importer {
* Now process the data rows * Now process the data rows
*/ */
for (; r <= lastRow; r++) { for (; r <= lastRow; r++) {
HSSFRow row = sheet.getRow(r); org.apache.poi.ss.usermodel.Row row = sheet.getRow(r);
if (row == null) { if (row == null) {
continue; continue;
} }
@ -109,21 +116,21 @@ public class ExcelImporter implements Importer {
continue; continue;
} }
HSSFCell cell = row.getCell(c); org.apache.poi.ss.usermodel.Cell cell = row.getCell(c);
if (cell == null) { if (cell == null) {
continue; continue;
} }
int cellType = cell.getCellType(); int cellType = cell.getCellType();
if (cellType == HSSFCell.CELL_TYPE_ERROR || if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_ERROR ||
cellType == HSSFCell.CELL_TYPE_BLANK) { cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_BLANK) {
continue; continue;
} }
Object value = null; Object value = null;
if (cellType == HSSFCell.CELL_TYPE_BOOLEAN) { if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_BOOLEAN) {
value = cell.getBooleanCellValue(); value = cell.getBooleanCellValue();
} else if (cellType == HSSFCell.CELL_TYPE_NUMERIC) { } else if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_NUMERIC) {
value = cell.getNumericCellValue(); value = cell.getNumericCellValue();
} else { } else {
String text = cell.getStringCellValue().trim(); String text = cell.getStringCellValue().trim();