Implemented back-end of customizable tabular exporting support.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@2225 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2011-08-30 19:19:46 +00:00
parent 2b351233e7
commit a8815956cd
14 changed files with 715 additions and 278 deletions

View File

@ -101,7 +101,9 @@ public class Engine implements Jsonizable {
int c = project.rows.size();
for (int rowIndex = 0; rowIndex < c; rowIndex++) {
Row row = project.rows.get(rowIndex);
visitor.visit(project, rowIndex, row);
if (visitor.visit(project, rowIndex, row)) {
break;
}
}
} finally {
visitor.end(project);

View File

@ -62,7 +62,9 @@ public class ConjunctiveFilteredRows implements FilteredRows {
for (int rowIndex = 0; rowIndex < c; rowIndex++) {
Row row = project.rows.get(rowIndex);
if (matchRow(project, rowIndex, row)) {
visitRow(project, visitor, rowIndex, row);
if (visitRow(project, visitor, rowIndex, row)) {
break;
}
}
}
} finally {
@ -70,8 +72,8 @@ public class ConjunctiveFilteredRows implements FilteredRows {
}
}
protected void visitRow(Project project, RowVisitor visitor, int rowIndex, Row row) {
visitor.visit(project, rowIndex, row);
protected boolean visitRow(Project project, RowVisitor visitor, int rowIndex, Row row) {
return visitor.visit(project, rowIndex, row);
}
protected boolean matchRow(Project project, int rowIndex, Row row) {

View File

@ -99,7 +99,7 @@ public abstract class Command {
if (request == null) {
throw new IllegalArgumentException("parameter 'request' should not be null");
}
String json = request.getParameter("engine");
try{
return (json == null) ? null : ParsingUtilities.evaluateJsonStringToObject(json);

View File

@ -35,7 +35,8 @@ package com.google.refine.commands.project;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Enumeration;
import java.util.Properties;
@ -75,27 +76,38 @@ public class ExportRowsCommand extends Command {
try {
Project project = getProject(request);
Engine engine = getEngine(request, project);
String format = request.getParameter("format");
Properties options = getRequestParameters(request);
Properties params = getRequestParameters(request);
String format = params.getProperty("format");
Exporter exporter = ExporterRegistry.getExporter(format);
if (exporter == null) {
exporter = new CsvExporter('\t');
}
response.setCharacterEncoding("UTF-8");
response.setHeader("Content-Type", exporter.getContentType());
String contentType = params.getProperty("contentType");
if (contentType == null) {
contentType = exporter.getContentType();
}
response.setHeader("Content-Type", contentType);
if (exporter instanceof WriterExporter) {
PrintWriter writer = response.getWriter();
((WriterExporter) exporter).export(project, options, engine, writer);
writer.flush();
String encoding = params.getProperty("encoding");
response.setCharacterEncoding(encoding != null ? encoding : "UTF-8");
Writer writer = encoding == null ?
response.getWriter() :
new OutputStreamWriter(response.getOutputStream(), encoding);
((WriterExporter) exporter).export(project, params, engine, writer);
writer.close();
} else if (exporter instanceof StreamExporter) {
response.setCharacterEncoding("UTF-8");
OutputStream stream = response.getOutputStream();
((StreamExporter) exporter).export(project, options, engine, stream);
stream.flush();
// } else if (exporter instanceof UrlExporter) {
// ((UrlExporter) exporter).export(project, options, engine);
((StreamExporter) exporter).export(project, params, engine, stream);
stream.close();
// } else if (exporter instanceof UrlExporter) {
// ((UrlExporter) exporter).export(project, options, engine);
} else {
// TODO: Should this use ServletException instead of respondException?
respondException(response, new RuntimeException("Unknown exporter type"));

View File

@ -35,21 +35,19 @@ package com.google.refine.exporters;
import java.io.IOException;
import java.io.Writer;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Properties;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import au.com.bytecode.opencsv.CSVWriter;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.FilteredRows;
import com.google.refine.browsing.RowVisitor;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;
public class CsvExporter implements WriterExporter{
@ -66,83 +64,65 @@ public class CsvExporter implements WriterExporter{
}
@Override
public void export(Project project, Properties options, Engine engine, Writer writer) throws IOException {
boolean printColumnHeader = true;
if (options != null && options.getProperty("printColumnHeader") != null) {
printColumnHeader = Boolean.parseBoolean(options.getProperty("printColumnHeader"));
public void export(Project project, Properties params, Engine engine, final Writer writer)
throws IOException {
String optionsString = params == null ? null : params.getProperty("options");
JSONObject options = null;
if (optionsString != null) {
try {
options = ParsingUtilities.evaluateJsonStringToObject(optionsString);
} catch (JSONException e) {
// Ignore and keep options null.
}
}
RowVisitor visitor = new RowVisitor() {
CSVWriter csvWriter;
boolean printColumnHeader = true;
boolean isFirstRow = true; //the first row should also add the column headers
public RowVisitor init(CSVWriter writer, boolean printColumnHeader) {
this.csvWriter = writer;
this.printColumnHeader = printColumnHeader;
return this;
final String separator = options == null ? Character.toString(this.separator) :
JSONUtilities.getString(options, "separator", Character.toString(this.separator));
final String lineSeparator = options == null ? CSVWriter.DEFAULT_LINE_END :
JSONUtilities.getString(options, "lineSeparator", CSVWriter.DEFAULT_LINE_END);
final boolean printColumnHeader =
(params != null && params.getProperty("printColumnHeader") != null) ?
Boolean.parseBoolean(params.getProperty("printColumnHeader")) :
true;
final CSVWriter csvWriter =
new CSVWriter(writer, separator.charAt(0), CSVWriter.DEFAULT_QUOTE_CHARACTER, lineSeparator);
TabularSerializer serializer = new TabularSerializer() {
@Override
public void startFile(JSONObject options) {
}
@Override
public boolean visit(Project project, int rowIndex, Row row) {
int size = project.columnModel.columns.size();
public void endFile() {
}
String[] cols = new String[size];
String[] vals = new String[size];
int i = 0;
for (Column col : project.columnModel.columns) {
int cellIndex = col.getCellIndex();
cols[i] = col.getName();
Object value = row.getCellValue(cellIndex);
if (value != null) {
if (value instanceof String) {
vals[i] = (String) value;
} else if (value instanceof Calendar) {
vals[i] = ParsingUtilities.dateToString(((Calendar) value).getTime());
} else if (value instanceof Date) {
vals[i] = ParsingUtilities.dateToString((Date) value);
} else {
vals[i] = value.toString();
}
@Override
public void addRow(List<CellData> cells, boolean isHeader) {
if (!isHeader || printColumnHeader) {
String[] strings = new String[cells.size()];
for (int i = 0; i < strings.length; i++) {
CellData cellData = cells.get(i);
strings[i] =
(cellData != null && cellData.text != null) ?
cellData.text :
"";
}
i++;
}
if (printColumnHeader && isFirstRow) {
csvWriter.writeNext(cols,false);
isFirstRow = false; //switch off flag
}
csvWriter.writeNext(vals,false);
return false;
}
@Override
public void start(Project project) {
// nothing to do
}
@Override
public void end(Project project) {
try {
csvWriter.close();
} catch (IOException e) {
logger.error("CsvExporter could not close writer : " + e.getMessage());
csvWriter.writeNext(strings, false);
}
}
}.init(new CSVWriter(writer, separator), printColumnHeader);
FilteredRows filteredRows = engine.getAllFilteredRows();
filteredRows.accept(project, visitor);
};
CustomizableTabularExporterUtilities.exportRows(
project, engine, params, serializer);
csvWriter.close();
}
@Override
public String getContentType() {
return "application/x-unknown";
return "text/plain";
}
}

View File

@ -0,0 +1,355 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.exporters;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.TimeZone;
import org.apache.commons.lang.StringUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import com.google.refine.ProjectManager;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.FilteredRows;
import com.google.refine.browsing.RowVisitor;
import com.google.refine.exporters.TabularSerializer.CellData;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
import com.google.refine.model.Recon;
import com.google.refine.model.Row;
import com.google.refine.preference.PreferenceStore;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;
abstract public class CustomizableTabularExporterUtilities {
static public void exportRows(
final Project project,
final Engine engine,
Properties params,
final TabularSerializer serializer) {
String optionsString = params.getProperty("options");
JSONObject optionsTemp = null;
if (optionsString != null) {
try {
optionsTemp = ParsingUtilities.evaluateJsonStringToObject(optionsString);
} catch (JSONException e) {
// Ignore and keep options null.
}
}
final JSONObject options = optionsTemp;
final boolean outputColumnHeaders = options == null ? true :
JSONUtilities.getBoolean(options, "outputColumnHeaders", true);
final boolean outputBlankRows = options == null ? false :
JSONUtilities.getBoolean(options, "outputBlankRows", true);
final int limit = options == null ? -1 :
JSONUtilities.getInt(options, "limit", -1);
final List<String> columnNames;
final Map<String, CellFormatter> columnNameToFormatter =
new HashMap<String, CustomizableTabularExporterUtilities.CellFormatter>();
JSONArray columnOptionArray = options == null ? null :
JSONUtilities.getArray(options, "columns");
if (columnOptionArray == null) {
List<Column> columns = project.columnModel.columns;
columnNames = new ArrayList<String>(columns.size());
for (Column column : columns) {
String name = column.getName();
columnNames.add(name);
columnNameToFormatter.put(name, new CellFormatter());
}
} else {
int count = columnOptionArray.length();
columnNames = new ArrayList<String>(count);
for (int i = 0; i < count; i++) {
JSONObject columnOptions = JSONUtilities.getObjectElement(columnOptionArray, i);
if (columnOptions != null) {
String name = JSONUtilities.getString(columnOptions, "name", null);
if (name != null) {
columnNames.add(name);
columnNameToFormatter.put(name, new CellFormatter(columnOptions));
}
}
}
}
RowVisitor visitor = new RowVisitor() {
int rowCount = 0;
@Override
public void start(Project project) {
serializer.startFile(options);
if (outputColumnHeaders) {
List<CellData> cells = new ArrayList<TabularSerializer.CellData>(columnNames.size());
for (String name : columnNames) {
cells.add(new CellData(name, name, name, null));
}
serializer.addRow(cells, true);
}
}
@Override
public boolean visit(Project project, int rowIndex, Row row) {
List<CellData> cells = new ArrayList<TabularSerializer.CellData>(columnNames.size());
int nonBlankCount = 0;
for (String columnName : columnNames) {
Column column = project.columnModel.getColumnByName(columnName);
CellFormatter formatter = columnNameToFormatter.get(columnName);
CellData cellData = formatter.format(
project,
column,
row.getCell(column.getCellIndex()));
cells.add(cellData);
if (cellData != null) {
nonBlankCount++;
}
}
if (nonBlankCount > 0 || outputBlankRows) {
serializer.addRow(cells, false);
rowCount++;
}
return limit > 0 && rowCount >= limit;
}
@Override
public void end(Project project) {
serializer.endFile();
}
};
FilteredRows filteredRows = engine.getAllFilteredRows();
filteredRows.accept(project, visitor);
}
private enum ReconOutputMode {
ENTITY_NAME,
ENTITY_ID,
CELL_CONTENT
}
private enum DateFormatMode {
ISO_8601,
SHORT_LOCALE,
MEDIUM_LOCALE,
LONG_LOCALE,
FULL_LOCALE,
CUSTOM
}
static private class CellFormatter {
ReconOutputMode recon_outputMode = ReconOutputMode.ENTITY_NAME;
boolean recon_blankUnmatchedCells = false;
boolean recon_linkToEntityPages = true;
DateFormatMode date_formatMode = DateFormatMode.ISO_8601;
String date_custom = null;
boolean date_useLocalTimeZone = false;
boolean date_omitTime = false;
DateFormat dateFormatter;
Map<String, String> identifierSpaceToUrl = null;
CellFormatter() {}
CellFormatter(JSONObject options) {
JSONObject reconSettings = JSONUtilities.getObject(options, "reconSettings");
if (reconSettings != null) {
String reconOutputString = JSONUtilities.getString(reconSettings, "output", null);
if ("entity-name".equals(reconOutputString)) {
recon_outputMode = ReconOutputMode.ENTITY_NAME;
} else if ("entity-id".equals(reconOutputString)) {
recon_outputMode = ReconOutputMode.ENTITY_ID;
} else if ("cell-content".equals(reconOutputString)) {
recon_outputMode = ReconOutputMode.CELL_CONTENT;
}
recon_blankUnmatchedCells = JSONUtilities.getBoolean(reconSettings, "blankUnmatchedCells", recon_blankUnmatchedCells);
recon_linkToEntityPages = JSONUtilities.getBoolean(reconSettings, "linkToEntityPages", recon_linkToEntityPages);
}
JSONObject dateSettings = JSONUtilities.getObject(options, "dateSettings");
if (dateSettings != null) {
String dateFormatString = JSONUtilities.getString(dateSettings, "format", null);
if ("iso-8601".equals(dateFormatString)) {
date_formatMode = DateFormatMode.ISO_8601;
} else if ("locale-short".equals(dateFormatString)) {
date_formatMode = DateFormatMode.SHORT_LOCALE;
} else if ("locale-medium".equals(dateFormatString)) {
date_formatMode = DateFormatMode.MEDIUM_LOCALE;
} else if ("locale-long".equals(dateFormatString)) {
date_formatMode = DateFormatMode.LONG_LOCALE;
} else if ("locale-full".equals(dateFormatString)) {
date_formatMode = DateFormatMode.FULL_LOCALE;
} else if ("custom".equals(dateFormatString)) {
date_formatMode = DateFormatMode.CUSTOM;
}
date_custom = JSONUtilities.getString(dateSettings, "custom", null);
date_useLocalTimeZone = JSONUtilities.getBoolean(dateSettings, "useLocalTimeZone", date_useLocalTimeZone);
date_omitTime = JSONUtilities.getBoolean(dateSettings, "omitTime", date_omitTime);
if (date_formatMode == DateFormatMode.CUSTOM &&
(date_custom == null || date_custom.isEmpty())) {
date_formatMode = DateFormatMode.ISO_8601;
}
}
switch (date_formatMode) {
case SHORT_LOCALE:
dateFormatter = date_omitTime ?
SimpleDateFormat.getDateInstance(SimpleDateFormat.SHORT) :
SimpleDateFormat.getDateTimeInstance(SimpleDateFormat.SHORT, SimpleDateFormat.SHORT);
break;
case MEDIUM_LOCALE:
dateFormatter = date_omitTime ?
SimpleDateFormat.getDateInstance(SimpleDateFormat.MEDIUM) :
SimpleDateFormat.getDateTimeInstance(SimpleDateFormat.MEDIUM, SimpleDateFormat.MEDIUM);
break;
case LONG_LOCALE:
dateFormatter = date_omitTime ?
SimpleDateFormat.getDateInstance(SimpleDateFormat.LONG) :
SimpleDateFormat.getDateTimeInstance(SimpleDateFormat.LONG, SimpleDateFormat.LONG);
break;
case FULL_LOCALE:
dateFormatter = date_omitTime ?
SimpleDateFormat.getDateInstance(SimpleDateFormat.FULL) :
SimpleDateFormat.getDateTimeInstance(SimpleDateFormat.FULL, SimpleDateFormat.FULL);
break;
case CUSTOM:
dateFormatter = new SimpleDateFormat(date_custom);
break;
default:
dateFormatter = date_omitTime ?
new SimpleDateFormat("yyyy-MM-dd") :
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
}
if (!date_useLocalTimeZone) {
dateFormatter.setTimeZone(TimeZone.getTimeZone("UTC"));
}
}
CellData format(Project project, Column column, Cell cell) {
if (cell != null) {
String link = null;
String text = null;
if (cell.recon != null) {
Recon recon = cell.recon;
if (recon.judgment == Recon.Judgment.Matched) {
if (recon_outputMode == ReconOutputMode.ENTITY_NAME) {
text = recon.match.name;
} else if (recon_outputMode == ReconOutputMode.ENTITY_ID) {
text = recon.match.id;
} // else: output cell content
if (recon_linkToEntityPages) {
buildIdentifierSpaceToUrlMap();
String service = recon.service;
String viewUrl = identifierSpaceToUrl.get(service);
if (viewUrl != null) {
link = StringUtils.replace(viewUrl, "{{id}}", recon.match.id);
}
}
} else if (recon_blankUnmatchedCells) {
return null;
}
}
Object value = cell.value;
if (value != null) {
if (text == null) {
if (value instanceof String) {
text = (String) value;
} else if (value instanceof Calendar) {
text = dateFormatter.format(((Calendar) value).getTime());
} else if (value instanceof Date) {
text = dateFormatter.format((Date) value);
} else {
text = value.toString();
}
}
return new CellData(column.getName(), value, text, link);
}
}
return null;
}
void buildIdentifierSpaceToUrlMap() {
if (identifierSpaceToUrl != null) {
return;
}
identifierSpaceToUrl = new HashMap<String, String>();
PreferenceStore ps = ProjectManager.singleton.getPreferenceStore();
JSONArray services = (JSONArray) ps.get("reconciliation.standardServices");
if (services != null) {
int count = services.length();
for (int i = 0; i < count; i++) {
JSONObject service = JSONUtilities.getObjectElement(services, i);
JSONObject view = JSONUtilities.getObject(service, "view");
if (view != null) {
String url = JSONUtilities.getString(service, "url", null);
String viewUrl = JSONUtilities.getString(view, "url", null);
if (url != null && viewUrl != null) {
identifierSpaceToUrl.put(url, viewUrl);
}
}
}
}
}
}
}

View File

@ -40,9 +40,14 @@ abstract public class ExporterRegistry {
static final private Map<String, Exporter> s_formatToExporter = new HashMap<String, Exporter>();
static {
s_formatToExporter.put("html", new HtmlTableExporter());
s_formatToExporter.put("xls", new XlsExporter());
s_formatToExporter.put("csv", new CsvExporter());
s_formatToExporter.put("tsv", new CsvExporter('\t'));
s_formatToExporter.put("*sv", new CsvExporter());
s_formatToExporter.put("xls", new XlsExporter(false));
s_formatToExporter.put("xlsx", new XlsExporter(true));
s_formatToExporter.put("html", new HtmlTableExporter());
s_formatToExporter.put("template", new TemplatingExporter());
}

View File

@ -35,16 +35,15 @@ package com.google.refine.exporters;
import java.io.IOException;
import java.io.Writer;
import java.util.List;
import java.util.Properties;
import org.apache.commons.lang.StringEscapeUtils;
import org.json.JSONObject;
import com.google.refine.ProjectManager;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.FilteredRows;
import com.google.refine.browsing.RowVisitor;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
public class HtmlTableExporter implements WriterExporter {
@ -54,79 +53,71 @@ public class HtmlTableExporter implements WriterExporter {
}
@Override
public void export(Project project, Properties options, Engine engine, Writer writer) throws IOException {
writer.write("<html>\n");
writer.write("<head><title>");
writer.write(ProjectManager.singleton.getProjectMetadata(project.id).getName());
writer.write("</title></head>\n");
writer.write("<body>\n");
writer.write("<table>\n");
public void export(final Project project, Properties params, Engine engine, final Writer writer)
throws IOException {
writer.write("<tr>");
{
for (Column column : project.columnModel.columns) {
writer.write("<th>");
writer.write(column.getName());
writer.write("</th>");
TabularSerializer serializer = new TabularSerializer() {
@Override
public void startFile(JSONObject options) {
try {
writer.write("<html>\n");
writer.write("<head><title>");
writer.write(ProjectManager.singleton.getProjectMetadata(project.id).getName());
writer.write("</title></head>\n");
writer.write("<body>\n");
writer.write("<table>\n");
} catch (IOException e) {
// Ignore
}
}
}
writer.write("</tr>\n");
{
RowVisitor visitor = new RowVisitor() {
Writer writer;
public RowVisitor init(Writer writer) {
this.writer = writer;
return this;
@Override
public void endFile() {
try {
writer.write("</table>\n");
writer.write("</body>\n");
writer.write("</html>\n");
} catch (IOException e) {
// Ignore
}
@Override
public void start(Project project) {
// nothing to do
}
@Override
public void end(Project project) {
// nothing to do
}
@Override
public boolean visit(Project project, int rowIndex, Row row) {
try {
writer.write("<tr>");
for (Column column : project.columnModel.columns) {
}
@Override
public void addRow(List<CellData> cells, boolean isHeader) {
try {
writer.write("<tr>");
if (isHeader) {
for (CellData cellData : cells) {
writer.write("<th>");
writer.write((cellData != null && cellData.text != null) ? cellData.text : "");
writer.write("</th>");
}
} else {
for (CellData cellData : cells) {
writer.write("<td>");
int cellIndex = column.getCellIndex();
if (cellIndex < row.cells.size()) {
Cell cell = row.cells.get(cellIndex);
if (cell != null && cell.value != null) {
Object v = cell.value;
writer.write(v instanceof String ? ((String) v) : v.toString());
if (cellData != null && cellData.text != null) {
if (cellData.link != null) {
writer.write("<a href=\"");
writer.write(StringEscapeUtils.escapeHtml(cellData.link));
writer.write("\">");
}
writer.write(cellData.text);
if (cellData.link != null) {
writer.write("</a>");
}
}
writer.write("</td>");
}
writer.write("</tr>\n");
} catch (IOException e) {
// ignore
}
return false;
writer.write("</tr>");
} catch (IOException e) {
// Ignore
}
}.init(writer);
FilteredRows filteredRows = engine.getAllFilteredRows();
filteredRows.accept(project, visitor);
}
}
};
writer.write("</table>\n");
writer.write("</body>\n");
writer.write("</html>\n");
CustomizableTabularExporterUtilities.exportRows(
project, engine, params, serializer);
}
}

View File

@ -0,0 +1,28 @@
package com.google.refine.exporters;
import java.util.List;
import org.json.JSONObject;
public interface TabularSerializer {
static public class CellData {
final public String columnName;
final public Object value;
final public String text;
final public String link;
public CellData(String columnName, Object value, String text, String link) {
this.columnName = columnName;
this.value = value;
this.text = text;
this.link = link;
}
}
public void startFile(JSONObject options);
public void endFile();
public void addRow(List<CellData> cells, boolean isHeader);
}

View File

@ -37,128 +37,98 @@ import java.io.IOException;
import java.io.OutputStream;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Properties;
import org.apache.poi.hssf.usermodel.HSSFHyperlink;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.json.JSONObject;
import com.google.refine.ProjectManager;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.FilteredRows;
import com.google.refine.browsing.RowVisitor;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
public class XlsExporter implements StreamExporter {
final private boolean xml;
public XlsExporter(boolean xml) {
this.xml = xml;
}
@Override
public String getContentType() {
return "application/xls";
return xml ? "application/xlsx" : "application/xls";
}
@Override
public void export(Project project, Properties options, Engine engine,
public void export(final Project project, Properties params, Engine engine,
OutputStream outputStream) throws IOException {
Workbook wb = new HSSFWorkbook();
Sheet s = wb.createSheet();
wb.setSheetName(0, ProjectManager.singleton.getProjectMetadata(project.id).getName());
final Workbook wb = xml ? new XSSFWorkbook() : new HSSFWorkbook();
int rowCount = 0;
{
org.apache.poi.ss.usermodel.Row r = s.createRow(rowCount++);
TabularSerializer serializer = new TabularSerializer() {
Sheet s;
int rowCount = 0;
int cellCount = 0;
for (Column column : project.columnModel.columns) {
if (cellCount++ > 255) {
// TODO: Warn user about truncated data
} else {
org.apache.poi.ss.usermodel.Cell c = r.createCell(cellCount);
c.setCellValue(column.getName());
}
@Override
public void startFile(JSONObject options) {
s = wb.createSheet();
wb.setSheetName(0, ProjectManager.singleton.getProjectMetadata(project.id).getName());
}
}
{
RowVisitor visitor = new RowVisitor() {
Sheet sheet;
int rowCount;
@Override
public void endFile() {
}
@Override
public void addRow(List<CellData> cells, boolean isHeader) {
Row r = s.createRow(rowCount++);
public RowVisitor init(Sheet sheet, int rowCount) {
this.sheet = sheet;
this.rowCount = rowCount;
return this;
}
@Override
public void start(Project project) {
// nothing to do
}
@Override
public void end(Project project) {
// nothing to do
}
@Override
public boolean visit(Project project, int rowIndex, Row row) {
org.apache.poi.ss.usermodel.Row r = sheet.createRow(rowCount++);
int cellCount = 0;
for (Column column : project.columnModel.columns) {
if (cellCount++ > 255) {
// TODO: Warn user about truncated data
} else {
org.apache.poi.ss.usermodel.Cell c = r.createCell(cellCount);
int cellIndex = column.getCellIndex();
if (cellIndex < row.cells.size()) {
Cell cell = row.cells.get(cellIndex);
if (cell != null) {
if (cell.recon != null && cell.recon.match != null) {
c.setCellValue(cell.recon.match.name);
HSSFHyperlink hl = new HSSFHyperlink(HSSFHyperlink.LINK_URL);
hl.setLabel(cell.recon.match.name);
hl.setAddress("http://www.freebase.com/view" + cell.recon.match.id);
c.setHyperlink(hl);
} else if (cell.value != null) {
Object v = cell.value;
if (v instanceof Number) {
c.setCellValue(((Number) v).doubleValue());
} else if (v instanceof Boolean) {
c.setCellValue(((Boolean) v).booleanValue());
} else if (v instanceof Date) {
c.setCellValue((Date) v);
} else if (v instanceof Calendar) {
c.setCellValue((Calendar) v);
} else if (v instanceof String) {
String s = (String) v;
if (s.length() > 32767) {
// The maximum length of cell contents (text) is 32,767 characters
s = s.substring(0, 32767);
}
c.setCellValue(s);
}
}
for (int i = 0; i < cells.size(); i++) {
Cell c = r.createCell(i);
if (i == 255 && cells.size() > 256) {
c.setCellValue("ERROR: TOO MANY COLUMNS");
} else {
CellData cellData = cells.get(i);
if (cellData != null && cellData.text != null && cellData.value != null) {
Object v = cellData.value;
if (v instanceof Number) {
c.setCellValue(((Number) v).doubleValue());
} else if (v instanceof Boolean) {
c.setCellValue(((Boolean) v).booleanValue());
} else if (v instanceof Date) {
c.setCellValue((Date) v);
} else if (v instanceof Calendar) {
c.setCellValue((Calendar) v);
} else {
String s = cellData.text;
if (s.length() > 32767) {
// The maximum length of cell contents (text) is 32,767 characters
s = s.substring(0, 32767);
}
c.setCellValue(s);
}
if (cellData.link != null) {
HSSFHyperlink hl = new HSSFHyperlink(HSSFHyperlink.LINK_URL);
hl.setLabel(cellData.text);
hl.setAddress(cellData.link);
}
}
}
return false;
}
}.init(s, rowCount);
FilteredRows filteredRows = engine.getAllFilteredRows();
filteredRows.accept(project, visitor);
}
}
};
CustomizableTabularExporterUtilities.exportRows(
project, engine, params, serializer);
wb.write(outputStream);
outputStream.flush();

View File

@ -24,8 +24,12 @@
<td><div class="grid-layout grid-layout-for-text layout-tightest"><table>
<tr>
<td width="1%"><input type="radio" name="custom-tabular-exporter-format" value="excel" /></td>
<td>Excel <input type="checkbox" bind="xlsxCheckbox" /> in XML (.xlsx)</td>
<td width="1%"><input type="radio" name="custom-tabular-exporter-format" value="xls" /></td>
<td>Excel (.xls)</td>
</tr>
<tr>
<td width="1%"><input type="radio" name="custom-tabular-exporter-format" value="xlsx" /></td>
<td>Excel in XML (.xlsx)</td>
</tr>
<tr>
<td width="1%"><input type="radio" name="custom-tabular-exporter-format" value="html" /></td>
@ -115,8 +119,10 @@
</td>
<td><div class="grid-layout layout-tighter"><table>
<tr>
<td width="1%"><input type="checkbox" bind="outputColumnHeadersCheckbox" checked /></td><td>Output column headers</td>
<td width="1%"><input type="checkbox" bind="outputBlankRowsCheckbox" /></td><td>Output blank rows</td>
<td width="1%"><input type="checkbox" bind="outputColumnHeadersCheckbox" checked /></td>
<td width="50%">Output column headers</td>
<td width="1%"><input type="checkbox" bind="outputBlankRowsCheckbox" /></td>
<td width="50%">Output blank rows</td>
</tr>
</table></div></td>
</tr>
@ -137,6 +143,10 @@
</div>
<div class="dialog-footer" bind="dialogFooter"><div class="grid-layout layout-tightest layout-full"><table><tr>
<td><button class="button" bind="previewButton">Preview</button></td>
<td width="1%"><input type="checkbox" bind="exportAllRowsCheckbox" /></td>
<td>Ignore facets and filters and export all rows</td>
<td width="1%"><button class="button button-primary" bind="exportButton">Export</button></td>
<td width="1%"><button class="button" bind="cancelButton">Cancel</button></td>
</tr></table></div></div>

View File

@ -47,6 +47,27 @@ function CustomTabularExporterDialog(options) {
this._createDialog(options);
};
CustomTabularExporterDialog.formats = {
'csv': {
extension: 'csv'
},
'tsv': {
extension: 'tsv'
},
'*sv': {
extension: 'txt'
},
'html': {
extension: 'html'
},
'xls': {
extension: 'xls'
},
'xlsx': {
extension: 'xlsx'
}
};
CustomTabularExporterDialog.prototype._createDialog = function(options) {
var self = this;
@ -145,7 +166,6 @@ CustomTabularExporterDialog.prototype._configureUIFromOptionCode = function(opti
this._elmts.encodingInput[0].value = options.encoding;
this._elmts.outputColumnHeadersCheckbox.attr('checked', (options.outputColumnHeaders) ? 'checked' : '');
this._elmts.outputBlankRowsCheckbox.attr('checked', (options.outputBlankRows) ? 'checked' : '');
this._elmts.xlsxCheckbox.attr('checked', (options.xlsx) ? 'checked' : '');
if (options.columns != null) {
var self = this;
@ -169,8 +189,52 @@ CustomTabularExporterDialog.prototype._dismiss = function() {
};
CustomTabularExporterDialog.prototype._preview = function() {
this._postExport(true);
};
CustomTabularExporterDialog.prototype._commit = function() {
this._postExport(false);
this._dismiss();
};
CustomTabularExporterDialog.prototype._postExport = function(preview) {
var exportAllRowsCheckbox = this._elmts.exportAllRowsCheckbox[0].checked;
var options = this._getOptionCode();
console.log(options);
var format = options.format;
var encoding = options.encoding;
delete options.format;
delete options.encoding;
if (preview) {
options.limit = 10;
}
var ext = CustomTabularExporterDialog.formats[format].extension;
var form = ExporterManager.prepareExportRowsForm(format, !exportAllRowsCheckbox, ext);
$('<input />')
.attr("name", "options")
.attr("value", JSON.stringify(options))
.appendTo(form);
if (encoding) {
$('<input />')
.attr("name", "encoding")
.attr("value", encoding)
.appendTo(form);
}
if (!preview) {
$('<input />')
.attr("name", "contentType")
.attr("value", "application/x-unknown") // force download
.appendTo(form);
}
document.body.appendChild(form);
window.open("about:blank", "refine-export");
form.submit();
document.body.removeChild(form);
};
CustomTabularExporterDialog.prototype._selectColumn = function(columnName) {
@ -229,10 +293,14 @@ CustomTabularExporterDialog.prototype._getOptionCode = function() {
format: this._dialog.find('input[name="custom-tabular-exporter-format"]:checked').val()
};
if (options.format == 'excel') {
options.xlsx = this._elmts.xlsxCheckbox[0].checked;
} else if (options.format != 'html') {
options.separator = String.decodeSeparator(this._elmts.separatorInput.val());
if (options.format == 'tsv' || options.format == 'csv' || options.format == '*sv') {
if (options.format == 'tsv') {
options.separator = '\t';
} else if (options.format == 'csv') {
options.separator = ',';
} else {
options.separator = String.decodeSeparator(this._elmts.separatorInput.val());
}
options.lineSeparator = String.decodeSeparator(this._elmts.lineSeparatorInput.val());
options.encoding = this._elmts.encodingInput.val();
}

View File

@ -114,18 +114,29 @@ ExporterManager.handlers.exportTripleloader = function(format) {
};
ExporterManager.handlers.exportRows = function(format, ext) {
var form = ExporterManager.prepareExportRowsForm(format, true, ext);
$('<input />')
.attr("name", "contentType")
.attr("value", "application/x-unknown") // force download
.appendTo(form);
document.body.appendChild(form);
window.open("about:blank", "refine-export");
form.submit();
document.body.removeChild(form);
};
ExporterManager.prepareExportRowsForm = function(format, includeEngine, ext) {
var name = $.trim(theProject.metadata.name.replace(/\W/g, ' ')).replace(/\s+/g, '-');
var form = document.createElement("form");
$(form)
.css("display", "none")
.attr("method", "post")
.attr("action", "/command/core/export-rows/" + name + "." + ext)
.attr("action", "/command/core/export-rows/" + name + ((ext) ? ("." + ext) : ""))
.attr("target", "refine-export");
$('<input />')
.attr("name", "engine")
.attr("value", JSON.stringify(ui.browsingEngine.getJSON()))
.appendTo(form);
$('<input />')
.attr("name", "project")
.attr("value", theProject.id)
@ -134,13 +145,14 @@ ExporterManager.handlers.exportRows = function(format, ext) {
.attr("name", "format")
.attr("value", format)
.appendTo(form);
document.body.appendChild(form);
window.open("about:blank", "refine-export");
form.submit();
document.body.removeChild(form);
if (includeEngine) {
$('<input />')
.attr("name", "engine")
.attr("value", JSON.stringify(ui.browsingEngine.getJSON()))
.appendTo(form);
}
return form;
};
ExporterManager.handlers.exportProject = function() {

View File

@ -49,12 +49,14 @@ String.prototype.contains = function(s) {
String.encodeSeparator = function(s) {
return s.replace("\\", "\\\\")
.replace("\r", "\\r")
.replace("\n", "\\n")
.replace("\t", "\\t");
};
String.decodeSeparator = function(s) {
return s.replace("\\n", "\n")
.replace("\\r", "\r")
.replace("\\t", "\t")
.replace("\\\\", "\\");
};