diff --git a/main/src/com/google/refine/browsing/Engine.java b/main/src/com/google/refine/browsing/Engine.java index 7924dcd13..0b19b1f22 100644 --- a/main/src/com/google/refine/browsing/Engine.java +++ b/main/src/com/google/refine/browsing/Engine.java @@ -101,7 +101,9 @@ public class Engine implements Jsonizable { int c = project.rows.size(); for (int rowIndex = 0; rowIndex < c; rowIndex++) { Row row = project.rows.get(rowIndex); - visitor.visit(project, rowIndex, row); + if (visitor.visit(project, rowIndex, row)) { + break; + } } } finally { visitor.end(project); diff --git a/main/src/com/google/refine/browsing/util/ConjunctiveFilteredRows.java b/main/src/com/google/refine/browsing/util/ConjunctiveFilteredRows.java index 344e0dcfb..6b021785f 100644 --- a/main/src/com/google/refine/browsing/util/ConjunctiveFilteredRows.java +++ b/main/src/com/google/refine/browsing/util/ConjunctiveFilteredRows.java @@ -62,7 +62,9 @@ public class ConjunctiveFilteredRows implements FilteredRows { for (int rowIndex = 0; rowIndex < c; rowIndex++) { Row row = project.rows.get(rowIndex); if (matchRow(project, rowIndex, row)) { - visitRow(project, visitor, rowIndex, row); + if (visitRow(project, visitor, rowIndex, row)) { + break; + } } } } finally { @@ -70,8 +72,8 @@ public class ConjunctiveFilteredRows implements FilteredRows { } } - protected void visitRow(Project project, RowVisitor visitor, int rowIndex, Row row) { - visitor.visit(project, rowIndex, row); + protected boolean visitRow(Project project, RowVisitor visitor, int rowIndex, Row row) { + return visitor.visit(project, rowIndex, row); } protected boolean matchRow(Project project, int rowIndex, Row row) { diff --git a/main/src/com/google/refine/commands/Command.java b/main/src/com/google/refine/commands/Command.java index eb1485e5c..1dd25510f 100644 --- a/main/src/com/google/refine/commands/Command.java +++ b/main/src/com/google/refine/commands/Command.java @@ -99,7 +99,7 @@ public abstract class Command { if (request == null) { throw new IllegalArgumentException("parameter 'request' should not be null"); } - + String json = request.getParameter("engine"); try{ return (json == null) ? null : ParsingUtilities.evaluateJsonStringToObject(json); diff --git a/main/src/com/google/refine/commands/project/ExportRowsCommand.java b/main/src/com/google/refine/commands/project/ExportRowsCommand.java index d781d5da1..664064b19 100644 --- a/main/src/com/google/refine/commands/project/ExportRowsCommand.java +++ b/main/src/com/google/refine/commands/project/ExportRowsCommand.java @@ -35,7 +35,8 @@ package com.google.refine.commands.project; import java.io.IOException; import java.io.OutputStream; -import java.io.PrintWriter; +import java.io.OutputStreamWriter; +import java.io.Writer; import java.util.Enumeration; import java.util.Properties; @@ -75,27 +76,38 @@ public class ExportRowsCommand extends Command { try { Project project = getProject(request); Engine engine = getEngine(request, project); - String format = request.getParameter("format"); - Properties options = getRequestParameters(request); - + Properties params = getRequestParameters(request); + + String format = params.getProperty("format"); Exporter exporter = ExporterRegistry.getExporter(format); if (exporter == null) { exporter = new CsvExporter('\t'); } - - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", exporter.getContentType()); - + + String contentType = params.getProperty("contentType"); + if (contentType == null) { + contentType = exporter.getContentType(); + } + response.setHeader("Content-Type", contentType); + if (exporter instanceof WriterExporter) { - PrintWriter writer = response.getWriter(); - ((WriterExporter) exporter).export(project, options, engine, writer); - writer.flush(); + String encoding = params.getProperty("encoding"); + + response.setCharacterEncoding(encoding != null ? encoding : "UTF-8"); + Writer writer = encoding == null ? + response.getWriter() : + new OutputStreamWriter(response.getOutputStream(), encoding); + + ((WriterExporter) exporter).export(project, params, engine, writer); + writer.close(); } else if (exporter instanceof StreamExporter) { + response.setCharacterEncoding("UTF-8"); + OutputStream stream = response.getOutputStream(); - ((StreamExporter) exporter).export(project, options, engine, stream); - stream.flush(); -// } else if (exporter instanceof UrlExporter) { -// ((UrlExporter) exporter).export(project, options, engine); + ((StreamExporter) exporter).export(project, params, engine, stream); + stream.close(); +// } else if (exporter instanceof UrlExporter) { +// ((UrlExporter) exporter).export(project, options, engine); } else { // TODO: Should this use ServletException instead of respondException? respondException(response, new RuntimeException("Unknown exporter type")); diff --git a/main/src/com/google/refine/exporters/CsvExporter.java b/main/src/com/google/refine/exporters/CsvExporter.java index b3563bb7e..e3b801b42 100644 --- a/main/src/com/google/refine/exporters/CsvExporter.java +++ b/main/src/com/google/refine/exporters/CsvExporter.java @@ -35,21 +35,19 @@ package com.google.refine.exporters; import java.io.IOException; import java.io.Writer; -import java.util.Calendar; -import java.util.Date; +import java.util.List; import java.util.Properties; +import org.json.JSONException; +import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import au.com.bytecode.opencsv.CSVWriter; import com.google.refine.browsing.Engine; -import com.google.refine.browsing.FilteredRows; -import com.google.refine.browsing.RowVisitor; -import com.google.refine.model.Column; import com.google.refine.model.Project; -import com.google.refine.model.Row; +import com.google.refine.util.JSONUtilities; import com.google.refine.util.ParsingUtilities; public class CsvExporter implements WriterExporter{ @@ -66,83 +64,65 @@ public class CsvExporter implements WriterExporter{ } @Override - public void export(Project project, Properties options, Engine engine, Writer writer) throws IOException { - boolean printColumnHeader = true; - - if (options != null && options.getProperty("printColumnHeader") != null) { - printColumnHeader = Boolean.parseBoolean(options.getProperty("printColumnHeader")); + public void export(Project project, Properties params, Engine engine, final Writer writer) + throws IOException { + + String optionsString = params == null ? null : params.getProperty("options"); + JSONObject options = null; + if (optionsString != null) { + try { + options = ParsingUtilities.evaluateJsonStringToObject(optionsString); + } catch (JSONException e) { + // Ignore and keep options null. + } } - - RowVisitor visitor = new RowVisitor() { - CSVWriter csvWriter; - boolean printColumnHeader = true; - boolean isFirstRow = true; //the first row should also add the column headers - - public RowVisitor init(CSVWriter writer, boolean printColumnHeader) { - this.csvWriter = writer; - this.printColumnHeader = printColumnHeader; - return this; + + final String separator = options == null ? Character.toString(this.separator) : + JSONUtilities.getString(options, "separator", Character.toString(this.separator)); + final String lineSeparator = options == null ? CSVWriter.DEFAULT_LINE_END : + JSONUtilities.getString(options, "lineSeparator", CSVWriter.DEFAULT_LINE_END); + + final boolean printColumnHeader = + (params != null && params.getProperty("printColumnHeader") != null) ? + Boolean.parseBoolean(params.getProperty("printColumnHeader")) : + true; + + final CSVWriter csvWriter = + new CSVWriter(writer, separator.charAt(0), CSVWriter.DEFAULT_QUOTE_CHARACTER, lineSeparator); + + TabularSerializer serializer = new TabularSerializer() { + @Override + public void startFile(JSONObject options) { } @Override - public boolean visit(Project project, int rowIndex, Row row) { - int size = project.columnModel.columns.size(); + public void endFile() { + } - String[] cols = new String[size]; - String[] vals = new String[size]; - - int i = 0; - for (Column col : project.columnModel.columns) { - int cellIndex = col.getCellIndex(); - cols[i] = col.getName(); - - Object value = row.getCellValue(cellIndex); - if (value != null) { - if (value instanceof String) { - vals[i] = (String) value; - } else if (value instanceof Calendar) { - vals[i] = ParsingUtilities.dateToString(((Calendar) value).getTime()); - } else if (value instanceof Date) { - vals[i] = ParsingUtilities.dateToString((Date) value); - } else { - vals[i] = value.toString(); - } + @Override + public void addRow(List cells, boolean isHeader) { + if (!isHeader || printColumnHeader) { + String[] strings = new String[cells.size()]; + for (int i = 0; i < strings.length; i++) { + CellData cellData = cells.get(i); + strings[i] = + (cellData != null && cellData.text != null) ? + cellData.text : + ""; } - i++; - } - - if (printColumnHeader && isFirstRow) { - csvWriter.writeNext(cols,false); - isFirstRow = false; //switch off flag - } - csvWriter.writeNext(vals,false); - - return false; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - try { - csvWriter.close(); - } catch (IOException e) { - logger.error("CsvExporter could not close writer : " + e.getMessage()); + csvWriter.writeNext(strings, false); } } - - }.init(new CSVWriter(writer, separator), printColumnHeader); - - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(project, visitor); + }; + + CustomizableTabularExporterUtilities.exportRows( + project, engine, params, serializer); + + csvWriter.close(); } @Override public String getContentType() { - return "application/x-unknown"; + return "text/plain"; } - } diff --git a/main/src/com/google/refine/exporters/CustomizableTabularExporterUtilities.java b/main/src/com/google/refine/exporters/CustomizableTabularExporterUtilities.java new file mode 100644 index 000000000..584dda642 --- /dev/null +++ b/main/src/com/google/refine/exporters/CustomizableTabularExporterUtilities.java @@ -0,0 +1,355 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.exporters; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.TimeZone; + +import org.apache.commons.lang.StringUtils; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +import com.google.refine.ProjectManager; +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.exporters.TabularSerializer.CellData; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.Row; +import com.google.refine.preference.PreferenceStore; +import com.google.refine.util.JSONUtilities; +import com.google.refine.util.ParsingUtilities; + +abstract public class CustomizableTabularExporterUtilities { + static public void exportRows( + final Project project, + final Engine engine, + Properties params, + final TabularSerializer serializer) { + + String optionsString = params.getProperty("options"); + JSONObject optionsTemp = null; + if (optionsString != null) { + try { + optionsTemp = ParsingUtilities.evaluateJsonStringToObject(optionsString); + } catch (JSONException e) { + // Ignore and keep options null. + } + } + final JSONObject options = optionsTemp; + + final boolean outputColumnHeaders = options == null ? true : + JSONUtilities.getBoolean(options, "outputColumnHeaders", true); + final boolean outputBlankRows = options == null ? false : + JSONUtilities.getBoolean(options, "outputBlankRows", true); + final int limit = options == null ? -1 : + JSONUtilities.getInt(options, "limit", -1); + + final List columnNames; + final Map columnNameToFormatter = + new HashMap(); + + JSONArray columnOptionArray = options == null ? null : + JSONUtilities.getArray(options, "columns"); + if (columnOptionArray == null) { + List columns = project.columnModel.columns; + + columnNames = new ArrayList(columns.size()); + for (Column column : columns) { + String name = column.getName(); + columnNames.add(name); + columnNameToFormatter.put(name, new CellFormatter()); + } + } else { + int count = columnOptionArray.length(); + + columnNames = new ArrayList(count); + for (int i = 0; i < count; i++) { + JSONObject columnOptions = JSONUtilities.getObjectElement(columnOptionArray, i); + if (columnOptions != null) { + String name = JSONUtilities.getString(columnOptions, "name", null); + if (name != null) { + columnNames.add(name); + columnNameToFormatter.put(name, new CellFormatter(columnOptions)); + } + } + } + } + + RowVisitor visitor = new RowVisitor() { + int rowCount = 0; + + @Override + public void start(Project project) { + serializer.startFile(options); + if (outputColumnHeaders) { + List cells = new ArrayList(columnNames.size()); + for (String name : columnNames) { + cells.add(new CellData(name, name, name, null)); + } + serializer.addRow(cells, true); + } + } + + @Override + public boolean visit(Project project, int rowIndex, Row row) { + List cells = new ArrayList(columnNames.size()); + int nonBlankCount = 0; + + for (String columnName : columnNames) { + Column column = project.columnModel.getColumnByName(columnName); + CellFormatter formatter = columnNameToFormatter.get(columnName); + CellData cellData = formatter.format( + project, + column, + row.getCell(column.getCellIndex())); + + cells.add(cellData); + if (cellData != null) { + nonBlankCount++; + } + } + + if (nonBlankCount > 0 || outputBlankRows) { + serializer.addRow(cells, false); + rowCount++; + } + + return limit > 0 && rowCount >= limit; + } + + @Override + public void end(Project project) { + serializer.endFile(); + } + }; + + FilteredRows filteredRows = engine.getAllFilteredRows(); + filteredRows.accept(project, visitor); + } + + private enum ReconOutputMode { + ENTITY_NAME, + ENTITY_ID, + CELL_CONTENT + } + private enum DateFormatMode { + ISO_8601, + SHORT_LOCALE, + MEDIUM_LOCALE, + LONG_LOCALE, + FULL_LOCALE, + CUSTOM + } + + static private class CellFormatter { + ReconOutputMode recon_outputMode = ReconOutputMode.ENTITY_NAME; + boolean recon_blankUnmatchedCells = false; + boolean recon_linkToEntityPages = true; + + DateFormatMode date_formatMode = DateFormatMode.ISO_8601; + String date_custom = null; + boolean date_useLocalTimeZone = false; + boolean date_omitTime = false; + + DateFormat dateFormatter; + + Map identifierSpaceToUrl = null; + + CellFormatter() {} + + CellFormatter(JSONObject options) { + JSONObject reconSettings = JSONUtilities.getObject(options, "reconSettings"); + if (reconSettings != null) { + String reconOutputString = JSONUtilities.getString(reconSettings, "output", null); + if ("entity-name".equals(reconOutputString)) { + recon_outputMode = ReconOutputMode.ENTITY_NAME; + } else if ("entity-id".equals(reconOutputString)) { + recon_outputMode = ReconOutputMode.ENTITY_ID; + } else if ("cell-content".equals(reconOutputString)) { + recon_outputMode = ReconOutputMode.CELL_CONTENT; + } + + recon_blankUnmatchedCells = JSONUtilities.getBoolean(reconSettings, "blankUnmatchedCells", recon_blankUnmatchedCells); + recon_linkToEntityPages = JSONUtilities.getBoolean(reconSettings, "linkToEntityPages", recon_linkToEntityPages); + } + JSONObject dateSettings = JSONUtilities.getObject(options, "dateSettings"); + if (dateSettings != null) { + String dateFormatString = JSONUtilities.getString(dateSettings, "format", null); + if ("iso-8601".equals(dateFormatString)) { + date_formatMode = DateFormatMode.ISO_8601; + } else if ("locale-short".equals(dateFormatString)) { + date_formatMode = DateFormatMode.SHORT_LOCALE; + } else if ("locale-medium".equals(dateFormatString)) { + date_formatMode = DateFormatMode.MEDIUM_LOCALE; + } else if ("locale-long".equals(dateFormatString)) { + date_formatMode = DateFormatMode.LONG_LOCALE; + } else if ("locale-full".equals(dateFormatString)) { + date_formatMode = DateFormatMode.FULL_LOCALE; + } else if ("custom".equals(dateFormatString)) { + date_formatMode = DateFormatMode.CUSTOM; + } + + date_custom = JSONUtilities.getString(dateSettings, "custom", null); + date_useLocalTimeZone = JSONUtilities.getBoolean(dateSettings, "useLocalTimeZone", date_useLocalTimeZone); + date_omitTime = JSONUtilities.getBoolean(dateSettings, "omitTime", date_omitTime); + + if (date_formatMode == DateFormatMode.CUSTOM && + (date_custom == null || date_custom.isEmpty())) { + date_formatMode = DateFormatMode.ISO_8601; + } + } + + switch (date_formatMode) { + case SHORT_LOCALE: + dateFormatter = date_omitTime ? + SimpleDateFormat.getDateInstance(SimpleDateFormat.SHORT) : + SimpleDateFormat.getDateTimeInstance(SimpleDateFormat.SHORT, SimpleDateFormat.SHORT); + break; + case MEDIUM_LOCALE: + dateFormatter = date_omitTime ? + SimpleDateFormat.getDateInstance(SimpleDateFormat.MEDIUM) : + SimpleDateFormat.getDateTimeInstance(SimpleDateFormat.MEDIUM, SimpleDateFormat.MEDIUM); + break; + case LONG_LOCALE: + dateFormatter = date_omitTime ? + SimpleDateFormat.getDateInstance(SimpleDateFormat.LONG) : + SimpleDateFormat.getDateTimeInstance(SimpleDateFormat.LONG, SimpleDateFormat.LONG); + break; + case FULL_LOCALE: + dateFormatter = date_omitTime ? + SimpleDateFormat.getDateInstance(SimpleDateFormat.FULL) : + SimpleDateFormat.getDateTimeInstance(SimpleDateFormat.FULL, SimpleDateFormat.FULL); + break; + case CUSTOM: + dateFormatter = new SimpleDateFormat(date_custom); + break; + + default: + dateFormatter = date_omitTime ? + new SimpleDateFormat("yyyy-MM-dd") : + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"); + } + + if (!date_useLocalTimeZone) { + dateFormatter.setTimeZone(TimeZone.getTimeZone("UTC")); + } + } + + CellData format(Project project, Column column, Cell cell) { + if (cell != null) { + String link = null; + String text = null; + + if (cell.recon != null) { + Recon recon = cell.recon; + if (recon.judgment == Recon.Judgment.Matched) { + if (recon_outputMode == ReconOutputMode.ENTITY_NAME) { + text = recon.match.name; + } else if (recon_outputMode == ReconOutputMode.ENTITY_ID) { + text = recon.match.id; + } // else: output cell content + + if (recon_linkToEntityPages) { + buildIdentifierSpaceToUrlMap(); + + String service = recon.service; + String viewUrl = identifierSpaceToUrl.get(service); + if (viewUrl != null) { + link = StringUtils.replace(viewUrl, "{{id}}", recon.match.id); + } + } + } else if (recon_blankUnmatchedCells) { + return null; + } + } + + Object value = cell.value; + if (value != null) { + if (text == null) { + if (value instanceof String) { + text = (String) value; + } else if (value instanceof Calendar) { + text = dateFormatter.format(((Calendar) value).getTime()); + } else if (value instanceof Date) { + text = dateFormatter.format((Date) value); + } else { + text = value.toString(); + } + } + return new CellData(column.getName(), value, text, link); + } + } + return null; + } + + void buildIdentifierSpaceToUrlMap() { + if (identifierSpaceToUrl != null) { + return; + } + + identifierSpaceToUrl = new HashMap(); + + PreferenceStore ps = ProjectManager.singleton.getPreferenceStore(); + JSONArray services = (JSONArray) ps.get("reconciliation.standardServices"); + if (services != null) { + int count = services.length(); + + for (int i = 0; i < count; i++) { + JSONObject service = JSONUtilities.getObjectElement(services, i); + JSONObject view = JSONUtilities.getObject(service, "view"); + if (view != null) { + String url = JSONUtilities.getString(service, "url", null); + String viewUrl = JSONUtilities.getString(view, "url", null); + if (url != null && viewUrl != null) { + identifierSpaceToUrl.put(url, viewUrl); + } + } + } + } + } + } +} diff --git a/main/src/com/google/refine/exporters/ExporterRegistry.java b/main/src/com/google/refine/exporters/ExporterRegistry.java index a7dc2692a..1a742ab2c 100644 --- a/main/src/com/google/refine/exporters/ExporterRegistry.java +++ b/main/src/com/google/refine/exporters/ExporterRegistry.java @@ -40,9 +40,14 @@ abstract public class ExporterRegistry { static final private Map s_formatToExporter = new HashMap(); static { - s_formatToExporter.put("html", new HtmlTableExporter()); - s_formatToExporter.put("xls", new XlsExporter()); s_formatToExporter.put("csv", new CsvExporter()); + s_formatToExporter.put("tsv", new CsvExporter('\t')); + s_formatToExporter.put("*sv", new CsvExporter()); + + s_formatToExporter.put("xls", new XlsExporter(false)); + s_formatToExporter.put("xlsx", new XlsExporter(true)); + + s_formatToExporter.put("html", new HtmlTableExporter()); s_formatToExporter.put("template", new TemplatingExporter()); } diff --git a/main/src/com/google/refine/exporters/HtmlTableExporter.java b/main/src/com/google/refine/exporters/HtmlTableExporter.java index 2dfe98b86..460b6b71f 100644 --- a/main/src/com/google/refine/exporters/HtmlTableExporter.java +++ b/main/src/com/google/refine/exporters/HtmlTableExporter.java @@ -35,16 +35,15 @@ package com.google.refine.exporters; import java.io.IOException; import java.io.Writer; +import java.util.List; import java.util.Properties; +import org.apache.commons.lang.StringEscapeUtils; +import org.json.JSONObject; + import com.google.refine.ProjectManager; import com.google.refine.browsing.Engine; -import com.google.refine.browsing.FilteredRows; -import com.google.refine.browsing.RowVisitor; -import com.google.refine.model.Cell; -import com.google.refine.model.Column; import com.google.refine.model.Project; -import com.google.refine.model.Row; public class HtmlTableExporter implements WriterExporter { @@ -54,79 +53,71 @@ public class HtmlTableExporter implements WriterExporter { } @Override - public void export(Project project, Properties options, Engine engine, Writer writer) throws IOException { - writer.write("\n"); - writer.write(""); - writer.write(ProjectManager.singleton.getProjectMetadata(project.id).getName()); - writer.write("\n"); - - writer.write("\n"); - writer.write("\n"); + public void export(final Project project, Properties params, Engine engine, final Writer writer) + throws IOException { - writer.write(""); - { - for (Column column : project.columnModel.columns) { - writer.write(""); + TabularSerializer serializer = new TabularSerializer() { + @Override + public void startFile(JSONObject options) { + try { + writer.write("\n"); + writer.write(""); + writer.write(ProjectManager.singleton.getProjectMetadata(project.id).getName()); + writer.write("\n"); + + writer.write("\n"); + writer.write("
"); - writer.write(column.getName()); - writer.write("
\n"); + } catch (IOException e) { + // Ignore + } } - } - writer.write("\n"); - - { - RowVisitor visitor = new RowVisitor() { - Writer writer; - - public RowVisitor init(Writer writer) { - this.writer = writer; - return this; + + @Override + public void endFile() { + try { + writer.write("
\n"); + writer.write("\n"); + writer.write("\n"); + } catch (IOException e) { + // Ignore } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - @Override - public boolean visit(Project project, int rowIndex, Row row) { - try { - writer.write(""); - - for (Column column : project.columnModel.columns) { + } + + @Override + public void addRow(List cells, boolean isHeader) { + try { + writer.write(""); + if (isHeader) { + for (CellData cellData : cells) { + writer.write(""); + writer.write((cellData != null && cellData.text != null) ? cellData.text : ""); + writer.write(""); + } + } else { + for (CellData cellData : cells) { writer.write(""); - - int cellIndex = column.getCellIndex(); - if (cellIndex < row.cells.size()) { - Cell cell = row.cells.get(cellIndex); - if (cell != null && cell.value != null) { - Object v = cell.value; - writer.write(v instanceof String ? ((String) v) : v.toString()); + if (cellData != null && cellData.text != null) { + if (cellData.link != null) { + writer.write(""); + } + writer.write(cellData.text); + if (cellData.link != null) { + writer.write(""); } } - writer.write(""); } - - writer.write("\n"); - } catch (IOException e) { - // ignore } - return false; + writer.write(""); + } catch (IOException e) { + // Ignore } - }.init(writer); - - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(project, visitor); - } + } + }; - writer.write("\n"); - writer.write("\n"); - writer.write("\n"); + CustomizableTabularExporterUtilities.exportRows( + project, engine, params, serializer); } - } diff --git a/main/src/com/google/refine/exporters/TabularSerializer.java b/main/src/com/google/refine/exporters/TabularSerializer.java new file mode 100644 index 000000000..8e61224a0 --- /dev/null +++ b/main/src/com/google/refine/exporters/TabularSerializer.java @@ -0,0 +1,28 @@ +package com.google.refine.exporters; + +import java.util.List; + +import org.json.JSONObject; + + +public interface TabularSerializer { + static public class CellData { + final public String columnName; + final public Object value; + final public String text; + final public String link; + + public CellData(String columnName, Object value, String text, String link) { + this.columnName = columnName; + this.value = value; + this.text = text; + this.link = link; + } + } + + public void startFile(JSONObject options); + + public void endFile(); + + public void addRow(List cells, boolean isHeader); +} diff --git a/main/src/com/google/refine/exporters/XlsExporter.java b/main/src/com/google/refine/exporters/XlsExporter.java index f53c0c037..a499e04e2 100644 --- a/main/src/com/google/refine/exporters/XlsExporter.java +++ b/main/src/com/google/refine/exporters/XlsExporter.java @@ -37,128 +37,98 @@ import java.io.IOException; import java.io.OutputStream; import java.util.Calendar; import java.util.Date; +import java.util.List; import java.util.Properties; import org.apache.poi.hssf.usermodel.HSSFHyperlink; import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.json.JSONObject; import com.google.refine.ProjectManager; import com.google.refine.browsing.Engine; -import com.google.refine.browsing.FilteredRows; -import com.google.refine.browsing.RowVisitor; -import com.google.refine.model.Cell; -import com.google.refine.model.Column; import com.google.refine.model.Project; -import com.google.refine.model.Row; public class XlsExporter implements StreamExporter { - + final private boolean xml; + + public XlsExporter(boolean xml) { + this.xml = xml; + } + @Override public String getContentType() { - return "application/xls"; + return xml ? "application/xlsx" : "application/xls"; } @Override - public void export(Project project, Properties options, Engine engine, + public void export(final Project project, Properties params, Engine engine, OutputStream outputStream) throws IOException { - Workbook wb = new HSSFWorkbook(); - Sheet s = wb.createSheet(); - wb.setSheetName(0, ProjectManager.singleton.getProjectMetadata(project.id).getName()); + final Workbook wb = xml ? new XSSFWorkbook() : new HSSFWorkbook(); - int rowCount = 0; - - { - org.apache.poi.ss.usermodel.Row r = s.createRow(rowCount++); + TabularSerializer serializer = new TabularSerializer() { + Sheet s; + int rowCount = 0; - int cellCount = 0; - for (Column column : project.columnModel.columns) { - if (cellCount++ > 255) { - // TODO: Warn user about truncated data - } else { - org.apache.poi.ss.usermodel.Cell c = r.createCell(cellCount); - c.setCellValue(column.getName()); - } + @Override + public void startFile(JSONObject options) { + s = wb.createSheet(); + wb.setSheetName(0, ProjectManager.singleton.getProjectMetadata(project.id).getName()); } - } - - { - RowVisitor visitor = new RowVisitor() { - Sheet sheet; - int rowCount; + + @Override + public void endFile() { + } + + @Override + public void addRow(List cells, boolean isHeader) { + Row r = s.createRow(rowCount++); - public RowVisitor init(Sheet sheet, int rowCount) { - this.sheet = sheet; - this.rowCount = rowCount; - return this; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - @Override - public boolean visit(Project project, int rowIndex, Row row) { - org.apache.poi.ss.usermodel.Row r = sheet.createRow(rowCount++); - - int cellCount = 0; - for (Column column : project.columnModel.columns) { - if (cellCount++ > 255) { - // TODO: Warn user about truncated data - } else { - org.apache.poi.ss.usermodel.Cell c = r.createCell(cellCount); - - int cellIndex = column.getCellIndex(); - if (cellIndex < row.cells.size()) { - Cell cell = row.cells.get(cellIndex); - if (cell != null) { - if (cell.recon != null && cell.recon.match != null) { - c.setCellValue(cell.recon.match.name); - - HSSFHyperlink hl = new HSSFHyperlink(HSSFHyperlink.LINK_URL); - hl.setLabel(cell.recon.match.name); - hl.setAddress("http://www.freebase.com/view" + cell.recon.match.id); - - c.setHyperlink(hl); - } else if (cell.value != null) { - Object v = cell.value; - - if (v instanceof Number) { - c.setCellValue(((Number) v).doubleValue()); - } else if (v instanceof Boolean) { - c.setCellValue(((Boolean) v).booleanValue()); - } else if (v instanceof Date) { - c.setCellValue((Date) v); - } else if (v instanceof Calendar) { - c.setCellValue((Calendar) v); - } else if (v instanceof String) { - String s = (String) v; - if (s.length() > 32767) { - // The maximum length of cell contents (text) is 32,767 characters - s = s.substring(0, 32767); - } - c.setCellValue(s); - } - } + for (int i = 0; i < cells.size(); i++) { + Cell c = r.createCell(i); + if (i == 255 && cells.size() > 256) { + c.setCellValue("ERROR: TOO MANY COLUMNS"); + } else { + CellData cellData = cells.get(i); + + if (cellData != null && cellData.text != null && cellData.value != null) { + Object v = cellData.value; + if (v instanceof Number) { + c.setCellValue(((Number) v).doubleValue()); + } else if (v instanceof Boolean) { + c.setCellValue(((Boolean) v).booleanValue()); + } else if (v instanceof Date) { + c.setCellValue((Date) v); + } else if (v instanceof Calendar) { + c.setCellValue((Calendar) v); + } else { + String s = cellData.text; + if (s.length() > 32767) { + // The maximum length of cell contents (text) is 32,767 characters + s = s.substring(0, 32767); } + c.setCellValue(s); + } + + if (cellData.link != null) { + HSSFHyperlink hl = new HSSFHyperlink(HSSFHyperlink.LINK_URL); + hl.setLabel(cellData.text); + hl.setAddress(cellData.link); } } + } - return false; } - }.init(s, rowCount); - - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(project, visitor); - } + } + }; + + CustomizableTabularExporterUtilities.exportRows( + project, engine, params, serializer); wb.write(outputStream); outputStream.flush(); diff --git a/main/webapp/modules/core/scripts/dialogs/custom-tabular-exporter-dialog.html b/main/webapp/modules/core/scripts/dialogs/custom-tabular-exporter-dialog.html index fd885169f..ca0ff9d2d 100644 --- a/main/webapp/modules/core/scripts/dialogs/custom-tabular-exporter-dialog.html +++ b/main/webapp/modules/core/scripts/dialogs/custom-tabular-exporter-dialog.html @@ -24,8 +24,12 @@
- - + + + + + + @@ -115,8 +119,10 @@ @@ -137,6 +143,10 @@
Excel in XML (.xlsx)Excel (.xls)
Excel in XML (.xlsx)
- - + + + +
Output column headersOutput blank rowsOutput column headersOutput blank rows
+ + + +
Ignore facets and filters and export all rows
diff --git a/main/webapp/modules/core/scripts/dialogs/custom-tabular-exporter-dialog.js b/main/webapp/modules/core/scripts/dialogs/custom-tabular-exporter-dialog.js index 6dfc009e0..56bd4a2a4 100644 --- a/main/webapp/modules/core/scripts/dialogs/custom-tabular-exporter-dialog.js +++ b/main/webapp/modules/core/scripts/dialogs/custom-tabular-exporter-dialog.js @@ -47,6 +47,27 @@ function CustomTabularExporterDialog(options) { this._createDialog(options); }; +CustomTabularExporterDialog.formats = { + 'csv': { + extension: 'csv' + }, + 'tsv': { + extension: 'tsv' + }, + '*sv': { + extension: 'txt' + }, + 'html': { + extension: 'html' + }, + 'xls': { + extension: 'xls' + }, + 'xlsx': { + extension: 'xlsx' + } +}; + CustomTabularExporterDialog.prototype._createDialog = function(options) { var self = this; @@ -145,7 +166,6 @@ CustomTabularExporterDialog.prototype._configureUIFromOptionCode = function(opti this._elmts.encodingInput[0].value = options.encoding; this._elmts.outputColumnHeadersCheckbox.attr('checked', (options.outputColumnHeaders) ? 'checked' : ''); this._elmts.outputBlankRowsCheckbox.attr('checked', (options.outputBlankRows) ? 'checked' : ''); - this._elmts.xlsxCheckbox.attr('checked', (options.xlsx) ? 'checked' : ''); if (options.columns != null) { var self = this; @@ -169,8 +189,52 @@ CustomTabularExporterDialog.prototype._dismiss = function() { }; CustomTabularExporterDialog.prototype._preview = function() { + this._postExport(true); +}; + +CustomTabularExporterDialog.prototype._commit = function() { + this._postExport(false); + this._dismiss(); +}; + +CustomTabularExporterDialog.prototype._postExport = function(preview) { + var exportAllRowsCheckbox = this._elmts.exportAllRowsCheckbox[0].checked; var options = this._getOptionCode(); - console.log(options); + + var format = options.format; + var encoding = options.encoding; + + delete options.format; + delete options.encoding; + if (preview) { + options.limit = 10; + } + + var ext = CustomTabularExporterDialog.formats[format].extension; + var form = ExporterManager.prepareExportRowsForm(format, !exportAllRowsCheckbox, ext); + $('') + .attr("name", "options") + .attr("value", JSON.stringify(options)) + .appendTo(form); + if (encoding) { + $('') + .attr("name", "encoding") + .attr("value", encoding) + .appendTo(form); + } + if (!preview) { + $('') + .attr("name", "contentType") + .attr("value", "application/x-unknown") // force download + .appendTo(form); + } + + document.body.appendChild(form); + + window.open("about:blank", "refine-export"); + form.submit(); + + document.body.removeChild(form); }; CustomTabularExporterDialog.prototype._selectColumn = function(columnName) { @@ -229,10 +293,14 @@ CustomTabularExporterDialog.prototype._getOptionCode = function() { format: this._dialog.find('input[name="custom-tabular-exporter-format"]:checked').val() }; - if (options.format == 'excel') { - options.xlsx = this._elmts.xlsxCheckbox[0].checked; - } else if (options.format != 'html') { - options.separator = String.decodeSeparator(this._elmts.separatorInput.val()); + if (options.format == 'tsv' || options.format == 'csv' || options.format == '*sv') { + if (options.format == 'tsv') { + options.separator = '\t'; + } else if (options.format == 'csv') { + options.separator = ','; + } else { + options.separator = String.decodeSeparator(this._elmts.separatorInput.val()); + } options.lineSeparator = String.decodeSeparator(this._elmts.lineSeparatorInput.val()); options.encoding = this._elmts.encodingInput.val(); } diff --git a/main/webapp/modules/core/scripts/project/exporters.js b/main/webapp/modules/core/scripts/project/exporters.js index e21d0c2cd..d884ca89b 100644 --- a/main/webapp/modules/core/scripts/project/exporters.js +++ b/main/webapp/modules/core/scripts/project/exporters.js @@ -114,18 +114,29 @@ ExporterManager.handlers.exportTripleloader = function(format) { }; ExporterManager.handlers.exportRows = function(format, ext) { + var form = ExporterManager.prepareExportRowsForm(format, true, ext); + $('') + .attr("name", "contentType") + .attr("value", "application/x-unknown") // force download + .appendTo(form); + + document.body.appendChild(form); + + window.open("about:blank", "refine-export"); + form.submit(); + + document.body.removeChild(form); +}; + +ExporterManager.prepareExportRowsForm = function(format, includeEngine, ext) { var name = $.trim(theProject.metadata.name.replace(/\W/g, ' ')).replace(/\s+/g, '-'); var form = document.createElement("form"); $(form) .css("display", "none") .attr("method", "post") - .attr("action", "/command/core/export-rows/" + name + "." + ext) + .attr("action", "/command/core/export-rows/" + name + ((ext) ? ("." + ext) : "")) .attr("target", "refine-export"); - $('') - .attr("name", "engine") - .attr("value", JSON.stringify(ui.browsingEngine.getJSON())) - .appendTo(form); $('') .attr("name", "project") .attr("value", theProject.id) @@ -134,13 +145,14 @@ ExporterManager.handlers.exportRows = function(format, ext) { .attr("name", "format") .attr("value", format) .appendTo(form); - - document.body.appendChild(form); - - window.open("about:blank", "refine-export"); - form.submit(); - - document.body.removeChild(form); + if (includeEngine) { + $('') + .attr("name", "engine") + .attr("value", JSON.stringify(ui.browsingEngine.getJSON())) + .appendTo(form); + } + + return form; }; ExporterManager.handlers.exportProject = function() { diff --git a/main/webapp/modules/core/scripts/util/string.js b/main/webapp/modules/core/scripts/util/string.js index 16091e52d..5004d2bbd 100644 --- a/main/webapp/modules/core/scripts/util/string.js +++ b/main/webapp/modules/core/scripts/util/string.js @@ -49,12 +49,14 @@ String.prototype.contains = function(s) { String.encodeSeparator = function(s) { return s.replace("\\", "\\\\") + .replace("\r", "\\r") .replace("\n", "\\n") .replace("\t", "\\t"); }; String.decodeSeparator = function(s) { return s.replace("\\n", "\n") + .replace("\\r", "\r") .replace("\\t", "\t") .replace("\\\\", "\\"); };