From 6811f54f31f4e49dc9d3dc6ac08c11015c8516fa Mon Sep 17 00:00:00 2001 From: David Huynh Date: Thu, 4 Mar 2010 07:02:03 +0000 Subject: [PATCH] Fixed quoting bug in tripleloader transposer. Implemented tripleloader exporter. git-svn-id: http://google-refine.googlecode.com/svn/trunk@194 7d457c2a-affb-35e4-300a-418c747d4874 --- .../metaweb/gridworks/GridworksServlet.java | 11 +- .../commands/info/ExportRowsCommand.java | 95 +++----------- .../util/PreviewProtographCommand.java | 7 +- .../metaweb/gridworks/exporters/Exporter.java | 14 ++ .../exporters/TripleloaderExporter.java | 31 +++++ .../gridworks/exporters/TsvExporter.java | 73 +++++++++++ .../TripleLoaderTransposedNodeFactory.java | 123 +++++++++--------- src/main/webapp/scripts/project/menu-bar.js | 25 +++- .../scripts/protograph/schema-alignment.js | 1 + 9 files changed, 232 insertions(+), 148 deletions(-) create mode 100644 src/main/java/com/metaweb/gridworks/exporters/Exporter.java create mode 100644 src/main/java/com/metaweb/gridworks/exporters/TripleloaderExporter.java create mode 100644 src/main/java/com/metaweb/gridworks/exporters/TsvExporter.java diff --git a/src/main/java/com/metaweb/gridworks/GridworksServlet.java b/src/main/java/com/metaweb/gridworks/GridworksServlet.java index 4c9ea3c17..47eb4a186 100644 --- a/src/main/java/com/metaweb/gridworks/GridworksServlet.java +++ b/src/main/java/com/metaweb/gridworks/GridworksServlet.java @@ -119,8 +119,7 @@ public class GridworksServlet extends HttpServlet { protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { ProjectManager.initialize(); - String commandName = request.getPathInfo().substring(1); - Command command = _commands.get(commandName); + Command command = _commands.get(getCommandName(request)); if (command != null) { command.doPost(request, response); } @@ -129,11 +128,15 @@ public class GridworksServlet extends HttpServlet { protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { ProjectManager.initialize(); - String commandName = request.getPathInfo().substring(1); - Command command = _commands.get(commandName); + Command command = _commands.get(getCommandName(request)); if (command != null) { command.doGet(request, response); } } + protected String getCommandName(HttpServletRequest request) { + String commandName = request.getPathInfo().substring(1); + int slash = commandName.indexOf('/'); + return slash > 0 ? commandName.substring(0, slash) : commandName; + } } diff --git a/src/main/java/com/metaweb/gridworks/commands/info/ExportRowsCommand.java b/src/main/java/com/metaweb/gridworks/commands/info/ExportRowsCommand.java index f5bdb1e10..32e9f738e 100644 --- a/src/main/java/com/metaweb/gridworks/commands/info/ExportRowsCommand.java +++ b/src/main/java/com/metaweb/gridworks/commands/info/ExportRowsCommand.java @@ -1,20 +1,19 @@ package com.metaweb.gridworks.commands.info; -import java.io.IOException; + import java.io.IOException; import java.io.PrintWriter; +import java.util.Properties; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import com.metaweb.gridworks.browsing.Engine; -import com.metaweb.gridworks.browsing.FilteredRows; -import com.metaweb.gridworks.browsing.RowVisitor; import com.metaweb.gridworks.commands.Command; -import com.metaweb.gridworks.model.Cell; -import com.metaweb.gridworks.model.Column; +import com.metaweb.gridworks.exporters.Exporter; +import com.metaweb.gridworks.exporters.TripleloaderExporter; +import com.metaweb.gridworks.exporters.TsvExporter; import com.metaweb.gridworks.model.Project; -import com.metaweb.gridworks.model.Row; public class ExportRowsCommand extends Command { @@ -24,86 +23,24 @@ public class ExportRowsCommand extends Command { try { Project project = getProject(request); Engine engine = getEngine(request, project); - - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "text/plain"); + String format = request.getParameter("format"); PrintWriter writer = response.getWriter(); - boolean first = true; - for (Column column : project.columnModel.columns) { - if (first) { - first = false; - } else { - writer.print("\t"); - } - writer.print(column.getHeaderLabel()); + Exporter exporter = null; + if ("tripleloader".equalsIgnoreCase(format)) { + exporter = new TripleloaderExporter(); + } else { + exporter = new TsvExporter(); } - writer.print("\n"); - { - RowVisitor visitor = new RowVisitor() { - PrintWriter writer; - - public RowVisitor init(PrintWriter writer) { - this.writer = writer; - return this; - } - - public boolean visit(Project project, int rowIndex, Row row, boolean contextual) { - boolean first = true; - for (Column column : project.columnModel.columns) { - if (first) { - first = false; - } else { - writer.print("\t"); - } - - int cellIndex = column.getCellIndex(); - if (cellIndex < row.cells.size()) { - Cell cell = row.cells.get(cellIndex); - if (cell != null && cell.value != null) { - writer.print(cell.value); - } - } - } - writer.print("\n"); - - return false; - } - }.init(writer); - - FilteredRows filteredRows = engine.getAllFilteredRows(true); - filteredRows.accept(project, visitor); - } + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", exporter.getContentType()); + + exporter.export(project, new Properties(), engine, writer); + } catch (Exception e) { respondException(response, e); } } - - static protected class RowAccumulator implements RowVisitor { - final public int start; - final public int limit; - - public int total; - - public RowAccumulator(int start, int limit) { - this.start = start; - this.limit = limit; - } - - public boolean visit(Project project, int rowIndex, Row row, boolean contextual) { - boolean r = false; - - if (total >= start && total < start + limit) { - r = internalVisit(rowIndex, row); - } - total++; - return r; - } - - protected boolean internalVisit(int rowIndex, Row row) { - return false; - } - } } diff --git a/src/main/java/com/metaweb/gridworks/commands/util/PreviewProtographCommand.java b/src/main/java/com/metaweb/gridworks/commands/util/PreviewProtographCommand.java index 7669132ae..f5ec62cfa 100644 --- a/src/main/java/com/metaweb/gridworks/commands/util/PreviewProtographCommand.java +++ b/src/main/java/com/metaweb/gridworks/commands/util/PreviewProtographCommand.java @@ -1,6 +1,7 @@ package com.metaweb.gridworks.commands.util; import java.io.IOException; +import java.io.StringWriter; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; @@ -35,12 +36,14 @@ public class PreviewProtographCommand extends Command { sb.append("{ "); { - TripleLoaderTransposedNodeFactory nodeFactory = new TripleLoaderTransposedNodeFactory(); + StringWriter stringWriter = new StringWriter(); + TripleLoaderTransposedNodeFactory nodeFactory = new TripleLoaderTransposedNodeFactory(stringWriter); Transposer.transpose(project, protograph, protograph.getRootNode(0), nodeFactory); + nodeFactory.flush(); sb.append("\"tripleloader\" : "); - sb.append(JSONObject.quote(nodeFactory.getLoad())); + sb.append(JSONObject.quote(stringWriter.toString())); } { diff --git a/src/main/java/com/metaweb/gridworks/exporters/Exporter.java b/src/main/java/com/metaweb/gridworks/exporters/Exporter.java new file mode 100644 index 000000000..2274d55d4 --- /dev/null +++ b/src/main/java/com/metaweb/gridworks/exporters/Exporter.java @@ -0,0 +1,14 @@ +package com.metaweb.gridworks.exporters; + +import java.io.IOException; +import java.io.Writer; +import java.util.Properties; + +import com.metaweb.gridworks.browsing.Engine; +import com.metaweb.gridworks.model.Project; + +public interface Exporter { + public String getContentType(); + + public void export(Project project, Properties options, Engine engine, Writer writer) throws IOException; +} diff --git a/src/main/java/com/metaweb/gridworks/exporters/TripleloaderExporter.java b/src/main/java/com/metaweb/gridworks/exporters/TripleloaderExporter.java new file mode 100644 index 000000000..6caa586ee --- /dev/null +++ b/src/main/java/com/metaweb/gridworks/exporters/TripleloaderExporter.java @@ -0,0 +1,31 @@ +package com.metaweb.gridworks.exporters; + +import java.io.IOException; +import java.io.Writer; +import java.util.Properties; + +import com.metaweb.gridworks.browsing.Engine; +import com.metaweb.gridworks.model.Project; +import com.metaweb.gridworks.protograph.Protograph; +import com.metaweb.gridworks.protograph.transpose.Transposer; +import com.metaweb.gridworks.protograph.transpose.TripleLoaderTransposedNodeFactory; + +public class TripleloaderExporter implements Exporter { + public String getContentType() { + return "application/x-unknown"; + } + + public void export(Project project, Properties options, Engine engine, + Writer writer) throws IOException { + + if (project.protograph != null) { + Protograph protograph = project.protograph; + + TripleLoaderTransposedNodeFactory nodeFactory = new TripleLoaderTransposedNodeFactory(writer); + + Transposer.transpose(project, protograph, protograph.getRootNode(0), nodeFactory); + nodeFactory.flush(); + } + } + +} diff --git a/src/main/java/com/metaweb/gridworks/exporters/TsvExporter.java b/src/main/java/com/metaweb/gridworks/exporters/TsvExporter.java new file mode 100644 index 000000000..7abde768f --- /dev/null +++ b/src/main/java/com/metaweb/gridworks/exporters/TsvExporter.java @@ -0,0 +1,73 @@ +package com.metaweb.gridworks.exporters; + +import java.io.IOException; +import java.io.Writer; +import java.util.Properties; + +import com.metaweb.gridworks.browsing.Engine; +import com.metaweb.gridworks.browsing.FilteredRows; +import com.metaweb.gridworks.browsing.RowVisitor; +import com.metaweb.gridworks.model.Cell; +import com.metaweb.gridworks.model.Column; +import com.metaweb.gridworks.model.Project; +import com.metaweb.gridworks.model.Row; + +public class TsvExporter implements Exporter { + public String getContentType() { + return "text/plain"; + } + + public void export(Project project, Properties options, Engine engine, Writer writer) throws IOException { + boolean first = true; + for (Column column : project.columnModel.columns) { + if (first) { + first = false; + } else { + writer.write("\t"); + } + writer.write(column.getHeaderLabel()); + } + writer.write("\n"); + + { + RowVisitor visitor = new RowVisitor() { + Writer writer; + + public RowVisitor init(Writer writer) { + this.writer = writer; + return this; + } + + public boolean visit(Project project, int rowIndex, Row row, boolean contextual) { + boolean first = true; + try { + for (Column column : project.columnModel.columns) { + if (first) { + first = false; + } else { + writer.write("\t"); + } + + int cellIndex = column.getCellIndex(); + if (cellIndex < row.cells.size()) { + Cell cell = row.cells.get(cellIndex); + if (cell != null && cell.value != null) { + Object v = cell.value; + writer.write(v instanceof String ? ((String) v) : v.toString()); + } + } + } + writer.write("\n"); + } catch (IOException e) { + // ignore + } + return false; + } + }.init(writer); + + FilteredRows filteredRows = engine.getAllFilteredRows(true); + filteredRows.accept(project, visitor); + } + } + +} diff --git a/src/main/java/com/metaweb/gridworks/protograph/transpose/TripleLoaderTransposedNodeFactory.java b/src/main/java/com/metaweb/gridworks/protograph/transpose/TripleLoaderTransposedNodeFactory.java index 6460284eb..a5c3c5504 100644 --- a/src/main/java/com/metaweb/gridworks/protograph/transpose/TripleLoaderTransposedNodeFactory.java +++ b/src/main/java/com/metaweb/gridworks/protograph/transpose/TripleLoaderTransposedNodeFactory.java @@ -1,5 +1,7 @@ package com.metaweb.gridworks.protograph.transpose; +import java.io.IOException; +import java.io.Writer; import java.util.HashMap; import java.util.LinkedList; import java.util.List; @@ -20,28 +22,52 @@ import com.metaweb.gridworks.protograph.FreebaseTopicNode; import com.metaweb.gridworks.protograph.ValueNode; public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory { - protected List rootNodes = new LinkedList(); - protected StringBuffer stringBuffer; + protected boolean start = true; + protected Writer writer; + protected WritingTransposedNode lastRootNode; protected Map varPool = new HashMap(); protected Map newTopicVars = new HashMap(); - public String getLoad() { - stringBuffer = new StringBuffer(); - for (WritingTransposedNode node : rootNodes) { - node.write(null, null); + public TripleLoaderTransposedNodeFactory(Writer writer) { + this.writer = writer; + } + + public void flush() { + if (lastRootNode != null) { + lastRootNode.write(null, null); + lastRootNode = null; } - return stringBuffer.toString(); } protected void writeLine(String line) { - if (stringBuffer.length() > 0) { - stringBuffer.append('\n'); + try { + if (start) { + start = false; + } else { + writer.write('\n'); + } + writer.write(line); + } catch (IOException e) { + // ignore } - stringBuffer.append(line); } - protected void writeLine(String subject, String predicate, String object) { + protected void writeLine(String subject, String predicate, Object object) { if (subject != null && object != null) { - writeLine("{ 's' : '" + subject + "', 'p' : '" + predicate + "', 'o' : " + object + " }"); + String s = object instanceof String ? + JSONObject.quote((String) object) : object.toString(); + + writeLine("{ \"s\" : \"" + subject + "\", \"p\" : \"" + predicate + "\", \"o\" : " + s + " }"); + } + } + protected void writeLine(String subject, String predicate, Object object, String lang) { + if (subject != null && object != null) { + String s = object instanceof String ? + JSONObject.quote((String) object) : object.toString(); + + writeLine("{ \"s\" : \"" + + subject + "\", \"p\" : \"" + + predicate + "\", \"o\" : " + + s + ", \"lang\" : \"" + lang + "\" }"); } } @@ -90,12 +116,12 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory sb.append(", "); } sb.append("\"" + properties.get(i).id + "\": "); - sb.append(s); + sb.append(s instanceof String ? JSONObject.quote(s) : s.toString()); } } sb.append(" }"); - writeLine(subject, predicate, sb.toString()); + writeLine(subject, predicate, sb); return null; } @@ -130,8 +156,8 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory id = "$" + node.columnName.replaceAll("\\W+", "_") + "_" + var; - writeLine("{ 's' : '" + id + "', 'p' : 'type', 'o' : '" + node.type.id + "' }"); - writeLine("{ 's' : '" + id + "', 'p' : 'name', 'o' : " + JSONObject.quote(cell.value.toString()) + " }"); + writeLine(id, "type", node.type.id); + writeLine(id, "name", cell.value); if (cell.recon != null) { newTopicVars.put(cell.recon.id, id); @@ -147,7 +173,7 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory writeChildren(id); - return JSONObject.quote(id); + return id; } } @@ -162,28 +188,15 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory } public String write(String subject, String predicate) { - String object = cell.value instanceof String ? - JSONObject.quote((String) cell.value) : cell.value.toString(); - if (subject != null) { if ("/type/text".equals(node.lang)) { - writeLine( - "{ 's' : '" + subject + - "', 'p' : '" + predicate + - "', 'o' : " + object + - ", 'lang' : '" + node.lang + - "' }" - ); + writeLine(subject, predicate, cell.value, node.lang); } else { - writeLine( - "{ 's' : '" + subject + - "', 'p' : '" + predicate + - "', 'o' : " + object + " }" - ); + writeLine(subject, predicate, cell.value); } } - return object; + return cell.value.toString(); } } @@ -197,7 +210,7 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory } public String write(String subject, String predicate) { - writeLine(subject, "key", JSONObject.quote(node.namespace.id + "/" + cell.value)); + writeLine(subject, "key", node.namespace.id + "/" + cell.value); return null; } @@ -211,12 +224,10 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory } public String write(String subject, String predicate) { - String object = JSONObject.quote(node.topic.id); + writeLine(subject, predicate, node.topic.id); + writeChildren(node.topic.id); - writeLine(subject, predicate, object); - writeChildren(object); - - return object; + return node.topic.id; } } @@ -228,26 +239,13 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory } public String write(String subject, String predicate) { - String object = node.value instanceof String ? - JSONObject.quote((String) node.value) : node.value.toString(); - if ("/type/text".equals(node.lang)) { - writeLine( - "{ 's' : '" + subject + - "', 'p' : '" + predicate + - "', 'o' : " + object + - ", 'lang' : '" + node.lang + - "' }" - ); + writeLine(subject, predicate, node.value, node.lang); } else { - writeLine( - "{ 's' : '" + subject + - "', 'p' : '" + predicate + - "', 'o' : " + object + " }" - ); + writeLine(subject, predicate, node.value); } - return object; + return node.value.toString(); } } @@ -309,9 +307,9 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory } protected void processTransposedNode( - WritingTransposedNode tnode, - TransposedNode parentNode, - FreebaseProperty property + WritingTransposedNode tnode, + TransposedNode parentNode, + FreebaseProperty property ) { if (parentNode != null) { if (parentNode instanceof TransposedNodeWithChildren) { @@ -320,7 +318,14 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory parentNode2.properties.add(property); } } else { - rootNodes.add(tnode); + addRootNode(tnode); } } + + protected void addRootNode(WritingTransposedNode tnode) { + if (lastRootNode != null) { + lastRootNode.write(null, null); + } + lastRootNode = tnode; + } } diff --git a/src/main/webapp/scripts/project/menu-bar.js b/src/main/webapp/scripts/project/menu-bar.js index d167f76c6..d1a62d536 100644 --- a/src/main/webapp/scripts/project/menu-bar.js +++ b/src/main/webapp/scripts/project/menu-bar.js @@ -14,8 +14,17 @@ MenuBar.prototype._initializeUI = function() { this._createTopLevelMenuItem("Data Set", [ { - label: "Export Filtered Rows", - click: function() { self._doExportRows(); } + "label": "Export Filtered Rows", + "submenu": [ + { + "label": "Tab-Separated Value", + "click": function() { self._doExportRows("tsv", "tsv"); } + }, + { + "label": "Tripleloader", + "click": function() { self._doExportRows("tripleloader", "txt"); } + } + ] } ]); this._createTopLevelMenuItem("Schemas", [ @@ -123,18 +132,26 @@ MenuBar.prototype._deactivateMenu = function() { this._mode = "inactive"; }; -MenuBar.prototype._doExportRows = function() { +MenuBar.prototype._doExportRows = function(format, ext) { var form = document.createElement("form"); $(form) .css("display", "none") .attr("method", "post") - .attr("action", "/command/export-rows?project=" + theProject.id) + .attr("action", "/command/export-rows/gridworks_" + theProject.id + "." + ext) .attr("target", "gridworks-export"); $('') .attr("name", "engine") .attr("value", JSON.stringify(ui.browsingEngine.getJSON())) .appendTo(form); + $('') + .attr("name", "project") + .attr("value", theProject.id) + .appendTo(form); + $('') + .attr("name", "format") + .attr("value", format) + .appendTo(form); document.body.appendChild(form); diff --git a/src/main/webapp/scripts/protograph/schema-alignment.js b/src/main/webapp/scripts/protograph/schema-alignment.js index da2886d66..4e25a010e 100644 --- a/src/main/webapp/scripts/protograph/schema-alignment.js +++ b/src/main/webapp/scripts/protograph/schema-alignment.js @@ -166,6 +166,7 @@ SchemaAlignmentDialog.prototype._constructFooter = function(footer) { "save-protograph", {}, { protograph: JSON.stringify(protograph) }, + {}, { onDone: function() { DialogSystem.dismissUntil(self._level - 1);