From ad3a174abd495621fc146ef9e28a959ac1f4e459 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Tue, 4 Jul 2017 23:14:19 +0200 Subject: [PATCH] Starting to migrate data extension to standard reconciliation services --- .../commands/recon/ExtendDataCommand.java | 65 +++ .../recon/PreviewExtendDataCommand.java | 208 ++++++++ .../model/changes/DataExtensionChange.java | 469 ++++++++++++++++++ .../recon/ReconciledDataExtensionJob.java | 453 +++++++++++++++++ .../operations/recon/ExtendDataOperation.java | 314 ++++++++++++ .../webapp/modules/core/MOD-INF/controller.js | 4 + .../dialogs/extend-data-preview-dialog.js | 424 ++++++++++++++++ .../add-column-by-reconciliation.html | 27 + .../extend-data-preview-dialog.html | 26 + .../views/data-table/menu-edit-column.js | 73 +++ .../views/extend-data-preview-dialog.less | 71 +++ 11 files changed, 2134 insertions(+) create mode 100644 main/src/com/google/refine/commands/recon/ExtendDataCommand.java create mode 100644 main/src/com/google/refine/commands/recon/PreviewExtendDataCommand.java create mode 100644 main/src/com/google/refine/model/changes/DataExtensionChange.java create mode 100644 main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java create mode 100644 main/src/com/google/refine/operations/recon/ExtendDataOperation.java create mode 100644 main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js create mode 100644 main/webapp/modules/core/scripts/views/data-table/add-column-by-reconciliation.html create mode 100644 main/webapp/modules/core/scripts/views/data-table/extend-data-preview-dialog.html create mode 100644 main/webapp/modules/core/styles/views/extend-data-preview-dialog.less diff --git a/main/src/com/google/refine/commands/recon/ExtendDataCommand.java b/main/src/com/google/refine/commands/recon/ExtendDataCommand.java new file mode 100644 index 000000000..119e90442 --- /dev/null +++ b/main/src/com/google/refine/commands/recon/ExtendDataCommand.java @@ -0,0 +1,65 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.commands.recon; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONObject; + +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.freebase.operations.ExtendDataOperation; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.util.ParsingUtilities; + +public class ExtendDataCommand extends EngineDependentCommand { + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, JSONObject engineConfig) throws Exception { + + String baseColumnName = request.getParameter("baseColumnName"); + int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex")); + + String jsonString = request.getParameter("extension"); + JSONObject extension = ParsingUtilities.evaluateJsonStringToObject(jsonString); + + return new ExtendDataOperation( + engineConfig, + baseColumnName, + extension, + columnInsertIndex + ); + } + +} diff --git a/main/src/com/google/refine/commands/recon/PreviewExtendDataCommand.java b/main/src/com/google/refine/commands/recon/PreviewExtendDataCommand.java new file mode 100644 index 000000000..ea655bb5d --- /dev/null +++ b/main/src/com/google/refine/commands/recon/PreviewExtendDataCommand.java @@ -0,0 +1,208 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.commands.recon; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONArray; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.commands.Command; +import com.google.refine.model.recon.ReconciledDataExtensionJob; +import com.google.refine.model.recon.ReconciledDataExtensionJob.ColumnInfo; +import com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.Row; +import com.google.refine.model.Column; +import com.google.refine.model.recon.ReconConfig; +import com.google.refine.model.recon.StandardReconConfig; +import com.google.refine.util.ParsingUtilities; + +public class PreviewExtendDataCommand extends Command { + + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + String columnName = request.getParameter("columnName"); + + String rowIndicesString = request.getParameter("rowIndices"); + if (rowIndicesString == null) { + respond(response, "{ \"code\" : \"error\", \"message\" : \"No row indices specified\" }"); + return; + } + + String jsonString = request.getParameter("extension"); + JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString); + + JSONArray rowIndices = ParsingUtilities.evaluateJsonStringToArray(rowIndicesString); + int length = rowIndices.length(); + Column column = project.columnModel.getColumnByName(columnName); + int cellIndex = column.getCellIndex(); + + // get the endpoint to extract data from + String endpoint = null; + ReconConfig cfg = column.getReconConfig(); + if (cfg != null && + cfg instanceof StandardReconConfig) { + StandardReconConfig scfg = (StandardReconConfig)cfg; + endpoint = scfg.service; + } else { + respond(response, "{ \"code\" : \"error\", \"message\" : \"This column has not been reconciled with a standard service.\" }"); + return; + } + + + List topicNames = new ArrayList(); + List topicIds = new ArrayList(); + Set ids = new HashSet(); + for (int i = 0; i < length; i++) { + int rowIndex = rowIndices.getInt(i); + if (rowIndex >= 0 && rowIndex < project.rows.size()) { + Row row = project.rows.get(rowIndex); + Cell cell = row.getCell(cellIndex); + if (cell != null && cell.recon != null && cell.recon.match != null) { + topicNames.add(cell.recon.match.name); + topicIds.add(cell.recon.match.id); + ids.add(cell.recon.match.id); + } else { + topicNames.add(null); + topicIds.add(null); + ids.add(null); + } + } + } + + Map reconCandidateMap = new HashMap(); + ReconciledDataExtensionJob job = new ReconciledDataExtensionJob(json, endpoint); + Map map = job.extend(ids, reconCandidateMap); + + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + JSONWriter writer = new JSONWriter(response.getWriter()); + writer.object(); + writer.key("code"); writer.value("ok"); + writer.key("columns"); + writer.array(); + for (ColumnInfo info : job.columns) { + writer.object(); + writer.key("names"); + writer.array(); + for (String name : info.names) { + writer.value(name); + } + writer.endArray(); + writer.key("path"); + writer.array(); + for (String id : info.path) { + writer.value(id); + } + writer.endArray(); + writer.endObject(); + } + writer.endArray(); + + writer.key("rows"); + writer.array(); + for (int r = 0; r < topicNames.size(); r++) { + String id = topicIds.get(r); + String topicName = topicNames.get(r); + + if (id != null && map.containsKey(id)) { + DataExtension ext = map.get(id); + boolean first = true; + + if (ext.data.length > 0) { + for (Object[] row : ext.data) { + writer.array(); + if (first) { + writer.value(topicName); + first = false; + } else { + writer.value(null); + } + + for (Object cell : row) { + if (cell != null && cell instanceof ReconCandidate) { + ReconCandidate rc = (ReconCandidate) cell; + writer.object(); + writer.key("id"); writer.value(rc.id); + writer.key("name"); writer.value(rc.name); + writer.endObject(); + } else { + writer.value(cell); + } + } + + writer.endArray(); + } + continue; + } + } + + writer.array(); + if (id != null) { + writer.object(); + writer.key("id"); writer.value(id); + writer.key("name"); writer.value(topicName); + writer.endObject(); + } else { + writer.value(""); + } + writer.endArray(); + } + writer.endArray(); + + writer.endObject(); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/model/changes/DataExtensionChange.java b/main/src/com/google/refine/model/changes/DataExtensionChange.java new file mode 100644 index 000000000..c39d13ea6 --- /dev/null +++ b/main/src/com/google/refine/model/changes/DataExtensionChange.java @@ -0,0 +1,469 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Serializable; +import java.io.Writer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +// import com.google.refine.freebase.FreebaseType; +import com.google.refine.model.recon.DataExtensionReconConfig; +import com.google.refine.model.recon.FreebaseDataExtensionJob.DataExtension; +import com.google.refine.history.Change; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.ModelException; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.Recon.Judgment; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.ReconStats; +import com.google.refine.model.Row; +import com.google.refine.util.ParsingUtilities; +import com.google.refine.util.Pool; + +public class DataExtensionChange implements Change { + final protected String _baseColumnName; + final protected int _columnInsertIndex; + + final protected List _columnNames; + final protected List _columnTypes; + + final protected List _rowIndices; + final protected List _dataExtensions; + + protected long _historyEntryID; + protected int _firstNewCellIndex = -1; + protected List _oldRows; + protected List _newRows; + + public DataExtensionChange( + String baseColumnName, + int columnInsertIndex, + List columnNames, + List columnTypes, + List rowIndices, + List dataExtensions, + long historyEntryID + ) { + _baseColumnName = baseColumnName; + _columnInsertIndex = columnInsertIndex; + + _columnNames = columnNames; + _columnTypes = columnTypes; + + _rowIndices = rowIndices; + _dataExtensions = dataExtensions; + + _historyEntryID = historyEntryID; + } + + protected DataExtensionChange( + String baseColumnName, + int columnInsertIndex, + + List columnNames, + List columnTypes, + + List rowIndices, + List dataExtensions, + int firstNewCellIndex, + List oldRows, + List newRows + ) { + _baseColumnName = baseColumnName; + _columnInsertIndex = columnInsertIndex; + + _columnNames = columnNames; + _columnTypes = columnTypes; + + _rowIndices = rowIndices; + _dataExtensions = dataExtensions; + + _firstNewCellIndex = firstNewCellIndex; + _oldRows = oldRows; + _newRows = newRows; + } + + @Override + public void apply(Project project) { + synchronized (project) { + if (_firstNewCellIndex < 0) { + _firstNewCellIndex = project.columnModel.allocateNewCellIndex(); + for (int i = 1; i < _columnNames.size(); i++) { + project.columnModel.allocateNewCellIndex(); + } + + _oldRows = new ArrayList(project.rows); + + _newRows = new ArrayList(project.rows.size()); + + int cellIndex = project.columnModel.getColumnByName(_baseColumnName).getCellIndex(); + int keyCellIndex = project.columnModel.columns.get(project.columnModel.getKeyColumnIndex()).getCellIndex(); + int index = 0; + + int rowIndex = index < _rowIndices.size() ? _rowIndices.get(index) : _oldRows.size(); + DataExtension dataExtension = index < _rowIndices.size() ? _dataExtensions.get(index) : null; + + index++; + + Map reconMap = new HashMap(); + + for (int r = 0; r < _oldRows.size(); r++) { + Row oldRow = _oldRows.get(r); + if (r < rowIndex) { + _newRows.add(oldRow.dup()); + continue; + } + + if (dataExtension == null || dataExtension.data.length == 0) { + _newRows.add(oldRow); + } else { + Row firstNewRow = oldRow.dup(); + extendRow(firstNewRow, dataExtension, 0, reconMap); + _newRows.add(firstNewRow); + + int r2 = r + 1; + for (int subR = 1; subR < dataExtension.data.length; subR++) { + if (r2 < project.rows.size()) { + Row oldRow2 = project.rows.get(r2); + if (oldRow2.isCellBlank(cellIndex) && + oldRow2.isCellBlank(keyCellIndex)) { + + Row newRow = oldRow2.dup(); + extendRow(newRow, dataExtension, subR, reconMap); + + _newRows.add(newRow); + r2++; + + continue; + } + } + + Row newRow = new Row(cellIndex + _columnNames.size()); + extendRow(newRow, dataExtension, subR, reconMap); + + _newRows.add(newRow); + } + + r = r2 - 1; // r will be incremented by the for loop anyway + } + + rowIndex = index < _rowIndices.size() ? _rowIndices.get(index) : _oldRows.size(); + dataExtension = index < _rowIndices.size() ? _dataExtensions.get(index) : null; + index++; + } + } + + project.rows.clear(); + project.rows.addAll(_newRows); + + for (int i = 0; i < _columnNames.size(); i++) { + String name = _columnNames.get(i); + int cellIndex = _firstNewCellIndex + i; + + Column column = new Column(cellIndex, name); + column.setReconConfig(new DataExtensionReconConfig(_columnTypes.get(i))); + column.setReconStats(ReconStats.create(project, cellIndex)); + + try { + project.columnModel.addColumn(_columnInsertIndex + i, column, true); + + // the column might have been renamed to avoid collision + _columnNames.set(i, column.getName()); + } catch (ModelException e) { + // won't get here since we set the avoid collision flag + } + } + + project.update(); + } + } + + protected void extendRow( + Row row, + DataExtension dataExtension, + int extensionRowIndex, + Map reconMap + ) { + Object[] values = dataExtension.data[extensionRowIndex]; + for (int c = 0; c < values.length; c++) { + Object value = values[c]; + Cell cell = null; + + if (value instanceof ReconCandidate) { + ReconCandidate rc = (ReconCandidate) value; + Recon recon; + if (reconMap.containsKey(rc.id)) { + recon = reconMap.get(rc.id); + } else { + recon = Recon.makeFreebaseRecon(_historyEntryID); + recon.addCandidate(rc); + recon.service = "mql"; + recon.match = rc; + recon.matchRank = 0; + recon.judgment = Judgment.Matched; + recon.judgmentAction = "auto"; + recon.judgmentBatchSize = 1; + + reconMap.put(rc.id, recon); + } + cell = new Cell(rc.name, recon); + } else { + cell = new Cell((Serializable) value, null); + } + + row.setCell(_firstNewCellIndex + c, cell); + } + } + + @Override + public void revert(Project project) { + synchronized (project) { + project.rows.clear(); + project.rows.addAll(_oldRows); + + for (int i = 0; i < _columnNames.size(); i++) { + project.columnModel.columns.remove(_columnInsertIndex); + } + + project.update(); + } + } + + @Override + public void save(Writer writer, Properties options) throws IOException { + writer.write("baseColumnName="); writer.write(_baseColumnName); writer.write('\n'); + writer.write("columnInsertIndex="); writer.write(Integer.toString(_columnInsertIndex)); writer.write('\n'); + writer.write("columnNameCount="); writer.write(Integer.toString(_columnNames.size())); writer.write('\n'); + for (String name : _columnNames) { + writer.write(name); writer.write('\n'); + } + writer.write("columnTypeCount="); writer.write(Integer.toString(_columnTypes.size())); writer.write('\n'); + for (FreebaseType type : _columnTypes) { + try { + JSONWriter jsonWriter = new JSONWriter(writer); + + type.write(jsonWriter, options); + } catch (JSONException e) { + // ??? + } + writer.write('\n'); + } + writer.write("rowIndexCount="); writer.write(Integer.toString(_rowIndices.size())); writer.write('\n'); + for (Integer rowIndex : _rowIndices) { + writer.write(rowIndex.toString()); writer.write('\n'); + } + writer.write("dataExtensionCount="); writer.write(Integer.toString(_dataExtensions.size())); writer.write('\n'); + for (DataExtension dataExtension : _dataExtensions) { + if (dataExtension == null) { + writer.write('\n'); + continue; + } + + writer.write(Integer.toString(dataExtension.data.length)); writer.write('\n'); + + for (Object[] values : dataExtension.data) { + for (Object value : values) { + if (value == null) { + writer.write("null"); + } else if (value instanceof ReconCandidate) { + try { + JSONWriter jsonWriter = new JSONWriter(writer); + ((ReconCandidate) value).write(jsonWriter, options); + } catch (JSONException e) { + // ??? + } + } else if (value instanceof String) { + writer.write(JSONObject.quote((String) value)); + } else { + writer.write(value.toString()); + } + writer.write('\n'); + } + } + } + + writer.write("firstNewCellIndex="); writer.write(Integer.toString(_firstNewCellIndex)); writer.write('\n'); + + writer.write("newRowCount="); writer.write(Integer.toString(_newRows.size())); writer.write('\n'); + for (Row row : _newRows) { + row.save(writer, options); + writer.write('\n'); + } + writer.write("oldRowCount="); writer.write(Integer.toString(_oldRows.size())); writer.write('\n'); + for (Row row : _oldRows) { + row.save(writer, options); + writer.write('\n'); + } + writer.write("/ec/\n"); // end of change marker + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + String baseColumnName = null; + int columnInsertIndex = -1; + + List columnNames = null; + List columnTypes = null; + + List rowIndices = null; + List dataExtensions = null; + + List oldRows = null; + List newRows = null; + + int firstNewCellIndex = -1; + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + String value = line.substring(equal + 1); + + if ("baseColumnName".equals(field)) { + baseColumnName = value; + } else if ("columnInsertIndex".equals(field)) { + columnInsertIndex = Integer.parseInt(value); + } else if ("firstNewCellIndex".equals(field)) { + firstNewCellIndex = Integer.parseInt(value); + } else if ("rowIndexCount".equals(field)) { + int count = Integer.parseInt(value); + + rowIndices = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + rowIndices.add(Integer.parseInt(line)); + } + } + } else if ("columnNameCount".equals(field)) { + int count = Integer.parseInt(value); + + columnNames = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + columnNames.add(line); + } + } + } else if ("columnTypeCount".equals(field)) { + int count = Integer.parseInt(value); + + columnTypes = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + columnTypes.add(FreebaseType.load(ParsingUtilities.evaluateJsonStringToObject(line))); + } + } else if ("dataExtensionCount".equals(field)) { + int count = Integer.parseInt(value); + + dataExtensions = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + + if (line == null) { + continue; + } + + if (line.length() == 0) { + dataExtensions.add(null); + continue; + } + + int rowCount = Integer.parseInt(line); + Object[][] data = new Object[rowCount][]; + + for (int r = 0; r < rowCount; r++) { + Object[] row = new Object[columnNames.size()]; + for (int c = 0; c < columnNames.size(); c++) { + line = reader.readLine(); + + row[c] = ReconCandidate.loadStreaming(line); + } + + data[r] = row; + } + + dataExtensions.add(new DataExtension(data)); + } + } else if ("oldRowCount".equals(field)) { + int count = Integer.parseInt(value); + + oldRows = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + oldRows.add(Row.load(line, pool)); + } + } + } else if ("newRowCount".equals(field)) { + int count = Integer.parseInt(value); + + newRows = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + newRows.add(Row.load(line, pool)); + } + } + } + + } + + DataExtensionChange change = new DataExtensionChange( + baseColumnName, + columnInsertIndex, + columnNames, + columnTypes, + rowIndices, + dataExtensions, + firstNewCellIndex, + oldRows, + newRows + ); + + + return change; + } +} diff --git a/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java b/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java new file mode 100644 index 000000000..30f619c1e --- /dev/null +++ b/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java @@ -0,0 +1,453 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +/** + * + */ +package com.google.refine.model.recon; + +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.Serializable; +import java.io.StringWriter; +import java.io.Writer; +import java.net.URL; +import java.net.URLConnection; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +// import com.google.refine.freebase.FreebaseType; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.recon.StandardReconConfig; +import com.google.refine.util.JSONUtilities; +import com.google.refine.util.ParsingUtilities; + +public class ReconciledDataExtensionJob { + static public class DataExtension { + final public Object[][] data; + + public DataExtension(Object[][] data) { + this.data = data; + } + } + + static public class ColumnInfo { + final public List names; + final public List path; + // final public FreebaseType expectedType; + // TODO + + protected ColumnInfo(List names, List path /*, FreebaseType expectedType */) { + this.names = names; + this.path = path; + // this.expectedType = expectedType; + } + } + + final public JSONObject extension; + final public String endpoint; + final public int columnCount; + final public List columns = new ArrayList(); + + public ReconciledDataExtensionJob(JSONObject obj, String endpoint) throws JSONException { + this.extension = obj; + this.endpoint = endpoint; + this.columnCount = (obj.has("properties") && !obj.isNull("properties")) ? + countColumns(obj.getJSONArray("properties"), columns, new ArrayList(), new ArrayList()) : 0; + } + + public Map extend( + Set ids, + Map reconCandidateMap + ) throws Exception { + StringWriter writer = new StringWriter(); + formulateQuery(ids, extension, writer); + + // Extract the order of properties + JSONArray origProperties = extension.getJSONArray("properties"); + List properties = new ArrayList(); + int l = origProperties.length(); + for (int i = 0; i < l; i++) { + properties.add(origProperties.getJSONObject(i).getString("id")); + } + + String query = writer.toString(); + InputStream is = performQuery(this.endpoint, query); + try { + String s = ParsingUtilities.inputStreamToString(is); + JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); + + Map map = new HashMap(); + if (o.has("rows")){ + JSONObject records = o.getJSONObject("rows"); + + // for each identifier + for (String id : ids) { + if (records.has(id)) { + JSONObject record = records.getJSONObject(id); + + ReconciledDataExtensionJob.DataExtension ext = collectResult(record, properties, reconCandidateMap); + + if (ext != null) { + map.put(id, ext); + } + } + } + } + + return map; + } finally { + is.close(); + } + } + + static protected InputStream performQuery(String endpoint, String query) throws IOException { + URL url = new URL(endpoint); + + URLConnection connection = url.openConnection(); + connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); + connection.setConnectTimeout(5000); + connection.setDoOutput(true); + + DataOutputStream dos = new DataOutputStream(connection.getOutputStream()); + try { + String body = "extend=" + ParsingUtilities.encode(query); + + dos.writeBytes(body); + } finally { + dos.flush(); + dos.close(); + } + + connection.connect(); + + return connection.getInputStream(); + } + + + protected ReconciledDataExtensionJob.DataExtension collectResult( + JSONObject record, + List properties, + Map reconCandidateMap + ) throws JSONException { + List rows = new ArrayList(); + + // for each property + int colindex = 0; + for(String pid : properties) { + JSONArray values = record.getJSONArray(pid); + if (values == null) { + continue; + } + + // for each value + for(int rowindex = 0; rowindex < values.length(); rowindex++) { + JSONObject val = values.getJSONObject(rowindex); + // store a reconciled value + if(val.has("id")) { + storeCell(rows, rowindex, colindex, val, reconCandidateMap); + } else if(val.has("str")) { + // store a bare string + String str = val.getString("str"); + storeStr(rows, rowindex, colindex, str); + } + // TODO other cases for other types of values (dates, booleans, …) + } + colindex++; + } + + + // collectResult(rows, extension.getJSONArray("properties"), obj, 0, 0, reconCandidateMap); + + Object[][] data = new Object[rows.size()][columnCount]; + rows.toArray(data); + + return new DataExtension(data); + } + + protected void storeStr( + List rows, + int row, + int col, + String str + ) throws JSONException { + while (row >= rows.size()) { + rows.add(new Object[columnCount]); + } + rows.get(row)[col] = str; + } + + protected void storeCell( + List rows, + int row, + int col, + Object value, + Map reconCandidateMap + ) { + while (row >= rows.size()) { + rows.add(new Object[columnCount]); + } + rows.get(row)[col] = value; + } + + protected void storeCell( + List rows, + int row, + int col, + JSONObject obj, + Map reconCandidateMap + ) throws JSONException { + String id = obj.getString("id"); + ReconCandidate rc; + if (reconCandidateMap.containsKey(id)) { + rc = reconCandidateMap.get(id); + } else { + rc = new ReconCandidate( + obj.getString("id"), + obj.getString("name"), + JSONUtilities.getStringArray(obj, "type"), + 100 + ); + + reconCandidateMap.put(id, rc); + } + + storeCell(rows, row, col, rc, reconCandidateMap); + } + /* + protected int[] collectResult( + List rows, + JSONObject extNode, + JSONObject resultNode, + int startRowIndex, + int startColumnIndex, + Map reconCandidateMap + ) throws JSONException { + String propertyID = extNode.getString("id"); + // String expectedTypeID = extNode.getJSONObject("expected").getString("id"); + + JSONArray a = resultNode != null && resultNode.has(propertyID) && !resultNode.isNull(propertyID) ? + resultNode.getJSONArray(propertyID) : null; + + if ("/type/key".equals(expectedTypeID)) { + if (a != null) { + int l = a.length(); + for (int r = 0; r < l; r++) { + Object o = a.isNull(r) ? null : a.get(r); + if (o instanceof JSONObject) { + storeStr(rows, startRowIndex++, startColumnIndex, (JSONObject) o, reconCandidateMap); + } + } + } + + // note that we still take up a column even if we don't have any data + return new int[] { startRowIndex, startColumnIndex + 1 }; + } else if (expectedTypeID.startsWith("/type/")) { + if (a != null) { + int l = a.length(); + for (int r = 0; r < l; r++) { + Object o = a.isNull(r) ? null : a.get(r); + if (o instanceof Serializable) { + storeCell(rows, startRowIndex++, startColumnIndex, o, reconCandidateMap); + } + } + } + + // note that we still take up a column even if we don't have any data + return new int[] { startRowIndex, startColumnIndex + 1 }; + } else { + boolean hasSubProperties = (extNode.has("properties") && !extNode.isNull("properties")); + boolean isOwnColumn = !hasSubProperties || (extNode.has("included") && extNode.getBoolean("included")); + + if (a != null && a.length() > 0) { + int maxColIndex = startColumnIndex; + + int l = a.length(); + for (int r = 0; r < l; r++) { + Object v = a.isNull(r) ? null : a.get(r); + JSONObject o = v != null && v instanceof JSONObject ? (JSONObject) v : null; + + int startColumnIndex2 = startColumnIndex; + int startRowIndex2 = startRowIndex; + + if (isOwnColumn) { + if (o != null) { + storeCell(rows, startRowIndex2++, startColumnIndex2++, o, reconCandidateMap); + } else { + storeCell(rows, startRowIndex2++, startColumnIndex2++, v, reconCandidateMap); + } + } + + if (hasSubProperties && o != null) { + int[] rowcol = collectResult( + rows, + extNode.getJSONArray("properties"), + o, + startRowIndex, + startColumnIndex2, + reconCandidateMap + ); + + startRowIndex2 = rowcol[0]; + startColumnIndex2 = rowcol[1]; + } + + startRowIndex = startRowIndex2; + maxColIndex = Math.max(maxColIndex, startColumnIndex2); + } + + return new int[] { startRowIndex, maxColIndex }; + } else { + return new int[] { + startRowIndex, + startColumnIndex + countColumns(extNode, null, new ArrayList(), new ArrayList()) + }; + } + } + } + + protected int[] collectResult( + List rows, + JSONArray subProperties, + JSONObject resultNode, + int startRowIndex, + int startColumnIndex, + Map reconCandidateMap + ) throws JSONException { + int maxStartRowIndex = startRowIndex; + + int k = subProperties.length(); + for (int c = 0; c < k; c++) { + int[] rowcol = collectResult( + rows, + subProperties.getJSONObject(c), + resultNode, + startRowIndex, + startColumnIndex, + reconCandidateMap + ); + + maxStartRowIndex = Math.max(maxStartRowIndex, rowcol[0]); + startColumnIndex = rowcol[1]; + } + + return new int[] { maxStartRowIndex, startColumnIndex }; + }*/ + + + + static protected void formulateQuery(Set ids, JSONObject node, Writer writer) throws JSONException { + JSONWriter jsonWriter = new JSONWriter(writer); + + jsonWriter.object(); + + jsonWriter.key("ids"); + jsonWriter.array(); + for (String id : ids) { + if (id != null) { + jsonWriter.value(id); + } + } + jsonWriter.endArray(); + + jsonWriter.key("properties"); + jsonWriter.array(); + JSONArray properties = node.getJSONArray("properties"); + int l = properties.length(); + + for (int i = 0; i < l; i++) { + JSONObject property = properties.getJSONObject(i); + jsonWriter.object(); + jsonWriter.key("id"); + jsonWriter.value(property.getString("id")); + // TODO translate constraints as below + jsonWriter.endObject(); + } + jsonWriter.endArray(); + jsonWriter.endObject(); + } + + + static protected int countColumns(JSONObject obj, List columns, List names, List path) throws JSONException { + String name = obj.getString("name"); + + List names2 = null; + List path2 = null; + if (columns != null) { + names2 = new ArrayList(names); + names2.add(name); + + path2 = new ArrayList(path); + path2.add(obj.getString("id")); + } + + if (obj.has("properties") && !obj.isNull("properties")) { + boolean included = (obj.has("included") && obj.getBoolean("included")); + if (included && columns != null) { + // JSONObject expected = obj.getJSONObject("expected"); + + columns.add(new ColumnInfo(names2, path2 + /* new FreebaseType(expected.getString("id"), expected.getString("name")) */)); + } + + return (included ? 1 : 0) + + countColumns(obj.getJSONArray("properties"), columns, names2, path2); + } else { + if (columns != null) { + // JSONObject expected = obj.getJSONObject("expected"); + + columns.add(new ColumnInfo(names2, path2 + /* new FreebaseType(expected.getString("id"), expected.getString("name")) */ )); + } + return 1; + } + } + + static protected int countColumns(JSONArray a, List columns, List names, List path) throws JSONException { + int c = 0; + int l = a.length(); + for (int i = 0; i < l; i++) { + c += countColumns(a.getJSONObject(i), columns, names, path); + } + return c; + } +} diff --git a/main/src/com/google/refine/operations/recon/ExtendDataOperation.java b/main/src/com/google/refine/operations/recon/ExtendDataOperation.java new file mode 100644 index 000000000..12f24392a --- /dev/null +++ b/main/src/com/google/refine/operations/recon/ExtendDataOperation.java @@ -0,0 +1,314 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.operations.recon; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; + +import org.apache.commons.lang.StringUtils; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.model.changes.DataExtensionChange; +import com.google.refine.model.recon.DataExtensionJob; +import com.google.refine.model.recon.DataExtensionJob.ColumnInfo; +import com.google.refine.model.recon.DataExtensionJob.DataExtension; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.Row; +import com.google.refine.model.changes.CellAtRow; +import com.google.refine.operations.EngineDependentOperation; +import com.google.refine.operations.OperationRegistry; +import com.google.refine.process.LongRunningProcess; +import com.google.refine.process.Process; + +public class ExtendDataOperation extends EngineDependentOperation { + final protected String _baseColumnName; + final protected JSONObject _extension; + final protected int _columnInsertIndex; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + JSONObject engineConfig = obj.getJSONObject("engineConfig"); + + return new ExtendDataOperation( + engineConfig, + obj.getString("baseColumnName"), + obj.getJSONObject("extension"), + obj.getInt("columnInsertIndex") + ); + } + + public ExtendDataOperation( + JSONObject engineConfig, + String baseColumnName, + JSONObject extension, + int columnInsertIndex + ) { + super(engineConfig); + + _baseColumnName = baseColumnName; + _extension = extension; + _columnInsertIndex = columnInsertIndex; + } + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("engineConfig"); writer.value(getEngineConfig()); + writer.key("columnInsertIndex"); writer.value(_columnInsertIndex); + writer.key("baseColumnName"); writer.value(_baseColumnName); + writer.key("extension"); writer.value(_extension); + writer.endObject(); + } + + @Override + protected String getBriefDescription(Project project) { + return "Extend data at index " + _columnInsertIndex + + " based on column " + _baseColumnName; + } + + protected String createDescription(Column column, List cellsAtRows) { + return "Extend data at index " + _columnInsertIndex + + " based on column " + column.getName() + + " by filling " + cellsAtRows.size(); + } + + @Override + public Process createProcess(Project project, Properties options) throws Exception { + return new ExtendDataProcess( + project, + getEngineConfig(), + getBriefDescription(null) + ); + } + + public class ExtendDataProcess extends LongRunningProcess implements Runnable { + final protected Project _project; + final protected JSONObject _engineConfig; + final protected long _historyEntryID; + protected int _cellIndex; + protected FreebaseDataExtensionJob _job; + + public ExtendDataProcess( + Project project, + JSONObject engineConfig, + String description + ) throws JSONException { + super(description); + _project = project; + _engineConfig = engineConfig; + _historyEntryID = HistoryEntry.allocateID(); + + _job = new FreebaseDataExtensionJob(_extension); + } + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("id"); writer.value(hashCode()); + writer.key("description"); writer.value(_description); + writer.key("immediate"); writer.value(false); + writer.key("status"); writer.value(_thread == null ? "pending" : (_thread.isAlive() ? "running" : "done")); + writer.key("progress"); writer.value(_progress); + writer.endObject(); + } + + @Override + protected Runnable getRunnable() { + return this; + } + + protected void populateRowsWithMatches(List rowIndices) throws Exception { + Engine engine = new Engine(_project); + engine.initializeFromJSON(_engineConfig); + + Column column = _project.columnModel.getColumnByName(_baseColumnName); + if (column == null) { + throw new Exception("No column named " + _baseColumnName); + } + + _cellIndex = column.getCellIndex(); + + FilteredRows filteredRows = engine.getAllFilteredRows(); + filteredRows.accept(_project, new RowVisitor() { + List _rowIndices; + + public RowVisitor init(List rowIndices) { + _rowIndices = rowIndices; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + @Override + public boolean visit(Project project, int rowIndex, Row row) { + Cell cell = row.getCell(_cellIndex); + if (cell != null && cell.recon != null && cell.recon.match != null) { + _rowIndices.add(rowIndex); + } + + return false; + } + }.init(rowIndices)); + } + + protected int extendRows( + List rowIndices, + List dataExtensions, + int from, + int limit, + Map reconCandidateMap + ) { + Set ids = new HashSet(); + + int end; + for (end = from; end < limit && ids.size() < 10; end++) { + int index = rowIndices.get(end); + Row row = _project.rows.get(index); + Cell cell = row.getCell(_cellIndex); + + ids.add(cell.recon.match.id); + } + + Map map = null; + try { + map = _job.extend(ids, reconCandidateMap); + } catch (Exception e) { + map = new HashMap(); + } + + for (int i = from; i < end; i++) { + int index = rowIndices.get(i); + Row row = _project.rows.get(index); + Cell cell = row.getCell(_cellIndex); + String guid = cell.recon.match.id; + + if (map.containsKey(guid)) { + dataExtensions.add(map.get(guid)); + } else { + dataExtensions.add(null); + } + } + + return end; + } + + @Override + public void run() { + List rowIndices = new ArrayList(); + List dataExtensions = new ArrayList(); + + try { + populateRowsWithMatches(rowIndices); + } catch (Exception e2) { + // TODO : Not sure what to do here? + e2.printStackTrace(); + } + + int start = 0; + Map reconCandidateMap = new HashMap(); + + while (start < rowIndices.size()) { + int end = extendRows(rowIndices, dataExtensions, start, rowIndices.size(), reconCandidateMap); + start = end; + + _progress = end * 100 / rowIndices.size(); + try { + Thread.sleep(200); + } catch (InterruptedException e) { + if (_canceled) { + break; + } + } + } + + if (!_canceled) { + List columnNames = new ArrayList(); + for (ColumnInfo info : _job.columns) { + columnNames.add(StringUtils.join(info.names, " - ")); + } + + List columnTypes = new ArrayList(); + for (ColumnInfo info : _job.columns) { + columnTypes.add(info.expectedType); + } + + HistoryEntry historyEntry = new HistoryEntry( + _historyEntryID, + _project, + _description, + ExtendDataOperation.this, + new DataExtensionChange( + _baseColumnName, + _columnInsertIndex, + columnNames, + columnTypes, + rowIndices, + dataExtensions, + _historyEntryID) + ); + + _project.history.addEntry(historyEntry); + _project.processManager.onDoneProcess(this); + } + } + } +} diff --git a/main/webapp/modules/core/MOD-INF/controller.js b/main/webapp/modules/core/MOD-INF/controller.js index d7dde610b..404e55070 100644 --- a/main/webapp/modules/core/MOD-INF/controller.js +++ b/main/webapp/modules/core/MOD-INF/controller.js @@ -121,6 +121,8 @@ function registerCommands() { RS.registerCommand(module, "recon-clear-one-cell", new Packages.com.google.refine.commands.recon.ReconClearOneCellCommand()); RS.registerCommand(module, "recon-clear-similar-cells", new Packages.com.google.refine.commands.recon.ReconClearSimilarCellsCommand()); RS.registerCommand(module, "recon-copy-across-columns", new Packages.com.google.refine.commands.recon.ReconCopyAcrossColumnsCommand()); + RS.registerCommand(module, "preview-extend-data", new Packages.com.google.refine.commands.recon.PreviewExtendDataCommand()); + RS.registerCommand(module, "extend-data", new Packages.com.google.refine.commands.recon.ExtendDataCommand()); RS.registerCommand(module, "guess-types-of-column", new Packages.com.google.refine.commands.recon.GuessTypesOfColumnCommand()); @@ -367,6 +369,7 @@ function init() { "styles/index/default-importing-sources.less", "styles/views/data-table-view.less", // for the preview table's styles + "styles/views/extend-data-preview-dialog.less", "styles/index/fixed-width-parser-ui.less", "styles/index/xml-parser-ui.less", "styles/index/json-parser-ui.less" @@ -431,6 +434,7 @@ function init() { "scripts/reconciliation/standard-service-panel.js", "scripts/dialogs/expression-preview-dialog.js", + "scripts/dialogs/extend-data-preview-dialog.js", "scripts/dialogs/clustering-dialog.js", "scripts/dialogs/scatterplot-dialog.js", "scripts/dialogs/templating-exporter-dialog.js", diff --git a/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js b/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js new file mode 100644 index 000000000..ca27f02d9 --- /dev/null +++ b/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js @@ -0,0 +1,424 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + */ + +function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDone) { + this._column = column; + this._columnIndex = columnIndex; + this._rowIndices = rowIndices; + this._onDone = onDone; + this._extension = { properties: [] }; + + var self = this; + this._dialog = $(DOM.loadHTML("core", "scripts/views/data-table/extend-data-preview-dialog.html")); + this._elmts = DOM.bind(this._dialog); + this._elmts.dialogHeader.html("Add columns by reconciled column " + column.name); + this._elmts.resetButton.click(function() { + self._extension.properties = []; + self._update(); + }); + + this._elmts.okButton.click(function() { + if (self._extension.properties.length === 0) { + alert("Please add some properties first."); + } else { + DialogSystem.dismissUntil(self._level - 1); + self._onDone(self._extension); + } + }); + this._elmts.cancelButton.click(function() { + DialogSystem.dismissUntil(self._level - 1); + }); + + var dismissBusy = DialogSystem.showBusy(); + var type = (column.reconConfig) && (column.reconConfig.type) ? column.reconConfig.type.id : ""; + + this._proposePropertiesUrl = null; + this._fetchColumnUrl = null; + this._serviceMetadata = null; + if ("reconConfig" in column) { + var service = column.reconConfig.service; + var serviceMetadata = ReconciliationManager.getServiceFromUrl(service); + this._serviceMetadata = serviceMetadata; + if ("extend" in serviceMetadata) { + var extend = serviceMetadata.extend; + if ("propose_properties" in extend) { + var endpoint = extend.propose_properties; + this._proposePropertiesUrl = endpoint.service_url + endpoint.service_path; + } + if ("fetch_column" in extend) { + var endpoint = extend.fetch_column; + this._fetchColumnUrl = endpoint.service_url + endpoint.service_path; + } + } + } + + ExtendReconciledDataPreviewDialog.getAllProperties(this._proposePropertiesUrl, type, function(properties) { + dismissBusy(); + self._show(properties); + }); +} + +ExtendReconciledDataPreviewDialog.getAllProperties = function(url, typeID, onDone) { + if(url == null) { + onDone([]); + } else { + var done = false; + $.getJSON( + url +"?type=" + typeID + "&callback=?", + null, + function(data) { + if (done) return; + done = true; + + var allProperties = []; + for (var i = 0; i < data.properties.length; i++) { + var property = data.properties[i]; + var property2 = { + id: property.id, + name: property.name + }; + /*if ("id2" in property) { + property2.expected = property.schema2; + property2.properties = [{ + id: property.id2, + name: property.name2, + expected: property.expects + }]; + } else { + property2.expected = property.expects; + } */ + allProperties.push(property2); + } + allProperties.sort(function(a, b) { return a.name.localeCompare(b.name); }); + + onDone(allProperties); + } + ); + + window.setTimeout(function() { + if (done) return; + + done = true; + onDone([]); + }, 7000); // time to give up? + } +}; + +ExtendReconciledDataPreviewDialog.prototype._show = function(properties) { + this._level = DialogSystem.showDialog(this._dialog); + + var n = this._elmts.suggestedPropertyContainer.offset().top + + this._elmts.suggestedPropertyContainer.outerHeight(true) - + this._elmts.addPropertyInput.offset().top; + + this._elmts.previewContainer.height(Math.floor(n)); + + var self = this; + var container = this._elmts.suggestedPropertyContainer; + var renderSuggestedProperty = function(property) { + var label = ("properties" in property) ? (property.name + " » " + property.properties[0].name) : property.name; + var div = $('
').addClass("suggested-property").appendTo(container); + + $('') + .attr("href", "javascript:{}") + .html(label) + .appendTo(div) + .click(function() { + self._addProperty(property); + }); + }; + for (var i = 0; i < properties.length; i++) { + renderSuggestedProperty(properties[i]); + } + + var suggestConfig = $.extend({}, this._serviceMetadata.suggest.property); + suggestConfig.key = null; + suggestConfig.query_param_name = "prefix"; + /* var suggestConfig = { + filter: '(all type:/type/property)' + }; + if ((this._column.reconConfig) && (this._column.reconConfig.type)) { + suggestConfig.filter = '(all type:/type/property (any namespace:/type/object namespace:' + this._column.reconConfig.type.id + '))'; + } */ + + this._elmts.addPropertyInput.suggestP(suggestConfig).bind("fb-select", function(evt, data) { + var expected = data.expected_type; + self._addProperty({ + id : data.id, + name: data.name, + /* expected: { + id: expected.id, + name: expected.name + } */ + }); + }); +}; + +ExtendReconciledDataPreviewDialog.prototype._update = function() { + this._elmts.previewContainer.empty().text("Querying THE service..."); + + var self = this; + var params = { + project: theProject.id, + columnName: this._column.name + }; + + $.post( + "command/core/preview-extend-data?" + $.param(params), + { + rowIndices: JSON.stringify(this._rowIndices), + extension: JSON.stringify(this._extension) + }, + function(data) { + self._renderPreview(data); + }, + "json" + ).fail(function(data) { + console.log(data); + }); +}; + +ExtendReconciledDataPreviewDialog.prototype._addProperty = function(p) { + var addSeveralToList = function(properties, oldProperties) { + for (var i = 0; i < properties.length; i++) { + addToList(properties[i], oldProperties); + } + }; + var addToList = function(property, oldProperties) { + for (var i = 0; i < oldProperties.length; i++) { + var oldProperty = oldProperties[i]; + if (oldProperty.id == property.id) { + if ("included" in property) { + oldProperty.included = "included" in oldProperty ? + (oldProperty.included || property.included) : + property.included; + } + + if ("properties" in property) { + if ("properties" in oldProperty) { + addSeveralToList(property.properties, oldProperty.properties); + } else { + oldProperty.properties = property.properties; + } + } + return; + } + } + + oldProperties.push(property); + }; + + addToList(p, this._extension.properties); + + this._update(); +}; + +ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) { + var self = this; + var container = this._elmts.previewContainer.empty(); + if (data.code == "error") { + container.text("Error."); + return; + } + + var table = $('')[0]; + var trHead = table.insertRow(table.rows.length); + $('
').appendTo(trHead).text(this._column.name); + + var renderColumnHeader = function(column) { + var th = $('').appendTo(trHead); + + $('').html(column.names.join(" » ")).appendTo(th); + $('
').appendTo(th); + + $('') + .text("remove") + .addClass("action") + .attr("title", "Remove this column") + .click(function() { + self._removeProperty(column.path); + }).appendTo(th); + + $('') + .text("constrain") + .addClass("action") + .attr("title", "Add constraints to this column") + .click(function() { + self._constrainProperty(column.path); + }).appendTo(th); + }; + for (var c = 0; c < data.columns.length; c++) { + renderColumnHeader(data.columns[c]); + } + + for (var r = 0; r < data.rows.length; r++) { + var tr = table.insertRow(table.rows.length); + var row = data.rows[r]; + + for (var c = 0; c < row.length; c++) { + var td = tr.insertCell(tr.cells.length); + var cell = row[c]; + if (cell !== null) { + if ($.isPlainObject(cell)) { + $('').attr("href", "http://www.freebase.com/view" + cell.id).text(cell.name).appendTo(td); + } else { + $('').text(cell).appendTo(td); + } + } + } + } + + container.append(table); +}; + +ExtendReconciledDataPreviewDialog.prototype._removeProperty = function(path) { + var removeFromList = function(path, index, properties) { + var id = path[index]; + + for (var i = properties.length - 1; i >= 0; i--) { + var property = properties[i]; + if (property.id == id) { + if (index === path.length - 1) { + if ("included" in property) { + delete property.included; + } + } else if ("properties" in property && property.properties.length > 0) { + removeFromList(path, index + 1, property.properties); + } + + if (!("properties" in property) || property.properties.length === 0) { + properties.splice(i, 1); + } + + return; + } + } + }; + + removeFromList(path, 0, this._extension.properties); + + this._update(); +}; + +ExtendReconciledDataPreviewDialog.prototype._findProperty = function(path) { + var find = function(path, index, properties) { + var id = path[index]; + + for (var i = properties.length - 1; i >= 0; i--) { + var property = properties[i]; + if (property.id == id) { + if (index === path.length - 1) { + return property; + } else if ("properties" in property && property.properties.length > 0) { + return find(path, index + 1, property.properties); + } + break; + } + } + + return null; + }; + + return find(path, 0, this._extension.properties); +}; + +ExtendReconciledDataPreviewDialog.prototype._constrainProperty = function(path) { + var self = this; + var property = this._findProperty(path); + + var frame = DialogSystem.createDialog(); + frame.width("500px"); + + var header = $('
').addClass("dialog-header").text("Constrain " + path.join(" > ")).appendTo(frame); + var body = $('
').addClass("dialog-body").appendTo(frame); + var footer = $('
').addClass("dialog-footer").appendTo(frame); + + body.html( + '
' + + '' + + '' + + '
' + + 'Enter MQL query constraints as JSON' + + '
' + + '' + + '
' + ); + var bodyElmts = DOM.bind(body); + + if ("constraints" in property) { + bodyElmts.textarea[0].value = JSON.stringify(property.constraints, null, 2); + } else { + bodyElmts.textarea[0].value = JSON.stringify({ "limit" : 10 }, null, 2); + } + + footer.html( + '' + + '' + ); + var footerElmts = DOM.bind(footer); + + var level = DialogSystem.showDialog(frame); + var dismiss = function() { + DialogSystem.dismissUntil(level - 1); + }; + + footerElmts.cancelButton.click(dismiss); + footerElmts.okButton.click(function() { + try { + var o = JSON.parse(bodyElmts.textarea[0].value); + if (o === undefined) { + alert("Please ensure that the JSON you enter is valid."); + return; + } + + if ($.isArray(o) && o.length == 1) { + o = o[0]; + } + if (!$.isPlainObject(o)) { + alert("The JSON you enter must be an object, that is, it is of this form { ... }."); + return; + } + + property.constraints = o; + + dismiss(); + + self._update(); + } catch (e) { + //console.log(e); + } + }); + + bodyElmts.textarea.focus(); +}; + diff --git a/main/webapp/modules/core/scripts/views/data-table/add-column-by-reconciliation.html b/main/webapp/modules/core/scripts/views/data-table/add-column-by-reconciliation.html new file mode 100644 index 000000000..7695cd02d --- /dev/null +++ b/main/webapp/modules/core/scripts/views/data-table/add-column-by-reconciliation.html @@ -0,0 +1,27 @@ +
+
+
+
+ + + + + + + + + + + + + + + +
+
+ +
+ diff --git a/main/webapp/modules/core/scripts/views/data-table/extend-data-preview-dialog.html b/main/webapp/modules/core/scripts/views/data-table/extend-data-preview-dialog.html new file mode 100644 index 000000000..f92e09fb9 --- /dev/null +++ b/main/webapp/modules/core/scripts/views/data-table/extend-data-preview-dialog.html @@ -0,0 +1,26 @@ +
+
+
+
+ + + + + + + + + + + + + + + +
Add PropertyPreview
Suggested Properties
+
+ +
\ No newline at end of file diff --git a/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js b/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js index 6e06addc3..85878a045 100644 --- a/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js +++ b/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js @@ -146,6 +146,74 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { }); }; + var doAddColumnByReconciliation = function() { + var columnIndex = Refine.columnNameToColumnIndex(column.name); + var o = DataTableView.sampleVisibleRows(column); + new ExtendReconciledDataPreviewDialog( + column, + columnIndex, + o.rowIndices, + function(extension) { + Refine.postProcess( + "core", + "extend-data", + { + baseColumnName: column.name, + columnInsertIndex: columnIndex + 1 + }, + { + extension: JSON.stringify(extension) + }, + { rowsChanged: true, modelsChanged: true } + ); + } + ); */ + }; + +/* + var doAddColumnByReconciliation = function() { + var frame = $( + DOM.loadHTML("core", "scripts/views/data-table/add-column-by-reconciliation.html")); + + var elmts = DOM.bind(frame); + elmts.dialogHeader.text($.i18n._('core-views')["add-by-recon"]); + + elmts.suggestedPropertyHeader.html('Suggested properties'); + elmts.previewHeader.html('Preview'); + elmts.addPropertyHeader.html('Add property'); + elmts.okButton.html($.i18n._('core-buttons')["ok"]); + elmts.cancelButton.text($.i18n._('core-buttons')["cancel"]); + + var level = DialogSystem.showDialog(frame); + var dismiss = function() { DialogSystem.dismissUntil(level - 1); }; + + elmts.cancelButton.click(dismiss); + elmts.okButton.click(function() { + var columnName = $.trim(elmts.columnNameInput[0].value); + if (!columnName.length) { + alert($.i18n._('core-views')["warning-col-name"]); + return; + } + + Refine.postCoreProcess( + "add-column-by-fetching-urls", + { + baseColumnName: column.name, + urlExpression: previewWidget.getExpression(true), + newColumnName: columnName, + columnInsertIndex: columnIndex + 1, + delay: elmts.throttleDelayInput[0].value, + onError: $('input[name="dialog-onerror-choice"]:checked')[0].value, + cacheResponses: $('input[name="dialog-cache-responses"]')[0].checked, + }, + null, + { modelsChanged: true } + ); + dismiss(); + }); + }; +*/ + var doRemoveColumn = function() { Refine.postCoreProcess( "remove-column", @@ -298,6 +366,11 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { label: $.i18n._('core-views')["add-by-urls"]+"...", click: doAddColumnByFetchingURLs }, + { + id: "core/add-column-by-reconciliation", + label: $.i18n._('core-views')["add-by-recon"]+"...", + click: doAddColumnByReconciliation + }, {}, { id: "core/rename-column", diff --git a/main/webapp/modules/core/styles/views/extend-data-preview-dialog.less b/main/webapp/modules/core/styles/views/extend-data-preview-dialog.less new file mode 100644 index 000000000..006e87306 --- /dev/null +++ b/main/webapp/modules/core/styles/views/extend-data-preview-dialog.less @@ -0,0 +1,71 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +@import-less url("../theme.less"); + +.extend-data-preview-dialog .suggested-property-container { + border: 1px solid #aaa; + padding: 5px; + overflow: auto; + height: 375px; + } + +.extend-data-preview-dialog .suggested-property { + padding: 5px; + } + +.extend-data-preview-dialog input.property-suggest { + display: block; + padding: 2%; + width: 96%; + } + +.extend-data-preview-dialog .preview-container { + border: 1px solid #aaa; + overflow: auto; + } + +.extend-data-preview-dialog .preview-container table { + border-collapse: collapse; + } + +.extend-data-preview-dialog .preview-container td, .extend-data-preview-dialog .preview-container th { + padding: 3px 5px; + border-bottom: 1px solid #ddd; + border-right: 1px solid #ddd; + } + +.extend-data-preview-dialog .preview-container th img { + vertical-align: top; + margin-left: 5px; + }