From ad3a174abd495621fc146ef9e28a959ac1f4e459 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Tue, 4 Jul 2017 23:14:19 +0200 Subject: [PATCH 01/11] Starting to migrate data extension to standard reconciliation services --- .../commands/recon/ExtendDataCommand.java | 65 +++ .../recon/PreviewExtendDataCommand.java | 208 ++++++++ .../model/changes/DataExtensionChange.java | 469 ++++++++++++++++++ .../recon/ReconciledDataExtensionJob.java | 453 +++++++++++++++++ .../operations/recon/ExtendDataOperation.java | 314 ++++++++++++ .../webapp/modules/core/MOD-INF/controller.js | 4 + .../dialogs/extend-data-preview-dialog.js | 424 ++++++++++++++++ .../add-column-by-reconciliation.html | 27 + .../extend-data-preview-dialog.html | 26 + .../views/data-table/menu-edit-column.js | 73 +++ .../views/extend-data-preview-dialog.less | 71 +++ 11 files changed, 2134 insertions(+) create mode 100644 main/src/com/google/refine/commands/recon/ExtendDataCommand.java create mode 100644 main/src/com/google/refine/commands/recon/PreviewExtendDataCommand.java create mode 100644 main/src/com/google/refine/model/changes/DataExtensionChange.java create mode 100644 main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java create mode 100644 main/src/com/google/refine/operations/recon/ExtendDataOperation.java create mode 100644 main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js create mode 100644 main/webapp/modules/core/scripts/views/data-table/add-column-by-reconciliation.html create mode 100644 main/webapp/modules/core/scripts/views/data-table/extend-data-preview-dialog.html create mode 100644 main/webapp/modules/core/styles/views/extend-data-preview-dialog.less diff --git a/main/src/com/google/refine/commands/recon/ExtendDataCommand.java b/main/src/com/google/refine/commands/recon/ExtendDataCommand.java new file mode 100644 index 000000000..119e90442 --- /dev/null +++ b/main/src/com/google/refine/commands/recon/ExtendDataCommand.java @@ -0,0 +1,65 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.commands.recon; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONObject; + +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.freebase.operations.ExtendDataOperation; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.util.ParsingUtilities; + +public class ExtendDataCommand extends EngineDependentCommand { + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, JSONObject engineConfig) throws Exception { + + String baseColumnName = request.getParameter("baseColumnName"); + int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex")); + + String jsonString = request.getParameter("extension"); + JSONObject extension = ParsingUtilities.evaluateJsonStringToObject(jsonString); + + return new ExtendDataOperation( + engineConfig, + baseColumnName, + extension, + columnInsertIndex + ); + } + +} diff --git a/main/src/com/google/refine/commands/recon/PreviewExtendDataCommand.java b/main/src/com/google/refine/commands/recon/PreviewExtendDataCommand.java new file mode 100644 index 000000000..ea655bb5d --- /dev/null +++ b/main/src/com/google/refine/commands/recon/PreviewExtendDataCommand.java @@ -0,0 +1,208 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.commands.recon; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONArray; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.commands.Command; +import com.google.refine.model.recon.ReconciledDataExtensionJob; +import com.google.refine.model.recon.ReconciledDataExtensionJob.ColumnInfo; +import com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.Row; +import com.google.refine.model.Column; +import com.google.refine.model.recon.ReconConfig; +import com.google.refine.model.recon.StandardReconConfig; +import com.google.refine.util.ParsingUtilities; + +public class PreviewExtendDataCommand extends Command { + + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + String columnName = request.getParameter("columnName"); + + String rowIndicesString = request.getParameter("rowIndices"); + if (rowIndicesString == null) { + respond(response, "{ \"code\" : \"error\", \"message\" : \"No row indices specified\" }"); + return; + } + + String jsonString = request.getParameter("extension"); + JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString); + + JSONArray rowIndices = ParsingUtilities.evaluateJsonStringToArray(rowIndicesString); + int length = rowIndices.length(); + Column column = project.columnModel.getColumnByName(columnName); + int cellIndex = column.getCellIndex(); + + // get the endpoint to extract data from + String endpoint = null; + ReconConfig cfg = column.getReconConfig(); + if (cfg != null && + cfg instanceof StandardReconConfig) { + StandardReconConfig scfg = (StandardReconConfig)cfg; + endpoint = scfg.service; + } else { + respond(response, "{ \"code\" : \"error\", \"message\" : \"This column has not been reconciled with a standard service.\" }"); + return; + } + + + List topicNames = new ArrayList(); + List topicIds = new ArrayList(); + Set ids = new HashSet(); + for (int i = 0; i < length; i++) { + int rowIndex = rowIndices.getInt(i); + if (rowIndex >= 0 && rowIndex < project.rows.size()) { + Row row = project.rows.get(rowIndex); + Cell cell = row.getCell(cellIndex); + if (cell != null && cell.recon != null && cell.recon.match != null) { + topicNames.add(cell.recon.match.name); + topicIds.add(cell.recon.match.id); + ids.add(cell.recon.match.id); + } else { + topicNames.add(null); + topicIds.add(null); + ids.add(null); + } + } + } + + Map reconCandidateMap = new HashMap(); + ReconciledDataExtensionJob job = new ReconciledDataExtensionJob(json, endpoint); + Map map = job.extend(ids, reconCandidateMap); + + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + JSONWriter writer = new JSONWriter(response.getWriter()); + writer.object(); + writer.key("code"); writer.value("ok"); + writer.key("columns"); + writer.array(); + for (ColumnInfo info : job.columns) { + writer.object(); + writer.key("names"); + writer.array(); + for (String name : info.names) { + writer.value(name); + } + writer.endArray(); + writer.key("path"); + writer.array(); + for (String id : info.path) { + writer.value(id); + } + writer.endArray(); + writer.endObject(); + } + writer.endArray(); + + writer.key("rows"); + writer.array(); + for (int r = 0; r < topicNames.size(); r++) { + String id = topicIds.get(r); + String topicName = topicNames.get(r); + + if (id != null && map.containsKey(id)) { + DataExtension ext = map.get(id); + boolean first = true; + + if (ext.data.length > 0) { + for (Object[] row : ext.data) { + writer.array(); + if (first) { + writer.value(topicName); + first = false; + } else { + writer.value(null); + } + + for (Object cell : row) { + if (cell != null && cell instanceof ReconCandidate) { + ReconCandidate rc = (ReconCandidate) cell; + writer.object(); + writer.key("id"); writer.value(rc.id); + writer.key("name"); writer.value(rc.name); + writer.endObject(); + } else { + writer.value(cell); + } + } + + writer.endArray(); + } + continue; + } + } + + writer.array(); + if (id != null) { + writer.object(); + writer.key("id"); writer.value(id); + writer.key("name"); writer.value(topicName); + writer.endObject(); + } else { + writer.value(""); + } + writer.endArray(); + } + writer.endArray(); + + writer.endObject(); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/model/changes/DataExtensionChange.java b/main/src/com/google/refine/model/changes/DataExtensionChange.java new file mode 100644 index 000000000..c39d13ea6 --- /dev/null +++ b/main/src/com/google/refine/model/changes/DataExtensionChange.java @@ -0,0 +1,469 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Serializable; +import java.io.Writer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +// import com.google.refine.freebase.FreebaseType; +import com.google.refine.model.recon.DataExtensionReconConfig; +import com.google.refine.model.recon.FreebaseDataExtensionJob.DataExtension; +import com.google.refine.history.Change; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.ModelException; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.Recon.Judgment; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.ReconStats; +import com.google.refine.model.Row; +import com.google.refine.util.ParsingUtilities; +import com.google.refine.util.Pool; + +public class DataExtensionChange implements Change { + final protected String _baseColumnName; + final protected int _columnInsertIndex; + + final protected List _columnNames; + final protected List _columnTypes; + + final protected List _rowIndices; + final protected List _dataExtensions; + + protected long _historyEntryID; + protected int _firstNewCellIndex = -1; + protected List _oldRows; + protected List _newRows; + + public DataExtensionChange( + String baseColumnName, + int columnInsertIndex, + List columnNames, + List columnTypes, + List rowIndices, + List dataExtensions, + long historyEntryID + ) { + _baseColumnName = baseColumnName; + _columnInsertIndex = columnInsertIndex; + + _columnNames = columnNames; + _columnTypes = columnTypes; + + _rowIndices = rowIndices; + _dataExtensions = dataExtensions; + + _historyEntryID = historyEntryID; + } + + protected DataExtensionChange( + String baseColumnName, + int columnInsertIndex, + + List columnNames, + List columnTypes, + + List rowIndices, + List dataExtensions, + int firstNewCellIndex, + List oldRows, + List newRows + ) { + _baseColumnName = baseColumnName; + _columnInsertIndex = columnInsertIndex; + + _columnNames = columnNames; + _columnTypes = columnTypes; + + _rowIndices = rowIndices; + _dataExtensions = dataExtensions; + + _firstNewCellIndex = firstNewCellIndex; + _oldRows = oldRows; + _newRows = newRows; + } + + @Override + public void apply(Project project) { + synchronized (project) { + if (_firstNewCellIndex < 0) { + _firstNewCellIndex = project.columnModel.allocateNewCellIndex(); + for (int i = 1; i < _columnNames.size(); i++) { + project.columnModel.allocateNewCellIndex(); + } + + _oldRows = new ArrayList(project.rows); + + _newRows = new ArrayList(project.rows.size()); + + int cellIndex = project.columnModel.getColumnByName(_baseColumnName).getCellIndex(); + int keyCellIndex = project.columnModel.columns.get(project.columnModel.getKeyColumnIndex()).getCellIndex(); + int index = 0; + + int rowIndex = index < _rowIndices.size() ? _rowIndices.get(index) : _oldRows.size(); + DataExtension dataExtension = index < _rowIndices.size() ? _dataExtensions.get(index) : null; + + index++; + + Map reconMap = new HashMap(); + + for (int r = 0; r < _oldRows.size(); r++) { + Row oldRow = _oldRows.get(r); + if (r < rowIndex) { + _newRows.add(oldRow.dup()); + continue; + } + + if (dataExtension == null || dataExtension.data.length == 0) { + _newRows.add(oldRow); + } else { + Row firstNewRow = oldRow.dup(); + extendRow(firstNewRow, dataExtension, 0, reconMap); + _newRows.add(firstNewRow); + + int r2 = r + 1; + for (int subR = 1; subR < dataExtension.data.length; subR++) { + if (r2 < project.rows.size()) { + Row oldRow2 = project.rows.get(r2); + if (oldRow2.isCellBlank(cellIndex) && + oldRow2.isCellBlank(keyCellIndex)) { + + Row newRow = oldRow2.dup(); + extendRow(newRow, dataExtension, subR, reconMap); + + _newRows.add(newRow); + r2++; + + continue; + } + } + + Row newRow = new Row(cellIndex + _columnNames.size()); + extendRow(newRow, dataExtension, subR, reconMap); + + _newRows.add(newRow); + } + + r = r2 - 1; // r will be incremented by the for loop anyway + } + + rowIndex = index < _rowIndices.size() ? _rowIndices.get(index) : _oldRows.size(); + dataExtension = index < _rowIndices.size() ? _dataExtensions.get(index) : null; + index++; + } + } + + project.rows.clear(); + project.rows.addAll(_newRows); + + for (int i = 0; i < _columnNames.size(); i++) { + String name = _columnNames.get(i); + int cellIndex = _firstNewCellIndex + i; + + Column column = new Column(cellIndex, name); + column.setReconConfig(new DataExtensionReconConfig(_columnTypes.get(i))); + column.setReconStats(ReconStats.create(project, cellIndex)); + + try { + project.columnModel.addColumn(_columnInsertIndex + i, column, true); + + // the column might have been renamed to avoid collision + _columnNames.set(i, column.getName()); + } catch (ModelException e) { + // won't get here since we set the avoid collision flag + } + } + + project.update(); + } + } + + protected void extendRow( + Row row, + DataExtension dataExtension, + int extensionRowIndex, + Map reconMap + ) { + Object[] values = dataExtension.data[extensionRowIndex]; + for (int c = 0; c < values.length; c++) { + Object value = values[c]; + Cell cell = null; + + if (value instanceof ReconCandidate) { + ReconCandidate rc = (ReconCandidate) value; + Recon recon; + if (reconMap.containsKey(rc.id)) { + recon = reconMap.get(rc.id); + } else { + recon = Recon.makeFreebaseRecon(_historyEntryID); + recon.addCandidate(rc); + recon.service = "mql"; + recon.match = rc; + recon.matchRank = 0; + recon.judgment = Judgment.Matched; + recon.judgmentAction = "auto"; + recon.judgmentBatchSize = 1; + + reconMap.put(rc.id, recon); + } + cell = new Cell(rc.name, recon); + } else { + cell = new Cell((Serializable) value, null); + } + + row.setCell(_firstNewCellIndex + c, cell); + } + } + + @Override + public void revert(Project project) { + synchronized (project) { + project.rows.clear(); + project.rows.addAll(_oldRows); + + for (int i = 0; i < _columnNames.size(); i++) { + project.columnModel.columns.remove(_columnInsertIndex); + } + + project.update(); + } + } + + @Override + public void save(Writer writer, Properties options) throws IOException { + writer.write("baseColumnName="); writer.write(_baseColumnName); writer.write('\n'); + writer.write("columnInsertIndex="); writer.write(Integer.toString(_columnInsertIndex)); writer.write('\n'); + writer.write("columnNameCount="); writer.write(Integer.toString(_columnNames.size())); writer.write('\n'); + for (String name : _columnNames) { + writer.write(name); writer.write('\n'); + } + writer.write("columnTypeCount="); writer.write(Integer.toString(_columnTypes.size())); writer.write('\n'); + for (FreebaseType type : _columnTypes) { + try { + JSONWriter jsonWriter = new JSONWriter(writer); + + type.write(jsonWriter, options); + } catch (JSONException e) { + // ??? + } + writer.write('\n'); + } + writer.write("rowIndexCount="); writer.write(Integer.toString(_rowIndices.size())); writer.write('\n'); + for (Integer rowIndex : _rowIndices) { + writer.write(rowIndex.toString()); writer.write('\n'); + } + writer.write("dataExtensionCount="); writer.write(Integer.toString(_dataExtensions.size())); writer.write('\n'); + for (DataExtension dataExtension : _dataExtensions) { + if (dataExtension == null) { + writer.write('\n'); + continue; + } + + writer.write(Integer.toString(dataExtension.data.length)); writer.write('\n'); + + for (Object[] values : dataExtension.data) { + for (Object value : values) { + if (value == null) { + writer.write("null"); + } else if (value instanceof ReconCandidate) { + try { + JSONWriter jsonWriter = new JSONWriter(writer); + ((ReconCandidate) value).write(jsonWriter, options); + } catch (JSONException e) { + // ??? + } + } else if (value instanceof String) { + writer.write(JSONObject.quote((String) value)); + } else { + writer.write(value.toString()); + } + writer.write('\n'); + } + } + } + + writer.write("firstNewCellIndex="); writer.write(Integer.toString(_firstNewCellIndex)); writer.write('\n'); + + writer.write("newRowCount="); writer.write(Integer.toString(_newRows.size())); writer.write('\n'); + for (Row row : _newRows) { + row.save(writer, options); + writer.write('\n'); + } + writer.write("oldRowCount="); writer.write(Integer.toString(_oldRows.size())); writer.write('\n'); + for (Row row : _oldRows) { + row.save(writer, options); + writer.write('\n'); + } + writer.write("/ec/\n"); // end of change marker + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + String baseColumnName = null; + int columnInsertIndex = -1; + + List columnNames = null; + List columnTypes = null; + + List rowIndices = null; + List dataExtensions = null; + + List oldRows = null; + List newRows = null; + + int firstNewCellIndex = -1; + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + String value = line.substring(equal + 1); + + if ("baseColumnName".equals(field)) { + baseColumnName = value; + } else if ("columnInsertIndex".equals(field)) { + columnInsertIndex = Integer.parseInt(value); + } else if ("firstNewCellIndex".equals(field)) { + firstNewCellIndex = Integer.parseInt(value); + } else if ("rowIndexCount".equals(field)) { + int count = Integer.parseInt(value); + + rowIndices = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + rowIndices.add(Integer.parseInt(line)); + } + } + } else if ("columnNameCount".equals(field)) { + int count = Integer.parseInt(value); + + columnNames = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + columnNames.add(line); + } + } + } else if ("columnTypeCount".equals(field)) { + int count = Integer.parseInt(value); + + columnTypes = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + columnTypes.add(FreebaseType.load(ParsingUtilities.evaluateJsonStringToObject(line))); + } + } else if ("dataExtensionCount".equals(field)) { + int count = Integer.parseInt(value); + + dataExtensions = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + + if (line == null) { + continue; + } + + if (line.length() == 0) { + dataExtensions.add(null); + continue; + } + + int rowCount = Integer.parseInt(line); + Object[][] data = new Object[rowCount][]; + + for (int r = 0; r < rowCount; r++) { + Object[] row = new Object[columnNames.size()]; + for (int c = 0; c < columnNames.size(); c++) { + line = reader.readLine(); + + row[c] = ReconCandidate.loadStreaming(line); + } + + data[r] = row; + } + + dataExtensions.add(new DataExtension(data)); + } + } else if ("oldRowCount".equals(field)) { + int count = Integer.parseInt(value); + + oldRows = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + oldRows.add(Row.load(line, pool)); + } + } + } else if ("newRowCount".equals(field)) { + int count = Integer.parseInt(value); + + newRows = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + newRows.add(Row.load(line, pool)); + } + } + } + + } + + DataExtensionChange change = new DataExtensionChange( + baseColumnName, + columnInsertIndex, + columnNames, + columnTypes, + rowIndices, + dataExtensions, + firstNewCellIndex, + oldRows, + newRows + ); + + + return change; + } +} diff --git a/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java b/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java new file mode 100644 index 000000000..30f619c1e --- /dev/null +++ b/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java @@ -0,0 +1,453 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +/** + * + */ +package com.google.refine.model.recon; + +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.Serializable; +import java.io.StringWriter; +import java.io.Writer; +import java.net.URL; +import java.net.URLConnection; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +// import com.google.refine.freebase.FreebaseType; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.recon.StandardReconConfig; +import com.google.refine.util.JSONUtilities; +import com.google.refine.util.ParsingUtilities; + +public class ReconciledDataExtensionJob { + static public class DataExtension { + final public Object[][] data; + + public DataExtension(Object[][] data) { + this.data = data; + } + } + + static public class ColumnInfo { + final public List names; + final public List path; + // final public FreebaseType expectedType; + // TODO + + protected ColumnInfo(List names, List path /*, FreebaseType expectedType */) { + this.names = names; + this.path = path; + // this.expectedType = expectedType; + } + } + + final public JSONObject extension; + final public String endpoint; + final public int columnCount; + final public List columns = new ArrayList(); + + public ReconciledDataExtensionJob(JSONObject obj, String endpoint) throws JSONException { + this.extension = obj; + this.endpoint = endpoint; + this.columnCount = (obj.has("properties") && !obj.isNull("properties")) ? + countColumns(obj.getJSONArray("properties"), columns, new ArrayList(), new ArrayList()) : 0; + } + + public Map extend( + Set ids, + Map reconCandidateMap + ) throws Exception { + StringWriter writer = new StringWriter(); + formulateQuery(ids, extension, writer); + + // Extract the order of properties + JSONArray origProperties = extension.getJSONArray("properties"); + List properties = new ArrayList(); + int l = origProperties.length(); + for (int i = 0; i < l; i++) { + properties.add(origProperties.getJSONObject(i).getString("id")); + } + + String query = writer.toString(); + InputStream is = performQuery(this.endpoint, query); + try { + String s = ParsingUtilities.inputStreamToString(is); + JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); + + Map map = new HashMap(); + if (o.has("rows")){ + JSONObject records = o.getJSONObject("rows"); + + // for each identifier + for (String id : ids) { + if (records.has(id)) { + JSONObject record = records.getJSONObject(id); + + ReconciledDataExtensionJob.DataExtension ext = collectResult(record, properties, reconCandidateMap); + + if (ext != null) { + map.put(id, ext); + } + } + } + } + + return map; + } finally { + is.close(); + } + } + + static protected InputStream performQuery(String endpoint, String query) throws IOException { + URL url = new URL(endpoint); + + URLConnection connection = url.openConnection(); + connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); + connection.setConnectTimeout(5000); + connection.setDoOutput(true); + + DataOutputStream dos = new DataOutputStream(connection.getOutputStream()); + try { + String body = "extend=" + ParsingUtilities.encode(query); + + dos.writeBytes(body); + } finally { + dos.flush(); + dos.close(); + } + + connection.connect(); + + return connection.getInputStream(); + } + + + protected ReconciledDataExtensionJob.DataExtension collectResult( + JSONObject record, + List properties, + Map reconCandidateMap + ) throws JSONException { + List rows = new ArrayList(); + + // for each property + int colindex = 0; + for(String pid : properties) { + JSONArray values = record.getJSONArray(pid); + if (values == null) { + continue; + } + + // for each value + for(int rowindex = 0; rowindex < values.length(); rowindex++) { + JSONObject val = values.getJSONObject(rowindex); + // store a reconciled value + if(val.has("id")) { + storeCell(rows, rowindex, colindex, val, reconCandidateMap); + } else if(val.has("str")) { + // store a bare string + String str = val.getString("str"); + storeStr(rows, rowindex, colindex, str); + } + // TODO other cases for other types of values (dates, booleans, …) + } + colindex++; + } + + + // collectResult(rows, extension.getJSONArray("properties"), obj, 0, 0, reconCandidateMap); + + Object[][] data = new Object[rows.size()][columnCount]; + rows.toArray(data); + + return new DataExtension(data); + } + + protected void storeStr( + List rows, + int row, + int col, + String str + ) throws JSONException { + while (row >= rows.size()) { + rows.add(new Object[columnCount]); + } + rows.get(row)[col] = str; + } + + protected void storeCell( + List rows, + int row, + int col, + Object value, + Map reconCandidateMap + ) { + while (row >= rows.size()) { + rows.add(new Object[columnCount]); + } + rows.get(row)[col] = value; + } + + protected void storeCell( + List rows, + int row, + int col, + JSONObject obj, + Map reconCandidateMap + ) throws JSONException { + String id = obj.getString("id"); + ReconCandidate rc; + if (reconCandidateMap.containsKey(id)) { + rc = reconCandidateMap.get(id); + } else { + rc = new ReconCandidate( + obj.getString("id"), + obj.getString("name"), + JSONUtilities.getStringArray(obj, "type"), + 100 + ); + + reconCandidateMap.put(id, rc); + } + + storeCell(rows, row, col, rc, reconCandidateMap); + } + /* + protected int[] collectResult( + List rows, + JSONObject extNode, + JSONObject resultNode, + int startRowIndex, + int startColumnIndex, + Map reconCandidateMap + ) throws JSONException { + String propertyID = extNode.getString("id"); + // String expectedTypeID = extNode.getJSONObject("expected").getString("id"); + + JSONArray a = resultNode != null && resultNode.has(propertyID) && !resultNode.isNull(propertyID) ? + resultNode.getJSONArray(propertyID) : null; + + if ("/type/key".equals(expectedTypeID)) { + if (a != null) { + int l = a.length(); + for (int r = 0; r < l; r++) { + Object o = a.isNull(r) ? null : a.get(r); + if (o instanceof JSONObject) { + storeStr(rows, startRowIndex++, startColumnIndex, (JSONObject) o, reconCandidateMap); + } + } + } + + // note that we still take up a column even if we don't have any data + return new int[] { startRowIndex, startColumnIndex + 1 }; + } else if (expectedTypeID.startsWith("/type/")) { + if (a != null) { + int l = a.length(); + for (int r = 0; r < l; r++) { + Object o = a.isNull(r) ? null : a.get(r); + if (o instanceof Serializable) { + storeCell(rows, startRowIndex++, startColumnIndex, o, reconCandidateMap); + } + } + } + + // note that we still take up a column even if we don't have any data + return new int[] { startRowIndex, startColumnIndex + 1 }; + } else { + boolean hasSubProperties = (extNode.has("properties") && !extNode.isNull("properties")); + boolean isOwnColumn = !hasSubProperties || (extNode.has("included") && extNode.getBoolean("included")); + + if (a != null && a.length() > 0) { + int maxColIndex = startColumnIndex; + + int l = a.length(); + for (int r = 0; r < l; r++) { + Object v = a.isNull(r) ? null : a.get(r); + JSONObject o = v != null && v instanceof JSONObject ? (JSONObject) v : null; + + int startColumnIndex2 = startColumnIndex; + int startRowIndex2 = startRowIndex; + + if (isOwnColumn) { + if (o != null) { + storeCell(rows, startRowIndex2++, startColumnIndex2++, o, reconCandidateMap); + } else { + storeCell(rows, startRowIndex2++, startColumnIndex2++, v, reconCandidateMap); + } + } + + if (hasSubProperties && o != null) { + int[] rowcol = collectResult( + rows, + extNode.getJSONArray("properties"), + o, + startRowIndex, + startColumnIndex2, + reconCandidateMap + ); + + startRowIndex2 = rowcol[0]; + startColumnIndex2 = rowcol[1]; + } + + startRowIndex = startRowIndex2; + maxColIndex = Math.max(maxColIndex, startColumnIndex2); + } + + return new int[] { startRowIndex, maxColIndex }; + } else { + return new int[] { + startRowIndex, + startColumnIndex + countColumns(extNode, null, new ArrayList(), new ArrayList()) + }; + } + } + } + + protected int[] collectResult( + List rows, + JSONArray subProperties, + JSONObject resultNode, + int startRowIndex, + int startColumnIndex, + Map reconCandidateMap + ) throws JSONException { + int maxStartRowIndex = startRowIndex; + + int k = subProperties.length(); + for (int c = 0; c < k; c++) { + int[] rowcol = collectResult( + rows, + subProperties.getJSONObject(c), + resultNode, + startRowIndex, + startColumnIndex, + reconCandidateMap + ); + + maxStartRowIndex = Math.max(maxStartRowIndex, rowcol[0]); + startColumnIndex = rowcol[1]; + } + + return new int[] { maxStartRowIndex, startColumnIndex }; + }*/ + + + + static protected void formulateQuery(Set ids, JSONObject node, Writer writer) throws JSONException { + JSONWriter jsonWriter = new JSONWriter(writer); + + jsonWriter.object(); + + jsonWriter.key("ids"); + jsonWriter.array(); + for (String id : ids) { + if (id != null) { + jsonWriter.value(id); + } + } + jsonWriter.endArray(); + + jsonWriter.key("properties"); + jsonWriter.array(); + JSONArray properties = node.getJSONArray("properties"); + int l = properties.length(); + + for (int i = 0; i < l; i++) { + JSONObject property = properties.getJSONObject(i); + jsonWriter.object(); + jsonWriter.key("id"); + jsonWriter.value(property.getString("id")); + // TODO translate constraints as below + jsonWriter.endObject(); + } + jsonWriter.endArray(); + jsonWriter.endObject(); + } + + + static protected int countColumns(JSONObject obj, List columns, List names, List path) throws JSONException { + String name = obj.getString("name"); + + List names2 = null; + List path2 = null; + if (columns != null) { + names2 = new ArrayList(names); + names2.add(name); + + path2 = new ArrayList(path); + path2.add(obj.getString("id")); + } + + if (obj.has("properties") && !obj.isNull("properties")) { + boolean included = (obj.has("included") && obj.getBoolean("included")); + if (included && columns != null) { + // JSONObject expected = obj.getJSONObject("expected"); + + columns.add(new ColumnInfo(names2, path2 + /* new FreebaseType(expected.getString("id"), expected.getString("name")) */)); + } + + return (included ? 1 : 0) + + countColumns(obj.getJSONArray("properties"), columns, names2, path2); + } else { + if (columns != null) { + // JSONObject expected = obj.getJSONObject("expected"); + + columns.add(new ColumnInfo(names2, path2 + /* new FreebaseType(expected.getString("id"), expected.getString("name")) */ )); + } + return 1; + } + } + + static protected int countColumns(JSONArray a, List columns, List names, List path) throws JSONException { + int c = 0; + int l = a.length(); + for (int i = 0; i < l; i++) { + c += countColumns(a.getJSONObject(i), columns, names, path); + } + return c; + } +} diff --git a/main/src/com/google/refine/operations/recon/ExtendDataOperation.java b/main/src/com/google/refine/operations/recon/ExtendDataOperation.java new file mode 100644 index 000000000..12f24392a --- /dev/null +++ b/main/src/com/google/refine/operations/recon/ExtendDataOperation.java @@ -0,0 +1,314 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.operations.recon; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; + +import org.apache.commons.lang.StringUtils; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.model.changes.DataExtensionChange; +import com.google.refine.model.recon.DataExtensionJob; +import com.google.refine.model.recon.DataExtensionJob.ColumnInfo; +import com.google.refine.model.recon.DataExtensionJob.DataExtension; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.Row; +import com.google.refine.model.changes.CellAtRow; +import com.google.refine.operations.EngineDependentOperation; +import com.google.refine.operations.OperationRegistry; +import com.google.refine.process.LongRunningProcess; +import com.google.refine.process.Process; + +public class ExtendDataOperation extends EngineDependentOperation { + final protected String _baseColumnName; + final protected JSONObject _extension; + final protected int _columnInsertIndex; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + JSONObject engineConfig = obj.getJSONObject("engineConfig"); + + return new ExtendDataOperation( + engineConfig, + obj.getString("baseColumnName"), + obj.getJSONObject("extension"), + obj.getInt("columnInsertIndex") + ); + } + + public ExtendDataOperation( + JSONObject engineConfig, + String baseColumnName, + JSONObject extension, + int columnInsertIndex + ) { + super(engineConfig); + + _baseColumnName = baseColumnName; + _extension = extension; + _columnInsertIndex = columnInsertIndex; + } + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("engineConfig"); writer.value(getEngineConfig()); + writer.key("columnInsertIndex"); writer.value(_columnInsertIndex); + writer.key("baseColumnName"); writer.value(_baseColumnName); + writer.key("extension"); writer.value(_extension); + writer.endObject(); + } + + @Override + protected String getBriefDescription(Project project) { + return "Extend data at index " + _columnInsertIndex + + " based on column " + _baseColumnName; + } + + protected String createDescription(Column column, List cellsAtRows) { + return "Extend data at index " + _columnInsertIndex + + " based on column " + column.getName() + + " by filling " + cellsAtRows.size(); + } + + @Override + public Process createProcess(Project project, Properties options) throws Exception { + return new ExtendDataProcess( + project, + getEngineConfig(), + getBriefDescription(null) + ); + } + + public class ExtendDataProcess extends LongRunningProcess implements Runnable { + final protected Project _project; + final protected JSONObject _engineConfig; + final protected long _historyEntryID; + protected int _cellIndex; + protected FreebaseDataExtensionJob _job; + + public ExtendDataProcess( + Project project, + JSONObject engineConfig, + String description + ) throws JSONException { + super(description); + _project = project; + _engineConfig = engineConfig; + _historyEntryID = HistoryEntry.allocateID(); + + _job = new FreebaseDataExtensionJob(_extension); + } + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("id"); writer.value(hashCode()); + writer.key("description"); writer.value(_description); + writer.key("immediate"); writer.value(false); + writer.key("status"); writer.value(_thread == null ? "pending" : (_thread.isAlive() ? "running" : "done")); + writer.key("progress"); writer.value(_progress); + writer.endObject(); + } + + @Override + protected Runnable getRunnable() { + return this; + } + + protected void populateRowsWithMatches(List rowIndices) throws Exception { + Engine engine = new Engine(_project); + engine.initializeFromJSON(_engineConfig); + + Column column = _project.columnModel.getColumnByName(_baseColumnName); + if (column == null) { + throw new Exception("No column named " + _baseColumnName); + } + + _cellIndex = column.getCellIndex(); + + FilteredRows filteredRows = engine.getAllFilteredRows(); + filteredRows.accept(_project, new RowVisitor() { + List _rowIndices; + + public RowVisitor init(List rowIndices) { + _rowIndices = rowIndices; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + @Override + public boolean visit(Project project, int rowIndex, Row row) { + Cell cell = row.getCell(_cellIndex); + if (cell != null && cell.recon != null && cell.recon.match != null) { + _rowIndices.add(rowIndex); + } + + return false; + } + }.init(rowIndices)); + } + + protected int extendRows( + List rowIndices, + List dataExtensions, + int from, + int limit, + Map reconCandidateMap + ) { + Set ids = new HashSet(); + + int end; + for (end = from; end < limit && ids.size() < 10; end++) { + int index = rowIndices.get(end); + Row row = _project.rows.get(index); + Cell cell = row.getCell(_cellIndex); + + ids.add(cell.recon.match.id); + } + + Map map = null; + try { + map = _job.extend(ids, reconCandidateMap); + } catch (Exception e) { + map = new HashMap(); + } + + for (int i = from; i < end; i++) { + int index = rowIndices.get(i); + Row row = _project.rows.get(index); + Cell cell = row.getCell(_cellIndex); + String guid = cell.recon.match.id; + + if (map.containsKey(guid)) { + dataExtensions.add(map.get(guid)); + } else { + dataExtensions.add(null); + } + } + + return end; + } + + @Override + public void run() { + List rowIndices = new ArrayList(); + List dataExtensions = new ArrayList(); + + try { + populateRowsWithMatches(rowIndices); + } catch (Exception e2) { + // TODO : Not sure what to do here? + e2.printStackTrace(); + } + + int start = 0; + Map reconCandidateMap = new HashMap(); + + while (start < rowIndices.size()) { + int end = extendRows(rowIndices, dataExtensions, start, rowIndices.size(), reconCandidateMap); + start = end; + + _progress = end * 100 / rowIndices.size(); + try { + Thread.sleep(200); + } catch (InterruptedException e) { + if (_canceled) { + break; + } + } + } + + if (!_canceled) { + List columnNames = new ArrayList(); + for (ColumnInfo info : _job.columns) { + columnNames.add(StringUtils.join(info.names, " - ")); + } + + List columnTypes = new ArrayList(); + for (ColumnInfo info : _job.columns) { + columnTypes.add(info.expectedType); + } + + HistoryEntry historyEntry = new HistoryEntry( + _historyEntryID, + _project, + _description, + ExtendDataOperation.this, + new DataExtensionChange( + _baseColumnName, + _columnInsertIndex, + columnNames, + columnTypes, + rowIndices, + dataExtensions, + _historyEntryID) + ); + + _project.history.addEntry(historyEntry); + _project.processManager.onDoneProcess(this); + } + } + } +} diff --git a/main/webapp/modules/core/MOD-INF/controller.js b/main/webapp/modules/core/MOD-INF/controller.js index d7dde610b..404e55070 100644 --- a/main/webapp/modules/core/MOD-INF/controller.js +++ b/main/webapp/modules/core/MOD-INF/controller.js @@ -121,6 +121,8 @@ function registerCommands() { RS.registerCommand(module, "recon-clear-one-cell", new Packages.com.google.refine.commands.recon.ReconClearOneCellCommand()); RS.registerCommand(module, "recon-clear-similar-cells", new Packages.com.google.refine.commands.recon.ReconClearSimilarCellsCommand()); RS.registerCommand(module, "recon-copy-across-columns", new Packages.com.google.refine.commands.recon.ReconCopyAcrossColumnsCommand()); + RS.registerCommand(module, "preview-extend-data", new Packages.com.google.refine.commands.recon.PreviewExtendDataCommand()); + RS.registerCommand(module, "extend-data", new Packages.com.google.refine.commands.recon.ExtendDataCommand()); RS.registerCommand(module, "guess-types-of-column", new Packages.com.google.refine.commands.recon.GuessTypesOfColumnCommand()); @@ -367,6 +369,7 @@ function init() { "styles/index/default-importing-sources.less", "styles/views/data-table-view.less", // for the preview table's styles + "styles/views/extend-data-preview-dialog.less", "styles/index/fixed-width-parser-ui.less", "styles/index/xml-parser-ui.less", "styles/index/json-parser-ui.less" @@ -431,6 +434,7 @@ function init() { "scripts/reconciliation/standard-service-panel.js", "scripts/dialogs/expression-preview-dialog.js", + "scripts/dialogs/extend-data-preview-dialog.js", "scripts/dialogs/clustering-dialog.js", "scripts/dialogs/scatterplot-dialog.js", "scripts/dialogs/templating-exporter-dialog.js", diff --git a/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js b/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js new file mode 100644 index 000000000..ca27f02d9 --- /dev/null +++ b/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js @@ -0,0 +1,424 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + */ + +function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDone) { + this._column = column; + this._columnIndex = columnIndex; + this._rowIndices = rowIndices; + this._onDone = onDone; + this._extension = { properties: [] }; + + var self = this; + this._dialog = $(DOM.loadHTML("core", "scripts/views/data-table/extend-data-preview-dialog.html")); + this._elmts = DOM.bind(this._dialog); + this._elmts.dialogHeader.html("Add columns by reconciled column " + column.name); + this._elmts.resetButton.click(function() { + self._extension.properties = []; + self._update(); + }); + + this._elmts.okButton.click(function() { + if (self._extension.properties.length === 0) { + alert("Please add some properties first."); + } else { + DialogSystem.dismissUntil(self._level - 1); + self._onDone(self._extension); + } + }); + this._elmts.cancelButton.click(function() { + DialogSystem.dismissUntil(self._level - 1); + }); + + var dismissBusy = DialogSystem.showBusy(); + var type = (column.reconConfig) && (column.reconConfig.type) ? column.reconConfig.type.id : ""; + + this._proposePropertiesUrl = null; + this._fetchColumnUrl = null; + this._serviceMetadata = null; + if ("reconConfig" in column) { + var service = column.reconConfig.service; + var serviceMetadata = ReconciliationManager.getServiceFromUrl(service); + this._serviceMetadata = serviceMetadata; + if ("extend" in serviceMetadata) { + var extend = serviceMetadata.extend; + if ("propose_properties" in extend) { + var endpoint = extend.propose_properties; + this._proposePropertiesUrl = endpoint.service_url + endpoint.service_path; + } + if ("fetch_column" in extend) { + var endpoint = extend.fetch_column; + this._fetchColumnUrl = endpoint.service_url + endpoint.service_path; + } + } + } + + ExtendReconciledDataPreviewDialog.getAllProperties(this._proposePropertiesUrl, type, function(properties) { + dismissBusy(); + self._show(properties); + }); +} + +ExtendReconciledDataPreviewDialog.getAllProperties = function(url, typeID, onDone) { + if(url == null) { + onDone([]); + } else { + var done = false; + $.getJSON( + url +"?type=" + typeID + "&callback=?", + null, + function(data) { + if (done) return; + done = true; + + var allProperties = []; + for (var i = 0; i < data.properties.length; i++) { + var property = data.properties[i]; + var property2 = { + id: property.id, + name: property.name + }; + /*if ("id2" in property) { + property2.expected = property.schema2; + property2.properties = [{ + id: property.id2, + name: property.name2, + expected: property.expects + }]; + } else { + property2.expected = property.expects; + } */ + allProperties.push(property2); + } + allProperties.sort(function(a, b) { return a.name.localeCompare(b.name); }); + + onDone(allProperties); + } + ); + + window.setTimeout(function() { + if (done) return; + + done = true; + onDone([]); + }, 7000); // time to give up? + } +}; + +ExtendReconciledDataPreviewDialog.prototype._show = function(properties) { + this._level = DialogSystem.showDialog(this._dialog); + + var n = this._elmts.suggestedPropertyContainer.offset().top + + this._elmts.suggestedPropertyContainer.outerHeight(true) - + this._elmts.addPropertyInput.offset().top; + + this._elmts.previewContainer.height(Math.floor(n)); + + var self = this; + var container = this._elmts.suggestedPropertyContainer; + var renderSuggestedProperty = function(property) { + var label = ("properties" in property) ? (property.name + " » " + property.properties[0].name) : property.name; + var div = $('
').addClass("suggested-property").appendTo(container); + + $('') + .attr("href", "javascript:{}") + .html(label) + .appendTo(div) + .click(function() { + self._addProperty(property); + }); + }; + for (var i = 0; i < properties.length; i++) { + renderSuggestedProperty(properties[i]); + } + + var suggestConfig = $.extend({}, this._serviceMetadata.suggest.property); + suggestConfig.key = null; + suggestConfig.query_param_name = "prefix"; + /* var suggestConfig = { + filter: '(all type:/type/property)' + }; + if ((this._column.reconConfig) && (this._column.reconConfig.type)) { + suggestConfig.filter = '(all type:/type/property (any namespace:/type/object namespace:' + this._column.reconConfig.type.id + '))'; + } */ + + this._elmts.addPropertyInput.suggestP(suggestConfig).bind("fb-select", function(evt, data) { + var expected = data.expected_type; + self._addProperty({ + id : data.id, + name: data.name, + /* expected: { + id: expected.id, + name: expected.name + } */ + }); + }); +}; + +ExtendReconciledDataPreviewDialog.prototype._update = function() { + this._elmts.previewContainer.empty().text("Querying THE service..."); + + var self = this; + var params = { + project: theProject.id, + columnName: this._column.name + }; + + $.post( + "command/core/preview-extend-data?" + $.param(params), + { + rowIndices: JSON.stringify(this._rowIndices), + extension: JSON.stringify(this._extension) + }, + function(data) { + self._renderPreview(data); + }, + "json" + ).fail(function(data) { + console.log(data); + }); +}; + +ExtendReconciledDataPreviewDialog.prototype._addProperty = function(p) { + var addSeveralToList = function(properties, oldProperties) { + for (var i = 0; i < properties.length; i++) { + addToList(properties[i], oldProperties); + } + }; + var addToList = function(property, oldProperties) { + for (var i = 0; i < oldProperties.length; i++) { + var oldProperty = oldProperties[i]; + if (oldProperty.id == property.id) { + if ("included" in property) { + oldProperty.included = "included" in oldProperty ? + (oldProperty.included || property.included) : + property.included; + } + + if ("properties" in property) { + if ("properties" in oldProperty) { + addSeveralToList(property.properties, oldProperty.properties); + } else { + oldProperty.properties = property.properties; + } + } + return; + } + } + + oldProperties.push(property); + }; + + addToList(p, this._extension.properties); + + this._update(); +}; + +ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) { + var self = this; + var container = this._elmts.previewContainer.empty(); + if (data.code == "error") { + container.text("Error."); + return; + } + + var table = $('')[0]; + var trHead = table.insertRow(table.rows.length); + $(''); + + // If the service metadata specifies fields, we build a proper form to make it more user-friendly + var fields = self._serviceMetadata.extend.property_settings; + if (fields != null) { + form = ''; + for(var i = 0; i < fields.length; i++) { + var field = fields[i]; + var fieldHTML = ''; + var currentValue = field.default; + if (property.settings != null && property.settings[field.name] != null) { + currentValue = property.settings[field.name]; + } + if (field.type == 'select') { + fieldHTML += ''+field.label+':
'; + for(var j = 0; j < field.choices.length; j++) { + var choice = field.choices[j]; + fieldHTML += ''; + form += ''; + } + } + } + + body.html( + '
').appendTo(trHead).text(this._column.name); + + var renderColumnHeader = function(column) { + var th = $('').appendTo(trHead); + + $('').html(column.names.join(" » ")).appendTo(th); + $('
').appendTo(th); + + $('') + .text("remove") + .addClass("action") + .attr("title", "Remove this column") + .click(function() { + self._removeProperty(column.path); + }).appendTo(th); + + $('') + .text("constrain") + .addClass("action") + .attr("title", "Add constraints to this column") + .click(function() { + self._constrainProperty(column.path); + }).appendTo(th); + }; + for (var c = 0; c < data.columns.length; c++) { + renderColumnHeader(data.columns[c]); + } + + for (var r = 0; r < data.rows.length; r++) { + var tr = table.insertRow(table.rows.length); + var row = data.rows[r]; + + for (var c = 0; c < row.length; c++) { + var td = tr.insertCell(tr.cells.length); + var cell = row[c]; + if (cell !== null) { + if ($.isPlainObject(cell)) { + $('').attr("href", "http://www.freebase.com/view" + cell.id).text(cell.name).appendTo(td); + } else { + $('').text(cell).appendTo(td); + } + } + } + } + + container.append(table); +}; + +ExtendReconciledDataPreviewDialog.prototype._removeProperty = function(path) { + var removeFromList = function(path, index, properties) { + var id = path[index]; + + for (var i = properties.length - 1; i >= 0; i--) { + var property = properties[i]; + if (property.id == id) { + if (index === path.length - 1) { + if ("included" in property) { + delete property.included; + } + } else if ("properties" in property && property.properties.length > 0) { + removeFromList(path, index + 1, property.properties); + } + + if (!("properties" in property) || property.properties.length === 0) { + properties.splice(i, 1); + } + + return; + } + } + }; + + removeFromList(path, 0, this._extension.properties); + + this._update(); +}; + +ExtendReconciledDataPreviewDialog.prototype._findProperty = function(path) { + var find = function(path, index, properties) { + var id = path[index]; + + for (var i = properties.length - 1; i >= 0; i--) { + var property = properties[i]; + if (property.id == id) { + if (index === path.length - 1) { + return property; + } else if ("properties" in property && property.properties.length > 0) { + return find(path, index + 1, property.properties); + } + break; + } + } + + return null; + }; + + return find(path, 0, this._extension.properties); +}; + +ExtendReconciledDataPreviewDialog.prototype._constrainProperty = function(path) { + var self = this; + var property = this._findProperty(path); + + var frame = DialogSystem.createDialog(); + frame.width("500px"); + + var header = $('
').addClass("dialog-header").text("Constrain " + path.join(" > ")).appendTo(frame); + var body = $('
').addClass("dialog-body").appendTo(frame); + var footer = $('
').addClass("dialog-footer").appendTo(frame); + + body.html( + '
' + + '' + + '' + + '
' + + 'Enter MQL query constraints as JSON' + + '
' + + '' + + '
' + ); + var bodyElmts = DOM.bind(body); + + if ("constraints" in property) { + bodyElmts.textarea[0].value = JSON.stringify(property.constraints, null, 2); + } else { + bodyElmts.textarea[0].value = JSON.stringify({ "limit" : 10 }, null, 2); + } + + footer.html( + '' + + '' + ); + var footerElmts = DOM.bind(footer); + + var level = DialogSystem.showDialog(frame); + var dismiss = function() { + DialogSystem.dismissUntil(level - 1); + }; + + footerElmts.cancelButton.click(dismiss); + footerElmts.okButton.click(function() { + try { + var o = JSON.parse(bodyElmts.textarea[0].value); + if (o === undefined) { + alert("Please ensure that the JSON you enter is valid."); + return; + } + + if ($.isArray(o) && o.length == 1) { + o = o[0]; + } + if (!$.isPlainObject(o)) { + alert("The JSON you enter must be an object, that is, it is of this form { ... }."); + return; + } + + property.constraints = o; + + dismiss(); + + self._update(); + } catch (e) { + //console.log(e); + } + }); + + bodyElmts.textarea.focus(); +}; + diff --git a/main/webapp/modules/core/scripts/views/data-table/add-column-by-reconciliation.html b/main/webapp/modules/core/scripts/views/data-table/add-column-by-reconciliation.html new file mode 100644 index 000000000..7695cd02d --- /dev/null +++ b/main/webapp/modules/core/scripts/views/data-table/add-column-by-reconciliation.html @@ -0,0 +1,27 @@ +
+
+
+
+ + + + + + + + + + + + + + + +
+
+ +
+ diff --git a/main/webapp/modules/core/scripts/views/data-table/extend-data-preview-dialog.html b/main/webapp/modules/core/scripts/views/data-table/extend-data-preview-dialog.html new file mode 100644 index 000000000..f92e09fb9 --- /dev/null +++ b/main/webapp/modules/core/scripts/views/data-table/extend-data-preview-dialog.html @@ -0,0 +1,26 @@ +
+
+
+
+ + + + + + + + + + + + + + + +
Add PropertyPreview
Suggested Properties
+
+ +
\ No newline at end of file diff --git a/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js b/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js index 6e06addc3..85878a045 100644 --- a/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js +++ b/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js @@ -146,6 +146,74 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { }); }; + var doAddColumnByReconciliation = function() { + var columnIndex = Refine.columnNameToColumnIndex(column.name); + var o = DataTableView.sampleVisibleRows(column); + new ExtendReconciledDataPreviewDialog( + column, + columnIndex, + o.rowIndices, + function(extension) { + Refine.postProcess( + "core", + "extend-data", + { + baseColumnName: column.name, + columnInsertIndex: columnIndex + 1 + }, + { + extension: JSON.stringify(extension) + }, + { rowsChanged: true, modelsChanged: true } + ); + } + ); */ + }; + +/* + var doAddColumnByReconciliation = function() { + var frame = $( + DOM.loadHTML("core", "scripts/views/data-table/add-column-by-reconciliation.html")); + + var elmts = DOM.bind(frame); + elmts.dialogHeader.text($.i18n._('core-views')["add-by-recon"]); + + elmts.suggestedPropertyHeader.html('Suggested properties'); + elmts.previewHeader.html('Preview'); + elmts.addPropertyHeader.html('Add property'); + elmts.okButton.html($.i18n._('core-buttons')["ok"]); + elmts.cancelButton.text($.i18n._('core-buttons')["cancel"]); + + var level = DialogSystem.showDialog(frame); + var dismiss = function() { DialogSystem.dismissUntil(level - 1); }; + + elmts.cancelButton.click(dismiss); + elmts.okButton.click(function() { + var columnName = $.trim(elmts.columnNameInput[0].value); + if (!columnName.length) { + alert($.i18n._('core-views')["warning-col-name"]); + return; + } + + Refine.postCoreProcess( + "add-column-by-fetching-urls", + { + baseColumnName: column.name, + urlExpression: previewWidget.getExpression(true), + newColumnName: columnName, + columnInsertIndex: columnIndex + 1, + delay: elmts.throttleDelayInput[0].value, + onError: $('input[name="dialog-onerror-choice"]:checked')[0].value, + cacheResponses: $('input[name="dialog-cache-responses"]')[0].checked, + }, + null, + { modelsChanged: true } + ); + dismiss(); + }); + }; +*/ + var doRemoveColumn = function() { Refine.postCoreProcess( "remove-column", @@ -298,6 +366,11 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { label: $.i18n._('core-views')["add-by-urls"]+"...", click: doAddColumnByFetchingURLs }, + { + id: "core/add-column-by-reconciliation", + label: $.i18n._('core-views')["add-by-recon"]+"...", + click: doAddColumnByReconciliation + }, {}, { id: "core/rename-column", diff --git a/main/webapp/modules/core/styles/views/extend-data-preview-dialog.less b/main/webapp/modules/core/styles/views/extend-data-preview-dialog.less new file mode 100644 index 000000000..006e87306 --- /dev/null +++ b/main/webapp/modules/core/styles/views/extend-data-preview-dialog.less @@ -0,0 +1,71 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +@import-less url("../theme.less"); + +.extend-data-preview-dialog .suggested-property-container { + border: 1px solid #aaa; + padding: 5px; + overflow: auto; + height: 375px; + } + +.extend-data-preview-dialog .suggested-property { + padding: 5px; + } + +.extend-data-preview-dialog input.property-suggest { + display: block; + padding: 2%; + width: 96%; + } + +.extend-data-preview-dialog .preview-container { + border: 1px solid #aaa; + overflow: auto; + } + +.extend-data-preview-dialog .preview-container table { + border-collapse: collapse; + } + +.extend-data-preview-dialog .preview-container td, .extend-data-preview-dialog .preview-container th { + padding: 3px 5px; + border-bottom: 1px solid #ddd; + border-right: 1px solid #ddd; + } + +.extend-data-preview-dialog .preview-container th img { + vertical-align: top; + margin-left: 5px; + } From d99128c3304e395195e16d3552cc50c57ca63e35 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Thu, 6 Jul 2017 21:15:37 +0200 Subject: [PATCH 02/11] Retrieve types from the extend service --- .../commands/recon/ExtendDataCommand.java | 4 +- .../recon/PreviewExtendDataCommand.java | 18 +- .../com/google/refine/model/ReconType.java | 79 ++++ .../model/changes/DataExtensionChange.java | 47 ++- .../model/recon/DataExtensionReconConfig.java | 109 ++++++ .../recon/ReconciledDataExtensionJob.java | 354 +++++------------- .../operations/recon/ExtendDataOperation.java | 21 +- .../dialogs/extend-data-preview-dialog.js | 72 ++-- .../views/data-table/menu-edit-column.js | 5 +- 9 files changed, 366 insertions(+), 343 deletions(-) create mode 100644 main/src/com/google/refine/model/ReconType.java create mode 100644 main/src/com/google/refine/model/recon/DataExtensionReconConfig.java diff --git a/main/src/com/google/refine/commands/recon/ExtendDataCommand.java b/main/src/com/google/refine/commands/recon/ExtendDataCommand.java index 119e90442..52cd66476 100644 --- a/main/src/com/google/refine/commands/recon/ExtendDataCommand.java +++ b/main/src/com/google/refine/commands/recon/ExtendDataCommand.java @@ -38,7 +38,7 @@ import javax.servlet.http.HttpServletRequest; import org.json.JSONObject; import com.google.refine.commands.EngineDependentCommand; -import com.google.refine.freebase.operations.ExtendDataOperation; +import com.google.refine.operations.recon.ExtendDataOperation; import com.google.refine.model.AbstractOperation; import com.google.refine.model.Project; import com.google.refine.util.ParsingUtilities; @@ -50,6 +50,7 @@ public class ExtendDataCommand extends EngineDependentCommand { String baseColumnName = request.getParameter("baseColumnName"); int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex")); + String endpoint = request.getParameter("endpoint"); String jsonString = request.getParameter("extension"); JSONObject extension = ParsingUtilities.evaluateJsonStringToObject(jsonString); @@ -57,6 +58,7 @@ public class ExtendDataCommand extends EngineDependentCommand { return new ExtendDataOperation( engineConfig, baseColumnName, + endpoint, extension, columnInsertIndex ); diff --git a/main/src/com/google/refine/commands/recon/PreviewExtendDataCommand.java b/main/src/com/google/refine/commands/recon/PreviewExtendDataCommand.java index ea655bb5d..037b1e87d 100644 --- a/main/src/com/google/refine/commands/recon/PreviewExtendDataCommand.java +++ b/main/src/com/google/refine/commands/recon/PreviewExtendDataCommand.java @@ -133,19 +133,11 @@ public class PreviewExtendDataCommand extends Command { writer.array(); for (ColumnInfo info : job.columns) { writer.object(); - writer.key("names"); - writer.array(); - for (String name : info.names) { - writer.value(name); - } - writer.endArray(); - writer.key("path"); - writer.array(); - for (String id : info.path) { - writer.value(id); - } - writer.endArray(); - writer.endObject(); + writer.key("name"); + writer.value(info.name); + writer.key("id"); + writer.value(info.id); + writer.endObject(); } writer.endArray(); diff --git a/main/src/com/google/refine/model/ReconType.java b/main/src/com/google/refine/model/ReconType.java new file mode 100644 index 000000000..088da69b5 --- /dev/null +++ b/main/src/com/google/refine/model/ReconType.java @@ -0,0 +1,79 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.model; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; + +/** + * This represents a type from the reconciliation + * service. It is used when extending data to + * store the (expected) types of new columns. + */ +public class ReconType implements Jsonizable { + public String id; + public String name; + + public ReconType(String id, String name) { + this.id = id; + this.name = name; + } + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("id"); writer.value(id); + writer.key("name"); writer.value(name); + writer.endObject(); + } + + static public ReconType load(JSONObject obj) throws Exception { + if (obj == null) { + return null; + } + + ReconType type = new ReconType( + obj.getString("id"), + obj.getString("name") + ); + return type; + } +} diff --git a/main/src/com/google/refine/model/changes/DataExtensionChange.java b/main/src/com/google/refine/model/changes/DataExtensionChange.java index c39d13ea6..4a3463174 100644 --- a/main/src/com/google/refine/model/changes/DataExtensionChange.java +++ b/main/src/com/google/refine/model/changes/DataExtensionChange.java @@ -47,9 +47,9 @@ import org.json.JSONException; import org.json.JSONObject; import org.json.JSONWriter; -// import com.google.refine.freebase.FreebaseType; +import com.google.refine.model.ReconType; import com.google.refine.model.recon.DataExtensionReconConfig; -import com.google.refine.model.recon.FreebaseDataExtensionJob.DataExtension; +import com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension; import com.google.refine.history.Change; import com.google.refine.model.Cell; import com.google.refine.model.Column; @@ -65,10 +65,11 @@ import com.google.refine.util.Pool; public class DataExtensionChange implements Change { final protected String _baseColumnName; + final protected String _service; final protected int _columnInsertIndex; final protected List _columnNames; - final protected List _columnTypes; + final protected List _columnTypes; final protected List _rowIndices; final protected List _dataExtensions; @@ -79,15 +80,17 @@ public class DataExtensionChange implements Change { protected List _newRows; public DataExtensionChange( - String baseColumnName, + String baseColumnName, + String service, int columnInsertIndex, List columnNames, - List columnTypes, + List columnTypes, List rowIndices, List dataExtensions, long historyEntryID ) { _baseColumnName = baseColumnName; + _service = service; _columnInsertIndex = columnInsertIndex; _columnNames = columnNames; @@ -101,10 +104,11 @@ public class DataExtensionChange implements Change { protected DataExtensionChange( String baseColumnName, + String service, int columnInsertIndex, List columnNames, - List columnTypes, + List columnTypes, List rowIndices, List dataExtensions, @@ -113,6 +117,7 @@ public class DataExtensionChange implements Change { List newRows ) { _baseColumnName = baseColumnName; + _service = service; _columnInsertIndex = columnInsertIndex; _columnNames = columnNames; @@ -204,7 +209,11 @@ public class DataExtensionChange implements Change { int cellIndex = _firstNewCellIndex + i; Column column = new Column(cellIndex, name); - column.setReconConfig(new DataExtensionReconConfig(_columnTypes.get(i))); + column.setReconConfig(new DataExtensionReconConfig( + _service, + "", // TODO retrieve service by URL and fill this + "", + _columnTypes.get(i))); column.setReconStats(ReconStats.create(project, cellIndex)); try { @@ -275,17 +284,21 @@ public class DataExtensionChange implements Change { @Override public void save(Writer writer, Properties options) throws IOException { writer.write("baseColumnName="); writer.write(_baseColumnName); writer.write('\n'); + writer.write("service="); writer.write(_service); writer.write('\n'); writer.write("columnInsertIndex="); writer.write(Integer.toString(_columnInsertIndex)); writer.write('\n'); writer.write("columnNameCount="); writer.write(Integer.toString(_columnNames.size())); writer.write('\n'); for (String name : _columnNames) { writer.write(name); writer.write('\n'); } writer.write("columnTypeCount="); writer.write(Integer.toString(_columnTypes.size())); writer.write('\n'); - for (FreebaseType type : _columnTypes) { + for (ReconType type : _columnTypes) { try { - JSONWriter jsonWriter = new JSONWriter(writer); - - type.write(jsonWriter, options); + if(type == null) { + writer.write("null"); + } else { + JSONWriter jsonWriter = new JSONWriter(writer); + type.write(jsonWriter, options); + } } catch (JSONException e) { // ??? } @@ -342,10 +355,11 @@ public class DataExtensionChange implements Change { static public Change load(LineNumberReader reader, Pool pool) throws Exception { String baseColumnName = null; + String service = null; int columnInsertIndex = -1; List columnNames = null; - List columnTypes = null; + List columnTypes = null; List rowIndices = null; List dataExtensions = null; @@ -363,6 +377,8 @@ public class DataExtensionChange implements Change { if ("baseColumnName".equals(field)) { baseColumnName = value; + } else if ("service".equals(field)) { + service = value; } else if ("columnInsertIndex".equals(field)) { columnInsertIndex = Integer.parseInt(value); } else if ("firstNewCellIndex".equals(field)) { @@ -390,10 +406,12 @@ public class DataExtensionChange implements Change { } else if ("columnTypeCount".equals(field)) { int count = Integer.parseInt(value); - columnTypes = new ArrayList(count); + columnTypes = new ArrayList(count); for (int i = 0; i < count; i++) { line = reader.readLine(); - columnTypes.add(FreebaseType.load(ParsingUtilities.evaluateJsonStringToObject(line))); + if (line != null) { + columnTypes.add(ReconType.load(ParsingUtilities.evaluateJsonStringToObject(line))); + } } } else if ("dataExtensionCount".equals(field)) { int count = Integer.parseInt(value); @@ -453,6 +471,7 @@ public class DataExtensionChange implements Change { DataExtensionChange change = new DataExtensionChange( baseColumnName, + service, columnInsertIndex, columnNames, columnTypes, diff --git a/main/src/com/google/refine/model/recon/DataExtensionReconConfig.java b/main/src/com/google/refine/model/recon/DataExtensionReconConfig.java new file mode 100644 index 000000000..15e0156c0 --- /dev/null +++ b/main/src/com/google/refine/model/recon/DataExtensionReconConfig.java @@ -0,0 +1,109 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.model.recon; + +import java.util.List; +import java.util.Properties; +import java.util.ArrayList; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.model.ReconType; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.Row; +import com.google.refine.model.recon.StandardReconConfig; +import com.google.refine.model.recon.ReconJob; + +public class DataExtensionReconConfig extends StandardReconConfig { + final public ReconType type; + + private final static String WARN = "Not implemented"; + + static public ReconConfig reconstruct(JSONObject obj) throws Exception { + JSONObject type = obj.getJSONObject("type"); + + ReconType typ = null; + if(obj.has("id")) { + typ = new ReconType(obj.getString("id"), + obj.has("name") ? obj.getString("name") : obj.getString("id")); + } + + return new DataExtensionReconConfig( + obj.getString("service"), + obj.has("identifierSpace") ? obj.getString("identifierSpace") : null, + obj.has("schemaSpace") ? obj.getString("schemaSpace") : null, + typ); + } + + public DataExtensionReconConfig( + String service, + String identifierSpace, + String schemaSpace, + ReconType type) { + super( + service, + identifierSpace, + schemaSpace, + type != null ? type.id : null, + type != null ? type.name : null, + true, + new ArrayList()); + this.type = type; + } + + @Override + public ReconJob createJob(Project project, int rowIndex, Row row, + String columnName, Cell cell) { + throw new RuntimeException(WARN); + } + + @Override + public int getBatchSize() { + throw new RuntimeException(WARN); + } + + @Override + public List batchRecon(List jobs, long historyEntryID) { + throw new RuntimeException(WARN); + } + + @Override + public String getBriefDescription(Project project, String columnName) { + throw new RuntimeException(WARN); + } +} diff --git a/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java b/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java index 30f619c1e..b0bffd8df 100644 --- a/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java +++ b/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java @@ -55,11 +55,12 @@ import org.json.JSONException; import org.json.JSONObject; import org.json.JSONWriter; -// import com.google.refine.freebase.FreebaseType; +import com.google.refine.model.ReconType; import com.google.refine.model.ReconCandidate; import com.google.refine.model.recon.StandardReconConfig; import com.google.refine.util.JSONUtilities; import com.google.refine.util.ParsingUtilities; +import com.google.refine.expr.functions.ToDate; public class ReconciledDataExtensionJob { static public class DataExtension { @@ -71,28 +72,24 @@ public class ReconciledDataExtensionJob { } static public class ColumnInfo { - final public List names; - final public List path; - // final public FreebaseType expectedType; - // TODO + final public String name; + final public String id; + final public ReconType expectedType; - protected ColumnInfo(List names, List path /*, FreebaseType expectedType */) { - this.names = names; - this.path = path; - // this.expectedType = expectedType; + protected ColumnInfo(String name, String id, ReconType expectedType) { + this.name = name; + this.id = id; + this.expectedType = expectedType; } } final public JSONObject extension; final public String endpoint; - final public int columnCount; final public List columns = new ArrayList(); public ReconciledDataExtensionJob(JSONObject obj, String endpoint) throws JSONException { this.extension = obj; this.endpoint = endpoint; - this.columnCount = (obj.has("properties") && !obj.isNull("properties")) ? - countColumns(obj.getJSONArray("properties"), columns, new ArrayList(), new ArrayList()) : 0; } public Map extend( @@ -102,35 +99,30 @@ public class ReconciledDataExtensionJob { StringWriter writer = new StringWriter(); formulateQuery(ids, extension, writer); - // Extract the order of properties - JSONArray origProperties = extension.getJSONArray("properties"); - List properties = new ArrayList(); - int l = origProperties.length(); - for (int i = 0; i < l; i++) { - properties.add(origProperties.getJSONObject(i).getString("id")); - } - String query = writer.toString(); InputStream is = performQuery(this.endpoint, query); try { String s = ParsingUtilities.inputStreamToString(is); JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); - + + // Extract the column metadata + gatherColumnInfo(o.getJSONArray("meta"), columns); + Map map = new HashMap(); if (o.has("rows")){ JSONObject records = o.getJSONObject("rows"); - // for each identifier + // for each identifier for (String id : ids) { - if (records.has(id)) { - JSONObject record = records.getJSONObject(id); - - ReconciledDataExtensionJob.DataExtension ext = collectResult(record, properties, reconCandidateMap); - - if (ext != null) { - map.put(id, ext); - } - } + if (records.has(id)) { + JSONObject record = records.getJSONObject(id); + + ReconciledDataExtensionJob.DataExtension ext = collectResult(record, reconCandidateMap); + + if (ext != null) { + map.put(id, ext); + } + } } } @@ -166,65 +158,65 @@ public class ReconciledDataExtensionJob { protected ReconciledDataExtensionJob.DataExtension collectResult( JSONObject record, - List properties, Map reconCandidateMap ) throws JSONException { List rows = new ArrayList(); - // for each property - int colindex = 0; - for(String pid : properties) { - JSONArray values = record.getJSONArray(pid); - if (values == null) { - continue; - } + // for each property + int colindex = 0; + for(ColumnInfo ci : columns) { + String pid = ci.id; + JSONArray values = record.getJSONArray(pid); + if (values == null) { + continue; + } - // for each value - for(int rowindex = 0; rowindex < values.length(); rowindex++) { - JSONObject val = values.getJSONObject(rowindex); - // store a reconciled value - if(val.has("id")) { - storeCell(rows, rowindex, colindex, val, reconCandidateMap); - } else if(val.has("str")) { - // store a bare string - String str = val.getString("str"); - storeStr(rows, rowindex, colindex, str); + // for each value + for(int rowindex = 0; rowindex < values.length(); rowindex++) { + JSONObject val = values.getJSONObject(rowindex); + // store a reconciled value + if (val.has("id")) { + storeCell(rows, rowindex, colindex, val, reconCandidateMap); + } else if (val.has("str")) { + // store a bare string + String str = val.getString("str"); + storeCell(rows, rowindex, colindex, str); + } else if (val.has("float")) { + float v = Float.parseFloat(val.getString("float")); + storeCell(rows, rowindex, colindex, v); + } else if (val.has("int")) { + int v = Integer.parseInt(val.getString("int")); + storeCell(rows, rowindex, colindex, v); + } else if (val.has("date")) { + ToDate td = new ToDate(); + String[] args = new String[1]; + args[0] = val.getString("date"); + Object v = td.call(null, args); + storeCell(rows, rowindex, colindex, v); + } else if(val.has("bool")) { + boolean v = val.getString("bool") == "true"; + storeCell(rows, rowindex, colindex, v); } - // TODO other cases for other types of values (dates, booleans, …) - } - colindex++; - } + } + colindex++; + } - // collectResult(rows, extension.getJSONArray("properties"), obj, 0, 0, reconCandidateMap); - Object[][] data = new Object[rows.size()][columnCount]; + Object[][] data = new Object[rows.size()][columns.size()]; rows.toArray(data); return new DataExtension(data); } - protected void storeStr( - List rows, - int row, - int col, - String str - ) throws JSONException { - while (row >= rows.size()) { - rows.add(new Object[columnCount]); - } - rows.get(row)[col] = str; - } - protected void storeCell( List rows, int row, int col, - Object value, - Map reconCandidateMap + Object value ) { while (row >= rows.size()) { - rows.add(new Object[columnCount]); + rows.add(new Object[columns.size()]); } rows.get(row)[col] = value; } @@ -251,128 +243,8 @@ public class ReconciledDataExtensionJob { reconCandidateMap.put(id, rc); } - storeCell(rows, row, col, rc, reconCandidateMap); + storeCell(rows, row, col, rc); } - /* - protected int[] collectResult( - List rows, - JSONObject extNode, - JSONObject resultNode, - int startRowIndex, - int startColumnIndex, - Map reconCandidateMap - ) throws JSONException { - String propertyID = extNode.getString("id"); - // String expectedTypeID = extNode.getJSONObject("expected").getString("id"); - - JSONArray a = resultNode != null && resultNode.has(propertyID) && !resultNode.isNull(propertyID) ? - resultNode.getJSONArray(propertyID) : null; - - if ("/type/key".equals(expectedTypeID)) { - if (a != null) { - int l = a.length(); - for (int r = 0; r < l; r++) { - Object o = a.isNull(r) ? null : a.get(r); - if (o instanceof JSONObject) { - storeStr(rows, startRowIndex++, startColumnIndex, (JSONObject) o, reconCandidateMap); - } - } - } - - // note that we still take up a column even if we don't have any data - return new int[] { startRowIndex, startColumnIndex + 1 }; - } else if (expectedTypeID.startsWith("/type/")) { - if (a != null) { - int l = a.length(); - for (int r = 0; r < l; r++) { - Object o = a.isNull(r) ? null : a.get(r); - if (o instanceof Serializable) { - storeCell(rows, startRowIndex++, startColumnIndex, o, reconCandidateMap); - } - } - } - - // note that we still take up a column even if we don't have any data - return new int[] { startRowIndex, startColumnIndex + 1 }; - } else { - boolean hasSubProperties = (extNode.has("properties") && !extNode.isNull("properties")); - boolean isOwnColumn = !hasSubProperties || (extNode.has("included") && extNode.getBoolean("included")); - - if (a != null && a.length() > 0) { - int maxColIndex = startColumnIndex; - - int l = a.length(); - for (int r = 0; r < l; r++) { - Object v = a.isNull(r) ? null : a.get(r); - JSONObject o = v != null && v instanceof JSONObject ? (JSONObject) v : null; - - int startColumnIndex2 = startColumnIndex; - int startRowIndex2 = startRowIndex; - - if (isOwnColumn) { - if (o != null) { - storeCell(rows, startRowIndex2++, startColumnIndex2++, o, reconCandidateMap); - } else { - storeCell(rows, startRowIndex2++, startColumnIndex2++, v, reconCandidateMap); - } - } - - if (hasSubProperties && o != null) { - int[] rowcol = collectResult( - rows, - extNode.getJSONArray("properties"), - o, - startRowIndex, - startColumnIndex2, - reconCandidateMap - ); - - startRowIndex2 = rowcol[0]; - startColumnIndex2 = rowcol[1]; - } - - startRowIndex = startRowIndex2; - maxColIndex = Math.max(maxColIndex, startColumnIndex2); - } - - return new int[] { startRowIndex, maxColIndex }; - } else { - return new int[] { - startRowIndex, - startColumnIndex + countColumns(extNode, null, new ArrayList(), new ArrayList()) - }; - } - } - } - - protected int[] collectResult( - List rows, - JSONArray subProperties, - JSONObject resultNode, - int startRowIndex, - int startColumnIndex, - Map reconCandidateMap - ) throws JSONException { - int maxStartRowIndex = startRowIndex; - - int k = subProperties.length(); - for (int c = 0; c < k; c++) { - int[] rowcol = collectResult( - rows, - subProperties.getJSONObject(c), - resultNode, - startRowIndex, - startColumnIndex, - reconCandidateMap - ); - - maxStartRowIndex = Math.max(maxStartRowIndex, rowcol[0]); - startColumnIndex = rowcol[1]; - } - - return new int[] { maxStartRowIndex, startColumnIndex }; - }*/ - static protected void formulateQuery(Set ids, JSONObject node, Writer writer) throws JSONException { @@ -380,74 +252,46 @@ public class ReconciledDataExtensionJob { jsonWriter.object(); - jsonWriter.key("ids"); - jsonWriter.array(); - for (String id : ids) { - if (id != null) { - jsonWriter.value(id); - } - } - jsonWriter.endArray(); + jsonWriter.key("ids"); + jsonWriter.array(); + for (String id : ids) { + if (id != null) { + jsonWriter.value(id); + } + } + jsonWriter.endArray(); jsonWriter.key("properties"); - jsonWriter.array(); - JSONArray properties = node.getJSONArray("properties"); - int l = properties.length(); + jsonWriter.array(); + JSONArray properties = node.getJSONArray("properties"); + int l = properties.length(); - for (int i = 0; i < l; i++) { - JSONObject property = properties.getJSONObject(i); - jsonWriter.object(); - jsonWriter.key("id"); - jsonWriter.value(property.getString("id")); - // TODO translate constraints as below - jsonWriter.endObject(); - } - jsonWriter.endArray(); + for (int i = 0; i < l; i++) { + JSONObject property = properties.getJSONObject(i); + jsonWriter.object(); + jsonWriter.key("id"); + jsonWriter.value(property.getString("id")); + // TODO translate constraints as below + jsonWriter.endObject(); + } + jsonWriter.endArray(); jsonWriter.endObject(); } - - static protected int countColumns(JSONObject obj, List columns, List names, List path) throws JSONException { - String name = obj.getString("name"); - - List names2 = null; - List path2 = null; - if (columns != null) { - names2 = new ArrayList(names); - names2.add(name); - - path2 = new ArrayList(path); - path2.add(obj.getString("id")); - } - - if (obj.has("properties") && !obj.isNull("properties")) { - boolean included = (obj.has("included") && obj.getBoolean("included")); - if (included && columns != null) { - // JSONObject expected = obj.getJSONObject("expected"); - - columns.add(new ColumnInfo(names2, path2 - /* new FreebaseType(expected.getString("id"), expected.getString("name")) */)); - } - - return (included ? 1 : 0) + - countColumns(obj.getJSONArray("properties"), columns, names2, path2); - } else { - if (columns != null) { - // JSONObject expected = obj.getJSONObject("expected"); - - columns.add(new ColumnInfo(names2, path2 - /* new FreebaseType(expected.getString("id"), expected.getString("name")) */ )); - } - return 1; - } - } - - static protected int countColumns(JSONArray a, List columns, List names, List path) throws JSONException { - int c = 0; - int l = a.length(); - for (int i = 0; i < l; i++) { - c += countColumns(a.getJSONObject(i), columns, names, path); - } - return c; - } + static protected void gatherColumnInfo(JSONArray meta, List columns) throws JSONException { + for(int i = 0; i < meta.length(); i++) { + JSONObject col = meta.getJSONObject(i); + + ReconType expectedType = null; + if(col.has("type")) { + JSONObject expectedObj = col.getJSONObject("type"); + expectedType = new ReconType(expectedObj.getString("id"), expectedObj.getString("name")); + } + + columns.add(new ColumnInfo( + col.getString("name"), + col.getString("id"), + expectedType)); + } + } } diff --git a/main/src/com/google/refine/operations/recon/ExtendDataOperation.java b/main/src/com/google/refine/operations/recon/ExtendDataOperation.java index 12f24392a..132e4da45 100644 --- a/main/src/com/google/refine/operations/recon/ExtendDataOperation.java +++ b/main/src/com/google/refine/operations/recon/ExtendDataOperation.java @@ -50,15 +50,16 @@ import com.google.refine.browsing.Engine; import com.google.refine.browsing.FilteredRows; import com.google.refine.browsing.RowVisitor; import com.google.refine.model.changes.DataExtensionChange; -import com.google.refine.model.recon.DataExtensionJob; -import com.google.refine.model.recon.DataExtensionJob.ColumnInfo; -import com.google.refine.model.recon.DataExtensionJob.DataExtension; +import com.google.refine.model.recon.ReconciledDataExtensionJob; +import com.google.refine.model.recon.ReconciledDataExtensionJob.ColumnInfo; +import com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension; import com.google.refine.history.HistoryEntry; import com.google.refine.model.AbstractOperation; import com.google.refine.model.Cell; import com.google.refine.model.Column; import com.google.refine.model.Project; import com.google.refine.model.ReconCandidate; +import com.google.refine.model.ReconType; import com.google.refine.model.Row; import com.google.refine.model.changes.CellAtRow; import com.google.refine.operations.EngineDependentOperation; @@ -68,6 +69,7 @@ import com.google.refine.process.Process; public class ExtendDataOperation extends EngineDependentOperation { final protected String _baseColumnName; + final protected String _endpoint; final protected JSONObject _extension; final protected int _columnInsertIndex; @@ -77,6 +79,7 @@ public class ExtendDataOperation extends EngineDependentOperation { return new ExtendDataOperation( engineConfig, obj.getString("baseColumnName"), + obj.getString("endpoint"), obj.getJSONObject("extension"), obj.getInt("columnInsertIndex") ); @@ -85,12 +88,14 @@ public class ExtendDataOperation extends EngineDependentOperation { public ExtendDataOperation( JSONObject engineConfig, String baseColumnName, + String endpoint, JSONObject extension, int columnInsertIndex ) { super(engineConfig); _baseColumnName = baseColumnName; + _endpoint = endpoint; _extension = extension; _columnInsertIndex = columnInsertIndex; } @@ -105,6 +110,7 @@ public class ExtendDataOperation extends EngineDependentOperation { writer.key("engineConfig"); writer.value(getEngineConfig()); writer.key("columnInsertIndex"); writer.value(_columnInsertIndex); writer.key("baseColumnName"); writer.value(_baseColumnName); + writer.key("endpoint"); writer.value(_endpoint); writer.key("extension"); writer.value(_extension); writer.endObject(); } @@ -135,7 +141,7 @@ public class ExtendDataOperation extends EngineDependentOperation { final protected JSONObject _engineConfig; final protected long _historyEntryID; protected int _cellIndex; - protected FreebaseDataExtensionJob _job; + protected ReconciledDataExtensionJob _job; public ExtendDataProcess( Project project, @@ -147,7 +153,7 @@ public class ExtendDataOperation extends EngineDependentOperation { _engineConfig = engineConfig; _historyEntryID = HistoryEntry.allocateID(); - _job = new FreebaseDataExtensionJob(_extension); + _job = new ReconciledDataExtensionJob(_extension, _endpoint); } @Override @@ -283,10 +289,10 @@ public class ExtendDataOperation extends EngineDependentOperation { if (!_canceled) { List columnNames = new ArrayList(); for (ColumnInfo info : _job.columns) { - columnNames.add(StringUtils.join(info.names, " - ")); + columnNames.add(info.name); } - List columnTypes = new ArrayList(); + List columnTypes = new ArrayList(); for (ColumnInfo info : _job.columns) { columnTypes.add(info.expectedType); } @@ -298,6 +304,7 @@ public class ExtendDataOperation extends EngineDependentOperation { ExtendDataOperation.this, new DataExtensionChange( _baseColumnName, + _endpoint, _columnInsertIndex, columnNames, columnTypes, diff --git a/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js b/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js index ca27f02d9..b78e1a851 100644 --- a/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js +++ b/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js @@ -52,7 +52,7 @@ function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDo alert("Please add some properties first."); } else { DialogSystem.dismissUntil(self._level - 1); - self._onDone(self._extension); + self._onDone(self._extension, self._service); } }); this._elmts.cancelButton.click(function() { @@ -67,6 +67,7 @@ function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDo this._serviceMetadata = null; if ("reconConfig" in column) { var service = column.reconConfig.service; + this._service = service; var serviceMetadata = ReconciliationManager.getServiceFromUrl(service); this._serviceMetadata = serviceMetadata; if ("extend" in serviceMetadata) { @@ -258,7 +259,7 @@ ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) { var renderColumnHeader = function(column) { var th = $('
').appendTo(trHead); - $('').html(column.names.join(" » ")).appendTo(th); + $('').html(column.name).appendTo(th); $('
').appendTo(th); $('') @@ -266,15 +267,15 @@ ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) { .addClass("action") .attr("title", "Remove this column") .click(function() { - self._removeProperty(column.path); + self._removeProperty(column.id); }).appendTo(th); $('') - .text("constrain") + .text("configure") .addClass("action") - .attr("title", "Add constraints to this column") + .attr("title", "Configure this column") .click(function() { - self._constrainProperty(column.path); + self._constrainProperty(column.id); }).appendTo(th); }; for (var c = 0; c < data.columns.length; c++) { @@ -301,56 +302,25 @@ ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) { container.append(table); }; -ExtendReconciledDataPreviewDialog.prototype._removeProperty = function(path) { - var removeFromList = function(path, index, properties) { - var id = path[index]; - - for (var i = properties.length - 1; i >= 0; i--) { - var property = properties[i]; - if (property.id == id) { - if (index === path.length - 1) { - if ("included" in property) { - delete property.included; - } - } else if ("properties" in property && property.properties.length > 0) { - removeFromList(path, index + 1, property.properties); - } - - if (!("properties" in property) || property.properties.length === 0) { - properties.splice(i, 1); - } - - return; - } +ExtendReconciledDataPreviewDialog.prototype._removeProperty = function(id) { + for(var i = this._extension.properties.length - 1; i >= 0; i--) { + var property = this._extension.properties[i]; + if (property.id == id) { + this._extension.properties.splice(i, 1); } - }; - - removeFromList(path, 0, this._extension.properties); - + } this._update(); }; -ExtendReconciledDataPreviewDialog.prototype._findProperty = function(path) { - var find = function(path, index, properties) { - var id = path[index]; - - for (var i = properties.length - 1; i >= 0; i--) { - var property = properties[i]; - if (property.id == id) { - if (index === path.length - 1) { - return property; - } else if ("properties" in property && property.properties.length > 0) { - return find(path, index + 1, property.properties); - } - break; - } +ExtendReconciledDataPreviewDialog.prototype._findProperty = function(id) { + var properties = this._extension.properties; + for(var i = properties.length - 1; i >= 0; i--) { + if (properties[i].id == path) { + return properties[i]; } - - return null; - }; - - return find(path, 0, this._extension.properties); -}; + } + return null; +} ExtendReconciledDataPreviewDialog.prototype._constrainProperty = function(path) { var self = this; diff --git a/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js b/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js index 85878a045..7ecf95074 100644 --- a/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js +++ b/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js @@ -153,12 +153,13 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { column, columnIndex, o.rowIndices, - function(extension) { + function(extension, endpoint) { Refine.postProcess( "core", "extend-data", { baseColumnName: column.name, + endpoint: endpoint, columnInsertIndex: columnIndex + 1 }, { @@ -167,7 +168,7 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { { rowsChanged: true, modelsChanged: true } ); } - ); */ + ); }; /* From cc991cab21ecf5f2debaa6195c3fdf3e4f3a4353 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Fri, 14 Jul 2017 11:30:17 +0100 Subject: [PATCH 03/11] Add nicer spinning gif while preview is loading. Fix bug of multiple ColumnInfo being generated. --- .../model/changes/DataExtensionChange.java | 8 +++--- .../recon/ReconciledDataExtensionJob.java | 6 +++-- .../webapp/modules/core/MOD-INF/controller.js | 3 ++- .../dialogs/extend-data-preview-dialog.js | 26 +++++++------------ .../extend-data-preview-dialog.less | 9 +++++++ 5 files changed, 28 insertions(+), 24 deletions(-) rename main/webapp/modules/core/styles/{views => reconciliation}/extend-data-preview-dialog.less (94%) diff --git a/main/src/com/google/refine/model/changes/DataExtensionChange.java b/main/src/com/google/refine/model/changes/DataExtensionChange.java index 4a3463174..4deebf612 100644 --- a/main/src/com/google/refine/model/changes/DataExtensionChange.java +++ b/main/src/com/google/refine/model/changes/DataExtensionChange.java @@ -293,9 +293,7 @@ public class DataExtensionChange implements Change { writer.write("columnTypeCount="); writer.write(Integer.toString(_columnTypes.size())); writer.write('\n'); for (ReconType type : _columnTypes) { try { - if(type == null) { - writer.write("null"); - } else { + if(type != null) { JSONWriter jsonWriter = new JSONWriter(writer); type.write(jsonWriter, options); } @@ -409,7 +407,9 @@ public class DataExtensionChange implements Change { columnTypes = new ArrayList(count); for (int i = 0; i < count; i++) { line = reader.readLine(); - if (line != null) { + if (line == null || line.length() == 0) { + columnTypes.add(null); + } else { columnTypes.add(ReconType.load(ParsingUtilities.evaluateJsonStringToObject(line))); } } diff --git a/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java b/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java index b0bffd8df..c1441bd5a 100644 --- a/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java +++ b/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java @@ -105,8 +105,10 @@ public class ReconciledDataExtensionJob { String s = ParsingUtilities.inputStreamToString(is); JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); - // Extract the column metadata - gatherColumnInfo(o.getJSONArray("meta"), columns); + if(columns.size() == 0) { + // Extract the column metadata + gatherColumnInfo(o.getJSONArray("meta"), columns); + } Map map = new HashMap(); if (o.has("rows")){ diff --git a/main/webapp/modules/core/MOD-INF/controller.js b/main/webapp/modules/core/MOD-INF/controller.js index 404e55070..08b76e2fc 100644 --- a/main/webapp/modules/core/MOD-INF/controller.js +++ b/main/webapp/modules/core/MOD-INF/controller.js @@ -478,7 +478,8 @@ function init() { "styles/dialogs/custom-tabular-exporter-dialog.less", "styles/reconciliation/recon-dialog.less", - "styles/reconciliation/standard-service-panel.less" + "styles/reconciliation/standard-service-panel.less", + "styles/reconciliation/extend-data-preview-dialog.less", ] ); diff --git a/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js b/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js index b78e1a851..622cf194c 100644 --- a/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js +++ b/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js @@ -165,28 +165,18 @@ ExtendReconciledDataPreviewDialog.prototype._show = function(properties) { var suggestConfig = $.extend({}, this._serviceMetadata.suggest.property); suggestConfig.key = null; suggestConfig.query_param_name = "prefix"; - /* var suggestConfig = { - filter: '(all type:/type/property)' - }; - if ((this._column.reconConfig) && (this._column.reconConfig.type)) { - suggestConfig.filter = '(all type:/type/property (any namespace:/type/object namespace:' + this._column.reconConfig.type.id + '))'; - } */ this._elmts.addPropertyInput.suggestP(suggestConfig).bind("fb-select", function(evt, data) { - var expected = data.expected_type; self._addProperty({ id : data.id, name: data.name, - /* expected: { - id: expected.id, - name: expected.name - } */ }); }); }; ExtendReconciledDataPreviewDialog.prototype._update = function() { - this._elmts.previewContainer.empty().text("Querying THE service..."); + this._elmts.previewContainer.empty().html( + '
'); var self = this; var params = { @@ -291,7 +281,9 @@ ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) { var cell = row[c]; if (cell !== null) { if ($.isPlainObject(cell)) { - $('').attr("href", "http://www.freebase.com/view" + cell.id).text(cell.name).appendTo(td); + $('').attr("href", + this._serviceMetadata.identifierSpace + cell.id + ).attr("target", "_blank").text(cell.name).appendTo(td); } else { $('').text(cell).appendTo(td); } @@ -315,21 +307,21 @@ ExtendReconciledDataPreviewDialog.prototype._removeProperty = function(id) { ExtendReconciledDataPreviewDialog.prototype._findProperty = function(id) { var properties = this._extension.properties; for(var i = properties.length - 1; i >= 0; i--) { - if (properties[i].id == path) { + if (properties[i].id == id) { return properties[i]; } } return null; } -ExtendReconciledDataPreviewDialog.prototype._constrainProperty = function(path) { +ExtendReconciledDataPreviewDialog.prototype._constrainProperty = function(id) { var self = this; - var property = this._findProperty(path); + var property = this._findProperty(id); var frame = DialogSystem.createDialog(); frame.width("500px"); - var header = $('
').addClass("dialog-header").text("Constrain " + path.join(" > ")).appendTo(frame); + var header = $('
').addClass("dialog-header").text("Constrain " + id).appendTo(frame); var body = $('
').addClass("dialog-body").appendTo(frame); var footer = $('
').addClass("dialog-footer").appendTo(frame); diff --git a/main/webapp/modules/core/styles/views/extend-data-preview-dialog.less b/main/webapp/modules/core/styles/reconciliation/extend-data-preview-dialog.less similarity index 94% rename from main/webapp/modules/core/styles/views/extend-data-preview-dialog.less rename to main/webapp/modules/core/styles/reconciliation/extend-data-preview-dialog.less index 006e87306..220a4610a 100644 --- a/main/webapp/modules/core/styles/views/extend-data-preview-dialog.less +++ b/main/webapp/modules/core/styles/reconciliation/extend-data-preview-dialog.less @@ -69,3 +69,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vertical-align: top; margin-left: 5px; } + +.extend-data-preview-progress { + text-align: center; +} + +.extend-data-preview-progress img { + padding: 45%; + display: inline-block; +} From 6501c235e8c134d5fac20b506dbcc968e8879397 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Fri, 14 Jul 2017 12:30:39 +0100 Subject: [PATCH 04/11] Pass the identifier and schema spaces along to create better ReconCandidates --- .../commands/recon/ExtendDataCommand.java | 4 +++ .../model/changes/DataExtensionChange.java | 28 ++++++++++++++++--- .../operations/recon/ExtendDataOperation.java | 12 ++++++++ .../dialogs/extend-data-preview-dialog.js | 5 +++- .../views/data-table/menu-edit-column.js | 4 ++- 5 files changed, 47 insertions(+), 6 deletions(-) diff --git a/main/src/com/google/refine/commands/recon/ExtendDataCommand.java b/main/src/com/google/refine/commands/recon/ExtendDataCommand.java index 52cd66476..bdf0748b8 100644 --- a/main/src/com/google/refine/commands/recon/ExtendDataCommand.java +++ b/main/src/com/google/refine/commands/recon/ExtendDataCommand.java @@ -51,6 +51,8 @@ public class ExtendDataCommand extends EngineDependentCommand { String baseColumnName = request.getParameter("baseColumnName"); int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex")); String endpoint = request.getParameter("endpoint"); + String identifierSpace = request.getParameter("identifierSpace"); + String schemaSpace = request.getParameter("schemaSpace"); String jsonString = request.getParameter("extension"); JSONObject extension = ParsingUtilities.evaluateJsonStringToObject(jsonString); @@ -59,6 +61,8 @@ public class ExtendDataCommand extends EngineDependentCommand { engineConfig, baseColumnName, endpoint, + identifierSpace, + schemaSpace, extension, columnInsertIndex ); diff --git a/main/src/com/google/refine/model/changes/DataExtensionChange.java b/main/src/com/google/refine/model/changes/DataExtensionChange.java index 4deebf612..b5a478939 100644 --- a/main/src/com/google/refine/model/changes/DataExtensionChange.java +++ b/main/src/com/google/refine/model/changes/DataExtensionChange.java @@ -66,6 +66,8 @@ import com.google.refine.util.Pool; public class DataExtensionChange implements Change { final protected String _baseColumnName; final protected String _service; + final protected String _identifierSpace; + final protected String _schemaSpace; final protected int _columnInsertIndex; final protected List _columnNames; @@ -82,6 +84,8 @@ public class DataExtensionChange implements Change { public DataExtensionChange( String baseColumnName, String service, + String identifierSpace, + String schemaSpace, int columnInsertIndex, List columnNames, List columnTypes, @@ -91,6 +95,8 @@ public class DataExtensionChange implements Change { ) { _baseColumnName = baseColumnName; _service = service; + _identifierSpace = identifierSpace; + _schemaSpace = schemaSpace; _columnInsertIndex = columnInsertIndex; _columnNames = columnNames; @@ -105,6 +111,8 @@ public class DataExtensionChange implements Change { protected DataExtensionChange( String baseColumnName, String service, + String identifierSpace, + String schemaSpace, int columnInsertIndex, List columnNames, @@ -118,6 +126,8 @@ public class DataExtensionChange implements Change { ) { _baseColumnName = baseColumnName; _service = service; + _identifierSpace = identifierSpace; + _schemaSpace = schemaSpace; _columnInsertIndex = columnInsertIndex; _columnNames = columnNames; @@ -211,8 +221,8 @@ public class DataExtensionChange implements Change { Column column = new Column(cellIndex, name); column.setReconConfig(new DataExtensionReconConfig( _service, - "", // TODO retrieve service by URL and fill this - "", + _identifierSpace, + _schemaSpace, _columnTypes.get(i))); column.setReconStats(ReconStats.create(project, cellIndex)); @@ -247,9 +257,9 @@ public class DataExtensionChange implements Change { if (reconMap.containsKey(rc.id)) { recon = reconMap.get(rc.id); } else { - recon = Recon.makeFreebaseRecon(_historyEntryID); + recon = new Recon(_historyEntryID, _identifierSpace, _schemaSpace); recon.addCandidate(rc); - recon.service = "mql"; + recon.service = _service; recon.match = rc; recon.matchRank = 0; recon.judgment = Judgment.Matched; @@ -285,6 +295,8 @@ public class DataExtensionChange implements Change { public void save(Writer writer, Properties options) throws IOException { writer.write("baseColumnName="); writer.write(_baseColumnName); writer.write('\n'); writer.write("service="); writer.write(_service); writer.write('\n'); + writer.write("identifierSpace="); writer.write(_identifierSpace); writer.write('\n'); + writer.write("schemaSpace="); writer.write(_schemaSpace); writer.write('\n'); writer.write("columnInsertIndex="); writer.write(Integer.toString(_columnInsertIndex)); writer.write('\n'); writer.write("columnNameCount="); writer.write(Integer.toString(_columnNames.size())); writer.write('\n'); for (String name : _columnNames) { @@ -354,6 +366,8 @@ public class DataExtensionChange implements Change { static public Change load(LineNumberReader reader, Pool pool) throws Exception { String baseColumnName = null; String service = null; + String identifierSpace = null; + String schemaSpace = null; int columnInsertIndex = -1; List columnNames = null; @@ -377,6 +391,10 @@ public class DataExtensionChange implements Change { baseColumnName = value; } else if ("service".equals(field)) { service = value; + } else if ("identifierSpace".equals(field)) { + identifierSpace = value; + } else if ("schemaSpace".equals(field)) { + schemaSpace = value; } else if ("columnInsertIndex".equals(field)) { columnInsertIndex = Integer.parseInt(value); } else if ("firstNewCellIndex".equals(field)) { @@ -472,6 +490,8 @@ public class DataExtensionChange implements Change { DataExtensionChange change = new DataExtensionChange( baseColumnName, service, + identifierSpace, + schemaSpace, columnInsertIndex, columnNames, columnTypes, diff --git a/main/src/com/google/refine/operations/recon/ExtendDataOperation.java b/main/src/com/google/refine/operations/recon/ExtendDataOperation.java index 132e4da45..eea06a711 100644 --- a/main/src/com/google/refine/operations/recon/ExtendDataOperation.java +++ b/main/src/com/google/refine/operations/recon/ExtendDataOperation.java @@ -70,6 +70,8 @@ import com.google.refine.process.Process; public class ExtendDataOperation extends EngineDependentOperation { final protected String _baseColumnName; final protected String _endpoint; + final protected String _identifierSpace; + final protected String _schemaSpace; final protected JSONObject _extension; final protected int _columnInsertIndex; @@ -80,6 +82,8 @@ public class ExtendDataOperation extends EngineDependentOperation { engineConfig, obj.getString("baseColumnName"), obj.getString("endpoint"), + obj.getString("identifierSpace"), + obj.getString("schemaSpace"), obj.getJSONObject("extension"), obj.getInt("columnInsertIndex") ); @@ -89,6 +93,8 @@ public class ExtendDataOperation extends EngineDependentOperation { JSONObject engineConfig, String baseColumnName, String endpoint, + String identifierSpace, + String schemaSpace, JSONObject extension, int columnInsertIndex ) { @@ -96,6 +102,8 @@ public class ExtendDataOperation extends EngineDependentOperation { _baseColumnName = baseColumnName; _endpoint = endpoint; + _identifierSpace = identifierSpace; + _schemaSpace = schemaSpace; _extension = extension; _columnInsertIndex = columnInsertIndex; } @@ -111,6 +119,8 @@ public class ExtendDataOperation extends EngineDependentOperation { writer.key("columnInsertIndex"); writer.value(_columnInsertIndex); writer.key("baseColumnName"); writer.value(_baseColumnName); writer.key("endpoint"); writer.value(_endpoint); + writer.key("identifierSpace"); writer.value(_identifierSpace); + writer.key("schemaSpace"); writer.value(_schemaSpace); writer.key("extension"); writer.value(_extension); writer.endObject(); } @@ -305,6 +315,8 @@ public class ExtendDataOperation extends EngineDependentOperation { new DataExtensionChange( _baseColumnName, _endpoint, + _identifierSpace, + _schemaSpace, _columnInsertIndex, columnNames, columnTypes, diff --git a/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js b/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js index 622cf194c..e98d0b890 100644 --- a/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js +++ b/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js @@ -52,7 +52,10 @@ function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDo alert("Please add some properties first."); } else { DialogSystem.dismissUntil(self._level - 1); - self._onDone(self._extension, self._service); + self._onDone(self._extension, + self._service, + self._serviceMetadata.identifierSpace, + self._serviceMetadata.schemaSpace); } }); this._elmts.cancelButton.click(function() { diff --git a/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js b/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js index 7ecf95074..0a327bc51 100644 --- a/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js +++ b/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js @@ -153,13 +153,15 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { column, columnIndex, o.rowIndices, - function(extension, endpoint) { + function(extension, endpoint, identifierSpace, schemaSpace) { Refine.postProcess( "core", "extend-data", { baseColumnName: column.name, endpoint: endpoint, + identifierSpace: identifierSpace, + schemaSpace: schemaSpace, columnInsertIndex: columnIndex + 1 }, { From 3eadefe613b108c992af51e3ad4e618236d68411 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Fri, 14 Jul 2017 12:53:54 +0100 Subject: [PATCH 05/11] Do not add reconciliation statistics on columns without types --- .../google/refine/model/changes/DataExtensionChange.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/main/src/com/google/refine/model/changes/DataExtensionChange.java b/main/src/com/google/refine/model/changes/DataExtensionChange.java index b5a478939..cffc193f4 100644 --- a/main/src/com/google/refine/model/changes/DataExtensionChange.java +++ b/main/src/com/google/refine/model/changes/DataExtensionChange.java @@ -219,12 +219,15 @@ public class DataExtensionChange implements Change { int cellIndex = _firstNewCellIndex + i; Column column = new Column(cellIndex, name); + ReconType columnType = _columnTypes.get(i); column.setReconConfig(new DataExtensionReconConfig( _service, _identifierSpace, _schemaSpace, - _columnTypes.get(i))); - column.setReconStats(ReconStats.create(project, cellIndex)); + columnType)); + if (columnType != null) { + column.setReconStats(ReconStats.create(project, cellIndex)); + } try { project.columnModel.addColumn(_columnInsertIndex + i, column, true); From 9e3b0f159918d876194231c2a600f9f421b63742 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Fri, 14 Jul 2017 16:47:39 +0100 Subject: [PATCH 06/11] Remove separate endpoint, not used anymore --- .../core/scripts/dialogs/extend-data-preview-dialog.js | 4 ---- 1 file changed, 4 deletions(-) diff --git a/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js b/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js index e98d0b890..0651eeeac 100644 --- a/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js +++ b/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js @@ -79,10 +79,6 @@ function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDo var endpoint = extend.propose_properties; this._proposePropertiesUrl = endpoint.service_url + endpoint.service_path; } - if ("fetch_column" in extend) { - var endpoint = extend.fetch_column; - this._fetchColumnUrl = endpoint.service_url + endpoint.service_path; - } } } From 05873f283ddcb8bb840dd944139f42ac71bead15 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Fri, 14 Jul 2017 22:17:40 +0100 Subject: [PATCH 07/11] Integration of constraints with service-defined forms --- .../recon/ReconciledDataExtensionJob.java | 6 +- .../dialogs/extend-data-preview-dialog.js | 107 ++++++++++++++---- .../extend-data-preview-dialog.less | 4 + 3 files changed, 92 insertions(+), 25 deletions(-) diff --git a/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java b/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java index c1441bd5a..541253a79 100644 --- a/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java +++ b/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java @@ -273,7 +273,11 @@ public class ReconciledDataExtensionJob { jsonWriter.object(); jsonWriter.key("id"); jsonWriter.value(property.getString("id")); - // TODO translate constraints as below + if (property.has("settings")) { + JSONObject settings = property.getJSONObject("settings"); + jsonWriter.key("settings"); + jsonWriter.value(settings); + } jsonWriter.endObject(); } jsonWriter.endArray(); diff --git a/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js b/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js index 0651eeeac..fe3702014 100644 --- a/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js +++ b/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js @@ -183,6 +183,7 @@ ExtendReconciledDataPreviewDialog.prototype._update = function() { columnName: this._column.name }; + console.log(this._extension); $.post( "command/core/preview-extend-data?" + $.param(params), { @@ -320,26 +321,72 @@ ExtendReconciledDataPreviewDialog.prototype._constrainProperty = function(id) { var frame = DialogSystem.createDialog(); frame.width("500px"); - var header = $('
').addClass("dialog-header").text("Constrain " + id).appendTo(frame); + var header = $('
').addClass("dialog-header").text("Settings for " + id).appendTo(frame); var body = $('
').addClass("dialog-body").appendTo(frame); var footer = $('
').addClass("dialog-footer").appendTo(frame); - body.html( - '
' + + // by default we display an area where the user can input JSON + var form = ( '' + '' + - '
' + - 'Enter MQL query constraints as JSON' + + 'Enter query settings as JSON' + '
' + '' + - '
' + '
'+fieldHTML+'
' + + form + + '
' ); var bodyElmts = DOM.bind(body); - if ("constraints" in property) { - bodyElmts.textarea[0].value = JSON.stringify(property.constraints, null, 2); - } else { - bodyElmts.textarea[0].value = JSON.stringify({ "limit" : 10 }, null, 2); + if (fields == null) { + if ("settings" in property) { + bodyElmts.textarea[0].value = JSON.stringify(property.settings, null, 2); + } else { + bodyElmts.textarea[0].value = JSON.stringify({ "limit" : 10 }, null, 2); + } } footer.html( @@ -356,21 +403,33 @@ ExtendReconciledDataPreviewDialog.prototype._constrainProperty = function(id) { footerElmts.cancelButton.click(dismiss); footerElmts.okButton.click(function() { try { - var o = JSON.parse(bodyElmts.textarea[0].value); - if (o === undefined) { - alert("Please ensure that the JSON you enter is valid."); - return; - } + if (fields == null) { + var o = JSON.parse(bodyElmts.textarea[0].value); + if (o === undefined) { + alert("Please ensure that the JSON you enter is valid."); + return; + } - if ($.isArray(o) && o.length == 1) { - o = o[0]; - } - if (!$.isPlainObject(o)) { - alert("The JSON you enter must be an object, that is, it is of this form { ... }."); - return; - } + if ($.isArray(o) && o.length == 1) { + o = o[0]; + } + if (!$.isPlainObject(o)) { + alert("The JSON you enter must be an object, that is, it is of this form { ... }."); + return; + } - property.constraints = o; + property.settings = o; + } else { + var elem = $(bodyElmts.form[0]); + var ar = elem.serializeArray(); + var settings = {}; + for(var i = 0; i < ar.length; i++) { + settings[ar[i].name] = ar[i].value; + } + console.log(ar); + property.settings = settings; + console.log(settings); + } dismiss(); @@ -380,6 +439,6 @@ ExtendReconciledDataPreviewDialog.prototype._constrainProperty = function(id) { } }); - bodyElmts.textarea.focus(); + //bodyElmts.textarea.focus(); }; diff --git a/main/webapp/modules/core/styles/reconciliation/extend-data-preview-dialog.less b/main/webapp/modules/core/styles/reconciliation/extend-data-preview-dialog.less index 220a4610a..069a70628 100644 --- a/main/webapp/modules/core/styles/reconciliation/extend-data-preview-dialog.less +++ b/main/webapp/modules/core/styles/reconciliation/extend-data-preview-dialog.less @@ -78,3 +78,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. padding: 45%; display: inline-block; } + +.data-extension-property-config td { + padding: 5px; +} From 9db113faa242a2ace1fe2598c3b9e15827174bf2 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Sat, 15 Jul 2017 10:23:31 +0100 Subject: [PATCH 08/11] Localization --- .../webapp/modules/core/MOD-INF/controller.js | 1 + .../core/langs/translation-default.json | 7 +++ .../modules/core/langs/translation-en.json | 7 +++ .../modules/core/langs/translation-es.json | 7 +++ .../modules/core/langs/translation-fr.json | 7 +++ .../modules/core/langs/translation-it.json | 7 +++ .../dialogs/extend-data-preview-dialog.js | 20 ++++---- .../views/data-table/menu-edit-column.js | 46 +------------------ 8 files changed, 49 insertions(+), 53 deletions(-) diff --git a/main/webapp/modules/core/MOD-INF/controller.js b/main/webapp/modules/core/MOD-INF/controller.js index 08b76e2fc..4489d0c97 100644 --- a/main/webapp/modules/core/MOD-INF/controller.js +++ b/main/webapp/modules/core/MOD-INF/controller.js @@ -182,6 +182,7 @@ function registerOperations() { OR.registerOperation(module, "recon-judge-similar-cells", Packages.com.google.refine.operations.recon.ReconJudgeSimilarCellsOperation); OR.registerOperation(module, "recon-clear-similar-cells", Packages.com.google.refine.operations.recon.ReconClearSimilarCellsOperation); OR.registerOperation(module, "recon-copy-across-columns", Packages.com.google.refine.operations.recon.ReconCopyAcrossColumnsOperation); + OR.registerOperation(module, "extend-reconciled-data", Packages.com.google.refine.operations.recon.ExtendDataOperation); } function registerImporting() { diff --git a/main/webapp/modules/core/langs/translation-default.json b/main/webapp/modules/core/langs/translation-default.json index ceab109be..48cebf3be 100644 --- a/main/webapp/modules/core/langs/translation-default.json +++ b/main/webapp/modules/core/langs/translation-default.json @@ -503,6 +503,13 @@ "cache-responses": "Cache responses", "copy-val": "copy value from original column", "warning-col-name": "You must enter a column name.", + "add-col-recon-val": "Add columns from reconciled values", + "add-col-recon-col": "Add columns from reconciled column", + "warning-no-property": "Please select a property first.", + "configure-col": "Configure this column", + "remove-prop": "remove", + "configure-prop": "configure", + "no-settings": "No settings are available for this property.", "add-col-fetch": "Add column by fetching URLs based on column", "throttle-delay": "Throttle delay", "milli": "milliseconds", diff --git a/main/webapp/modules/core/langs/translation-en.json b/main/webapp/modules/core/langs/translation-en.json index 6f13a1bdb..ae836fb8d 100644 --- a/main/webapp/modules/core/langs/translation-en.json +++ b/main/webapp/modules/core/langs/translation-en.json @@ -503,6 +503,13 @@ "cache-responses": "Cache responses", "copy-val": "copy value from original column", "warning-col-name": "You must enter a column name.", + "add-col-recon-val": "Add columns from reconciled values", + "add-col-recon-col": "Add columns from reconciled column", + "warning-no-property": "Please select a property first.", + "configure-col": "Configure this column", + "remove-prop": "remove", + "configure-prop": "configure", + "no-settings": "No settings are available for this property.", "add-col-fetch": "Add column by fetching URLs based on column", "throttle-delay": "Throttle delay", "milli": "milliseconds", diff --git a/main/webapp/modules/core/langs/translation-es.json b/main/webapp/modules/core/langs/translation-es.json index 98a8d6598..ab4f88dae 100644 --- a/main/webapp/modules/core/langs/translation-es.json +++ b/main/webapp/modules/core/langs/translation-es.json @@ -502,6 +502,13 @@ "store-err": "guardar error", "copy-val": "copiar valor de la columna original", "warning-col-name": "Debe ingresar un nombre para la columna.", + "add-col-recon-val": "Añadir columnas de valores conciliados", + "add-col-recon-col": "Añadir columnas de la columna conciliada", + "warning-no-property": "Seleccione primero una propiedad.", + "configure-col": "Configurar esta columna", + "remove-prop": "retirar", + "configure-prop": "configurar", + "no-settings": "No hay configuraciones disponibles para esta propiedad.", "add-col-fetch": "Agregar columna accediendo a URls basada en la columna", "throttle-delay": "Tiempo de retraso", "milli": "milisegundos", diff --git a/main/webapp/modules/core/langs/translation-fr.json b/main/webapp/modules/core/langs/translation-fr.json index 525e50292..02153c3e6 100644 --- a/main/webapp/modules/core/langs/translation-fr.json +++ b/main/webapp/modules/core/langs/translation-fr.json @@ -504,6 +504,13 @@ "copy-val": "copier la valeur depuis la colonne originale", "warning-col-name": "Vous devez indiquer un nom de colonne.", "add-col-fetch": "Ajouter une colonne en moissonnant les données depuis les URL d’une colonne", + "add-col-recon-val": "Ajouter des colonnes à partir de valeurs réconciliées", + "add-col-recon-col": "Ajouter des colonnes à partir de la colonne", + "warning-no-property": "Veuillez d'abord sélectionner une propriété.", + "configure-col": "Configurer cette colonne", + "remove-prop": "supprimer", + "configure-prop": "configurer", + "no-settings": "Aucun paramètre n'est disponible pour cette propriété.", "throttle-delay": "Délai de récupération", "milli": "millisecondes", "url-fetch": "Indiquer les URL à moissonner :", diff --git a/main/webapp/modules/core/langs/translation-it.json b/main/webapp/modules/core/langs/translation-it.json index 022ab88a1..5cd320aff 100644 --- a/main/webapp/modules/core/langs/translation-it.json +++ b/main/webapp/modules/core/langs/translation-it.json @@ -502,6 +502,13 @@ "store-err": "salva l'errore", "copy-val": "copia il valore dalla colonna originale", "warning-col-name": "Inserisci un nome per la colonna.", + "add-col-recon-val": "Aggiungi colonne da valori riconciliati", + "add-col-recon-col": "Aggiungi colonne dalla colonna riconciliata", + "warning-no-property": "Per favore seleziona innanzitutto una proprietà.", + "configure-col": "Configurare questa colonna", + "remove-prop": "rimuovi", + "configure-prop": "configurare", + "no-settings": "Nessuna impostazione sono disponibili per questa proprietà.", "add-col-fetch": "Aggiungi colonna con URL, basandoti su", "throttle-delay": "Durata Throttle", "milli": "millisecondi", diff --git a/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js b/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js index fe3702014..2aa6f9e66 100644 --- a/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js +++ b/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js @@ -41,7 +41,7 @@ function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDo var self = this; this._dialog = $(DOM.loadHTML("core", "scripts/views/data-table/extend-data-preview-dialog.html")); this._elmts = DOM.bind(this._dialog); - this._elmts.dialogHeader.html("Add columns by reconciled column " + column.name); + this._elmts.dialogHeader.html($.i18n._('core-views')["add-col-recon-col"]+" "+column.name); this._elmts.resetButton.click(function() { self._extension.properties = []; self._update(); @@ -49,7 +49,7 @@ function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDo this._elmts.okButton.click(function() { if (self._extension.properties.length === 0) { - alert("Please add some properties first."); + alert($.i18n._('core-views')["warning-no-property"]); } else { DialogSystem.dismissUntil(self._level - 1); self._onDone(self._extension, @@ -253,17 +253,17 @@ ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) { $('
').appendTo(th); $('
') - .text("remove") + .text($.i18n("core-views")["remove-prop"]) .addClass("action") - .attr("title", "Remove this column") + .attr("title", $.i18n("core-views")["remove-col"]) .click(function() { self._removeProperty(column.id); }).appendTo(th); $('') - .text("configure") + .text($.i18n("core-views")["configure-prop"]) .addClass("action") - .attr("title", "Configure this column") + .attr("title", $.i18n("core-views")["configure-col"]) .click(function() { self._constrainProperty(column.id); }).appendTo(th); @@ -372,6 +372,10 @@ ExtendReconciledDataPreviewDialog.prototype._constrainProperty = function(id) { form += ''; } } + + if (form == '') { + form = ''+$.i18n('core-views')['no-settings']+'' + } } body.html( @@ -390,8 +394,8 @@ ExtendReconciledDataPreviewDialog.prototype._constrainProperty = function(id) { } footer.html( - '' + - '' + '' + + '' ); var footerElmts = DOM.bind(footer); diff --git a/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js b/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js index 0a327bc51..015bb84fd 100644 --- a/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js +++ b/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js @@ -173,50 +173,6 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { ); }; -/* - var doAddColumnByReconciliation = function() { - var frame = $( - DOM.loadHTML("core", "scripts/views/data-table/add-column-by-reconciliation.html")); - - var elmts = DOM.bind(frame); - elmts.dialogHeader.text($.i18n._('core-views')["add-by-recon"]); - - elmts.suggestedPropertyHeader.html('Suggested properties'); - elmts.previewHeader.html('Preview'); - elmts.addPropertyHeader.html('Add property'); - elmts.okButton.html($.i18n._('core-buttons')["ok"]); - elmts.cancelButton.text($.i18n._('core-buttons')["cancel"]); - - var level = DialogSystem.showDialog(frame); - var dismiss = function() { DialogSystem.dismissUntil(level - 1); }; - - elmts.cancelButton.click(dismiss); - elmts.okButton.click(function() { - var columnName = $.trim(elmts.columnNameInput[0].value); - if (!columnName.length) { - alert($.i18n._('core-views')["warning-col-name"]); - return; - } - - Refine.postCoreProcess( - "add-column-by-fetching-urls", - { - baseColumnName: column.name, - urlExpression: previewWidget.getExpression(true), - newColumnName: columnName, - columnInsertIndex: columnIndex + 1, - delay: elmts.throttleDelayInput[0].value, - onError: $('input[name="dialog-onerror-choice"]:checked')[0].value, - cacheResponses: $('input[name="dialog-cache-responses"]')[0].checked, - }, - null, - { modelsChanged: true } - ); - dismiss(); - }); - }; -*/ - var doRemoveColumn = function() { Refine.postCoreProcess( "remove-column", @@ -371,7 +327,7 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { }, { id: "core/add-column-by-reconciliation", - label: $.i18n._('core-views')["add-by-recon"]+"...", + label: $.i18n._('core-views')["add-col-recon-val"]+"...", click: doAddColumnByReconciliation }, {}, From 8437a9d245e2fedaa2c9063b2da2e5dffa2e3362 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Sat, 15 Jul 2017 14:17:27 +0100 Subject: [PATCH 09/11] Cleaner HTML form generation --- .../dialogs/extend-data-preview-dialog.js | 128 +++++++----------- 1 file changed, 51 insertions(+), 77 deletions(-) diff --git a/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js b/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js index 2aa6f9e66..bdb5d5fdc 100644 --- a/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js +++ b/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js @@ -183,7 +183,6 @@ ExtendReconciledDataPreviewDialog.prototype._update = function() { columnName: this._column.name }; - console.log(this._extension); $.post( "command/core/preview-extend-data?" + $.param(params), { @@ -253,17 +252,17 @@ ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) { $('
').appendTo(th); $('') - .text($.i18n("core-views")["remove-prop"]) + .text($.i18n._("core-views")["remove-prop"]) .addClass("action") - .attr("title", $.i18n("core-views")["remove-col"]) + .attr("title", $.i18n._("core-views")["remove-col"]) .click(function() { self._removeProperty(column.id); }).appendTo(th); $('') - .text($.i18n("core-views")["configure-prop"]) + .text($.i18n._("core-views")["configure-prop"]) .addClass("action") - .attr("title", $.i18n("core-views")["configure-col"]) + .attr("title", $.i18n._("core-views")["configure-col"]) .click(function() { self._constrainProperty(column.id); }).appendTo(th); @@ -325,19 +324,9 @@ ExtendReconciledDataPreviewDialog.prototype._constrainProperty = function(id) { var body = $('
').addClass("dialog-body").appendTo(frame); var footer = $('
').addClass("dialog-footer").appendTo(frame); - // by default we display an area where the user can input JSON - var form = ( - '' + - 'Enter query settings as JSON' + - '' + - '' + - '' + - ''); - - // If the service metadata specifies fields, we build a proper form to make it more user-friendly var fields = self._serviceMetadata.extend.property_settings; + var table = $('
'); if (fields != null) { - form = ''; for(var i = 0; i < fields.length; i++) { var field = fields[i]; var fieldHTML = ''; @@ -345,57 +334,60 @@ ExtendReconciledDataPreviewDialog.prototype._constrainProperty = function(id) { if (property.settings != null && property.settings[field.name] != null) { currentValue = property.settings[field.name]; } + var tr = $(''); + var td = $('').attr('title', field.help_text).appendTo(tr); if (field.type == 'select') { - fieldHTML += ''+field.label+':
'; + var fieldLabel = $('').text(field.label+':').appendTo(td); + td.append($('
')); for(var j = 0; j < field.choices.length; j++) { var choice = field.choices[j]; - fieldHTML += '