diff --git a/main/src/com/google/refine/commands/recon/ExtendDataCommand.java b/main/src/com/google/refine/commands/recon/ExtendDataCommand.java new file mode 100644 index 000000000..1e4d4a98f --- /dev/null +++ b/main/src/com/google/refine/commands/recon/ExtendDataCommand.java @@ -0,0 +1,71 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.commands.recon; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONObject; + +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.operations.recon.ExtendDataOperation; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.util.ParsingUtilities; + +public class ExtendDataCommand extends EngineDependentCommand { + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, JSONObject engineConfig) throws Exception { + + String baseColumnName = request.getParameter("baseColumnName"); + int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex")); + String endpoint = request.getParameter("endpoint"); + String identifierSpace = request.getParameter("identifierSpace"); + String schemaSpace = request.getParameter("schemaSpace"); + + String jsonString = request.getParameter("extension"); + JSONObject extension = ParsingUtilities.evaluateJsonStringToObject(jsonString); + + return new ExtendDataOperation( + engineConfig, + baseColumnName, + endpoint, + identifierSpace, + schemaSpace, + extension, + columnInsertIndex + ); + } + +} diff --git a/main/src/com/google/refine/commands/recon/PreviewExtendDataCommand.java b/main/src/com/google/refine/commands/recon/PreviewExtendDataCommand.java new file mode 100644 index 000000000..037b1e87d --- /dev/null +++ b/main/src/com/google/refine/commands/recon/PreviewExtendDataCommand.java @@ -0,0 +1,200 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.commands.recon; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONArray; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.commands.Command; +import com.google.refine.model.recon.ReconciledDataExtensionJob; +import com.google.refine.model.recon.ReconciledDataExtensionJob.ColumnInfo; +import com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.Row; +import com.google.refine.model.Column; +import com.google.refine.model.recon.ReconConfig; +import com.google.refine.model.recon.StandardReconConfig; +import com.google.refine.util.ParsingUtilities; + +public class PreviewExtendDataCommand extends Command { + + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + String columnName = request.getParameter("columnName"); + + String rowIndicesString = request.getParameter("rowIndices"); + if (rowIndicesString == null) { + respond(response, "{ \"code\" : \"error\", \"message\" : \"No row indices specified\" }"); + return; + } + + String jsonString = request.getParameter("extension"); + JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString); + + JSONArray rowIndices = ParsingUtilities.evaluateJsonStringToArray(rowIndicesString); + int length = rowIndices.length(); + Column column = project.columnModel.getColumnByName(columnName); + int cellIndex = column.getCellIndex(); + + // get the endpoint to extract data from + String endpoint = null; + ReconConfig cfg = column.getReconConfig(); + if (cfg != null && + cfg instanceof StandardReconConfig) { + StandardReconConfig scfg = (StandardReconConfig)cfg; + endpoint = scfg.service; + } else { + respond(response, "{ \"code\" : \"error\", \"message\" : \"This column has not been reconciled with a standard service.\" }"); + return; + } + + + List topicNames = new ArrayList(); + List topicIds = new ArrayList(); + Set ids = new HashSet(); + for (int i = 0; i < length; i++) { + int rowIndex = rowIndices.getInt(i); + if (rowIndex >= 0 && rowIndex < project.rows.size()) { + Row row = project.rows.get(rowIndex); + Cell cell = row.getCell(cellIndex); + if (cell != null && cell.recon != null && cell.recon.match != null) { + topicNames.add(cell.recon.match.name); + topicIds.add(cell.recon.match.id); + ids.add(cell.recon.match.id); + } else { + topicNames.add(null); + topicIds.add(null); + ids.add(null); + } + } + } + + Map reconCandidateMap = new HashMap(); + ReconciledDataExtensionJob job = new ReconciledDataExtensionJob(json, endpoint); + Map map = job.extend(ids, reconCandidateMap); + + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + JSONWriter writer = new JSONWriter(response.getWriter()); + writer.object(); + writer.key("code"); writer.value("ok"); + writer.key("columns"); + writer.array(); + for (ColumnInfo info : job.columns) { + writer.object(); + writer.key("name"); + writer.value(info.name); + writer.key("id"); + writer.value(info.id); + writer.endObject(); + } + writer.endArray(); + + writer.key("rows"); + writer.array(); + for (int r = 0; r < topicNames.size(); r++) { + String id = topicIds.get(r); + String topicName = topicNames.get(r); + + if (id != null && map.containsKey(id)) { + DataExtension ext = map.get(id); + boolean first = true; + + if (ext.data.length > 0) { + for (Object[] row : ext.data) { + writer.array(); + if (first) { + writer.value(topicName); + first = false; + } else { + writer.value(null); + } + + for (Object cell : row) { + if (cell != null && cell instanceof ReconCandidate) { + ReconCandidate rc = (ReconCandidate) cell; + writer.object(); + writer.key("id"); writer.value(rc.id); + writer.key("name"); writer.value(rc.name); + writer.endObject(); + } else { + writer.value(cell); + } + } + + writer.endArray(); + } + continue; + } + } + + writer.array(); + if (id != null) { + writer.object(); + writer.key("id"); writer.value(id); + writer.key("name"); writer.value(topicName); + writer.endObject(); + } else { + writer.value(""); + } + writer.endArray(); + } + writer.endArray(); + + writer.endObject(); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/model/ReconType.java b/main/src/com/google/refine/model/ReconType.java new file mode 100644 index 000000000..088da69b5 --- /dev/null +++ b/main/src/com/google/refine/model/ReconType.java @@ -0,0 +1,79 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.model; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; + +/** + * This represents a type from the reconciliation + * service. It is used when extending data to + * store the (expected) types of new columns. + */ +public class ReconType implements Jsonizable { + public String id; + public String name; + + public ReconType(String id, String name) { + this.id = id; + this.name = name; + } + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("id"); writer.value(id); + writer.key("name"); writer.value(name); + writer.endObject(); + } + + static public ReconType load(JSONObject obj) throws Exception { + if (obj == null) { + return null; + } + + ReconType type = new ReconType( + obj.getString("id"), + obj.getString("name") + ); + return type; + } +} diff --git a/main/src/com/google/refine/model/changes/DataExtensionChange.java b/main/src/com/google/refine/model/changes/DataExtensionChange.java new file mode 100644 index 000000000..b7098a1e0 --- /dev/null +++ b/main/src/com/google/refine/model/changes/DataExtensionChange.java @@ -0,0 +1,511 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Serializable; +import java.io.Writer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.model.ReconType; +import com.google.refine.model.recon.DataExtensionReconConfig; +import com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension; +import com.google.refine.history.Change; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.ModelException; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.Recon.Judgment; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.ReconStats; +import com.google.refine.model.Row; +import com.google.refine.util.ParsingUtilities; +import com.google.refine.util.Pool; + +public class DataExtensionChange implements Change { + final protected String _baseColumnName; + final protected String _service; + final protected String _identifierSpace; + final protected String _schemaSpace; + final protected int _columnInsertIndex; + + final protected List _columnNames; + final protected List _columnTypes; + + final protected List _rowIndices; + final protected List _dataExtensions; + + protected long _historyEntryID; + protected int _firstNewCellIndex = -1; + protected List _oldRows; + protected List _newRows; + + public DataExtensionChange( + String baseColumnName, + String service, + String identifierSpace, + String schemaSpace, + int columnInsertIndex, + List columnNames, + List columnTypes, + List rowIndices, + List dataExtensions, + long historyEntryID + ) { + _baseColumnName = baseColumnName; + _service = service; + _identifierSpace = identifierSpace; + _schemaSpace = schemaSpace; + _columnInsertIndex = columnInsertIndex; + + _columnNames = columnNames; + _columnTypes = columnTypes; + + _rowIndices = rowIndices; + _dataExtensions = dataExtensions; + + _historyEntryID = historyEntryID; + } + + protected DataExtensionChange( + String baseColumnName, + String service, + String identifierSpace, + String schemaSpace, + int columnInsertIndex, + + List columnNames, + List columnTypes, + + List rowIndices, + List dataExtensions, + int firstNewCellIndex, + List oldRows, + List newRows + ) { + _baseColumnName = baseColumnName; + _service = service; + _identifierSpace = identifierSpace; + _schemaSpace = schemaSpace; + _columnInsertIndex = columnInsertIndex; + + _columnNames = columnNames; + _columnTypes = columnTypes; + + _rowIndices = rowIndices; + _dataExtensions = dataExtensions; + + _firstNewCellIndex = firstNewCellIndex; + _oldRows = oldRows; + _newRows = newRows; + } + + @Override + public void apply(Project project) { + synchronized (project) { + if (_firstNewCellIndex < 0) { + _firstNewCellIndex = project.columnModel.allocateNewCellIndex(); + for (int i = 1; i < _columnNames.size(); i++) { + project.columnModel.allocateNewCellIndex(); + } + + _oldRows = new ArrayList(project.rows); + + _newRows = new ArrayList(project.rows.size()); + + int cellIndex = project.columnModel.getColumnByName(_baseColumnName).getCellIndex(); + int keyCellIndex = project.columnModel.columns.get(project.columnModel.getKeyColumnIndex()).getCellIndex(); + int index = 0; + + int rowIndex = index < _rowIndices.size() ? _rowIndices.get(index) : _oldRows.size(); + DataExtension dataExtension = index < _rowIndices.size() ? _dataExtensions.get(index) : null; + + index++; + + Map reconMap = new HashMap(); + + for (int r = 0; r < _oldRows.size(); r++) { + Row oldRow = _oldRows.get(r); + if (r < rowIndex) { + _newRows.add(oldRow.dup()); + continue; + } + + if (dataExtension == null || dataExtension.data.length == 0) { + _newRows.add(oldRow); + } else { + Row firstNewRow = oldRow.dup(); + extendRow(firstNewRow, dataExtension, 0, reconMap); + _newRows.add(firstNewRow); + + int r2 = r + 1; + for (int subR = 1; subR < dataExtension.data.length; subR++) { + if (r2 < project.rows.size()) { + Row oldRow2 = project.rows.get(r2); + if (oldRow2.isCellBlank(cellIndex) && + oldRow2.isCellBlank(keyCellIndex)) { + + Row newRow = oldRow2.dup(); + extendRow(newRow, dataExtension, subR, reconMap); + + _newRows.add(newRow); + r2++; + + continue; + } + } + + Row newRow = new Row(cellIndex + _columnNames.size()); + extendRow(newRow, dataExtension, subR, reconMap); + + _newRows.add(newRow); + } + + r = r2 - 1; // r will be incremented by the for loop anyway + } + + rowIndex = index < _rowIndices.size() ? _rowIndices.get(index) : _oldRows.size(); + dataExtension = index < _rowIndices.size() ? _dataExtensions.get(index) : null; + index++; + } + } + + project.rows.clear(); + project.rows.addAll(_newRows); + + for (int i = 0; i < _columnNames.size(); i++) { + String name = _columnNames.get(i); + int cellIndex = _firstNewCellIndex + i; + + Column column = new Column(cellIndex, name); + ReconType columnType = _columnTypes.get(i); + column.setReconConfig(new DataExtensionReconConfig( + _service, + _identifierSpace, + _schemaSpace, + columnType)); + if (columnType != null) { + column.setReconStats(ReconStats.create(project, cellIndex)); + } + + try { + project.columnModel.addColumn(_columnInsertIndex + i, column, true); + + // the column might have been renamed to avoid collision + _columnNames.set(i, column.getName()); + } catch (ModelException e) { + // won't get here since we set the avoid collision flag + } + } + + project.update(); + } + } + + protected void extendRow( + Row row, + DataExtension dataExtension, + int extensionRowIndex, + Map reconMap + ) { + Object[] values = dataExtension.data[extensionRowIndex]; + for (int c = 0; c < values.length; c++) { + Object value = values[c]; + Cell cell = null; + + if (value instanceof ReconCandidate) { + ReconCandidate rc = (ReconCandidate) value; + Recon recon; + if (reconMap.containsKey(rc.id)) { + recon = reconMap.get(rc.id); + } else { + recon = new Recon(_historyEntryID, _identifierSpace, _schemaSpace); + recon.addCandidate(rc); + recon.service = _service; + recon.match = rc; + recon.matchRank = 0; + recon.judgment = Judgment.Matched; + recon.judgmentAction = "auto"; + recon.judgmentBatchSize = 1; + + reconMap.put(rc.id, recon); + } + cell = new Cell(rc.name, recon); + } else { + cell = new Cell((Serializable) value, null); + } + + row.setCell(_firstNewCellIndex + c, cell); + } + } + + @Override + public void revert(Project project) { + synchronized (project) { + project.rows.clear(); + project.rows.addAll(_oldRows); + + for (int i = 0; i < _columnNames.size(); i++) { + project.columnModel.columns.remove(_columnInsertIndex); + } + + project.update(); + } + } + + @Override + public void save(Writer writer, Properties options) throws IOException { + writer.write("baseColumnName="); writer.write(_baseColumnName); writer.write('\n'); + writer.write("service="); writer.write(_service); writer.write('\n'); + writer.write("identifierSpace="); writer.write(_identifierSpace); writer.write('\n'); + writer.write("schemaSpace="); writer.write(_schemaSpace); writer.write('\n'); + writer.write("columnInsertIndex="); writer.write(Integer.toString(_columnInsertIndex)); writer.write('\n'); + writer.write("columnNameCount="); writer.write(Integer.toString(_columnNames.size())); writer.write('\n'); + for (String name : _columnNames) { + writer.write(name); writer.write('\n'); + } + writer.write("columnTypeCount="); writer.write(Integer.toString(_columnTypes.size())); writer.write('\n'); + for (ReconType type : _columnTypes) { + try { + if(type != null) { + JSONWriter jsonWriter = new JSONWriter(writer); + type.write(jsonWriter, options); + } + } catch (JSONException e) { + // ??? + } + writer.write('\n'); + } + writer.write("rowIndexCount="); writer.write(Integer.toString(_rowIndices.size())); writer.write('\n'); + for (Integer rowIndex : _rowIndices) { + writer.write(rowIndex.toString()); writer.write('\n'); + } + writer.write("dataExtensionCount="); writer.write(Integer.toString(_dataExtensions.size())); writer.write('\n'); + for (DataExtension dataExtension : _dataExtensions) { + if (dataExtension == null) { + writer.write('\n'); + continue; + } + + writer.write(Integer.toString(dataExtension.data.length)); writer.write('\n'); + + for (Object[] values : dataExtension.data) { + for (Object value : values) { + if (value == null) { + writer.write("null"); + } else if (value instanceof ReconCandidate) { + try { + JSONWriter jsonWriter = new JSONWriter(writer); + ((ReconCandidate) value).write(jsonWriter, options); + } catch (JSONException e) { + // ??? + } + } else if (value instanceof String) { + writer.write(JSONObject.quote((String) value)); + } else { + writer.write(value.toString()); + } + writer.write('\n'); + } + } + } + + writer.write("firstNewCellIndex="); writer.write(Integer.toString(_firstNewCellIndex)); writer.write('\n'); + + writer.write("newRowCount="); writer.write(Integer.toString(_newRows.size())); writer.write('\n'); + for (Row row : _newRows) { + row.save(writer, options); + writer.write('\n'); + } + writer.write("oldRowCount="); writer.write(Integer.toString(_oldRows.size())); writer.write('\n'); + for (Row row : _oldRows) { + row.save(writer, options); + writer.write('\n'); + } + writer.write("/ec/\n"); // end of change marker + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + String baseColumnName = null; + String service = null; + String identifierSpace = null; + String schemaSpace = null; + int columnInsertIndex = -1; + + List columnNames = null; + List columnTypes = null; + + List rowIndices = null; + List dataExtensions = null; + + List oldRows = null; + List newRows = null; + + int firstNewCellIndex = -1; + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + String value = line.substring(equal + 1); + + if ("baseColumnName".equals(field)) { + baseColumnName = value; + } else if ("service".equals(field)) { + service = value; + } else if ("identifierSpace".equals(field)) { + identifierSpace = value; + } else if ("schemaSpace".equals(field)) { + schemaSpace = value; + } else if ("columnInsertIndex".equals(field)) { + columnInsertIndex = Integer.parseInt(value); + } else if ("firstNewCellIndex".equals(field)) { + firstNewCellIndex = Integer.parseInt(value); + } else if ("rowIndexCount".equals(field)) { + int count = Integer.parseInt(value); + + rowIndices = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + rowIndices.add(Integer.parseInt(line)); + } + } + } else if ("columnNameCount".equals(field)) { + int count = Integer.parseInt(value); + + columnNames = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + columnNames.add(line); + } + } + } else if ("columnTypeCount".equals(field)) { + int count = Integer.parseInt(value); + + columnTypes = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line == null || line.length() == 0) { + columnTypes.add(null); + } else { + columnTypes.add(ReconType.load(ParsingUtilities.evaluateJsonStringToObject(line))); + } + } + } else if ("dataExtensionCount".equals(field)) { + int count = Integer.parseInt(value); + + dataExtensions = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + + if (line == null) { + continue; + } + + if (line.length() == 0) { + dataExtensions.add(null); + continue; + } + + int rowCount = Integer.parseInt(line); + Object[][] data = new Object[rowCount][]; + + for (int r = 0; r < rowCount; r++) { + Object[] row = new Object[columnNames.size()]; + for (int c = 0; c < columnNames.size(); c++) { + line = reader.readLine(); + + row[c] = ReconCandidate.loadStreaming(line); + } + + data[r] = row; + } + + dataExtensions.add(new DataExtension(data)); + } + } else if ("oldRowCount".equals(field)) { + int count = Integer.parseInt(value); + + oldRows = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + oldRows.add(Row.load(line, pool)); + } + } + } else if ("newRowCount".equals(field)) { + int count = Integer.parseInt(value); + + newRows = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + newRows.add(Row.load(line, pool)); + } + } + } + + } + + DataExtensionChange change = new DataExtensionChange( + baseColumnName, + service, + identifierSpace, + schemaSpace, + columnInsertIndex, + columnNames, + columnTypes, + rowIndices, + dataExtensions, + firstNewCellIndex, + oldRows, + newRows + ); + + + return change; + } +} diff --git a/main/src/com/google/refine/model/recon/DataExtensionReconConfig.java b/main/src/com/google/refine/model/recon/DataExtensionReconConfig.java new file mode 100644 index 000000000..9c1eede9b --- /dev/null +++ b/main/src/com/google/refine/model/recon/DataExtensionReconConfig.java @@ -0,0 +1,109 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.model.recon; + +import java.util.List; +import java.util.Properties; +import java.util.ArrayList; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.model.ReconType; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.Row; +import com.google.refine.model.recon.StandardReconConfig; +import com.google.refine.model.recon.ReconJob; + +public class DataExtensionReconConfig extends StandardReconConfig { + final public ReconType type; + + private final static String WARN = "Not implemented"; + + static public ReconConfig reconstruct(JSONObject obj) throws Exception { + JSONObject type = obj.getJSONObject("type"); + + ReconType typ = null; + if(obj.has("id")) { + typ = new ReconType(obj.getString("id"), + obj.has("name") ? obj.getString("name") : obj.getString("id")); + } + + return new DataExtensionReconConfig( + obj.getString("service"), + obj.has("identifierSpace") ? obj.getString("identifierSpace") : null, + obj.has("schemaSpace") ? obj.getString("schemaSpace") : null, + typ); + } + + public DataExtensionReconConfig( + String service, + String identifierSpace, + String schemaSpace, + ReconType type) { + super( + service, + identifierSpace, + schemaSpace, + type != null ? type.id : null, + type != null ? type.name : null, + true, + new ArrayList()); + this.type = type; + } + + @Override + public ReconJob createJob(Project project, int rowIndex, Row row, + String columnName, Cell cell) { + throw new RuntimeException(WARN); + } + + @Override + public int getBatchSize() { + throw new RuntimeException(WARN); + } + + @Override + public List batchRecon(List jobs, long historyEntryID) { + throw new RuntimeException(WARN); + } + + @Override + public String getBriefDescription(Project project, String columnName) { + throw new RuntimeException(WARN); + } +} diff --git a/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java b/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java new file mode 100644 index 000000000..0ddce01d3 --- /dev/null +++ b/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java @@ -0,0 +1,303 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +/** + * + */ +package com.google.refine.model.recon; + +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.Serializable; +import java.io.StringWriter; +import java.io.Writer; +import java.net.URL; +import java.net.URLConnection; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.model.ReconType; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.recon.StandardReconConfig; +import com.google.refine.util.JSONUtilities; +import com.google.refine.util.ParsingUtilities; +import com.google.refine.expr.functions.ToDate; + +public class ReconciledDataExtensionJob { + static public class DataExtension { + final public Object[][] data; + + public DataExtension(Object[][] data) { + this.data = data; + } + } + + static public class ColumnInfo { + final public String name; + final public String id; + final public ReconType expectedType; + + protected ColumnInfo(String name, String id, ReconType expectedType) { + this.name = name; + this.id = id; + this.expectedType = expectedType; + } + } + + final public JSONObject extension; + final public String endpoint; + final public List columns = new ArrayList(); + + public ReconciledDataExtensionJob(JSONObject obj, String endpoint) throws JSONException { + this.extension = obj; + this.endpoint = endpoint; + } + + public Map extend( + Set ids, + Map reconCandidateMap + ) throws Exception { + StringWriter writer = new StringWriter(); + formulateQuery(ids, extension, writer); + + String query = writer.toString(); + InputStream is = performQuery(this.endpoint, query); + try { + String s = ParsingUtilities.inputStreamToString(is); + JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); + + if(columns.size() == 0) { + // Extract the column metadata + gatherColumnInfo(o.getJSONArray("meta"), columns); + } + + Map map = new HashMap(); + if (o.has("rows")){ + JSONObject records = o.getJSONObject("rows"); + + // for each identifier + for (String id : ids) { + if (records.has(id)) { + JSONObject record = records.getJSONObject(id); + + ReconciledDataExtensionJob.DataExtension ext = collectResult(record, reconCandidateMap); + + if (ext != null) { + map.put(id, ext); + } + } + } + } + + return map; + } finally { + is.close(); + } + } + + static protected InputStream performQuery(String endpoint, String query) throws IOException { + URL url = new URL(endpoint); + + URLConnection connection = url.openConnection(); + connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); + connection.setConnectTimeout(5000); + connection.setDoOutput(true); + + DataOutputStream dos = new DataOutputStream(connection.getOutputStream()); + try { + String body = "extend=" + ParsingUtilities.encode(query); + + dos.writeBytes(body); + } finally { + dos.flush(); + dos.close(); + } + + connection.connect(); + + return connection.getInputStream(); + } + + + protected ReconciledDataExtensionJob.DataExtension collectResult( + JSONObject record, + Map reconCandidateMap + ) throws JSONException { + List rows = new ArrayList(); + + // for each property + int colindex = 0; + for(ColumnInfo ci : columns) { + String pid = ci.id; + JSONArray values = record.getJSONArray(pid); + if (values == null) { + continue; + } + + // for each value + for(int rowindex = 0; rowindex < values.length(); rowindex++) { + JSONObject val = values.getJSONObject(rowindex); + // store a reconciled value + if (val.has("id")) { + storeCell(rows, rowindex, colindex, val, reconCandidateMap); + } else if (val.has("str")) { + // store a bare string + String str = val.getString("str"); + storeCell(rows, rowindex, colindex, str); + } else if (val.has("float")) { + float v = Float.parseFloat(val.getString("float")); + storeCell(rows, rowindex, colindex, v); + } else if (val.has("int")) { + int v = Integer.parseInt(val.getString("int")); + storeCell(rows, rowindex, colindex, v); + } else if (val.has("date")) { + ToDate td = new ToDate(); + String[] args = new String[1]; + args[0] = val.getString("date"); + Object v = td.call(null, args); + storeCell(rows, rowindex, colindex, v); + } else if(val.has("bool")) { + boolean v = val.getString("bool") == "true"; + storeCell(rows, rowindex, colindex, v); + } + } + colindex++; + } + + + + Object[][] data = new Object[rows.size()][columns.size()]; + rows.toArray(data); + + return new DataExtension(data); + } + + protected void storeCell( + List rows, + int row, + int col, + Object value + ) { + while (row >= rows.size()) { + rows.add(new Object[columns.size()]); + } + rows.get(row)[col] = value; + } + + protected void storeCell( + List rows, + int row, + int col, + JSONObject obj, + Map reconCandidateMap + ) throws JSONException { + String id = obj.getString("id"); + ReconCandidate rc; + if (reconCandidateMap.containsKey(id)) { + rc = reconCandidateMap.get(id); + } else { + rc = new ReconCandidate( + obj.getString("id"), + obj.getString("name"), + JSONUtilities.getStringArray(obj, "type"), + 100 + ); + + reconCandidateMap.put(id, rc); + } + + storeCell(rows, row, col, rc); + } + + + static protected void formulateQuery(Set ids, JSONObject node, Writer writer) throws JSONException { + JSONWriter jsonWriter = new JSONWriter(writer); + + jsonWriter.object(); + + jsonWriter.key("ids"); + jsonWriter.array(); + for (String id : ids) { + if (id != null) { + jsonWriter.value(id); + } + } + jsonWriter.endArray(); + + jsonWriter.key("properties"); + jsonWriter.array(); + JSONArray properties = node.getJSONArray("properties"); + int l = properties.length(); + + for (int i = 0; i < l; i++) { + JSONObject property = properties.getJSONObject(i); + jsonWriter.object(); + jsonWriter.key("id"); + jsonWriter.value(property.getString("id")); + if (property.has("settings")) { + JSONObject settings = property.getJSONObject("settings"); + jsonWriter.key("settings"); + jsonWriter.value(settings); + } + jsonWriter.endObject(); + } + jsonWriter.endArray(); + jsonWriter.endObject(); + } + + static protected void gatherColumnInfo(JSONArray meta, List columns) throws JSONException { + for(int i = 0; i < meta.length(); i++) { + JSONObject col = meta.getJSONObject(i); + + ReconType expectedType = null; + if(col.has("type")) { + JSONObject expectedObj = col.getJSONObject("type"); + expectedType = new ReconType(expectedObj.getString("id"), expectedObj.getString("name")); + } + + columns.add(new ColumnInfo( + col.getString("name"), + col.getString("id"), + expectedType)); + } + } +} diff --git a/main/src/com/google/refine/operations/recon/ExtendDataOperation.java b/main/src/com/google/refine/operations/recon/ExtendDataOperation.java new file mode 100644 index 000000000..9771e52d5 --- /dev/null +++ b/main/src/com/google/refine/operations/recon/ExtendDataOperation.java @@ -0,0 +1,333 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.operations.recon; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; + +import org.apache.commons.lang.StringUtils; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.model.changes.DataExtensionChange; +import com.google.refine.model.recon.ReconciledDataExtensionJob; +import com.google.refine.model.recon.ReconciledDataExtensionJob.ColumnInfo; +import com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.ReconType; +import com.google.refine.model.Row; +import com.google.refine.model.changes.CellAtRow; +import com.google.refine.operations.EngineDependentOperation; +import com.google.refine.operations.OperationRegistry; +import com.google.refine.process.LongRunningProcess; +import com.google.refine.process.Process; + +public class ExtendDataOperation extends EngineDependentOperation { + final protected String _baseColumnName; + final protected String _endpoint; + final protected String _identifierSpace; + final protected String _schemaSpace; + final protected JSONObject _extension; + final protected int _columnInsertIndex; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + JSONObject engineConfig = obj.getJSONObject("engineConfig"); + + return new ExtendDataOperation( + engineConfig, + obj.getString("baseColumnName"), + obj.getString("endpoint"), + obj.getString("identifierSpace"), + obj.getString("schemaSpace"), + obj.getJSONObject("extension"), + obj.getInt("columnInsertIndex") + ); + } + + public ExtendDataOperation( + JSONObject engineConfig, + String baseColumnName, + String endpoint, + String identifierSpace, + String schemaSpace, + JSONObject extension, + int columnInsertIndex + ) { + super(engineConfig); + + _baseColumnName = baseColumnName; + _endpoint = endpoint; + _identifierSpace = identifierSpace; + _schemaSpace = schemaSpace; + _extension = extension; + _columnInsertIndex = columnInsertIndex; + } + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("engineConfig"); writer.value(getEngineConfig()); + writer.key("columnInsertIndex"); writer.value(_columnInsertIndex); + writer.key("baseColumnName"); writer.value(_baseColumnName); + writer.key("endpoint"); writer.value(_endpoint); + writer.key("identifierSpace"); writer.value(_identifierSpace); + writer.key("schemaSpace"); writer.value(_schemaSpace); + writer.key("extension"); writer.value(_extension); + writer.endObject(); + } + + @Override + protected String getBriefDescription(Project project) { + return "Extend data at index " + _columnInsertIndex + + " based on column " + _baseColumnName; + } + + protected String createDescription(Column column, List cellsAtRows) { + return "Extend data at index " + _columnInsertIndex + + " based on column " + column.getName() + + " by filling " + cellsAtRows.size(); + } + + @Override + public Process createProcess(Project project, Properties options) throws Exception { + return new ExtendDataProcess( + project, + getEngineConfig(), + getBriefDescription(null) + ); + } + + public class ExtendDataProcess extends LongRunningProcess implements Runnable { + final protected Project _project; + final protected JSONObject _engineConfig; + final protected long _historyEntryID; + protected int _cellIndex; + protected ReconciledDataExtensionJob _job; + + public ExtendDataProcess( + Project project, + JSONObject engineConfig, + String description + ) throws JSONException { + super(description); + _project = project; + _engineConfig = engineConfig; + _historyEntryID = HistoryEntry.allocateID(); + + _job = new ReconciledDataExtensionJob(_extension, _endpoint); + } + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("id"); writer.value(hashCode()); + writer.key("description"); writer.value(_description); + writer.key("immediate"); writer.value(false); + writer.key("status"); writer.value(_thread == null ? "pending" : (_thread.isAlive() ? "running" : "done")); + writer.key("progress"); writer.value(_progress); + writer.endObject(); + } + + @Override + protected Runnable getRunnable() { + return this; + } + + protected void populateRowsWithMatches(List rowIndices) throws Exception { + Engine engine = new Engine(_project); + engine.initializeFromJSON(_engineConfig); + + Column column = _project.columnModel.getColumnByName(_baseColumnName); + if (column == null) { + throw new Exception("No column named " + _baseColumnName); + } + + _cellIndex = column.getCellIndex(); + + FilteredRows filteredRows = engine.getAllFilteredRows(); + filteredRows.accept(_project, new RowVisitor() { + List _rowIndices; + + public RowVisitor init(List rowIndices) { + _rowIndices = rowIndices; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + @Override + public boolean visit(Project project, int rowIndex, Row row) { + Cell cell = row.getCell(_cellIndex); + if (cell != null && cell.recon != null && cell.recon.match != null) { + _rowIndices.add(rowIndex); + } + + return false; + } + }.init(rowIndices)); + } + + protected int extendRows( + List rowIndices, + List dataExtensions, + int from, + int limit, + Map reconCandidateMap + ) { + Set ids = new HashSet(); + + int end; + for (end = from; end < limit && ids.size() < 10; end++) { + int index = rowIndices.get(end); + Row row = _project.rows.get(index); + Cell cell = row.getCell(_cellIndex); + + ids.add(cell.recon.match.id); + } + + Map map = null; + try { + map = _job.extend(ids, reconCandidateMap); + } catch (Exception e) { + map = new HashMap(); + } + + for (int i = from; i < end; i++) { + int index = rowIndices.get(i); + Row row = _project.rows.get(index); + Cell cell = row.getCell(_cellIndex); + String guid = cell.recon.match.id; + + if (map.containsKey(guid)) { + dataExtensions.add(map.get(guid)); + } else { + dataExtensions.add(null); + } + } + + return end; + } + + @Override + public void run() { + List rowIndices = new ArrayList(); + List dataExtensions = new ArrayList(); + + try { + populateRowsWithMatches(rowIndices); + } catch (Exception e2) { + // TODO : Not sure what to do here? + e2.printStackTrace(); + } + + int start = 0; + Map reconCandidateMap = new HashMap(); + + while (start < rowIndices.size()) { + int end = extendRows(rowIndices, dataExtensions, start, rowIndices.size(), reconCandidateMap); + start = end; + + _progress = end * 100 / rowIndices.size(); + try { + Thread.sleep(200); + } catch (InterruptedException e) { + if (_canceled) { + break; + } + } + } + + if (!_canceled) { + List columnNames = new ArrayList(); + for (ColumnInfo info : _job.columns) { + columnNames.add(info.name); + } + + List columnTypes = new ArrayList(); + for (ColumnInfo info : _job.columns) { + columnTypes.add(info.expectedType); + } + + HistoryEntry historyEntry = new HistoryEntry( + _historyEntryID, + _project, + _description, + ExtendDataOperation.this, + new DataExtensionChange( + _baseColumnName, + _endpoint, + _identifierSpace, + _schemaSpace, + _columnInsertIndex, + columnNames, + columnTypes, + rowIndices, + dataExtensions, + _historyEntryID) + ); + + _project.history.addEntry(historyEntry); + _project.processManager.onDoneProcess(this); + } + } + } +} diff --git a/main/tests/server/src/com/google/refine/tests/recon/DataExtensionTests.java b/main/tests/server/src/com/google/refine/tests/recon/DataExtensionTests.java new file mode 100644 index 000000000..fff3eaaf1 --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/recon/DataExtensionTests.java @@ -0,0 +1,299 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.tests.recon; + +import static org.mockito.Mockito.mock; + +import java.io.File; +import java.io.IOException; +import java.util.Properties; +import java.util.List; +import java.util.ArrayList; + +import org.json.JSONException; +import org.json.JSONObject; +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import com.google.refine.ProjectManager; +import com.google.refine.ProjectMetadata; +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.grel.Function; +import com.google.refine.io.FileProjectManager; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.ModelException; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.model.Recon; +import com.google.refine.model.ReconCandidate; +import com.google.refine.process.Process; +import com.google.refine.process.ProcessManager; +import com.google.refine.operations.OnError; +import com.google.refine.operations.EngineDependentOperation; +import com.google.refine.operations.recon.ExtendDataOperation; +import com.google.refine.tests.RefineTest; +import com.google.refine.tests.util.TestUtils; + + +public class DataExtensionTests extends RefineTest { + + static final String ENGINE_JSON_URLS = "{\"mode\":\"row-based\"}}"; + static final String RECON_SERVICE = "https://tools.wmflabs.org/openrefine-wikidata/en/api"; + static final String RECON_IDENTIFIER_SPACE = "http://www.wikidata.org/entity/"; + static final String RECON_SCHEMA_SPACE = "http://www.wikidata.org/prop/direct/"; + + @Override + @BeforeTest + public void init() { + logger = LoggerFactory.getLogger(this.getClass()); + } + + // dependencies + Project project; + Properties options; + JSONObject engine_config; + Engine engine; + Properties bindings; + + @BeforeMethod + public void SetUp() throws JSONException, IOException, ModelException { + File dir = TestUtils.createTempDirectory("openrefine-test-workspace-dir"); + FileProjectManager.initialize(dir); + project = new Project(); + ProjectMetadata pm = new ProjectMetadata(); + pm.setName("Data Extension Test Project"); + ProjectManager.singleton.registerProject(project, pm); + + int index = project.columnModel.allocateNewCellIndex(); + Column column = new Column(index,"country"); + project.columnModel.addColumn(index, column, true); + + options = mock(Properties.class); + engine = new Engine(project); + engine_config = new JSONObject(ENGINE_JSON_URLS); + engine.initializeFromJSON(engine_config); + engine.setMode(Engine.Mode.RowBased); + + bindings = new Properties(); + bindings.put("project", project); + + Row row = new Row(2); + row.setCell(0, reconciledCell("Iran", "Q794")); + project.rows.add(row); + row = new Row(2); + row.setCell(0, reconciledCell("Japan", "Q17")); + project.rows.add(row); + row = new Row(2); + row.setCell(0, reconciledCell("Tajikistan", "Q863")); + project.rows.add(row); + row = new Row(2); + row.setCell(0, reconciledCell("United States of America", "Q30")); + project.rows.add(row); + } + + @AfterMethod + public void TearDown() { + project = null; + options = null; + engine = null; + bindings = null; + } + + static public Cell reconciledCell(String name, String id) { + ReconCandidate r = new ReconCandidate(id, name, new String[0], 100); + List candidates = new ArrayList(); + candidates.add(r); + Recon rec = new Recon(0, RECON_IDENTIFIER_SPACE, RECON_SCHEMA_SPACE); + rec.service = RECON_SERVICE; + rec.candidates = candidates; + rec.match = r; + return new Cell(name, rec); + } + + /** + * Test to fetch simple strings + */ + + @Test + public void testFetchStrings() throws Exception { + JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P297\",\"name\":\"ISO 3166-1 alpha-2 code\"}]}"); + + EngineDependentOperation op = new ExtendDataOperation(engine_config, + "country", + RECON_SERVICE, + RECON_IDENTIFIER_SPACE, + RECON_SCHEMA_SPACE, + extension, + 1); + ProcessManager pm = project.getProcessManager(); + Process process = op.createProcess(project, options); + process.startPerforming(pm); + Assert.assertTrue(process.isRunning()); + try { + // We have 4 rows so 4000 ms should be largely enough. + Thread.sleep(5000); + } catch (InterruptedException e) { + Assert.fail("Test interrupted"); + } + Assert.assertFalse(process.isRunning()); + + // Inspect rows + Assert.assertTrue("IR".equals(project.rows.get(0).getCellValue(1))); + Assert.assertTrue("JP".equals(project.rows.get(1).getCellValue(1))); + Assert.assertTrue("TJ".equals(project.rows.get(2).getCellValue(1))); + Assert.assertTrue("US".equals(project.rows.get(3).getCellValue(1))); + + // Make sure we did not create any recon stats for that column (no reconciled value) + Assert.assertTrue(project.columnModel.getColumnByName("ISO 3166-1 alpha-2 code").getReconStats() == null); + } + + /** + * Test to fetch counts of values + */ + + @Test + public void testFetchCounts() throws Exception { + JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P38\",\"name\":\"currency\",\"settings\":{\"count\":\"on\"}}]}"); + + EngineDependentOperation op = new ExtendDataOperation(engine_config, + "country", + RECON_SERVICE, + RECON_IDENTIFIER_SPACE, + RECON_SCHEMA_SPACE, + extension, + 1); + ProcessManager pm = project.getProcessManager(); + Process process = op.createProcess(project, options); + process.startPerforming(pm); + Assert.assertTrue(process.isRunning()); + try { + Thread.sleep(5000); + } catch (InterruptedException e) { + Assert.fail("Test interrupted"); + } + Assert.assertFalse(process.isRunning()); + + // Test to be updated as countries change currencies! + Assert.assertTrue(Math.round((float)project.rows.get(2).getCellValue(1)) == 2); + Assert.assertTrue(Math.round((float)project.rows.get(3).getCellValue(1)) == 1); + + // Make sure we did not create any recon stats for that column (no reconciled value) + Assert.assertTrue(project.columnModel.getColumnByName("currency").getReconStats() == null); + } + + /** + * Test fetch only the best statements + */ + @Test + public void testFetchCurrent() throws Exception { + JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P38\",\"name\":\"currency\",\"settings\":{\"rank\":\"best\"}}]}"); + + EngineDependentOperation op = new ExtendDataOperation(engine_config, + "country", + RECON_SERVICE, + RECON_IDENTIFIER_SPACE, + RECON_SCHEMA_SPACE, + extension, + 1); + ProcessManager pm = project.getProcessManager(); + Process process = op.createProcess(project, options); + process.startPerforming(pm); + Assert.assertTrue(process.isRunning()); + try { + Thread.sleep(5000); + } catch (InterruptedException e) { + Assert.fail("Test interrupted"); + } + Assert.assertFalse(process.isRunning()); + + /* + * Tajikistan has one "preferred" currency and one "normal" one + * (in terms of statement ranks). + * But thanks to our setting in the extension configuration, + * we only fetch the current one, so the one just after it is + * the one for the US (USD). + */ + Assert.assertTrue("Tajikistani somoni".equals(project.rows.get(2).getCellValue(1))); + Assert.assertTrue("United States dollar".equals(project.rows.get(3).getCellValue(1))); + + // Make sure all the values are reconciled + Assert.assertTrue(project.columnModel.getColumnByName("currency").getReconStats().matchedTopics == 4); + } + + /** + * Test fetch records (multiple values per reconciled cell) + */ + @Test + public void testFetchRecord() throws Exception { + JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P38\",\"name\":\"currency\"}]}"); + + EngineDependentOperation op = new ExtendDataOperation(engine_config, + "country", + RECON_SERVICE, + RECON_IDENTIFIER_SPACE, + RECON_SCHEMA_SPACE, + extension, + 1); + ProcessManager pm = project.getProcessManager(); + Process process = op.createProcess(project, options); + process.startPerforming(pm); + Assert.assertTrue(process.isRunning()); + try { + Thread.sleep(5000); + } catch (InterruptedException e) { + Assert.fail("Test interrupted"); + } + Assert.assertFalse(process.isRunning()); + + /* + * Tajikistan has one "preferred" currency and one "normal" one + * (in terms of statement ranks). + * The second currency is fetched as well, which creates a record + * (the cell to the left of it is left blank). + */ + Assert.assertTrue("Tajikistani somoni".equals(project.rows.get(2).getCellValue(1))); + Assert.assertTrue("Tajikistani ruble".equals(project.rows.get(3).getCellValue(1))); + Assert.assertTrue(null == project.rows.get(3).getCellValue(0)); + + // Make sure all the values are reconciled + Assert.assertTrue(project.columnModel.getColumnByName("currency").getReconStats().matchedTopics == 5); + } + +} diff --git a/main/webapp/modules/core/MOD-INF/controller.js b/main/webapp/modules/core/MOD-INF/controller.js index d7dde610b..4489d0c97 100644 --- a/main/webapp/modules/core/MOD-INF/controller.js +++ b/main/webapp/modules/core/MOD-INF/controller.js @@ -121,6 +121,8 @@ function registerCommands() { RS.registerCommand(module, "recon-clear-one-cell", new Packages.com.google.refine.commands.recon.ReconClearOneCellCommand()); RS.registerCommand(module, "recon-clear-similar-cells", new Packages.com.google.refine.commands.recon.ReconClearSimilarCellsCommand()); RS.registerCommand(module, "recon-copy-across-columns", new Packages.com.google.refine.commands.recon.ReconCopyAcrossColumnsCommand()); + RS.registerCommand(module, "preview-extend-data", new Packages.com.google.refine.commands.recon.PreviewExtendDataCommand()); + RS.registerCommand(module, "extend-data", new Packages.com.google.refine.commands.recon.ExtendDataCommand()); RS.registerCommand(module, "guess-types-of-column", new Packages.com.google.refine.commands.recon.GuessTypesOfColumnCommand()); @@ -180,6 +182,7 @@ function registerOperations() { OR.registerOperation(module, "recon-judge-similar-cells", Packages.com.google.refine.operations.recon.ReconJudgeSimilarCellsOperation); OR.registerOperation(module, "recon-clear-similar-cells", Packages.com.google.refine.operations.recon.ReconClearSimilarCellsOperation); OR.registerOperation(module, "recon-copy-across-columns", Packages.com.google.refine.operations.recon.ReconCopyAcrossColumnsOperation); + OR.registerOperation(module, "extend-reconciled-data", Packages.com.google.refine.operations.recon.ExtendDataOperation); } function registerImporting() { @@ -367,6 +370,7 @@ function init() { "styles/index/default-importing-sources.less", "styles/views/data-table-view.less", // for the preview table's styles + "styles/views/extend-data-preview-dialog.less", "styles/index/fixed-width-parser-ui.less", "styles/index/xml-parser-ui.less", "styles/index/json-parser-ui.less" @@ -431,6 +435,7 @@ function init() { "scripts/reconciliation/standard-service-panel.js", "scripts/dialogs/expression-preview-dialog.js", + "scripts/dialogs/extend-data-preview-dialog.js", "scripts/dialogs/clustering-dialog.js", "scripts/dialogs/scatterplot-dialog.js", "scripts/dialogs/templating-exporter-dialog.js", @@ -474,7 +479,8 @@ function init() { "styles/dialogs/custom-tabular-exporter-dialog.less", "styles/reconciliation/recon-dialog.less", - "styles/reconciliation/standard-service-panel.less" + "styles/reconciliation/standard-service-panel.less", + "styles/reconciliation/extend-data-preview-dialog.less", ] ); diff --git a/main/webapp/modules/core/langs/translation-default.json b/main/webapp/modules/core/langs/translation-default.json index ceab109be..48cebf3be 100644 --- a/main/webapp/modules/core/langs/translation-default.json +++ b/main/webapp/modules/core/langs/translation-default.json @@ -503,6 +503,13 @@ "cache-responses": "Cache responses", "copy-val": "copy value from original column", "warning-col-name": "You must enter a column name.", + "add-col-recon-val": "Add columns from reconciled values", + "add-col-recon-col": "Add columns from reconciled column", + "warning-no-property": "Please select a property first.", + "configure-col": "Configure this column", + "remove-prop": "remove", + "configure-prop": "configure", + "no-settings": "No settings are available for this property.", "add-col-fetch": "Add column by fetching URLs based on column", "throttle-delay": "Throttle delay", "milli": "milliseconds", diff --git a/main/webapp/modules/core/langs/translation-en.json b/main/webapp/modules/core/langs/translation-en.json index 6f13a1bdb..ae836fb8d 100644 --- a/main/webapp/modules/core/langs/translation-en.json +++ b/main/webapp/modules/core/langs/translation-en.json @@ -503,6 +503,13 @@ "cache-responses": "Cache responses", "copy-val": "copy value from original column", "warning-col-name": "You must enter a column name.", + "add-col-recon-val": "Add columns from reconciled values", + "add-col-recon-col": "Add columns from reconciled column", + "warning-no-property": "Please select a property first.", + "configure-col": "Configure this column", + "remove-prop": "remove", + "configure-prop": "configure", + "no-settings": "No settings are available for this property.", "add-col-fetch": "Add column by fetching URLs based on column", "throttle-delay": "Throttle delay", "milli": "milliseconds", diff --git a/main/webapp/modules/core/langs/translation-es.json b/main/webapp/modules/core/langs/translation-es.json index 98a8d6598..ab4f88dae 100644 --- a/main/webapp/modules/core/langs/translation-es.json +++ b/main/webapp/modules/core/langs/translation-es.json @@ -502,6 +502,13 @@ "store-err": "guardar error", "copy-val": "copiar valor de la columna original", "warning-col-name": "Debe ingresar un nombre para la columna.", + "add-col-recon-val": "Añadir columnas de valores conciliados", + "add-col-recon-col": "Añadir columnas de la columna conciliada", + "warning-no-property": "Seleccione primero una propiedad.", + "configure-col": "Configurar esta columna", + "remove-prop": "retirar", + "configure-prop": "configurar", + "no-settings": "No hay configuraciones disponibles para esta propiedad.", "add-col-fetch": "Agregar columna accediendo a URls basada en la columna", "throttle-delay": "Tiempo de retraso", "milli": "milisegundos", diff --git a/main/webapp/modules/core/langs/translation-fr.json b/main/webapp/modules/core/langs/translation-fr.json index 525e50292..02153c3e6 100644 --- a/main/webapp/modules/core/langs/translation-fr.json +++ b/main/webapp/modules/core/langs/translation-fr.json @@ -504,6 +504,13 @@ "copy-val": "copier la valeur depuis la colonne originale", "warning-col-name": "Vous devez indiquer un nom de colonne.", "add-col-fetch": "Ajouter une colonne en moissonnant les données depuis les URL d’une colonne", + "add-col-recon-val": "Ajouter des colonnes à partir de valeurs réconciliées", + "add-col-recon-col": "Ajouter des colonnes à partir de la colonne", + "warning-no-property": "Veuillez d'abord sélectionner une propriété.", + "configure-col": "Configurer cette colonne", + "remove-prop": "supprimer", + "configure-prop": "configurer", + "no-settings": "Aucun paramètre n'est disponible pour cette propriété.", "throttle-delay": "Délai de récupération", "milli": "millisecondes", "url-fetch": "Indiquer les URL à moissonner :", diff --git a/main/webapp/modules/core/langs/translation-it.json b/main/webapp/modules/core/langs/translation-it.json index 022ab88a1..5cd320aff 100644 --- a/main/webapp/modules/core/langs/translation-it.json +++ b/main/webapp/modules/core/langs/translation-it.json @@ -502,6 +502,13 @@ "store-err": "salva l'errore", "copy-val": "copia il valore dalla colonna originale", "warning-col-name": "Inserisci un nome per la colonna.", + "add-col-recon-val": "Aggiungi colonne da valori riconciliati", + "add-col-recon-col": "Aggiungi colonne dalla colonna riconciliata", + "warning-no-property": "Per favore seleziona innanzitutto una proprietà.", + "configure-col": "Configurare questa colonna", + "remove-prop": "rimuovi", + "configure-prop": "configurare", + "no-settings": "Nessuna impostazione sono disponibili per questa proprietà.", "add-col-fetch": "Aggiungi colonna con URL, basandoti su", "throttle-delay": "Durata Throttle", "milli": "millisecondi", diff --git a/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js b/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js new file mode 100644 index 000000000..bdb5d5fdc --- /dev/null +++ b/main/webapp/modules/core/scripts/dialogs/extend-data-preview-dialog.js @@ -0,0 +1,422 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + */ + +function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDone) { + this._column = column; + this._columnIndex = columnIndex; + this._rowIndices = rowIndices; + this._onDone = onDone; + this._extension = { properties: [] }; + + var self = this; + this._dialog = $(DOM.loadHTML("core", "scripts/views/data-table/extend-data-preview-dialog.html")); + this._elmts = DOM.bind(this._dialog); + this._elmts.dialogHeader.html($.i18n._('core-views')["add-col-recon-col"]+" "+column.name); + this._elmts.resetButton.click(function() { + self._extension.properties = []; + self._update(); + }); + + this._elmts.okButton.click(function() { + if (self._extension.properties.length === 0) { + alert($.i18n._('core-views')["warning-no-property"]); + } else { + DialogSystem.dismissUntil(self._level - 1); + self._onDone(self._extension, + self._service, + self._serviceMetadata.identifierSpace, + self._serviceMetadata.schemaSpace); + } + }); + this._elmts.cancelButton.click(function() { + DialogSystem.dismissUntil(self._level - 1); + }); + + var dismissBusy = DialogSystem.showBusy(); + var type = (column.reconConfig) && (column.reconConfig.type) ? column.reconConfig.type.id : ""; + + this._proposePropertiesUrl = null; + this._fetchColumnUrl = null; + this._serviceMetadata = null; + if ("reconConfig" in column) { + var service = column.reconConfig.service; + this._service = service; + var serviceMetadata = ReconciliationManager.getServiceFromUrl(service); + this._serviceMetadata = serviceMetadata; + if ("extend" in serviceMetadata) { + var extend = serviceMetadata.extend; + if ("propose_properties" in extend) { + var endpoint = extend.propose_properties; + this._proposePropertiesUrl = endpoint.service_url + endpoint.service_path; + } + } + } + + ExtendReconciledDataPreviewDialog.getAllProperties(this._proposePropertiesUrl, type, function(properties) { + dismissBusy(); + self._show(properties); + }); +} + +ExtendReconciledDataPreviewDialog.getAllProperties = function(url, typeID, onDone) { + if(url == null) { + onDone([]); + } else { + var done = false; + $.getJSON( + url +"?type=" + typeID + "&callback=?", + null, + function(data) { + if (done) return; + done = true; + + var allProperties = []; + for (var i = 0; i < data.properties.length; i++) { + var property = data.properties[i]; + var property2 = { + id: property.id, + name: property.name + }; + /*if ("id2" in property) { + property2.expected = property.schema2; + property2.properties = [{ + id: property.id2, + name: property.name2, + expected: property.expects + }]; + } else { + property2.expected = property.expects; + } */ + allProperties.push(property2); + } + allProperties.sort(function(a, b) { return a.name.localeCompare(b.name); }); + + onDone(allProperties); + } + ); + + window.setTimeout(function() { + if (done) return; + + done = true; + onDone([]); + }, 7000); // time to give up? + } +}; + +ExtendReconciledDataPreviewDialog.prototype._show = function(properties) { + this._level = DialogSystem.showDialog(this._dialog); + + var n = this._elmts.suggestedPropertyContainer.offset().top + + this._elmts.suggestedPropertyContainer.outerHeight(true) - + this._elmts.addPropertyInput.offset().top; + + this._elmts.previewContainer.height(Math.floor(n)); + + var self = this; + var container = this._elmts.suggestedPropertyContainer; + var renderSuggestedProperty = function(property) { + var label = ("properties" in property) ? (property.name + " » " + property.properties[0].name) : property.name; + var div = $('
').addClass("suggested-property").appendTo(container); + + $('') + .attr("href", "javascript:{}") + .html(label) + .appendTo(div) + .click(function() { + self._addProperty(property); + }); + }; + for (var i = 0; i < properties.length; i++) { + renderSuggestedProperty(properties[i]); + } + + var suggestConfig = $.extend({}, this._serviceMetadata.suggest.property); + suggestConfig.key = null; + suggestConfig.query_param_name = "prefix"; + + this._elmts.addPropertyInput.suggestP(suggestConfig).bind("fb-select", function(evt, data) { + self._addProperty({ + id : data.id, + name: data.name, + }); + }); +}; + +ExtendReconciledDataPreviewDialog.prototype._update = function() { + this._elmts.previewContainer.empty().html( + '
'); + + var self = this; + var params = { + project: theProject.id, + columnName: this._column.name + }; + + $.post( + "command/core/preview-extend-data?" + $.param(params), + { + rowIndices: JSON.stringify(this._rowIndices), + extension: JSON.stringify(this._extension) + }, + function(data) { + self._renderPreview(data); + }, + "json" + ).fail(function(data) { + console.log(data); + }); +}; + +ExtendReconciledDataPreviewDialog.prototype._addProperty = function(p) { + var addSeveralToList = function(properties, oldProperties) { + for (var i = 0; i < properties.length; i++) { + addToList(properties[i], oldProperties); + } + }; + var addToList = function(property, oldProperties) { + for (var i = 0; i < oldProperties.length; i++) { + var oldProperty = oldProperties[i]; + if (oldProperty.id == property.id) { + if ("included" in property) { + oldProperty.included = "included" in oldProperty ? + (oldProperty.included || property.included) : + property.included; + } + + if ("properties" in property) { + if ("properties" in oldProperty) { + addSeveralToList(property.properties, oldProperty.properties); + } else { + oldProperty.properties = property.properties; + } + } + return; + } + } + + oldProperties.push(property); + }; + + addToList(p, this._extension.properties); + + this._update(); +}; + +ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) { + var self = this; + var container = this._elmts.previewContainer.empty(); + if (data.code == "error") { + container.text("Error."); + return; + } + + var table = $('')[0]; + var trHead = table.insertRow(table.rows.length); + $(''); + var td = $('').attr('title', field.help_text).appendTo(tr); + if (field.type == 'select') { + var fieldLabel = $('').text(field.label+':').appendTo(td); + td.append($('
')); + for(var j = 0; j < field.choices.length; j++) { + var choice = field.choices[j]; + var labelElem = $('').attr('for', field.name+'_'+choice.value).appendTo(td); + var inputElem = $('').attr( + 'id', field.name+'_'+choice.value).attr( + 'value', choice.value).attr( + 'name', field.name).appendTo(labelElem); + + if (choice.value == currentValue) { + inputElem.attr('checked', 'checked'); + } + labelElem.append(' '+choice.name); + td.append('
'); + } + td.append(fieldHTML); + } else if (field.type == 'checkbox') { + var label = $('').attr('for', field.name).appendTo(td); + var input = $('').attr('name', field.name).appendTo(label); + if (currentValue == 'on') { + input.attr('checked','checked'); + } + label.append(' '+field.label); + } else if (field.type == 'number' || field.type == 'text') { + var label = $('').attr('for', field.name).appendTo(td); + label.append(field.label+': '); + var input = $('').attr( + 'name', field.name).attr( + 'type', field.type).attr( + 'value', currentValue).appendTo(label); + } + if (tr.children().length > 0) { + table.append(tr); + } + } + } + + if (table.children().length == 0) { + var tr = $('').appendTo(table); + $('').text($.i18n._('core-views')['no-settings']).appendTo(tr); + } + + var form = $('').append(table); + var gridLayout = $('
').append(form); + body.append(gridLayout); + var bodyElmts = DOM.bind(body); + + footer.html( + '' + + '' + ); + var footerElmts = DOM.bind(footer); + + var level = DialogSystem.showDialog(frame); + var dismiss = function() { + DialogSystem.dismissUntil(level - 1); + }; + + footerElmts.cancelButton.click(dismiss); + footerElmts.okButton.click(function() { + try { + if (fields != null) { + var elem = $(bodyElmts.form[0]); + var ar = elem.serializeArray(); + var settings = {}; + for(var i = 0; i < ar.length; i++) { + settings[ar[i].name] = ar[i].value; + } + property.settings = settings; + } + + dismiss(); + + self._update(); + } catch (e) { + //console.log(e); + } + }); + + //bodyElmts.textarea.focus(); +}; + diff --git a/main/webapp/modules/core/scripts/views/data-table/add-column-by-reconciliation.html b/main/webapp/modules/core/scripts/views/data-table/add-column-by-reconciliation.html new file mode 100644 index 000000000..7695cd02d --- /dev/null +++ b/main/webapp/modules/core/scripts/views/data-table/add-column-by-reconciliation.html @@ -0,0 +1,27 @@ +
+
+
+
').appendTo(trHead).text(this._column.name); + + var renderColumnHeader = function(column) { + var th = $('').appendTo(trHead); + + $('').html(column.name).appendTo(th); + $('
').appendTo(th); + + $('') + .text($.i18n._("core-views")["remove-prop"]) + .addClass("action") + .attr("title", $.i18n._("core-views")["remove-col"]) + .click(function() { + self._removeProperty(column.id); + }).appendTo(th); + + $('') + .text($.i18n._("core-views")["configure-prop"]) + .addClass("action") + .attr("title", $.i18n._("core-views")["configure-col"]) + .click(function() { + self._constrainProperty(column.id); + }).appendTo(th); + }; + for (var c = 0; c < data.columns.length; c++) { + renderColumnHeader(data.columns[c]); + } + + for (var r = 0; r < data.rows.length; r++) { + var tr = table.insertRow(table.rows.length); + var row = data.rows[r]; + + for (var c = 0; c < row.length; c++) { + var td = tr.insertCell(tr.cells.length); + var cell = row[c]; + if (cell !== null) { + if ($.isPlainObject(cell)) { + $('').attr("href", + this._serviceMetadata.identifierSpace + cell.id + ).attr("target", "_blank").text(cell.name).appendTo(td); + } else { + $('').text(cell).appendTo(td); + } + } + } + } + + container.append(table); +}; + +ExtendReconciledDataPreviewDialog.prototype._removeProperty = function(id) { + for(var i = this._extension.properties.length - 1; i >= 0; i--) { + var property = this._extension.properties[i]; + if (property.id == id) { + this._extension.properties.splice(i, 1); + } + } + this._update(); +}; + +ExtendReconciledDataPreviewDialog.prototype._findProperty = function(id) { + var properties = this._extension.properties; + for(var i = properties.length - 1; i >= 0; i--) { + if (properties[i].id == id) { + return properties[i]; + } + } + return null; +} + +ExtendReconciledDataPreviewDialog.prototype._constrainProperty = function(id) { + var self = this; + var property = this._findProperty(id); + + var frame = DialogSystem.createDialog(); + frame.width("500px"); + + var header = $('
').addClass("dialog-header").text("Settings for " + id).appendTo(frame); + var body = $('
').addClass("dialog-body").appendTo(frame); + var footer = $('
').addClass("dialog-footer").appendTo(frame); + + var fields = self._serviceMetadata.extend.property_settings; + var table = $('
'); + if (fields != null) { + for(var i = 0; i < fields.length; i++) { + var field = fields[i]; + var fieldHTML = ''; + var currentValue = field.default; + if (property.settings != null && property.settings[field.name] != null) { + currentValue = property.settings[field.name]; + } + var tr = $('
+ + + + + + + + + + + + + + + +
+ + + + diff --git a/main/webapp/modules/core/scripts/views/data-table/extend-data-preview-dialog.html b/main/webapp/modules/core/scripts/views/data-table/extend-data-preview-dialog.html new file mode 100644 index 000000000..f92e09fb9 --- /dev/null +++ b/main/webapp/modules/core/scripts/views/data-table/extend-data-preview-dialog.html @@ -0,0 +1,26 @@ +
+
+
+
+ + + + + + + + + + + + + + + +
Add PropertyPreview
Suggested Properties
+
+ +
\ No newline at end of file diff --git a/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js b/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js index 6e06addc3..015bb84fd 100644 --- a/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js +++ b/main/webapp/modules/core/scripts/views/data-table/menu-edit-column.js @@ -146,6 +146,33 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { }); }; + var doAddColumnByReconciliation = function() { + var columnIndex = Refine.columnNameToColumnIndex(column.name); + var o = DataTableView.sampleVisibleRows(column); + new ExtendReconciledDataPreviewDialog( + column, + columnIndex, + o.rowIndices, + function(extension, endpoint, identifierSpace, schemaSpace) { + Refine.postProcess( + "core", + "extend-data", + { + baseColumnName: column.name, + endpoint: endpoint, + identifierSpace: identifierSpace, + schemaSpace: schemaSpace, + columnInsertIndex: columnIndex + 1 + }, + { + extension: JSON.stringify(extension) + }, + { rowsChanged: true, modelsChanged: true } + ); + } + ); + }; + var doRemoveColumn = function() { Refine.postCoreProcess( "remove-column", @@ -298,6 +325,11 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { label: $.i18n._('core-views')["add-by-urls"]+"...", click: doAddColumnByFetchingURLs }, + { + id: "core/add-column-by-reconciliation", + label: $.i18n._('core-views')["add-col-recon-val"]+"...", + click: doAddColumnByReconciliation + }, {}, { id: "core/rename-column", diff --git a/main/webapp/modules/core/styles/reconciliation/extend-data-preview-dialog.less b/main/webapp/modules/core/styles/reconciliation/extend-data-preview-dialog.less new file mode 100644 index 000000000..069a70628 --- /dev/null +++ b/main/webapp/modules/core/styles/reconciliation/extend-data-preview-dialog.less @@ -0,0 +1,84 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +@import-less url("../theme.less"); + +.extend-data-preview-dialog .suggested-property-container { + border: 1px solid #aaa; + padding: 5px; + overflow: auto; + height: 375px; + } + +.extend-data-preview-dialog .suggested-property { + padding: 5px; + } + +.extend-data-preview-dialog input.property-suggest { + display: block; + padding: 2%; + width: 96%; + } + +.extend-data-preview-dialog .preview-container { + border: 1px solid #aaa; + overflow: auto; + } + +.extend-data-preview-dialog .preview-container table { + border-collapse: collapse; + } + +.extend-data-preview-dialog .preview-container td, .extend-data-preview-dialog .preview-container th { + padding: 3px 5px; + border-bottom: 1px solid #ddd; + border-right: 1px solid #ddd; + } + +.extend-data-preview-dialog .preview-container th img { + vertical-align: top; + margin-left: 5px; + } + +.extend-data-preview-progress { + text-align: center; +} + +.extend-data-preview-progress img { + padding: 45%; + display: inline-block; +} + +.data-extension-property-config td { + padding: 5px; +}