Merge pull request #1210 from wetneb/extend

Add data extension capabilities to the reconciliation API
This commit is contained in:
Thad Guidry 2017-07-17 18:01:37 -05:00 committed by GitHub
commit 7f92251ed1
19 changed files with 2538 additions and 1 deletions

View File

@ -0,0 +1,71 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.commands.recon;
import javax.servlet.http.HttpServletRequest;
import org.json.JSONObject;
import com.google.refine.commands.EngineDependentCommand;
import com.google.refine.operations.recon.ExtendDataOperation;
import com.google.refine.model.AbstractOperation;
import com.google.refine.model.Project;
import com.google.refine.util.ParsingUtilities;
public class ExtendDataCommand extends EngineDependentCommand {
@Override
protected AbstractOperation createOperation(Project project,
HttpServletRequest request, JSONObject engineConfig) throws Exception {
String baseColumnName = request.getParameter("baseColumnName");
int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex"));
String endpoint = request.getParameter("endpoint");
String identifierSpace = request.getParameter("identifierSpace");
String schemaSpace = request.getParameter("schemaSpace");
String jsonString = request.getParameter("extension");
JSONObject extension = ParsingUtilities.evaluateJsonStringToObject(jsonString);
return new ExtendDataOperation(
engineConfig,
baseColumnName,
endpoint,
identifierSpace,
schemaSpace,
extension,
columnInsertIndex
);
}
}

View File

@ -0,0 +1,200 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.commands.recon;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.json.JSONArray;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.refine.commands.Command;
import com.google.refine.model.recon.ReconciledDataExtensionJob;
import com.google.refine.model.recon.ReconciledDataExtensionJob.ColumnInfo;
import com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import com.google.refine.model.ReconCandidate;
import com.google.refine.model.Row;
import com.google.refine.model.Column;
import com.google.refine.model.recon.ReconConfig;
import com.google.refine.model.recon.StandardReconConfig;
import com.google.refine.util.ParsingUtilities;
public class PreviewExtendDataCommand extends Command {
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
try {
Project project = getProject(request);
String columnName = request.getParameter("columnName");
String rowIndicesString = request.getParameter("rowIndices");
if (rowIndicesString == null) {
respond(response, "{ \"code\" : \"error\", \"message\" : \"No row indices specified\" }");
return;
}
String jsonString = request.getParameter("extension");
JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString);
JSONArray rowIndices = ParsingUtilities.evaluateJsonStringToArray(rowIndicesString);
int length = rowIndices.length();
Column column = project.columnModel.getColumnByName(columnName);
int cellIndex = column.getCellIndex();
// get the endpoint to extract data from
String endpoint = null;
ReconConfig cfg = column.getReconConfig();
if (cfg != null &&
cfg instanceof StandardReconConfig) {
StandardReconConfig scfg = (StandardReconConfig)cfg;
endpoint = scfg.service;
} else {
respond(response, "{ \"code\" : \"error\", \"message\" : \"This column has not been reconciled with a standard service.\" }");
return;
}
List<String> topicNames = new ArrayList<String>();
List<String> topicIds = new ArrayList<String>();
Set<String> ids = new HashSet<String>();
for (int i = 0; i < length; i++) {
int rowIndex = rowIndices.getInt(i);
if (rowIndex >= 0 && rowIndex < project.rows.size()) {
Row row = project.rows.get(rowIndex);
Cell cell = row.getCell(cellIndex);
if (cell != null && cell.recon != null && cell.recon.match != null) {
topicNames.add(cell.recon.match.name);
topicIds.add(cell.recon.match.id);
ids.add(cell.recon.match.id);
} else {
topicNames.add(null);
topicIds.add(null);
ids.add(null);
}
}
}
Map<String, ReconCandidate> reconCandidateMap = new HashMap<String, ReconCandidate>();
ReconciledDataExtensionJob job = new ReconciledDataExtensionJob(json, endpoint);
Map<String, DataExtension> map = job.extend(ids, reconCandidateMap);
response.setCharacterEncoding("UTF-8");
response.setHeader("Content-Type", "application/json");
JSONWriter writer = new JSONWriter(response.getWriter());
writer.object();
writer.key("code"); writer.value("ok");
writer.key("columns");
writer.array();
for (ColumnInfo info : job.columns) {
writer.object();
writer.key("name");
writer.value(info.name);
writer.key("id");
writer.value(info.id);
writer.endObject();
}
writer.endArray();
writer.key("rows");
writer.array();
for (int r = 0; r < topicNames.size(); r++) {
String id = topicIds.get(r);
String topicName = topicNames.get(r);
if (id != null && map.containsKey(id)) {
DataExtension ext = map.get(id);
boolean first = true;
if (ext.data.length > 0) {
for (Object[] row : ext.data) {
writer.array();
if (first) {
writer.value(topicName);
first = false;
} else {
writer.value(null);
}
for (Object cell : row) {
if (cell != null && cell instanceof ReconCandidate) {
ReconCandidate rc = (ReconCandidate) cell;
writer.object();
writer.key("id"); writer.value(rc.id);
writer.key("name"); writer.value(rc.name);
writer.endObject();
} else {
writer.value(cell);
}
}
writer.endArray();
}
continue;
}
}
writer.array();
if (id != null) {
writer.object();
writer.key("id"); writer.value(id);
writer.key("name"); writer.value(topicName);
writer.endObject();
} else {
writer.value("<not reconciled>");
}
writer.endArray();
}
writer.endArray();
writer.endObject();
} catch (Exception e) {
respondException(response, e);
}
}
}

View File

@ -0,0 +1,79 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.model;
import java.util.Properties;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.refine.Jsonizable;
/**
* This represents a type from the reconciliation
* service. It is used when extending data to
* store the (expected) types of new columns.
*/
public class ReconType implements Jsonizable {
public String id;
public String name;
public ReconType(String id, String name) {
this.id = id;
this.name = name;
}
@Override
public void write(JSONWriter writer, Properties options)
throws JSONException {
writer.object();
writer.key("id"); writer.value(id);
writer.key("name"); writer.value(name);
writer.endObject();
}
static public ReconType load(JSONObject obj) throws Exception {
if (obj == null) {
return null;
}
ReconType type = new ReconType(
obj.getString("id"),
obj.getString("name")
);
return type;
}
}

View File

@ -0,0 +1,511 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.model.changes;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.Serializable;
import java.io.Writer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.refine.model.ReconType;
import com.google.refine.model.recon.DataExtensionReconConfig;
import com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension;
import com.google.refine.history.Change;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.ModelException;
import com.google.refine.model.Project;
import com.google.refine.model.Recon;
import com.google.refine.model.Recon.Judgment;
import com.google.refine.model.ReconCandidate;
import com.google.refine.model.ReconStats;
import com.google.refine.model.Row;
import com.google.refine.util.ParsingUtilities;
import com.google.refine.util.Pool;
public class DataExtensionChange implements Change {
final protected String _baseColumnName;
final protected String _service;
final protected String _identifierSpace;
final protected String _schemaSpace;
final protected int _columnInsertIndex;
final protected List<String> _columnNames;
final protected List<ReconType> _columnTypes;
final protected List<Integer> _rowIndices;
final protected List<DataExtension> _dataExtensions;
protected long _historyEntryID;
protected int _firstNewCellIndex = -1;
protected List<Row> _oldRows;
protected List<Row> _newRows;
public DataExtensionChange(
String baseColumnName,
String service,
String identifierSpace,
String schemaSpace,
int columnInsertIndex,
List<String> columnNames,
List<ReconType> columnTypes,
List<Integer> rowIndices,
List<DataExtension> dataExtensions,
long historyEntryID
) {
_baseColumnName = baseColumnName;
_service = service;
_identifierSpace = identifierSpace;
_schemaSpace = schemaSpace;
_columnInsertIndex = columnInsertIndex;
_columnNames = columnNames;
_columnTypes = columnTypes;
_rowIndices = rowIndices;
_dataExtensions = dataExtensions;
_historyEntryID = historyEntryID;
}
protected DataExtensionChange(
String baseColumnName,
String service,
String identifierSpace,
String schemaSpace,
int columnInsertIndex,
List<String> columnNames,
List<ReconType> columnTypes,
List<Integer> rowIndices,
List<DataExtension> dataExtensions,
int firstNewCellIndex,
List<Row> oldRows,
List<Row> newRows
) {
_baseColumnName = baseColumnName;
_service = service;
_identifierSpace = identifierSpace;
_schemaSpace = schemaSpace;
_columnInsertIndex = columnInsertIndex;
_columnNames = columnNames;
_columnTypes = columnTypes;
_rowIndices = rowIndices;
_dataExtensions = dataExtensions;
_firstNewCellIndex = firstNewCellIndex;
_oldRows = oldRows;
_newRows = newRows;
}
@Override
public void apply(Project project) {
synchronized (project) {
if (_firstNewCellIndex < 0) {
_firstNewCellIndex = project.columnModel.allocateNewCellIndex();
for (int i = 1; i < _columnNames.size(); i++) {
project.columnModel.allocateNewCellIndex();
}
_oldRows = new ArrayList<Row>(project.rows);
_newRows = new ArrayList<Row>(project.rows.size());
int cellIndex = project.columnModel.getColumnByName(_baseColumnName).getCellIndex();
int keyCellIndex = project.columnModel.columns.get(project.columnModel.getKeyColumnIndex()).getCellIndex();
int index = 0;
int rowIndex = index < _rowIndices.size() ? _rowIndices.get(index) : _oldRows.size();
DataExtension dataExtension = index < _rowIndices.size() ? _dataExtensions.get(index) : null;
index++;
Map<String, Recon> reconMap = new HashMap<String, Recon>();
for (int r = 0; r < _oldRows.size(); r++) {
Row oldRow = _oldRows.get(r);
if (r < rowIndex) {
_newRows.add(oldRow.dup());
continue;
}
if (dataExtension == null || dataExtension.data.length == 0) {
_newRows.add(oldRow);
} else {
Row firstNewRow = oldRow.dup();
extendRow(firstNewRow, dataExtension, 0, reconMap);
_newRows.add(firstNewRow);
int r2 = r + 1;
for (int subR = 1; subR < dataExtension.data.length; subR++) {
if (r2 < project.rows.size()) {
Row oldRow2 = project.rows.get(r2);
if (oldRow2.isCellBlank(cellIndex) &&
oldRow2.isCellBlank(keyCellIndex)) {
Row newRow = oldRow2.dup();
extendRow(newRow, dataExtension, subR, reconMap);
_newRows.add(newRow);
r2++;
continue;
}
}
Row newRow = new Row(cellIndex + _columnNames.size());
extendRow(newRow, dataExtension, subR, reconMap);
_newRows.add(newRow);
}
r = r2 - 1; // r will be incremented by the for loop anyway
}
rowIndex = index < _rowIndices.size() ? _rowIndices.get(index) : _oldRows.size();
dataExtension = index < _rowIndices.size() ? _dataExtensions.get(index) : null;
index++;
}
}
project.rows.clear();
project.rows.addAll(_newRows);
for (int i = 0; i < _columnNames.size(); i++) {
String name = _columnNames.get(i);
int cellIndex = _firstNewCellIndex + i;
Column column = new Column(cellIndex, name);
ReconType columnType = _columnTypes.get(i);
column.setReconConfig(new DataExtensionReconConfig(
_service,
_identifierSpace,
_schemaSpace,
columnType));
if (columnType != null) {
column.setReconStats(ReconStats.create(project, cellIndex));
}
try {
project.columnModel.addColumn(_columnInsertIndex + i, column, true);
// the column might have been renamed to avoid collision
_columnNames.set(i, column.getName());
} catch (ModelException e) {
// won't get here since we set the avoid collision flag
}
}
project.update();
}
}
protected void extendRow(
Row row,
DataExtension dataExtension,
int extensionRowIndex,
Map<String, Recon> reconMap
) {
Object[] values = dataExtension.data[extensionRowIndex];
for (int c = 0; c < values.length; c++) {
Object value = values[c];
Cell cell = null;
if (value instanceof ReconCandidate) {
ReconCandidate rc = (ReconCandidate) value;
Recon recon;
if (reconMap.containsKey(rc.id)) {
recon = reconMap.get(rc.id);
} else {
recon = new Recon(_historyEntryID, _identifierSpace, _schemaSpace);
recon.addCandidate(rc);
recon.service = _service;
recon.match = rc;
recon.matchRank = 0;
recon.judgment = Judgment.Matched;
recon.judgmentAction = "auto";
recon.judgmentBatchSize = 1;
reconMap.put(rc.id, recon);
}
cell = new Cell(rc.name, recon);
} else {
cell = new Cell((Serializable) value, null);
}
row.setCell(_firstNewCellIndex + c, cell);
}
}
@Override
public void revert(Project project) {
synchronized (project) {
project.rows.clear();
project.rows.addAll(_oldRows);
for (int i = 0; i < _columnNames.size(); i++) {
project.columnModel.columns.remove(_columnInsertIndex);
}
project.update();
}
}
@Override
public void save(Writer writer, Properties options) throws IOException {
writer.write("baseColumnName="); writer.write(_baseColumnName); writer.write('\n');
writer.write("service="); writer.write(_service); writer.write('\n');
writer.write("identifierSpace="); writer.write(_identifierSpace); writer.write('\n');
writer.write("schemaSpace="); writer.write(_schemaSpace); writer.write('\n');
writer.write("columnInsertIndex="); writer.write(Integer.toString(_columnInsertIndex)); writer.write('\n');
writer.write("columnNameCount="); writer.write(Integer.toString(_columnNames.size())); writer.write('\n');
for (String name : _columnNames) {
writer.write(name); writer.write('\n');
}
writer.write("columnTypeCount="); writer.write(Integer.toString(_columnTypes.size())); writer.write('\n');
for (ReconType type : _columnTypes) {
try {
if(type != null) {
JSONWriter jsonWriter = new JSONWriter(writer);
type.write(jsonWriter, options);
}
} catch (JSONException e) {
// ???
}
writer.write('\n');
}
writer.write("rowIndexCount="); writer.write(Integer.toString(_rowIndices.size())); writer.write('\n');
for (Integer rowIndex : _rowIndices) {
writer.write(rowIndex.toString()); writer.write('\n');
}
writer.write("dataExtensionCount="); writer.write(Integer.toString(_dataExtensions.size())); writer.write('\n');
for (DataExtension dataExtension : _dataExtensions) {
if (dataExtension == null) {
writer.write('\n');
continue;
}
writer.write(Integer.toString(dataExtension.data.length)); writer.write('\n');
for (Object[] values : dataExtension.data) {
for (Object value : values) {
if (value == null) {
writer.write("null");
} else if (value instanceof ReconCandidate) {
try {
JSONWriter jsonWriter = new JSONWriter(writer);
((ReconCandidate) value).write(jsonWriter, options);
} catch (JSONException e) {
// ???
}
} else if (value instanceof String) {
writer.write(JSONObject.quote((String) value));
} else {
writer.write(value.toString());
}
writer.write('\n');
}
}
}
writer.write("firstNewCellIndex="); writer.write(Integer.toString(_firstNewCellIndex)); writer.write('\n');
writer.write("newRowCount="); writer.write(Integer.toString(_newRows.size())); writer.write('\n');
for (Row row : _newRows) {
row.save(writer, options);
writer.write('\n');
}
writer.write("oldRowCount="); writer.write(Integer.toString(_oldRows.size())); writer.write('\n');
for (Row row : _oldRows) {
row.save(writer, options);
writer.write('\n');
}
writer.write("/ec/\n"); // end of change marker
}
static public Change load(LineNumberReader reader, Pool pool) throws Exception {
String baseColumnName = null;
String service = null;
String identifierSpace = null;
String schemaSpace = null;
int columnInsertIndex = -1;
List<String> columnNames = null;
List<ReconType> columnTypes = null;
List<Integer> rowIndices = null;
List<DataExtension> dataExtensions = null;
List<Row> oldRows = null;
List<Row> newRows = null;
int firstNewCellIndex = -1;
String line;
while ((line = reader.readLine()) != null && !"/ec/".equals(line)) {
int equal = line.indexOf('=');
CharSequence field = line.subSequence(0, equal);
String value = line.substring(equal + 1);
if ("baseColumnName".equals(field)) {
baseColumnName = value;
} else if ("service".equals(field)) {
service = value;
} else if ("identifierSpace".equals(field)) {
identifierSpace = value;
} else if ("schemaSpace".equals(field)) {
schemaSpace = value;
} else if ("columnInsertIndex".equals(field)) {
columnInsertIndex = Integer.parseInt(value);
} else if ("firstNewCellIndex".equals(field)) {
firstNewCellIndex = Integer.parseInt(value);
} else if ("rowIndexCount".equals(field)) {
int count = Integer.parseInt(value);
rowIndices = new ArrayList<Integer>(count);
for (int i = 0; i < count; i++) {
line = reader.readLine();
if (line != null) {
rowIndices.add(Integer.parseInt(line));
}
}
} else if ("columnNameCount".equals(field)) {
int count = Integer.parseInt(value);
columnNames = new ArrayList<String>(count);
for (int i = 0; i < count; i++) {
line = reader.readLine();
if (line != null) {
columnNames.add(line);
}
}
} else if ("columnTypeCount".equals(field)) {
int count = Integer.parseInt(value);
columnTypes = new ArrayList<ReconType>(count);
for (int i = 0; i < count; i++) {
line = reader.readLine();
if (line == null || line.length() == 0) {
columnTypes.add(null);
} else {
columnTypes.add(ReconType.load(ParsingUtilities.evaluateJsonStringToObject(line)));
}
}
} else if ("dataExtensionCount".equals(field)) {
int count = Integer.parseInt(value);
dataExtensions = new ArrayList<DataExtension>(count);
for (int i = 0; i < count; i++) {
line = reader.readLine();
if (line == null) {
continue;
}
if (line.length() == 0) {
dataExtensions.add(null);
continue;
}
int rowCount = Integer.parseInt(line);
Object[][] data = new Object[rowCount][];
for (int r = 0; r < rowCount; r++) {
Object[] row = new Object[columnNames.size()];
for (int c = 0; c < columnNames.size(); c++) {
line = reader.readLine();
row[c] = ReconCandidate.loadStreaming(line);
}
data[r] = row;
}
dataExtensions.add(new DataExtension(data));
}
} else if ("oldRowCount".equals(field)) {
int count = Integer.parseInt(value);
oldRows = new ArrayList<Row>(count);
for (int i = 0; i < count; i++) {
line = reader.readLine();
if (line != null) {
oldRows.add(Row.load(line, pool));
}
}
} else if ("newRowCount".equals(field)) {
int count = Integer.parseInt(value);
newRows = new ArrayList<Row>(count);
for (int i = 0; i < count; i++) {
line = reader.readLine();
if (line != null) {
newRows.add(Row.load(line, pool));
}
}
}
}
DataExtensionChange change = new DataExtensionChange(
baseColumnName,
service,
identifierSpace,
schemaSpace,
columnInsertIndex,
columnNames,
columnTypes,
rowIndices,
dataExtensions,
firstNewCellIndex,
oldRows,
newRows
);
return change;
}
}

View File

@ -0,0 +1,109 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.model.recon;
import java.util.List;
import java.util.Properties;
import java.util.ArrayList;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.refine.model.ReconType;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import com.google.refine.model.Recon;
import com.google.refine.model.Row;
import com.google.refine.model.recon.StandardReconConfig;
import com.google.refine.model.recon.ReconJob;
public class DataExtensionReconConfig extends StandardReconConfig {
final public ReconType type;
private final static String WARN = "Not implemented";
static public ReconConfig reconstruct(JSONObject obj) throws Exception {
JSONObject type = obj.getJSONObject("type");
ReconType typ = null;
if(obj.has("id")) {
typ = new ReconType(obj.getString("id"),
obj.has("name") ? obj.getString("name") : obj.getString("id"));
}
return new DataExtensionReconConfig(
obj.getString("service"),
obj.has("identifierSpace") ? obj.getString("identifierSpace") : null,
obj.has("schemaSpace") ? obj.getString("schemaSpace") : null,
typ);
}
public DataExtensionReconConfig(
String service,
String identifierSpace,
String schemaSpace,
ReconType type) {
super(
service,
identifierSpace,
schemaSpace,
type != null ? type.id : null,
type != null ? type.name : null,
true,
new ArrayList<ColumnDetail>());
this.type = type;
}
@Override
public ReconJob createJob(Project project, int rowIndex, Row row,
String columnName, Cell cell) {
throw new RuntimeException(WARN);
}
@Override
public int getBatchSize() {
throw new RuntimeException(WARN);
}
@Override
public List<Recon> batchRecon(List<ReconJob> jobs, long historyEntryID) {
throw new RuntimeException(WARN);
}
@Override
public String getBriefDescription(Project project, String columnName) {
throw new RuntimeException(WARN);
}
}

View File

@ -0,0 +1,303 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
*
*/
package com.google.refine.model.recon;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.io.StringWriter;
import java.io.Writer;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.refine.model.ReconType;
import com.google.refine.model.ReconCandidate;
import com.google.refine.model.recon.StandardReconConfig;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;
import com.google.refine.expr.functions.ToDate;
public class ReconciledDataExtensionJob {
static public class DataExtension {
final public Object[][] data;
public DataExtension(Object[][] data) {
this.data = data;
}
}
static public class ColumnInfo {
final public String name;
final public String id;
final public ReconType expectedType;
protected ColumnInfo(String name, String id, ReconType expectedType) {
this.name = name;
this.id = id;
this.expectedType = expectedType;
}
}
final public JSONObject extension;
final public String endpoint;
final public List<ColumnInfo> columns = new ArrayList<ColumnInfo>();
public ReconciledDataExtensionJob(JSONObject obj, String endpoint) throws JSONException {
this.extension = obj;
this.endpoint = endpoint;
}
public Map<String, ReconciledDataExtensionJob.DataExtension> extend(
Set<String> ids,
Map<String, ReconCandidate> reconCandidateMap
) throws Exception {
StringWriter writer = new StringWriter();
formulateQuery(ids, extension, writer);
String query = writer.toString();
InputStream is = performQuery(this.endpoint, query);
try {
String s = ParsingUtilities.inputStreamToString(is);
JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s);
if(columns.size() == 0) {
// Extract the column metadata
gatherColumnInfo(o.getJSONArray("meta"), columns);
}
Map<String, ReconciledDataExtensionJob.DataExtension> map = new HashMap<String, ReconciledDataExtensionJob.DataExtension>();
if (o.has("rows")){
JSONObject records = o.getJSONObject("rows");
// for each identifier
for (String id : ids) {
if (records.has(id)) {
JSONObject record = records.getJSONObject(id);
ReconciledDataExtensionJob.DataExtension ext = collectResult(record, reconCandidateMap);
if (ext != null) {
map.put(id, ext);
}
}
}
}
return map;
} finally {
is.close();
}
}
static protected InputStream performQuery(String endpoint, String query) throws IOException {
URL url = new URL(endpoint);
URLConnection connection = url.openConnection();
connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
connection.setConnectTimeout(5000);
connection.setDoOutput(true);
DataOutputStream dos = new DataOutputStream(connection.getOutputStream());
try {
String body = "extend=" + ParsingUtilities.encode(query);
dos.writeBytes(body);
} finally {
dos.flush();
dos.close();
}
connection.connect();
return connection.getInputStream();
}
protected ReconciledDataExtensionJob.DataExtension collectResult(
JSONObject record,
Map<String, ReconCandidate> reconCandidateMap
) throws JSONException {
List<Object[]> rows = new ArrayList<Object[]>();
// for each property
int colindex = 0;
for(ColumnInfo ci : columns) {
String pid = ci.id;
JSONArray values = record.getJSONArray(pid);
if (values == null) {
continue;
}
// for each value
for(int rowindex = 0; rowindex < values.length(); rowindex++) {
JSONObject val = values.getJSONObject(rowindex);
// store a reconciled value
if (val.has("id")) {
storeCell(rows, rowindex, colindex, val, reconCandidateMap);
} else if (val.has("str")) {
// store a bare string
String str = val.getString("str");
storeCell(rows, rowindex, colindex, str);
} else if (val.has("float")) {
float v = Float.parseFloat(val.getString("float"));
storeCell(rows, rowindex, colindex, v);
} else if (val.has("int")) {
int v = Integer.parseInt(val.getString("int"));
storeCell(rows, rowindex, colindex, v);
} else if (val.has("date")) {
ToDate td = new ToDate();
String[] args = new String[1];
args[0] = val.getString("date");
Object v = td.call(null, args);
storeCell(rows, rowindex, colindex, v);
} else if(val.has("bool")) {
boolean v = val.getString("bool") == "true";
storeCell(rows, rowindex, colindex, v);
}
}
colindex++;
}
Object[][] data = new Object[rows.size()][columns.size()];
rows.toArray(data);
return new DataExtension(data);
}
protected void storeCell(
List<Object[]> rows,
int row,
int col,
Object value
) {
while (row >= rows.size()) {
rows.add(new Object[columns.size()]);
}
rows.get(row)[col] = value;
}
protected void storeCell(
List<Object[]> rows,
int row,
int col,
JSONObject obj,
Map<String, ReconCandidate> reconCandidateMap
) throws JSONException {
String id = obj.getString("id");
ReconCandidate rc;
if (reconCandidateMap.containsKey(id)) {
rc = reconCandidateMap.get(id);
} else {
rc = new ReconCandidate(
obj.getString("id"),
obj.getString("name"),
JSONUtilities.getStringArray(obj, "type"),
100
);
reconCandidateMap.put(id, rc);
}
storeCell(rows, row, col, rc);
}
static protected void formulateQuery(Set<String> ids, JSONObject node, Writer writer) throws JSONException {
JSONWriter jsonWriter = new JSONWriter(writer);
jsonWriter.object();
jsonWriter.key("ids");
jsonWriter.array();
for (String id : ids) {
if (id != null) {
jsonWriter.value(id);
}
}
jsonWriter.endArray();
jsonWriter.key("properties");
jsonWriter.array();
JSONArray properties = node.getJSONArray("properties");
int l = properties.length();
for (int i = 0; i < l; i++) {
JSONObject property = properties.getJSONObject(i);
jsonWriter.object();
jsonWriter.key("id");
jsonWriter.value(property.getString("id"));
if (property.has("settings")) {
JSONObject settings = property.getJSONObject("settings");
jsonWriter.key("settings");
jsonWriter.value(settings);
}
jsonWriter.endObject();
}
jsonWriter.endArray();
jsonWriter.endObject();
}
static protected void gatherColumnInfo(JSONArray meta, List<ColumnInfo> columns) throws JSONException {
for(int i = 0; i < meta.length(); i++) {
JSONObject col = meta.getJSONObject(i);
ReconType expectedType = null;
if(col.has("type")) {
JSONObject expectedObj = col.getJSONObject("type");
expectedType = new ReconType(expectedObj.getString("id"), expectedObj.getString("name"));
}
columns.add(new ColumnInfo(
col.getString("name"),
col.getString("id"),
expectedType));
}
}
}

View File

@ -0,0 +1,333 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.operations.recon;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.FilteredRows;
import com.google.refine.browsing.RowVisitor;
import com.google.refine.model.changes.DataExtensionChange;
import com.google.refine.model.recon.ReconciledDataExtensionJob;
import com.google.refine.model.recon.ReconciledDataExtensionJob.ColumnInfo;
import com.google.refine.model.recon.ReconciledDataExtensionJob.DataExtension;
import com.google.refine.history.HistoryEntry;
import com.google.refine.model.AbstractOperation;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
import com.google.refine.model.ReconCandidate;
import com.google.refine.model.ReconType;
import com.google.refine.model.Row;
import com.google.refine.model.changes.CellAtRow;
import com.google.refine.operations.EngineDependentOperation;
import com.google.refine.operations.OperationRegistry;
import com.google.refine.process.LongRunningProcess;
import com.google.refine.process.Process;
public class ExtendDataOperation extends EngineDependentOperation {
final protected String _baseColumnName;
final protected String _endpoint;
final protected String _identifierSpace;
final protected String _schemaSpace;
final protected JSONObject _extension;
final protected int _columnInsertIndex;
static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception {
JSONObject engineConfig = obj.getJSONObject("engineConfig");
return new ExtendDataOperation(
engineConfig,
obj.getString("baseColumnName"),
obj.getString("endpoint"),
obj.getString("identifierSpace"),
obj.getString("schemaSpace"),
obj.getJSONObject("extension"),
obj.getInt("columnInsertIndex")
);
}
public ExtendDataOperation(
JSONObject engineConfig,
String baseColumnName,
String endpoint,
String identifierSpace,
String schemaSpace,
JSONObject extension,
int columnInsertIndex
) {
super(engineConfig);
_baseColumnName = baseColumnName;
_endpoint = endpoint;
_identifierSpace = identifierSpace;
_schemaSpace = schemaSpace;
_extension = extension;
_columnInsertIndex = columnInsertIndex;
}
@Override
public void write(JSONWriter writer, Properties options)
throws JSONException {
writer.object();
writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass()));
writer.key("description"); writer.value(getBriefDescription(null));
writer.key("engineConfig"); writer.value(getEngineConfig());
writer.key("columnInsertIndex"); writer.value(_columnInsertIndex);
writer.key("baseColumnName"); writer.value(_baseColumnName);
writer.key("endpoint"); writer.value(_endpoint);
writer.key("identifierSpace"); writer.value(_identifierSpace);
writer.key("schemaSpace"); writer.value(_schemaSpace);
writer.key("extension"); writer.value(_extension);
writer.endObject();
}
@Override
protected String getBriefDescription(Project project) {
return "Extend data at index " + _columnInsertIndex +
" based on column " + _baseColumnName;
}
protected String createDescription(Column column, List<CellAtRow> cellsAtRows) {
return "Extend data at index " + _columnInsertIndex +
" based on column " + column.getName() +
" by filling " + cellsAtRows.size();
}
@Override
public Process createProcess(Project project, Properties options) throws Exception {
return new ExtendDataProcess(
project,
getEngineConfig(),
getBriefDescription(null)
);
}
public class ExtendDataProcess extends LongRunningProcess implements Runnable {
final protected Project _project;
final protected JSONObject _engineConfig;
final protected long _historyEntryID;
protected int _cellIndex;
protected ReconciledDataExtensionJob _job;
public ExtendDataProcess(
Project project,
JSONObject engineConfig,
String description
) throws JSONException {
super(description);
_project = project;
_engineConfig = engineConfig;
_historyEntryID = HistoryEntry.allocateID();
_job = new ReconciledDataExtensionJob(_extension, _endpoint);
}
@Override
public void write(JSONWriter writer, Properties options)
throws JSONException {
writer.object();
writer.key("id"); writer.value(hashCode());
writer.key("description"); writer.value(_description);
writer.key("immediate"); writer.value(false);
writer.key("status"); writer.value(_thread == null ? "pending" : (_thread.isAlive() ? "running" : "done"));
writer.key("progress"); writer.value(_progress);
writer.endObject();
}
@Override
protected Runnable getRunnable() {
return this;
}
protected void populateRowsWithMatches(List<Integer> rowIndices) throws Exception {
Engine engine = new Engine(_project);
engine.initializeFromJSON(_engineConfig);
Column column = _project.columnModel.getColumnByName(_baseColumnName);
if (column == null) {
throw new Exception("No column named " + _baseColumnName);
}
_cellIndex = column.getCellIndex();
FilteredRows filteredRows = engine.getAllFilteredRows();
filteredRows.accept(_project, new RowVisitor() {
List<Integer> _rowIndices;
public RowVisitor init(List<Integer> rowIndices) {
_rowIndices = rowIndices;
return this;
}
@Override
public void start(Project project) {
// nothing to do
}
@Override
public void end(Project project) {
// nothing to do
}
@Override
public boolean visit(Project project, int rowIndex, Row row) {
Cell cell = row.getCell(_cellIndex);
if (cell != null && cell.recon != null && cell.recon.match != null) {
_rowIndices.add(rowIndex);
}
return false;
}
}.init(rowIndices));
}
protected int extendRows(
List<Integer> rowIndices,
List<DataExtension> dataExtensions,
int from,
int limit,
Map<String, ReconCandidate> reconCandidateMap
) {
Set<String> ids = new HashSet<String>();
int end;
for (end = from; end < limit && ids.size() < 10; end++) {
int index = rowIndices.get(end);
Row row = _project.rows.get(index);
Cell cell = row.getCell(_cellIndex);
ids.add(cell.recon.match.id);
}
Map<String, DataExtension> map = null;
try {
map = _job.extend(ids, reconCandidateMap);
} catch (Exception e) {
map = new HashMap<String, DataExtension>();
}
for (int i = from; i < end; i++) {
int index = rowIndices.get(i);
Row row = _project.rows.get(index);
Cell cell = row.getCell(_cellIndex);
String guid = cell.recon.match.id;
if (map.containsKey(guid)) {
dataExtensions.add(map.get(guid));
} else {
dataExtensions.add(null);
}
}
return end;
}
@Override
public void run() {
List<Integer> rowIndices = new ArrayList<Integer>();
List<DataExtension> dataExtensions = new ArrayList<DataExtension>();
try {
populateRowsWithMatches(rowIndices);
} catch (Exception e2) {
// TODO : Not sure what to do here?
e2.printStackTrace();
}
int start = 0;
Map<String, ReconCandidate> reconCandidateMap = new HashMap<String, ReconCandidate>();
while (start < rowIndices.size()) {
int end = extendRows(rowIndices, dataExtensions, start, rowIndices.size(), reconCandidateMap);
start = end;
_progress = end * 100 / rowIndices.size();
try {
Thread.sleep(200);
} catch (InterruptedException e) {
if (_canceled) {
break;
}
}
}
if (!_canceled) {
List<String> columnNames = new ArrayList<String>();
for (ColumnInfo info : _job.columns) {
columnNames.add(info.name);
}
List<ReconType> columnTypes = new ArrayList<ReconType>();
for (ColumnInfo info : _job.columns) {
columnTypes.add(info.expectedType);
}
HistoryEntry historyEntry = new HistoryEntry(
_historyEntryID,
_project,
_description,
ExtendDataOperation.this,
new DataExtensionChange(
_baseColumnName,
_endpoint,
_identifierSpace,
_schemaSpace,
_columnInsertIndex,
columnNames,
columnTypes,
rowIndices,
dataExtensions,
_historyEntryID)
);
_project.history.addEntry(historyEntry);
_project.processManager.onDoneProcess(this);
}
}
}
}

View File

@ -0,0 +1,299 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.tests.recon;
import static org.mockito.Mockito.mock;
import java.io.File;
import java.io.IOException;
import java.util.Properties;
import java.util.List;
import java.util.ArrayList;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.RowVisitor;
import com.google.refine.grel.Function;
import com.google.refine.io.FileProjectManager;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.ModelException;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.model.Recon;
import com.google.refine.model.ReconCandidate;
import com.google.refine.process.Process;
import com.google.refine.process.ProcessManager;
import com.google.refine.operations.OnError;
import com.google.refine.operations.EngineDependentOperation;
import com.google.refine.operations.recon.ExtendDataOperation;
import com.google.refine.tests.RefineTest;
import com.google.refine.tests.util.TestUtils;
public class DataExtensionTests extends RefineTest {
static final String ENGINE_JSON_URLS = "{\"mode\":\"row-based\"}}";
static final String RECON_SERVICE = "https://tools.wmflabs.org/openrefine-wikidata/en/api";
static final String RECON_IDENTIFIER_SPACE = "http://www.wikidata.org/entity/";
static final String RECON_SCHEMA_SPACE = "http://www.wikidata.org/prop/direct/";
@Override
@BeforeTest
public void init() {
logger = LoggerFactory.getLogger(this.getClass());
}
// dependencies
Project project;
Properties options;
JSONObject engine_config;
Engine engine;
Properties bindings;
@BeforeMethod
public void SetUp() throws JSONException, IOException, ModelException {
File dir = TestUtils.createTempDirectory("openrefine-test-workspace-dir");
FileProjectManager.initialize(dir);
project = new Project();
ProjectMetadata pm = new ProjectMetadata();
pm.setName("Data Extension Test Project");
ProjectManager.singleton.registerProject(project, pm);
int index = project.columnModel.allocateNewCellIndex();
Column column = new Column(index,"country");
project.columnModel.addColumn(index, column, true);
options = mock(Properties.class);
engine = new Engine(project);
engine_config = new JSONObject(ENGINE_JSON_URLS);
engine.initializeFromJSON(engine_config);
engine.setMode(Engine.Mode.RowBased);
bindings = new Properties();
bindings.put("project", project);
Row row = new Row(2);
row.setCell(0, reconciledCell("Iran", "Q794"));
project.rows.add(row);
row = new Row(2);
row.setCell(0, reconciledCell("Japan", "Q17"));
project.rows.add(row);
row = new Row(2);
row.setCell(0, reconciledCell("Tajikistan", "Q863"));
project.rows.add(row);
row = new Row(2);
row.setCell(0, reconciledCell("United States of America", "Q30"));
project.rows.add(row);
}
@AfterMethod
public void TearDown() {
project = null;
options = null;
engine = null;
bindings = null;
}
static public Cell reconciledCell(String name, String id) {
ReconCandidate r = new ReconCandidate(id, name, new String[0], 100);
List<ReconCandidate> candidates = new ArrayList<ReconCandidate>();
candidates.add(r);
Recon rec = new Recon(0, RECON_IDENTIFIER_SPACE, RECON_SCHEMA_SPACE);
rec.service = RECON_SERVICE;
rec.candidates = candidates;
rec.match = r;
return new Cell(name, rec);
}
/**
* Test to fetch simple strings
*/
@Test
public void testFetchStrings() throws Exception {
JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P297\",\"name\":\"ISO 3166-1 alpha-2 code\"}]}");
EngineDependentOperation op = new ExtendDataOperation(engine_config,
"country",
RECON_SERVICE,
RECON_IDENTIFIER_SPACE,
RECON_SCHEMA_SPACE,
extension,
1);
ProcessManager pm = project.getProcessManager();
Process process = op.createProcess(project, options);
process.startPerforming(pm);
Assert.assertTrue(process.isRunning());
try {
// We have 4 rows so 4000 ms should be largely enough.
Thread.sleep(5000);
} catch (InterruptedException e) {
Assert.fail("Test interrupted");
}
Assert.assertFalse(process.isRunning());
// Inspect rows
Assert.assertTrue("IR".equals(project.rows.get(0).getCellValue(1)));
Assert.assertTrue("JP".equals(project.rows.get(1).getCellValue(1)));
Assert.assertTrue("TJ".equals(project.rows.get(2).getCellValue(1)));
Assert.assertTrue("US".equals(project.rows.get(3).getCellValue(1)));
// Make sure we did not create any recon stats for that column (no reconciled value)
Assert.assertTrue(project.columnModel.getColumnByName("ISO 3166-1 alpha-2 code").getReconStats() == null);
}
/**
* Test to fetch counts of values
*/
@Test
public void testFetchCounts() throws Exception {
JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P38\",\"name\":\"currency\",\"settings\":{\"count\":\"on\"}}]}");
EngineDependentOperation op = new ExtendDataOperation(engine_config,
"country",
RECON_SERVICE,
RECON_IDENTIFIER_SPACE,
RECON_SCHEMA_SPACE,
extension,
1);
ProcessManager pm = project.getProcessManager();
Process process = op.createProcess(project, options);
process.startPerforming(pm);
Assert.assertTrue(process.isRunning());
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
Assert.fail("Test interrupted");
}
Assert.assertFalse(process.isRunning());
// Test to be updated as countries change currencies!
Assert.assertTrue(Math.round((float)project.rows.get(2).getCellValue(1)) == 2);
Assert.assertTrue(Math.round((float)project.rows.get(3).getCellValue(1)) == 1);
// Make sure we did not create any recon stats for that column (no reconciled value)
Assert.assertTrue(project.columnModel.getColumnByName("currency").getReconStats() == null);
}
/**
* Test fetch only the best statements
*/
@Test
public void testFetchCurrent() throws Exception {
JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P38\",\"name\":\"currency\",\"settings\":{\"rank\":\"best\"}}]}");
EngineDependentOperation op = new ExtendDataOperation(engine_config,
"country",
RECON_SERVICE,
RECON_IDENTIFIER_SPACE,
RECON_SCHEMA_SPACE,
extension,
1);
ProcessManager pm = project.getProcessManager();
Process process = op.createProcess(project, options);
process.startPerforming(pm);
Assert.assertTrue(process.isRunning());
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
Assert.fail("Test interrupted");
}
Assert.assertFalse(process.isRunning());
/*
* Tajikistan has one "preferred" currency and one "normal" one
* (in terms of statement ranks).
* But thanks to our setting in the extension configuration,
* we only fetch the current one, so the one just after it is
* the one for the US (USD).
*/
Assert.assertTrue("Tajikistani somoni".equals(project.rows.get(2).getCellValue(1)));
Assert.assertTrue("United States dollar".equals(project.rows.get(3).getCellValue(1)));
// Make sure all the values are reconciled
Assert.assertTrue(project.columnModel.getColumnByName("currency").getReconStats().matchedTopics == 4);
}
/**
* Test fetch records (multiple values per reconciled cell)
*/
@Test
public void testFetchRecord() throws Exception {
JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P38\",\"name\":\"currency\"}]}");
EngineDependentOperation op = new ExtendDataOperation(engine_config,
"country",
RECON_SERVICE,
RECON_IDENTIFIER_SPACE,
RECON_SCHEMA_SPACE,
extension,
1);
ProcessManager pm = project.getProcessManager();
Process process = op.createProcess(project, options);
process.startPerforming(pm);
Assert.assertTrue(process.isRunning());
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
Assert.fail("Test interrupted");
}
Assert.assertFalse(process.isRunning());
/*
* Tajikistan has one "preferred" currency and one "normal" one
* (in terms of statement ranks).
* The second currency is fetched as well, which creates a record
* (the cell to the left of it is left blank).
*/
Assert.assertTrue("Tajikistani somoni".equals(project.rows.get(2).getCellValue(1)));
Assert.assertTrue("Tajikistani ruble".equals(project.rows.get(3).getCellValue(1)));
Assert.assertTrue(null == project.rows.get(3).getCellValue(0));
// Make sure all the values are reconciled
Assert.assertTrue(project.columnModel.getColumnByName("currency").getReconStats().matchedTopics == 5);
}
}

View File

@ -121,6 +121,8 @@ function registerCommands() {
RS.registerCommand(module, "recon-clear-one-cell", new Packages.com.google.refine.commands.recon.ReconClearOneCellCommand()); RS.registerCommand(module, "recon-clear-one-cell", new Packages.com.google.refine.commands.recon.ReconClearOneCellCommand());
RS.registerCommand(module, "recon-clear-similar-cells", new Packages.com.google.refine.commands.recon.ReconClearSimilarCellsCommand()); RS.registerCommand(module, "recon-clear-similar-cells", new Packages.com.google.refine.commands.recon.ReconClearSimilarCellsCommand());
RS.registerCommand(module, "recon-copy-across-columns", new Packages.com.google.refine.commands.recon.ReconCopyAcrossColumnsCommand()); RS.registerCommand(module, "recon-copy-across-columns", new Packages.com.google.refine.commands.recon.ReconCopyAcrossColumnsCommand());
RS.registerCommand(module, "preview-extend-data", new Packages.com.google.refine.commands.recon.PreviewExtendDataCommand());
RS.registerCommand(module, "extend-data", new Packages.com.google.refine.commands.recon.ExtendDataCommand());
RS.registerCommand(module, "guess-types-of-column", new Packages.com.google.refine.commands.recon.GuessTypesOfColumnCommand()); RS.registerCommand(module, "guess-types-of-column", new Packages.com.google.refine.commands.recon.GuessTypesOfColumnCommand());
@ -180,6 +182,7 @@ function registerOperations() {
OR.registerOperation(module, "recon-judge-similar-cells", Packages.com.google.refine.operations.recon.ReconJudgeSimilarCellsOperation); OR.registerOperation(module, "recon-judge-similar-cells", Packages.com.google.refine.operations.recon.ReconJudgeSimilarCellsOperation);
OR.registerOperation(module, "recon-clear-similar-cells", Packages.com.google.refine.operations.recon.ReconClearSimilarCellsOperation); OR.registerOperation(module, "recon-clear-similar-cells", Packages.com.google.refine.operations.recon.ReconClearSimilarCellsOperation);
OR.registerOperation(module, "recon-copy-across-columns", Packages.com.google.refine.operations.recon.ReconCopyAcrossColumnsOperation); OR.registerOperation(module, "recon-copy-across-columns", Packages.com.google.refine.operations.recon.ReconCopyAcrossColumnsOperation);
OR.registerOperation(module, "extend-reconciled-data", Packages.com.google.refine.operations.recon.ExtendDataOperation);
} }
function registerImporting() { function registerImporting() {
@ -367,6 +370,7 @@ function init() {
"styles/index/default-importing-sources.less", "styles/index/default-importing-sources.less",
"styles/views/data-table-view.less", // for the preview table's styles "styles/views/data-table-view.less", // for the preview table's styles
"styles/views/extend-data-preview-dialog.less",
"styles/index/fixed-width-parser-ui.less", "styles/index/fixed-width-parser-ui.less",
"styles/index/xml-parser-ui.less", "styles/index/xml-parser-ui.less",
"styles/index/json-parser-ui.less" "styles/index/json-parser-ui.less"
@ -431,6 +435,7 @@ function init() {
"scripts/reconciliation/standard-service-panel.js", "scripts/reconciliation/standard-service-panel.js",
"scripts/dialogs/expression-preview-dialog.js", "scripts/dialogs/expression-preview-dialog.js",
"scripts/dialogs/extend-data-preview-dialog.js",
"scripts/dialogs/clustering-dialog.js", "scripts/dialogs/clustering-dialog.js",
"scripts/dialogs/scatterplot-dialog.js", "scripts/dialogs/scatterplot-dialog.js",
"scripts/dialogs/templating-exporter-dialog.js", "scripts/dialogs/templating-exporter-dialog.js",
@ -474,7 +479,8 @@ function init() {
"styles/dialogs/custom-tabular-exporter-dialog.less", "styles/dialogs/custom-tabular-exporter-dialog.less",
"styles/reconciliation/recon-dialog.less", "styles/reconciliation/recon-dialog.less",
"styles/reconciliation/standard-service-panel.less" "styles/reconciliation/standard-service-panel.less",
"styles/reconciliation/extend-data-preview-dialog.less",
] ]
); );

View File

@ -503,6 +503,13 @@
"cache-responses": "Cache responses", "cache-responses": "Cache responses",
"copy-val": "copy value from original column", "copy-val": "copy value from original column",
"warning-col-name": "You must enter a column name.", "warning-col-name": "You must enter a column name.",
"add-col-recon-val": "Add columns from reconciled values",
"add-col-recon-col": "Add columns from reconciled column",
"warning-no-property": "Please select a property first.",
"configure-col": "Configure this column",
"remove-prop": "remove",
"configure-prop": "configure",
"no-settings": "No settings are available for this property.",
"add-col-fetch": "Add column by fetching URLs based on column", "add-col-fetch": "Add column by fetching URLs based on column",
"throttle-delay": "Throttle delay", "throttle-delay": "Throttle delay",
"milli": "milliseconds", "milli": "milliseconds",

View File

@ -503,6 +503,13 @@
"cache-responses": "Cache responses", "cache-responses": "Cache responses",
"copy-val": "copy value from original column", "copy-val": "copy value from original column",
"warning-col-name": "You must enter a column name.", "warning-col-name": "You must enter a column name.",
"add-col-recon-val": "Add columns from reconciled values",
"add-col-recon-col": "Add columns from reconciled column",
"warning-no-property": "Please select a property first.",
"configure-col": "Configure this column",
"remove-prop": "remove",
"configure-prop": "configure",
"no-settings": "No settings are available for this property.",
"add-col-fetch": "Add column by fetching URLs based on column", "add-col-fetch": "Add column by fetching URLs based on column",
"throttle-delay": "Throttle delay", "throttle-delay": "Throttle delay",
"milli": "milliseconds", "milli": "milliseconds",

View File

@ -502,6 +502,13 @@
"store-err": "guardar error", "store-err": "guardar error",
"copy-val": "copiar valor de la columna original", "copy-val": "copiar valor de la columna original",
"warning-col-name": "Debe ingresar un nombre para la columna.", "warning-col-name": "Debe ingresar un nombre para la columna.",
"add-col-recon-val": "Añadir columnas de valores conciliados",
"add-col-recon-col": "Añadir columnas de la columna conciliada",
"warning-no-property": "Seleccione primero una propiedad.",
"configure-col": "Configurar esta columna",
"remove-prop": "retirar",
"configure-prop": "configurar",
"no-settings": "No hay configuraciones disponibles para esta propiedad.",
"add-col-fetch": "Agregar columna accediendo a URls basada en la columna", "add-col-fetch": "Agregar columna accediendo a URls basada en la columna",
"throttle-delay": "Tiempo de retraso", "throttle-delay": "Tiempo de retraso",
"milli": "milisegundos", "milli": "milisegundos",

View File

@ -504,6 +504,13 @@
"copy-val": "copier la valeur depuis la colonne originale", "copy-val": "copier la valeur depuis la colonne originale",
"warning-col-name": "Vous devez indiquer un nom de colonne.", "warning-col-name": "Vous devez indiquer un nom de colonne.",
"add-col-fetch": "Ajouter une colonne en moissonnant les données depuis les URL dune colonne", "add-col-fetch": "Ajouter une colonne en moissonnant les données depuis les URL dune colonne",
"add-col-recon-val": "Ajouter des colonnes à partir de valeurs réconciliées",
"add-col-recon-col": "Ajouter des colonnes à partir de la colonne",
"warning-no-property": "Veuillez d'abord sélectionner une propriété.",
"configure-col": "Configurer cette colonne",
"remove-prop": "supprimer",
"configure-prop": "configurer",
"no-settings": "Aucun paramètre n'est disponible pour cette propriété.",
"throttle-delay": "Délai de récupération", "throttle-delay": "Délai de récupération",
"milli": "millisecondes", "milli": "millisecondes",
"url-fetch": "Indiquer les URL à moissonner :", "url-fetch": "Indiquer les URL à moissonner :",

View File

@ -502,6 +502,13 @@
"store-err": "salva l'errore", "store-err": "salva l'errore",
"copy-val": "copia il valore dalla colonna originale", "copy-val": "copia il valore dalla colonna originale",
"warning-col-name": "Inserisci un nome per la colonna.", "warning-col-name": "Inserisci un nome per la colonna.",
"add-col-recon-val": "Aggiungi colonne da valori riconciliati",
"add-col-recon-col": "Aggiungi colonne dalla colonna riconciliata",
"warning-no-property": "Per favore seleziona innanzitutto una proprietà.",
"configure-col": "Configurare questa colonna",
"remove-prop": "rimuovi",
"configure-prop": "configurare",
"no-settings": "Nessuna impostazione sono disponibili per questa proprietà.",
"add-col-fetch": "Aggiungi colonna con URL, basandoti su", "add-col-fetch": "Aggiungi colonna con URL, basandoti su",
"throttle-delay": "Durata Throttle", "throttle-delay": "Durata Throttle",
"milli": "millisecondi", "milli": "millisecondi",

View File

@ -0,0 +1,422 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
function ExtendReconciledDataPreviewDialog(column, columnIndex, rowIndices, onDone) {
this._column = column;
this._columnIndex = columnIndex;
this._rowIndices = rowIndices;
this._onDone = onDone;
this._extension = { properties: [] };
var self = this;
this._dialog = $(DOM.loadHTML("core", "scripts/views/data-table/extend-data-preview-dialog.html"));
this._elmts = DOM.bind(this._dialog);
this._elmts.dialogHeader.html($.i18n._('core-views')["add-col-recon-col"]+" "+column.name);
this._elmts.resetButton.click(function() {
self._extension.properties = [];
self._update();
});
this._elmts.okButton.click(function() {
if (self._extension.properties.length === 0) {
alert($.i18n._('core-views')["warning-no-property"]);
} else {
DialogSystem.dismissUntil(self._level - 1);
self._onDone(self._extension,
self._service,
self._serviceMetadata.identifierSpace,
self._serviceMetadata.schemaSpace);
}
});
this._elmts.cancelButton.click(function() {
DialogSystem.dismissUntil(self._level - 1);
});
var dismissBusy = DialogSystem.showBusy();
var type = (column.reconConfig) && (column.reconConfig.type) ? column.reconConfig.type.id : "";
this._proposePropertiesUrl = null;
this._fetchColumnUrl = null;
this._serviceMetadata = null;
if ("reconConfig" in column) {
var service = column.reconConfig.service;
this._service = service;
var serviceMetadata = ReconciliationManager.getServiceFromUrl(service);
this._serviceMetadata = serviceMetadata;
if ("extend" in serviceMetadata) {
var extend = serviceMetadata.extend;
if ("propose_properties" in extend) {
var endpoint = extend.propose_properties;
this._proposePropertiesUrl = endpoint.service_url + endpoint.service_path;
}
}
}
ExtendReconciledDataPreviewDialog.getAllProperties(this._proposePropertiesUrl, type, function(properties) {
dismissBusy();
self._show(properties);
});
}
ExtendReconciledDataPreviewDialog.getAllProperties = function(url, typeID, onDone) {
if(url == null) {
onDone([]);
} else {
var done = false;
$.getJSON(
url +"?type=" + typeID + "&callback=?",
null,
function(data) {
if (done) return;
done = true;
var allProperties = [];
for (var i = 0; i < data.properties.length; i++) {
var property = data.properties[i];
var property2 = {
id: property.id,
name: property.name
};
/*if ("id2" in property) {
property2.expected = property.schema2;
property2.properties = [{
id: property.id2,
name: property.name2,
expected: property.expects
}];
} else {
property2.expected = property.expects;
} */
allProperties.push(property2);
}
allProperties.sort(function(a, b) { return a.name.localeCompare(b.name); });
onDone(allProperties);
}
);
window.setTimeout(function() {
if (done) return;
done = true;
onDone([]);
}, 7000); // time to give up?
}
};
ExtendReconciledDataPreviewDialog.prototype._show = function(properties) {
this._level = DialogSystem.showDialog(this._dialog);
var n = this._elmts.suggestedPropertyContainer.offset().top +
this._elmts.suggestedPropertyContainer.outerHeight(true) -
this._elmts.addPropertyInput.offset().top;
this._elmts.previewContainer.height(Math.floor(n));
var self = this;
var container = this._elmts.suggestedPropertyContainer;
var renderSuggestedProperty = function(property) {
var label = ("properties" in property) ? (property.name + " &raquo; " + property.properties[0].name) : property.name;
var div = $('<div>').addClass("suggested-property").appendTo(container);
$('<a>')
.attr("href", "javascript:{}")
.html(label)
.appendTo(div)
.click(function() {
self._addProperty(property);
});
};
for (var i = 0; i < properties.length; i++) {
renderSuggestedProperty(properties[i]);
}
var suggestConfig = $.extend({}, this._serviceMetadata.suggest.property);
suggestConfig.key = null;
suggestConfig.query_param_name = "prefix";
this._elmts.addPropertyInput.suggestP(suggestConfig).bind("fb-select", function(evt, data) {
self._addProperty({
id : data.id,
name: data.name,
});
});
};
ExtendReconciledDataPreviewDialog.prototype._update = function() {
this._elmts.previewContainer.empty().html(
'<div bind="progressPanel" class="extend-data-preview-progress"><img src="images/large-spinner.gif" /></div>');
var self = this;
var params = {
project: theProject.id,
columnName: this._column.name
};
$.post(
"command/core/preview-extend-data?" + $.param(params),
{
rowIndices: JSON.stringify(this._rowIndices),
extension: JSON.stringify(this._extension)
},
function(data) {
self._renderPreview(data);
},
"json"
).fail(function(data) {
console.log(data);
});
};
ExtendReconciledDataPreviewDialog.prototype._addProperty = function(p) {
var addSeveralToList = function(properties, oldProperties) {
for (var i = 0; i < properties.length; i++) {
addToList(properties[i], oldProperties);
}
};
var addToList = function(property, oldProperties) {
for (var i = 0; i < oldProperties.length; i++) {
var oldProperty = oldProperties[i];
if (oldProperty.id == property.id) {
if ("included" in property) {
oldProperty.included = "included" in oldProperty ?
(oldProperty.included || property.included) :
property.included;
}
if ("properties" in property) {
if ("properties" in oldProperty) {
addSeveralToList(property.properties, oldProperty.properties);
} else {
oldProperty.properties = property.properties;
}
}
return;
}
}
oldProperties.push(property);
};
addToList(p, this._extension.properties);
this._update();
};
ExtendReconciledDataPreviewDialog.prototype._renderPreview = function(data) {
var self = this;
var container = this._elmts.previewContainer.empty();
if (data.code == "error") {
container.text("Error.");
return;
}
var table = $('<table>')[0];
var trHead = table.insertRow(table.rows.length);
$('<th>').appendTo(trHead).text(this._column.name);
var renderColumnHeader = function(column) {
var th = $('<th>').appendTo(trHead);
$('<span>').html(column.name).appendTo(th);
$('<br>').appendTo(th);
$('<a href="javascript:{}"></a>')
.text($.i18n._("core-views")["remove-prop"])
.addClass("action")
.attr("title", $.i18n._("core-views")["remove-col"])
.click(function() {
self._removeProperty(column.id);
}).appendTo(th);
$('<a href="javascript:{}"></a>')
.text($.i18n._("core-views")["configure-prop"])
.addClass("action")
.attr("title", $.i18n._("core-views")["configure-col"])
.click(function() {
self._constrainProperty(column.id);
}).appendTo(th);
};
for (var c = 0; c < data.columns.length; c++) {
renderColumnHeader(data.columns[c]);
}
for (var r = 0; r < data.rows.length; r++) {
var tr = table.insertRow(table.rows.length);
var row = data.rows[r];
for (var c = 0; c < row.length; c++) {
var td = tr.insertCell(tr.cells.length);
var cell = row[c];
if (cell !== null) {
if ($.isPlainObject(cell)) {
$('<a>').attr("href",
this._serviceMetadata.identifierSpace + cell.id
).attr("target", "_blank").text(cell.name).appendTo(td);
} else {
$('<span>').text(cell).appendTo(td);
}
}
}
}
container.append(table);
};
ExtendReconciledDataPreviewDialog.prototype._removeProperty = function(id) {
for(var i = this._extension.properties.length - 1; i >= 0; i--) {
var property = this._extension.properties[i];
if (property.id == id) {
this._extension.properties.splice(i, 1);
}
}
this._update();
};
ExtendReconciledDataPreviewDialog.prototype._findProperty = function(id) {
var properties = this._extension.properties;
for(var i = properties.length - 1; i >= 0; i--) {
if (properties[i].id == id) {
return properties[i];
}
}
return null;
}
ExtendReconciledDataPreviewDialog.prototype._constrainProperty = function(id) {
var self = this;
var property = this._findProperty(id);
var frame = DialogSystem.createDialog();
frame.width("500px");
var header = $('<div></div>').addClass("dialog-header").text("Settings for " + id).appendTo(frame);
var body = $('<div></div>').addClass("dialog-body").appendTo(frame);
var footer = $('<div></div>').addClass("dialog-footer").appendTo(frame);
var fields = self._serviceMetadata.extend.property_settings;
var table = $('<table></table>');
if (fields != null) {
for(var i = 0; i < fields.length; i++) {
var field = fields[i];
var fieldHTML = '';
var currentValue = field.default;
if (property.settings != null && property.settings[field.name] != null) {
currentValue = property.settings[field.name];
}
var tr = $('<tr></tr>');
var td = $('<td></td>').attr('title', field.help_text).appendTo(tr);
if (field.type == 'select') {
var fieldLabel = $('<span></span>').text(field.label+':').appendTo(td);
td.append($('<br/>'));
for(var j = 0; j < field.choices.length; j++) {
var choice = field.choices[j];
var labelElem = $('<label></label>').attr('for', field.name+'_'+choice.value).appendTo(td);
var inputElem = $('<input type="radio" />').attr(
'id', field.name+'_'+choice.value).attr(
'value', choice.value).attr(
'name', field.name).appendTo(labelElem);
if (choice.value == currentValue) {
inputElem.attr('checked', 'checked');
}
labelElem.append(' '+choice.name);
td.append('<br/>');
}
td.append(fieldHTML);
} else if (field.type == 'checkbox') {
var label = $('<label></label>').attr('for', field.name).appendTo(td);
var input = $('<input type="checkbox" />').attr('name', field.name).appendTo(label);
if (currentValue == 'on') {
input.attr('checked','checked');
}
label.append(' '+field.label);
} else if (field.type == 'number' || field.type == 'text') {
var label = $('<label></label>').attr('for', field.name).appendTo(td);
label.append(field.label+': ');
var input = $('<input />').attr(
'name', field.name).attr(
'type', field.type).attr(
'value', currentValue).appendTo(label);
}
if (tr.children().length > 0) {
table.append(tr);
}
}
}
if (table.children().length == 0) {
var tr = $('<tr></tr>').appendTo(table);
$('<td></td>').text($.i18n._('core-views')['no-settings']).appendTo(tr);
}
var form = $('<form class="data-extension-property-config" bind="form"></form>').append(table);
var gridLayout = $('<div class="grid-layout layout-normal layout-full"></div>').append(form);
body.append(gridLayout);
var bodyElmts = DOM.bind(body);
footer.html(
'<button class="button" bind="okButton">'+$.i18n._('core-buttons')['ok']+'</button>' +
'<button class="button" bind="cancelButton">'+$.i18n._('core-buttons')['cancel']+'</button>'
);
var footerElmts = DOM.bind(footer);
var level = DialogSystem.showDialog(frame);
var dismiss = function() {
DialogSystem.dismissUntil(level - 1);
};
footerElmts.cancelButton.click(dismiss);
footerElmts.okButton.click(function() {
try {
if (fields != null) {
var elem = $(bodyElmts.form[0]);
var ar = elem.serializeArray();
var settings = {};
for(var i = 0; i < ar.length; i++) {
settings[ar[i].name] = ar[i].value;
}
property.settings = settings;
}
dismiss();
self._update();
} catch (e) {
//console.log(e);
}
});
//bodyElmts.textarea.focus();
};

View File

@ -0,0 +1,27 @@
<div class="dialog-frame extend-data-preview-dialog" style="width: 800px;">
<div class="dialog-header" bind="dialogHeader"></div>
<div class="dialog-body" bind="dialogBody">
<div class="grid-layout layout-normal layout-full"><table rows="4">
<tr>
<td width="300" height="1" bind="addPropertyHeader"></td>
<td height="1" bind="previewHeader"></td>
<td height="1" width="1%"><button class="button" bind="resetButton">Reset</button></td>
</tr>
<tr>
<td style="vertical-align: top;" height="1"><div class="input-container"><input bind="addPropertyInput" /></div></td>
<td style="vertical-align: top;" rowspan="3" colspan="2"><div class="preview-container" bind="previewContainer"></div></td>
</tr>
<tr>
<td height="1" bind="suggestedPropertyHeader"></td>
</tr>
<tr>
<td><div class="suggested-property-container" bind="suggestedPropertyContainer"></div></td>
</tr>
</table></div>
</div>
<div class="dialog-footer" bind="dialogFooter">
<button class="button" bind="okButton"></button>
<button class="button" bind="cancelButton"></button>
</div>
</div>

View File

@ -0,0 +1,26 @@
<div class="dialog-frame extend-data-preview-dialog" style="width: 800px;">
<div class="dialog-header" bind="dialogHeader"></div>
<div class="dialog-body" bind="dialogBody">
<div class="grid-layout layout-normal layout-full"><table rows="4">
<tr>
<td width="300" height="1">Add Property</td>
<td height="1">Preview</td>
<td height="1" width="1%"><button class="button" bind="resetButton">Reset</button></td>
</tr>
<tr>
<td style="vertical-align: top;" height="1"><div class="input-container"><input bind="addPropertyInput" /></div></td>
<td style="vertical-align: top;" rowspan="3" colspan="2"><div class="preview-container" bind="previewContainer"></div></td>
</tr>
<tr>
<td height="1">Suggested Properties</td>
</tr>
<tr>
<td><div class="suggested-property-container" bind="suggestedPropertyContainer"></div></td>
</tr>
</table></div>
</div>
<div class="dialog-footer" bind="dialogFooter">
<button class="button" bind="okButton">&nbsp;&nbsp;OK&nbsp;&nbsp;</button>
<button class="button" bind="cancelButton">Cancel</button>
</div>
</div>

View File

@ -146,6 +146,33 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
}); });
}; };
var doAddColumnByReconciliation = function() {
var columnIndex = Refine.columnNameToColumnIndex(column.name);
var o = DataTableView.sampleVisibleRows(column);
new ExtendReconciledDataPreviewDialog(
column,
columnIndex,
o.rowIndices,
function(extension, endpoint, identifierSpace, schemaSpace) {
Refine.postProcess(
"core",
"extend-data",
{
baseColumnName: column.name,
endpoint: endpoint,
identifierSpace: identifierSpace,
schemaSpace: schemaSpace,
columnInsertIndex: columnIndex + 1
},
{
extension: JSON.stringify(extension)
},
{ rowsChanged: true, modelsChanged: true }
);
}
);
};
var doRemoveColumn = function() { var doRemoveColumn = function() {
Refine.postCoreProcess( Refine.postCoreProcess(
"remove-column", "remove-column",
@ -298,6 +325,11 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
label: $.i18n._('core-views')["add-by-urls"]+"...", label: $.i18n._('core-views')["add-by-urls"]+"...",
click: doAddColumnByFetchingURLs click: doAddColumnByFetchingURLs
}, },
{
id: "core/add-column-by-reconciliation",
label: $.i18n._('core-views')["add-col-recon-val"]+"...",
click: doAddColumnByReconciliation
},
{}, {},
{ {
id: "core/rename-column", id: "core/rename-column",

View File

@ -0,0 +1,84 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
@import-less url("../theme.less");
.extend-data-preview-dialog .suggested-property-container {
border: 1px solid #aaa;
padding: 5px;
overflow: auto;
height: 375px;
}
.extend-data-preview-dialog .suggested-property {
padding: 5px;
}
.extend-data-preview-dialog input.property-suggest {
display: block;
padding: 2%;
width: 96%;
}
.extend-data-preview-dialog .preview-container {
border: 1px solid #aaa;
overflow: auto;
}
.extend-data-preview-dialog .preview-container table {
border-collapse: collapse;
}
.extend-data-preview-dialog .preview-container td, .extend-data-preview-dialog .preview-container th {
padding: 3px 5px;
border-bottom: 1px solid #ddd;
border-right: 1px solid #ddd;
}
.extend-data-preview-dialog .preview-container th img {
vertical-align: top;
margin-left: 5px;
}
.extend-data-preview-progress {
text-align: center;
}
.extend-data-preview-progress img {
padding: 45%;
display: inline-block;
}
.data-extension-property-config td {
padding: 5px;
}