Data extension tests

This commit is contained in:
Antonin Delpeuch 2017-07-16 11:47:12 +01:00
parent 8437a9d245
commit 84c06821ee
6 changed files with 367 additions and 68 deletions

View File

@ -52,7 +52,7 @@ public class ExtendDataCommand extends EngineDependentCommand {
int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex")); int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex"));
String endpoint = request.getParameter("endpoint"); String endpoint = request.getParameter("endpoint");
String identifierSpace = request.getParameter("identifierSpace"); String identifierSpace = request.getParameter("identifierSpace");
String schemaSpace = request.getParameter("schemaSpace"); String schemaSpace = request.getParameter("schemaSpace");
String jsonString = request.getParameter("extension"); String jsonString = request.getParameter("extension");
JSONObject extension = ParsingUtilities.evaluateJsonStringToObject(jsonString); JSONObject extension = ParsingUtilities.evaluateJsonStringToObject(jsonString);
@ -60,8 +60,8 @@ public class ExtendDataCommand extends EngineDependentCommand {
return new ExtendDataOperation( return new ExtendDataOperation(
engineConfig, engineConfig,
baseColumnName, baseColumnName,
endpoint, endpoint,
identifierSpace, identifierSpace,
schemaSpace, schemaSpace,
extension, extension,
columnInsertIndex columnInsertIndex

View File

@ -71,7 +71,7 @@ public class DataExtensionChange implements Change {
final protected int _columnInsertIndex; final protected int _columnInsertIndex;
final protected List<String> _columnNames; final protected List<String> _columnNames;
final protected List<ReconType> _columnTypes; final protected List<ReconType> _columnTypes;
final protected List<Integer> _rowIndices; final protected List<Integer> _rowIndices;
final protected List<DataExtension> _dataExtensions; final protected List<DataExtension> _dataExtensions;
@ -221,13 +221,13 @@ public class DataExtensionChange implements Change {
Column column = new Column(cellIndex, name); Column column = new Column(cellIndex, name);
ReconType columnType = _columnTypes.get(i); ReconType columnType = _columnTypes.get(i);
column.setReconConfig(new DataExtensionReconConfig( column.setReconConfig(new DataExtensionReconConfig(
_service, _service,
_identifierSpace, _identifierSpace,
_schemaSpace, _schemaSpace,
columnType)); columnType));
if (columnType != null) { if (columnType != null) {
column.setReconStats(ReconStats.create(project, cellIndex)); column.setReconStats(ReconStats.create(project, cellIndex));
} }
try { try {
project.columnModel.addColumn(_columnInsertIndex + i, column, true); project.columnModel.addColumn(_columnInsertIndex + i, column, true);
@ -311,7 +311,7 @@ public class DataExtensionChange implements Change {
if(type != null) { if(type != null) {
JSONWriter jsonWriter = new JSONWriter(writer); JSONWriter jsonWriter = new JSONWriter(writer);
type.write(jsonWriter, options); type.write(jsonWriter, options);
} }
} catch (JSONException e) { } catch (JSONException e) {
// ??? // ???
} }
@ -368,9 +368,9 @@ public class DataExtensionChange implements Change {
static public Change load(LineNumberReader reader, Pool pool) throws Exception { static public Change load(LineNumberReader reader, Pool pool) throws Exception {
String baseColumnName = null; String baseColumnName = null;
String service = null; String service = null;
String identifierSpace = null; String identifierSpace = null;
String schemaSpace = null; String schemaSpace = null;
int columnInsertIndex = -1; int columnInsertIndex = -1;
List<String> columnNames = null; List<String> columnNames = null;
@ -428,11 +428,11 @@ public class DataExtensionChange implements Change {
columnTypes = new ArrayList<ReconType>(count); columnTypes = new ArrayList<ReconType>(count);
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
line = reader.readLine(); line = reader.readLine();
if (line == null || line.length() == 0) { if (line == null || line.length() == 0) {
columnTypes.add(null); columnTypes.add(null);
} else { } else {
columnTypes.add(ReconType.load(ParsingUtilities.evaluateJsonStringToObject(line))); columnTypes.add(ReconType.load(ParsingUtilities.evaluateJsonStringToObject(line)));
} }
} }
} else if ("dataExtensionCount".equals(field)) { } else if ("dataExtensionCount".equals(field)) {
int count = Integer.parseInt(value); int count = Integer.parseInt(value);
@ -492,9 +492,9 @@ public class DataExtensionChange implements Change {
DataExtensionChange change = new DataExtensionChange( DataExtensionChange change = new DataExtensionChange(
baseColumnName, baseColumnName,
service, service,
identifierSpace, identifierSpace,
schemaSpace, schemaSpace,
columnInsertIndex, columnInsertIndex,
columnNames, columnNames,
columnTypes, columnTypes,

View File

@ -56,31 +56,31 @@ public class DataExtensionReconConfig extends StandardReconConfig {
static public ReconConfig reconstruct(JSONObject obj) throws Exception { static public ReconConfig reconstruct(JSONObject obj) throws Exception {
JSONObject type = obj.getJSONObject("type"); JSONObject type = obj.getJSONObject("type");
ReconType typ = null; ReconType typ = null;
if(obj.has("id")) { if(obj.has("id")) {
typ = new ReconType(obj.getString("id"), typ = new ReconType(obj.getString("id"),
obj.has("name") ? obj.getString("name") : obj.getString("id")); obj.has("name") ? obj.getString("name") : obj.getString("id"));
} }
return new DataExtensionReconConfig( return new DataExtensionReconConfig(
obj.getString("service"), obj.getString("service"),
obj.has("identifierSpace") ? obj.getString("identifierSpace") : null, obj.has("identifierSpace") ? obj.getString("identifierSpace") : null,
obj.has("schemaSpace") ? obj.getString("schemaSpace") : null, obj.has("schemaSpace") ? obj.getString("schemaSpace") : null,
typ); typ);
} }
public DataExtensionReconConfig( public DataExtensionReconConfig(
String service, String service,
String identifierSpace, String identifierSpace,
String schemaSpace, String schemaSpace,
ReconType type) { ReconType type) {
super( super(
service, service,
identifierSpace, identifierSpace,
schemaSpace, schemaSpace,
type != null ? type.id : null, type != null ? type.id : null,
type != null ? type.name : null, type != null ? type.name : null,
true, true,
new ArrayList<ColumnDetail>()); new ArrayList<ColumnDetail>());
this.type = type; this.type = type;

View File

@ -105,11 +105,11 @@ public class ReconciledDataExtensionJob {
String s = ParsingUtilities.inputStreamToString(is); String s = ParsingUtilities.inputStreamToString(is);
JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s);
if(columns.size() == 0) { if(columns.size() == 0) {
// Extract the column metadata // Extract the column metadata
gatherColumnInfo(o.getJSONArray("meta"), columns); gatherColumnInfo(o.getJSONArray("meta"), columns);
} }
Map<String, ReconciledDataExtensionJob.DataExtension> map = new HashMap<String, ReconciledDataExtensionJob.DataExtension>(); Map<String, ReconciledDataExtensionJob.DataExtension> map = new HashMap<String, ReconciledDataExtensionJob.DataExtension>();
if (o.has("rows")){ if (o.has("rows")){
JSONObject records = o.getJSONObject("rows"); JSONObject records = o.getJSONObject("rows");
@ -167,7 +167,7 @@ public class ReconciledDataExtensionJob {
// for each property // for each property
int colindex = 0; int colindex = 0;
for(ColumnInfo ci : columns) { for(ColumnInfo ci : columns) {
String pid = ci.id; String pid = ci.id;
JSONArray values = record.getJSONArray(pid); JSONArray values = record.getJSONArray(pid);
if (values == null) { if (values == null) {
continue; continue;
@ -273,11 +273,11 @@ public class ReconciledDataExtensionJob {
jsonWriter.object(); jsonWriter.object();
jsonWriter.key("id"); jsonWriter.key("id");
jsonWriter.value(property.getString("id")); jsonWriter.value(property.getString("id"));
if (property.has("settings")) { if (property.has("settings")) {
JSONObject settings = property.getJSONObject("settings"); JSONObject settings = property.getJSONObject("settings");
jsonWriter.key("settings"); jsonWriter.key("settings");
jsonWriter.value(settings); jsonWriter.value(settings);
} }
jsonWriter.endObject(); jsonWriter.endObject();
} }
jsonWriter.endArray(); jsonWriter.endArray();
@ -285,19 +285,19 @@ public class ReconciledDataExtensionJob {
} }
static protected void gatherColumnInfo(JSONArray meta, List<ColumnInfo> columns) throws JSONException { static protected void gatherColumnInfo(JSONArray meta, List<ColumnInfo> columns) throws JSONException {
for(int i = 0; i < meta.length(); i++) { for(int i = 0; i < meta.length(); i++) {
JSONObject col = meta.getJSONObject(i); JSONObject col = meta.getJSONObject(i);
ReconType expectedType = null; ReconType expectedType = null;
if(col.has("type")) { if(col.has("type")) {
JSONObject expectedObj = col.getJSONObject("type"); JSONObject expectedObj = col.getJSONObject("type");
expectedType = new ReconType(expectedObj.getString("id"), expectedObj.getString("name")); expectedType = new ReconType(expectedObj.getString("id"), expectedObj.getString("name"));
} }
columns.add(new ColumnInfo( columns.add(new ColumnInfo(
col.getString("name"), col.getString("name"),
col.getString("id"), col.getString("id"),
expectedType)); expectedType));
} }
} }
} }

View File

@ -92,7 +92,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
public ExtendDataOperation( public ExtendDataOperation(
JSONObject engineConfig, JSONObject engineConfig,
String baseColumnName, String baseColumnName,
String endpoint, String endpoint,
String identifierSpace, String identifierSpace,
String schemaSpace, String schemaSpace,
JSONObject extension, JSONObject extension,
@ -118,9 +118,9 @@ public class ExtendDataOperation extends EngineDependentOperation {
writer.key("engineConfig"); writer.value(getEngineConfig()); writer.key("engineConfig"); writer.value(getEngineConfig());
writer.key("columnInsertIndex"); writer.value(_columnInsertIndex); writer.key("columnInsertIndex"); writer.value(_columnInsertIndex);
writer.key("baseColumnName"); writer.value(_baseColumnName); writer.key("baseColumnName"); writer.value(_baseColumnName);
writer.key("endpoint"); writer.value(_endpoint); writer.key("endpoint"); writer.value(_endpoint);
writer.key("identifierSpace"); writer.value(_identifierSpace); writer.key("identifierSpace"); writer.value(_identifierSpace);
writer.key("schemaSpace"); writer.value(_schemaSpace); writer.key("schemaSpace"); writer.value(_schemaSpace);
writer.key("extension"); writer.value(_extension); writer.key("extension"); writer.value(_extension);
writer.endObject(); writer.endObject();
} }
@ -314,9 +314,9 @@ public class ExtendDataOperation extends EngineDependentOperation {
ExtendDataOperation.this, ExtendDataOperation.this,
new DataExtensionChange( new DataExtensionChange(
_baseColumnName, _baseColumnName,
_endpoint, _endpoint,
_identifierSpace, _identifierSpace,
_schemaSpace, _schemaSpace,
_columnInsertIndex, _columnInsertIndex,
columnNames, columnNames,
columnTypes, columnTypes,

View File

@ -0,0 +1,299 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.tests.recon;
import static org.mockito.Mockito.mock;
import java.io.File;
import java.io.IOException;
import java.util.Properties;
import java.util.List;
import java.util.ArrayList;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.RowVisitor;
import com.google.refine.grel.Function;
import com.google.refine.io.FileProjectManager;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.ModelException;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.model.Recon;
import com.google.refine.model.ReconCandidate;
import com.google.refine.process.Process;
import com.google.refine.process.ProcessManager;
import com.google.refine.operations.OnError;
import com.google.refine.operations.EngineDependentOperation;
import com.google.refine.operations.recon.ExtendDataOperation;
import com.google.refine.tests.RefineTest;
import com.google.refine.tests.util.TestUtils;
public class DataExtensionTests extends RefineTest {
static final String ENGINE_JSON_URLS = "{\"mode\":\"row-based\"}}";
static final String RECON_SERVICE = "http://localhost:8000/en/api"; //"https://tools.wmflabs.org/openrefine-wikidata/en/api";
static final String RECON_IDENTIFIER_SPACE = "http://www.wikidata.org/entity/";
static final String RECON_SCHEMA_SPACE = "http://www.wikidata.org/prop/direct/";
@Override
@BeforeTest
public void init() {
logger = LoggerFactory.getLogger(this.getClass());
}
// dependencies
Project project;
Properties options;
JSONObject engine_config;
Engine engine;
Properties bindings;
@BeforeMethod
public void SetUp() throws JSONException, IOException, ModelException {
File dir = TestUtils.createTempDirectory("openrefine-test-workspace-dir");
FileProjectManager.initialize(dir);
project = new Project();
ProjectMetadata pm = new ProjectMetadata();
pm.setName("Data Extension Test Project");
ProjectManager.singleton.registerProject(project, pm);
int index = project.columnModel.allocateNewCellIndex();
Column column = new Column(index,"country");
project.columnModel.addColumn(index, column, true);
options = mock(Properties.class);
engine = new Engine(project);
engine_config = new JSONObject(ENGINE_JSON_URLS);
engine.initializeFromJSON(engine_config);
engine.setMode(Engine.Mode.RowBased);
bindings = new Properties();
bindings.put("project", project);
Row row = new Row(2);
row.setCell(0, reconciledCell("Iran", "Q794"));
project.rows.add(row);
row = new Row(2);
row.setCell(0, reconciledCell("Japan", "Q17"));
project.rows.add(row);
row = new Row(2);
row.setCell(0, reconciledCell("Tajikistan", "Q863"));
project.rows.add(row);
row = new Row(2);
row.setCell(0, reconciledCell("United States of America", "Q30"));
project.rows.add(row);
}
@AfterMethod
public void TearDown() {
project = null;
options = null;
engine = null;
bindings = null;
}
static public Cell reconciledCell(String name, String id) {
ReconCandidate r = new ReconCandidate(id, name, new String[0], 100);
List<ReconCandidate> candidates = new ArrayList<ReconCandidate>();
candidates.add(r);
Recon rec = new Recon(0, RECON_IDENTIFIER_SPACE, RECON_SCHEMA_SPACE);
rec.service = RECON_SERVICE;
rec.candidates = candidates;
rec.match = r;
return new Cell(name, rec);
}
/**
* Test to fetch simple strings
*/
@Test
public void testFetchStrings() throws Exception {
JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P297\",\"name\":\"ISO 3166-1 alpha-2 code\"}]}");
EngineDependentOperation op = new ExtendDataOperation(engine_config,
"country",
RECON_SERVICE,
RECON_IDENTIFIER_SPACE,
RECON_SCHEMA_SPACE,
extension,
1);
ProcessManager pm = project.getProcessManager();
Process process = op.createProcess(project, options);
process.startPerforming(pm);
Assert.assertTrue(process.isRunning());
try {
// We have 4 rows so 4000 ms should be largely enough.
Thread.sleep(5000);
} catch (InterruptedException e) {
Assert.fail("Test interrupted");
}
Assert.assertFalse(process.isRunning());
// Inspect rows
Assert.assertTrue("IR".equals(project.rows.get(0).getCellValue(1)));
Assert.assertTrue("JP".equals(project.rows.get(1).getCellValue(1)));
Assert.assertTrue("TJ".equals(project.rows.get(2).getCellValue(1)));
Assert.assertTrue("US".equals(project.rows.get(3).getCellValue(1)));
// Make sure we did not create any recon stats for that column (no reconciled value)
Assert.assertTrue(project.columnModel.getColumnByName("ISO 3166-1 alpha-2 code").getReconStats() == null);
}
/**
* Test to fetch counts of values
*/
@Test
public void testFetchCounts() throws Exception {
JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P38\",\"name\":\"currency\",\"settings\":{\"count\":\"on\"}}]}");
EngineDependentOperation op = new ExtendDataOperation(engine_config,
"country",
RECON_SERVICE,
RECON_IDENTIFIER_SPACE,
RECON_SCHEMA_SPACE,
extension,
1);
ProcessManager pm = project.getProcessManager();
Process process = op.createProcess(project, options);
process.startPerforming(pm);
Assert.assertTrue(process.isRunning());
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
Assert.fail("Test interrupted");
}
Assert.assertFalse(process.isRunning());
// Test to be updated as countries change currencies!
Assert.assertTrue(Math.round((float)project.rows.get(2).getCellValue(1)) == 2);
Assert.assertTrue(Math.round((float)project.rows.get(3).getCellValue(1)) == 1);
// Make sure we did not create any recon stats for that column (no reconciled value)
Assert.assertTrue(project.columnModel.getColumnByName("currency").getReconStats() == null);
}
/**
* Test fetch only the best statements
*/
@Test
public void testFetchCurrent() throws Exception {
JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P38\",\"name\":\"currency\",\"settings\":{\"rank\":\"best\"}}]}");
EngineDependentOperation op = new ExtendDataOperation(engine_config,
"country",
RECON_SERVICE,
RECON_IDENTIFIER_SPACE,
RECON_SCHEMA_SPACE,
extension,
1);
ProcessManager pm = project.getProcessManager();
Process process = op.createProcess(project, options);
process.startPerforming(pm);
Assert.assertTrue(process.isRunning());
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
Assert.fail("Test interrupted");
}
Assert.assertFalse(process.isRunning());
/*
* Tajikistan has one "preferred" currency and one "normal" one
* (in terms of statement ranks).
* But thanks to our setting in the extension configuration,
* we only fetch the current one, so the one just after it is
* the one for the US (USD).
*/
Assert.assertTrue("Tajikistani somoni".equals(project.rows.get(2).getCellValue(1)));
Assert.assertTrue("United States dollar".equals(project.rows.get(3).getCellValue(1)));
// Make sure all the values are reconciled
Assert.assertTrue(project.columnModel.getColumnByName("currency").getReconStats().matchedTopics == 4);
}
/**
* Test fetch records (multiple values per reconciled cell)
*/
@Test
public void testFetchRecord() throws Exception {
JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P38\",\"name\":\"currency\"}]}");
EngineDependentOperation op = new ExtendDataOperation(engine_config,
"country",
RECON_SERVICE,
RECON_IDENTIFIER_SPACE,
RECON_SCHEMA_SPACE,
extension,
1);
ProcessManager pm = project.getProcessManager();
Process process = op.createProcess(project, options);
process.startPerforming(pm);
Assert.assertTrue(process.isRunning());
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
Assert.fail("Test interrupted");
}
Assert.assertFalse(process.isRunning());
/*
* Tajikistan has one "preferred" currency and one "normal" one
* (in terms of statement ranks).
* The second currency is fetched as well, which creates a record
* (the cell to the left of it is left blank).
*/
Assert.assertTrue("Tajikistani somoni".equals(project.rows.get(2).getCellValue(1)));
Assert.assertTrue("Tajikistani ruble".equals(project.rows.get(3).getCellValue(1)));
Assert.assertTrue(null == project.rows.get(3).getCellValue(0));
// Make sure all the values are reconciled
Assert.assertTrue(project.columnModel.getColumnByName("currency").getReconStats().matchedTopics == 5);
}
}