Data extension tests

This commit is contained in:
Antonin Delpeuch 2017-07-16 11:47:12 +01:00
parent 8437a9d245
commit 84c06821ee
6 changed files with 367 additions and 68 deletions

View File

@ -0,0 +1,299 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.tests.recon;
import static org.mockito.Mockito.mock;
import java.io.File;
import java.io.IOException;
import java.util.Properties;
import java.util.List;
import java.util.ArrayList;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.RowVisitor;
import com.google.refine.grel.Function;
import com.google.refine.io.FileProjectManager;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.ModelException;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.model.Recon;
import com.google.refine.model.ReconCandidate;
import com.google.refine.process.Process;
import com.google.refine.process.ProcessManager;
import com.google.refine.operations.OnError;
import com.google.refine.operations.EngineDependentOperation;
import com.google.refine.operations.recon.ExtendDataOperation;
import com.google.refine.tests.RefineTest;
import com.google.refine.tests.util.TestUtils;
public class DataExtensionTests extends RefineTest {
static final String ENGINE_JSON_URLS = "{\"mode\":\"row-based\"}}";
static final String RECON_SERVICE = "http://localhost:8000/en/api"; //"https://tools.wmflabs.org/openrefine-wikidata/en/api";
static final String RECON_IDENTIFIER_SPACE = "http://www.wikidata.org/entity/";
static final String RECON_SCHEMA_SPACE = "http://www.wikidata.org/prop/direct/";
@Override
@BeforeTest
public void init() {
logger = LoggerFactory.getLogger(this.getClass());
}
// dependencies
Project project;
Properties options;
JSONObject engine_config;
Engine engine;
Properties bindings;
@BeforeMethod
public void SetUp() throws JSONException, IOException, ModelException {
File dir = TestUtils.createTempDirectory("openrefine-test-workspace-dir");
FileProjectManager.initialize(dir);
project = new Project();
ProjectMetadata pm = new ProjectMetadata();
pm.setName("Data Extension Test Project");
ProjectManager.singleton.registerProject(project, pm);
int index = project.columnModel.allocateNewCellIndex();
Column column = new Column(index,"country");
project.columnModel.addColumn(index, column, true);
options = mock(Properties.class);
engine = new Engine(project);
engine_config = new JSONObject(ENGINE_JSON_URLS);
engine.initializeFromJSON(engine_config);
engine.setMode(Engine.Mode.RowBased);
bindings = new Properties();
bindings.put("project", project);
Row row = new Row(2);
row.setCell(0, reconciledCell("Iran", "Q794"));
project.rows.add(row);
row = new Row(2);
row.setCell(0, reconciledCell("Japan", "Q17"));
project.rows.add(row);
row = new Row(2);
row.setCell(0, reconciledCell("Tajikistan", "Q863"));
project.rows.add(row);
row = new Row(2);
row.setCell(0, reconciledCell("United States of America", "Q30"));
project.rows.add(row);
}
@AfterMethod
public void TearDown() {
project = null;
options = null;
engine = null;
bindings = null;
}
static public Cell reconciledCell(String name, String id) {
ReconCandidate r = new ReconCandidate(id, name, new String[0], 100);
List<ReconCandidate> candidates = new ArrayList<ReconCandidate>();
candidates.add(r);
Recon rec = new Recon(0, RECON_IDENTIFIER_SPACE, RECON_SCHEMA_SPACE);
rec.service = RECON_SERVICE;
rec.candidates = candidates;
rec.match = r;
return new Cell(name, rec);
}
/**
* Test to fetch simple strings
*/
@Test
public void testFetchStrings() throws Exception {
JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P297\",\"name\":\"ISO 3166-1 alpha-2 code\"}]}");
EngineDependentOperation op = new ExtendDataOperation(engine_config,
"country",
RECON_SERVICE,
RECON_IDENTIFIER_SPACE,
RECON_SCHEMA_SPACE,
extension,
1);
ProcessManager pm = project.getProcessManager();
Process process = op.createProcess(project, options);
process.startPerforming(pm);
Assert.assertTrue(process.isRunning());
try {
// We have 4 rows so 4000 ms should be largely enough.
Thread.sleep(5000);
} catch (InterruptedException e) {
Assert.fail("Test interrupted");
}
Assert.assertFalse(process.isRunning());
// Inspect rows
Assert.assertTrue("IR".equals(project.rows.get(0).getCellValue(1)));
Assert.assertTrue("JP".equals(project.rows.get(1).getCellValue(1)));
Assert.assertTrue("TJ".equals(project.rows.get(2).getCellValue(1)));
Assert.assertTrue("US".equals(project.rows.get(3).getCellValue(1)));
// Make sure we did not create any recon stats for that column (no reconciled value)
Assert.assertTrue(project.columnModel.getColumnByName("ISO 3166-1 alpha-2 code").getReconStats() == null);
}
/**
* Test to fetch counts of values
*/
@Test
public void testFetchCounts() throws Exception {
JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P38\",\"name\":\"currency\",\"settings\":{\"count\":\"on\"}}]}");
EngineDependentOperation op = new ExtendDataOperation(engine_config,
"country",
RECON_SERVICE,
RECON_IDENTIFIER_SPACE,
RECON_SCHEMA_SPACE,
extension,
1);
ProcessManager pm = project.getProcessManager();
Process process = op.createProcess(project, options);
process.startPerforming(pm);
Assert.assertTrue(process.isRunning());
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
Assert.fail("Test interrupted");
}
Assert.assertFalse(process.isRunning());
// Test to be updated as countries change currencies!
Assert.assertTrue(Math.round((float)project.rows.get(2).getCellValue(1)) == 2);
Assert.assertTrue(Math.round((float)project.rows.get(3).getCellValue(1)) == 1);
// Make sure we did not create any recon stats for that column (no reconciled value)
Assert.assertTrue(project.columnModel.getColumnByName("currency").getReconStats() == null);
}
/**
* Test fetch only the best statements
*/
@Test
public void testFetchCurrent() throws Exception {
JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P38\",\"name\":\"currency\",\"settings\":{\"rank\":\"best\"}}]}");
EngineDependentOperation op = new ExtendDataOperation(engine_config,
"country",
RECON_SERVICE,
RECON_IDENTIFIER_SPACE,
RECON_SCHEMA_SPACE,
extension,
1);
ProcessManager pm = project.getProcessManager();
Process process = op.createProcess(project, options);
process.startPerforming(pm);
Assert.assertTrue(process.isRunning());
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
Assert.fail("Test interrupted");
}
Assert.assertFalse(process.isRunning());
/*
* Tajikistan has one "preferred" currency and one "normal" one
* (in terms of statement ranks).
* But thanks to our setting in the extension configuration,
* we only fetch the current one, so the one just after it is
* the one for the US (USD).
*/
Assert.assertTrue("Tajikistani somoni".equals(project.rows.get(2).getCellValue(1)));
Assert.assertTrue("United States dollar".equals(project.rows.get(3).getCellValue(1)));
// Make sure all the values are reconciled
Assert.assertTrue(project.columnModel.getColumnByName("currency").getReconStats().matchedTopics == 4);
}
/**
* Test fetch records (multiple values per reconciled cell)
*/
@Test
public void testFetchRecord() throws Exception {
JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P38\",\"name\":\"currency\"}]}");
EngineDependentOperation op = new ExtendDataOperation(engine_config,
"country",
RECON_SERVICE,
RECON_IDENTIFIER_SPACE,
RECON_SCHEMA_SPACE,
extension,
1);
ProcessManager pm = project.getProcessManager();
Process process = op.createProcess(project, options);
process.startPerforming(pm);
Assert.assertTrue(process.isRunning());
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
Assert.fail("Test interrupted");
}
Assert.assertFalse(process.isRunning());
/*
* Tajikistan has one "preferred" currency and one "normal" one
* (in terms of statement ranks).
* The second currency is fetched as well, which creates a record
* (the cell to the left of it is left blank).
*/
Assert.assertTrue("Tajikistani somoni".equals(project.rows.get(2).getCellValue(1)));
Assert.assertTrue("Tajikistani ruble".equals(project.rows.get(3).getCellValue(1)));
Assert.assertTrue(null == project.rows.get(3).getCellValue(0));
// Make sure all the values are reconciled
Assert.assertTrue(project.columnModel.getColumnByName("currency").getReconStats().matchedTopics == 5);
}
}