Data extension tests
This commit is contained in:
parent
8437a9d245
commit
84c06821ee
@ -0,0 +1,299 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2010, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.tests.recon;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Properties;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.AfterMethod;
|
||||||
|
import org.testng.annotations.BeforeMethod;
|
||||||
|
import org.testng.annotations.BeforeTest;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import com.google.refine.ProjectManager;
|
||||||
|
import com.google.refine.ProjectMetadata;
|
||||||
|
import com.google.refine.browsing.Engine;
|
||||||
|
import com.google.refine.browsing.RowVisitor;
|
||||||
|
import com.google.refine.grel.Function;
|
||||||
|
import com.google.refine.io.FileProjectManager;
|
||||||
|
import com.google.refine.model.Cell;
|
||||||
|
import com.google.refine.model.Column;
|
||||||
|
import com.google.refine.model.ModelException;
|
||||||
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.model.Row;
|
||||||
|
import com.google.refine.model.Recon;
|
||||||
|
import com.google.refine.model.ReconCandidate;
|
||||||
|
import com.google.refine.process.Process;
|
||||||
|
import com.google.refine.process.ProcessManager;
|
||||||
|
import com.google.refine.operations.OnError;
|
||||||
|
import com.google.refine.operations.EngineDependentOperation;
|
||||||
|
import com.google.refine.operations.recon.ExtendDataOperation;
|
||||||
|
import com.google.refine.tests.RefineTest;
|
||||||
|
import com.google.refine.tests.util.TestUtils;
|
||||||
|
|
||||||
|
|
||||||
|
public class DataExtensionTests extends RefineTest {
|
||||||
|
|
||||||
|
static final String ENGINE_JSON_URLS = "{\"mode\":\"row-based\"}}";
|
||||||
|
static final String RECON_SERVICE = "http://localhost:8000/en/api"; //"https://tools.wmflabs.org/openrefine-wikidata/en/api";
|
||||||
|
static final String RECON_IDENTIFIER_SPACE = "http://www.wikidata.org/entity/";
|
||||||
|
static final String RECON_SCHEMA_SPACE = "http://www.wikidata.org/prop/direct/";
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@BeforeTest
|
||||||
|
public void init() {
|
||||||
|
logger = LoggerFactory.getLogger(this.getClass());
|
||||||
|
}
|
||||||
|
|
||||||
|
// dependencies
|
||||||
|
Project project;
|
||||||
|
Properties options;
|
||||||
|
JSONObject engine_config;
|
||||||
|
Engine engine;
|
||||||
|
Properties bindings;
|
||||||
|
|
||||||
|
@BeforeMethod
|
||||||
|
public void SetUp() throws JSONException, IOException, ModelException {
|
||||||
|
File dir = TestUtils.createTempDirectory("openrefine-test-workspace-dir");
|
||||||
|
FileProjectManager.initialize(dir);
|
||||||
|
project = new Project();
|
||||||
|
ProjectMetadata pm = new ProjectMetadata();
|
||||||
|
pm.setName("Data Extension Test Project");
|
||||||
|
ProjectManager.singleton.registerProject(project, pm);
|
||||||
|
|
||||||
|
int index = project.columnModel.allocateNewCellIndex();
|
||||||
|
Column column = new Column(index,"country");
|
||||||
|
project.columnModel.addColumn(index, column, true);
|
||||||
|
|
||||||
|
options = mock(Properties.class);
|
||||||
|
engine = new Engine(project);
|
||||||
|
engine_config = new JSONObject(ENGINE_JSON_URLS);
|
||||||
|
engine.initializeFromJSON(engine_config);
|
||||||
|
engine.setMode(Engine.Mode.RowBased);
|
||||||
|
|
||||||
|
bindings = new Properties();
|
||||||
|
bindings.put("project", project);
|
||||||
|
|
||||||
|
Row row = new Row(2);
|
||||||
|
row.setCell(0, reconciledCell("Iran", "Q794"));
|
||||||
|
project.rows.add(row);
|
||||||
|
row = new Row(2);
|
||||||
|
row.setCell(0, reconciledCell("Japan", "Q17"));
|
||||||
|
project.rows.add(row);
|
||||||
|
row = new Row(2);
|
||||||
|
row.setCell(0, reconciledCell("Tajikistan", "Q863"));
|
||||||
|
project.rows.add(row);
|
||||||
|
row = new Row(2);
|
||||||
|
row.setCell(0, reconciledCell("United States of America", "Q30"));
|
||||||
|
project.rows.add(row);
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterMethod
|
||||||
|
public void TearDown() {
|
||||||
|
project = null;
|
||||||
|
options = null;
|
||||||
|
engine = null;
|
||||||
|
bindings = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public Cell reconciledCell(String name, String id) {
|
||||||
|
ReconCandidate r = new ReconCandidate(id, name, new String[0], 100);
|
||||||
|
List<ReconCandidate> candidates = new ArrayList<ReconCandidate>();
|
||||||
|
candidates.add(r);
|
||||||
|
Recon rec = new Recon(0, RECON_IDENTIFIER_SPACE, RECON_SCHEMA_SPACE);
|
||||||
|
rec.service = RECON_SERVICE;
|
||||||
|
rec.candidates = candidates;
|
||||||
|
rec.match = r;
|
||||||
|
return new Cell(name, rec);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test to fetch simple strings
|
||||||
|
*/
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFetchStrings() throws Exception {
|
||||||
|
JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P297\",\"name\":\"ISO 3166-1 alpha-2 code\"}]}");
|
||||||
|
|
||||||
|
EngineDependentOperation op = new ExtendDataOperation(engine_config,
|
||||||
|
"country",
|
||||||
|
RECON_SERVICE,
|
||||||
|
RECON_IDENTIFIER_SPACE,
|
||||||
|
RECON_SCHEMA_SPACE,
|
||||||
|
extension,
|
||||||
|
1);
|
||||||
|
ProcessManager pm = project.getProcessManager();
|
||||||
|
Process process = op.createProcess(project, options);
|
||||||
|
process.startPerforming(pm);
|
||||||
|
Assert.assertTrue(process.isRunning());
|
||||||
|
try {
|
||||||
|
// We have 4 rows so 4000 ms should be largely enough.
|
||||||
|
Thread.sleep(5000);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Assert.fail("Test interrupted");
|
||||||
|
}
|
||||||
|
Assert.assertFalse(process.isRunning());
|
||||||
|
|
||||||
|
// Inspect rows
|
||||||
|
Assert.assertTrue("IR".equals(project.rows.get(0).getCellValue(1)));
|
||||||
|
Assert.assertTrue("JP".equals(project.rows.get(1).getCellValue(1)));
|
||||||
|
Assert.assertTrue("TJ".equals(project.rows.get(2).getCellValue(1)));
|
||||||
|
Assert.assertTrue("US".equals(project.rows.get(3).getCellValue(1)));
|
||||||
|
|
||||||
|
// Make sure we did not create any recon stats for that column (no reconciled value)
|
||||||
|
Assert.assertTrue(project.columnModel.getColumnByName("ISO 3166-1 alpha-2 code").getReconStats() == null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test to fetch counts of values
|
||||||
|
*/
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFetchCounts() throws Exception {
|
||||||
|
JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P38\",\"name\":\"currency\",\"settings\":{\"count\":\"on\"}}]}");
|
||||||
|
|
||||||
|
EngineDependentOperation op = new ExtendDataOperation(engine_config,
|
||||||
|
"country",
|
||||||
|
RECON_SERVICE,
|
||||||
|
RECON_IDENTIFIER_SPACE,
|
||||||
|
RECON_SCHEMA_SPACE,
|
||||||
|
extension,
|
||||||
|
1);
|
||||||
|
ProcessManager pm = project.getProcessManager();
|
||||||
|
Process process = op.createProcess(project, options);
|
||||||
|
process.startPerforming(pm);
|
||||||
|
Assert.assertTrue(process.isRunning());
|
||||||
|
try {
|
||||||
|
Thread.sleep(5000);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Assert.fail("Test interrupted");
|
||||||
|
}
|
||||||
|
Assert.assertFalse(process.isRunning());
|
||||||
|
|
||||||
|
// Test to be updated as countries change currencies!
|
||||||
|
Assert.assertTrue(Math.round((float)project.rows.get(2).getCellValue(1)) == 2);
|
||||||
|
Assert.assertTrue(Math.round((float)project.rows.get(3).getCellValue(1)) == 1);
|
||||||
|
|
||||||
|
// Make sure we did not create any recon stats for that column (no reconciled value)
|
||||||
|
Assert.assertTrue(project.columnModel.getColumnByName("currency").getReconStats() == null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test fetch only the best statements
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testFetchCurrent() throws Exception {
|
||||||
|
JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P38\",\"name\":\"currency\",\"settings\":{\"rank\":\"best\"}}]}");
|
||||||
|
|
||||||
|
EngineDependentOperation op = new ExtendDataOperation(engine_config,
|
||||||
|
"country",
|
||||||
|
RECON_SERVICE,
|
||||||
|
RECON_IDENTIFIER_SPACE,
|
||||||
|
RECON_SCHEMA_SPACE,
|
||||||
|
extension,
|
||||||
|
1);
|
||||||
|
ProcessManager pm = project.getProcessManager();
|
||||||
|
Process process = op.createProcess(project, options);
|
||||||
|
process.startPerforming(pm);
|
||||||
|
Assert.assertTrue(process.isRunning());
|
||||||
|
try {
|
||||||
|
Thread.sleep(5000);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Assert.fail("Test interrupted");
|
||||||
|
}
|
||||||
|
Assert.assertFalse(process.isRunning());
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Tajikistan has one "preferred" currency and one "normal" one
|
||||||
|
* (in terms of statement ranks).
|
||||||
|
* But thanks to our setting in the extension configuration,
|
||||||
|
* we only fetch the current one, so the one just after it is
|
||||||
|
* the one for the US (USD).
|
||||||
|
*/
|
||||||
|
Assert.assertTrue("Tajikistani somoni".equals(project.rows.get(2).getCellValue(1)));
|
||||||
|
Assert.assertTrue("United States dollar".equals(project.rows.get(3).getCellValue(1)));
|
||||||
|
|
||||||
|
// Make sure all the values are reconciled
|
||||||
|
Assert.assertTrue(project.columnModel.getColumnByName("currency").getReconStats().matchedTopics == 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test fetch records (multiple values per reconciled cell)
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testFetchRecord() throws Exception {
|
||||||
|
JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P38\",\"name\":\"currency\"}]}");
|
||||||
|
|
||||||
|
EngineDependentOperation op = new ExtendDataOperation(engine_config,
|
||||||
|
"country",
|
||||||
|
RECON_SERVICE,
|
||||||
|
RECON_IDENTIFIER_SPACE,
|
||||||
|
RECON_SCHEMA_SPACE,
|
||||||
|
extension,
|
||||||
|
1);
|
||||||
|
ProcessManager pm = project.getProcessManager();
|
||||||
|
Process process = op.createProcess(project, options);
|
||||||
|
process.startPerforming(pm);
|
||||||
|
Assert.assertTrue(process.isRunning());
|
||||||
|
try {
|
||||||
|
Thread.sleep(5000);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Assert.fail("Test interrupted");
|
||||||
|
}
|
||||||
|
Assert.assertFalse(process.isRunning());
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Tajikistan has one "preferred" currency and one "normal" one
|
||||||
|
* (in terms of statement ranks).
|
||||||
|
* The second currency is fetched as well, which creates a record
|
||||||
|
* (the cell to the left of it is left blank).
|
||||||
|
*/
|
||||||
|
Assert.assertTrue("Tajikistani somoni".equals(project.rows.get(2).getCellValue(1)));
|
||||||
|
Assert.assertTrue("Tajikistani ruble".equals(project.rows.get(3).getCellValue(1)));
|
||||||
|
Assert.assertTrue(null == project.rows.get(3).getCellValue(0));
|
||||||
|
|
||||||
|
// Make sure all the values are reconciled
|
||||||
|
Assert.assertTrue(project.columnModel.getColumnByName("currency").getReconStats().matchedTopics == 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user