diff --git a/main/src/com/google/refine/commands/recon/ExtendDataCommand.java b/main/src/com/google/refine/commands/recon/ExtendDataCommand.java index bdf0748b8..1e4d4a98f 100644 --- a/main/src/com/google/refine/commands/recon/ExtendDataCommand.java +++ b/main/src/com/google/refine/commands/recon/ExtendDataCommand.java @@ -52,7 +52,7 @@ public class ExtendDataCommand extends EngineDependentCommand { int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex")); String endpoint = request.getParameter("endpoint"); String identifierSpace = request.getParameter("identifierSpace"); - String schemaSpace = request.getParameter("schemaSpace"); + String schemaSpace = request.getParameter("schemaSpace"); String jsonString = request.getParameter("extension"); JSONObject extension = ParsingUtilities.evaluateJsonStringToObject(jsonString); @@ -60,8 +60,8 @@ public class ExtendDataCommand extends EngineDependentCommand { return new ExtendDataOperation( engineConfig, baseColumnName, - endpoint, - identifierSpace, + endpoint, + identifierSpace, schemaSpace, extension, columnInsertIndex diff --git a/main/src/com/google/refine/model/changes/DataExtensionChange.java b/main/src/com/google/refine/model/changes/DataExtensionChange.java index cffc193f4..b7098a1e0 100644 --- a/main/src/com/google/refine/model/changes/DataExtensionChange.java +++ b/main/src/com/google/refine/model/changes/DataExtensionChange.java @@ -71,7 +71,7 @@ public class DataExtensionChange implements Change { final protected int _columnInsertIndex; final protected List _columnNames; - final protected List _columnTypes; + final protected List _columnTypes; final protected List _rowIndices; final protected List _dataExtensions; @@ -221,13 +221,13 @@ public class DataExtensionChange implements Change { Column column = new Column(cellIndex, name); ReconType columnType = _columnTypes.get(i); column.setReconConfig(new DataExtensionReconConfig( - _service, - _identifierSpace, - _schemaSpace, - columnType)); - if (columnType != null) { - column.setReconStats(ReconStats.create(project, cellIndex)); - } + _service, + _identifierSpace, + _schemaSpace, + columnType)); + if (columnType != null) { + column.setReconStats(ReconStats.create(project, cellIndex)); + } try { project.columnModel.addColumn(_columnInsertIndex + i, column, true); @@ -311,7 +311,7 @@ public class DataExtensionChange implements Change { if(type != null) { JSONWriter jsonWriter = new JSONWriter(writer); type.write(jsonWriter, options); - } + } } catch (JSONException e) { // ??? } @@ -368,9 +368,9 @@ public class DataExtensionChange implements Change { static public Change load(LineNumberReader reader, Pool pool) throws Exception { String baseColumnName = null; - String service = null; - String identifierSpace = null; - String schemaSpace = null; + String service = null; + String identifierSpace = null; + String schemaSpace = null; int columnInsertIndex = -1; List columnNames = null; @@ -428,11 +428,11 @@ public class DataExtensionChange implements Change { columnTypes = new ArrayList(count); for (int i = 0; i < count; i++) { line = reader.readLine(); - if (line == null || line.length() == 0) { - columnTypes.add(null); - } else { - columnTypes.add(ReconType.load(ParsingUtilities.evaluateJsonStringToObject(line))); - } + if (line == null || line.length() == 0) { + columnTypes.add(null); + } else { + columnTypes.add(ReconType.load(ParsingUtilities.evaluateJsonStringToObject(line))); + } } } else if ("dataExtensionCount".equals(field)) { int count = Integer.parseInt(value); @@ -492,9 +492,9 @@ public class DataExtensionChange implements Change { DataExtensionChange change = new DataExtensionChange( baseColumnName, - service, - identifierSpace, - schemaSpace, + service, + identifierSpace, + schemaSpace, columnInsertIndex, columnNames, columnTypes, diff --git a/main/src/com/google/refine/model/recon/DataExtensionReconConfig.java b/main/src/com/google/refine/model/recon/DataExtensionReconConfig.java index 15e0156c0..9c1eede9b 100644 --- a/main/src/com/google/refine/model/recon/DataExtensionReconConfig.java +++ b/main/src/com/google/refine/model/recon/DataExtensionReconConfig.java @@ -56,31 +56,31 @@ public class DataExtensionReconConfig extends StandardReconConfig { static public ReconConfig reconstruct(JSONObject obj) throws Exception { JSONObject type = obj.getJSONObject("type"); - - ReconType typ = null; - if(obj.has("id")) { - typ = new ReconType(obj.getString("id"), - obj.has("name") ? obj.getString("name") : obj.getString("id")); - } + + ReconType typ = null; + if(obj.has("id")) { + typ = new ReconType(obj.getString("id"), + obj.has("name") ? obj.getString("name") : obj.getString("id")); + } return new DataExtensionReconConfig( obj.getString("service"), obj.has("identifierSpace") ? obj.getString("identifierSpace") : null, obj.has("schemaSpace") ? obj.getString("schemaSpace") : null, - typ); + typ); } public DataExtensionReconConfig( - String service, + String service, String identifierSpace, - String schemaSpace, - ReconType type) { - super( - service, - identifierSpace, + String schemaSpace, + ReconType type) { + super( + service, + identifierSpace, schemaSpace, type != null ? type.id : null, - type != null ? type.name : null, + type != null ? type.name : null, true, new ArrayList()); this.type = type; diff --git a/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java b/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java index 541253a79..0ddce01d3 100644 --- a/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java +++ b/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java @@ -105,11 +105,11 @@ public class ReconciledDataExtensionJob { String s = ParsingUtilities.inputStreamToString(is); JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); - if(columns.size() == 0) { - // Extract the column metadata - gatherColumnInfo(o.getJSONArray("meta"), columns); - } - + if(columns.size() == 0) { + // Extract the column metadata + gatherColumnInfo(o.getJSONArray("meta"), columns); + } + Map map = new HashMap(); if (o.has("rows")){ JSONObject records = o.getJSONObject("rows"); @@ -167,7 +167,7 @@ public class ReconciledDataExtensionJob { // for each property int colindex = 0; for(ColumnInfo ci : columns) { - String pid = ci.id; + String pid = ci.id; JSONArray values = record.getJSONArray(pid); if (values == null) { continue; @@ -273,11 +273,11 @@ public class ReconciledDataExtensionJob { jsonWriter.object(); jsonWriter.key("id"); jsonWriter.value(property.getString("id")); - if (property.has("settings")) { - JSONObject settings = property.getJSONObject("settings"); - jsonWriter.key("settings"); - jsonWriter.value(settings); - } + if (property.has("settings")) { + JSONObject settings = property.getJSONObject("settings"); + jsonWriter.key("settings"); + jsonWriter.value(settings); + } jsonWriter.endObject(); } jsonWriter.endArray(); @@ -285,19 +285,19 @@ public class ReconciledDataExtensionJob { } static protected void gatherColumnInfo(JSONArray meta, List columns) throws JSONException { - for(int i = 0; i < meta.length(); i++) { - JSONObject col = meta.getJSONObject(i); + for(int i = 0; i < meta.length(); i++) { + JSONObject col = meta.getJSONObject(i); - ReconType expectedType = null; - if(col.has("type")) { - JSONObject expectedObj = col.getJSONObject("type"); - expectedType = new ReconType(expectedObj.getString("id"), expectedObj.getString("name")); - } - - columns.add(new ColumnInfo( - col.getString("name"), - col.getString("id"), - expectedType)); - } + ReconType expectedType = null; + if(col.has("type")) { + JSONObject expectedObj = col.getJSONObject("type"); + expectedType = new ReconType(expectedObj.getString("id"), expectedObj.getString("name")); + } + + columns.add(new ColumnInfo( + col.getString("name"), + col.getString("id"), + expectedType)); + } } } diff --git a/main/src/com/google/refine/operations/recon/ExtendDataOperation.java b/main/src/com/google/refine/operations/recon/ExtendDataOperation.java index eea06a711..9771e52d5 100644 --- a/main/src/com/google/refine/operations/recon/ExtendDataOperation.java +++ b/main/src/com/google/refine/operations/recon/ExtendDataOperation.java @@ -92,7 +92,7 @@ public class ExtendDataOperation extends EngineDependentOperation { public ExtendDataOperation( JSONObject engineConfig, String baseColumnName, - String endpoint, + String endpoint, String identifierSpace, String schemaSpace, JSONObject extension, @@ -118,9 +118,9 @@ public class ExtendDataOperation extends EngineDependentOperation { writer.key("engineConfig"); writer.value(getEngineConfig()); writer.key("columnInsertIndex"); writer.value(_columnInsertIndex); writer.key("baseColumnName"); writer.value(_baseColumnName); - writer.key("endpoint"); writer.value(_endpoint); - writer.key("identifierSpace"); writer.value(_identifierSpace); - writer.key("schemaSpace"); writer.value(_schemaSpace); + writer.key("endpoint"); writer.value(_endpoint); + writer.key("identifierSpace"); writer.value(_identifierSpace); + writer.key("schemaSpace"); writer.value(_schemaSpace); writer.key("extension"); writer.value(_extension); writer.endObject(); } @@ -314,9 +314,9 @@ public class ExtendDataOperation extends EngineDependentOperation { ExtendDataOperation.this, new DataExtensionChange( _baseColumnName, - _endpoint, - _identifierSpace, - _schemaSpace, + _endpoint, + _identifierSpace, + _schemaSpace, _columnInsertIndex, columnNames, columnTypes, diff --git a/main/tests/server/src/com/google/refine/tests/recon/DataExtensionTests.java b/main/tests/server/src/com/google/refine/tests/recon/DataExtensionTests.java new file mode 100644 index 000000000..fd992a2f2 --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/recon/DataExtensionTests.java @@ -0,0 +1,299 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.tests.recon; + +import static org.mockito.Mockito.mock; + +import java.io.File; +import java.io.IOException; +import java.util.Properties; +import java.util.List; +import java.util.ArrayList; + +import org.json.JSONException; +import org.json.JSONObject; +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import com.google.refine.ProjectManager; +import com.google.refine.ProjectMetadata; +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.grel.Function; +import com.google.refine.io.FileProjectManager; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.ModelException; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.model.Recon; +import com.google.refine.model.ReconCandidate; +import com.google.refine.process.Process; +import com.google.refine.process.ProcessManager; +import com.google.refine.operations.OnError; +import com.google.refine.operations.EngineDependentOperation; +import com.google.refine.operations.recon.ExtendDataOperation; +import com.google.refine.tests.RefineTest; +import com.google.refine.tests.util.TestUtils; + + +public class DataExtensionTests extends RefineTest { + + static final String ENGINE_JSON_URLS = "{\"mode\":\"row-based\"}}"; + static final String RECON_SERVICE = "http://localhost:8000/en/api"; //"https://tools.wmflabs.org/openrefine-wikidata/en/api"; + static final String RECON_IDENTIFIER_SPACE = "http://www.wikidata.org/entity/"; + static final String RECON_SCHEMA_SPACE = "http://www.wikidata.org/prop/direct/"; + + @Override + @BeforeTest + public void init() { + logger = LoggerFactory.getLogger(this.getClass()); + } + + // dependencies + Project project; + Properties options; + JSONObject engine_config; + Engine engine; + Properties bindings; + + @BeforeMethod + public void SetUp() throws JSONException, IOException, ModelException { + File dir = TestUtils.createTempDirectory("openrefine-test-workspace-dir"); + FileProjectManager.initialize(dir); + project = new Project(); + ProjectMetadata pm = new ProjectMetadata(); + pm.setName("Data Extension Test Project"); + ProjectManager.singleton.registerProject(project, pm); + + int index = project.columnModel.allocateNewCellIndex(); + Column column = new Column(index,"country"); + project.columnModel.addColumn(index, column, true); + + options = mock(Properties.class); + engine = new Engine(project); + engine_config = new JSONObject(ENGINE_JSON_URLS); + engine.initializeFromJSON(engine_config); + engine.setMode(Engine.Mode.RowBased); + + bindings = new Properties(); + bindings.put("project", project); + + Row row = new Row(2); + row.setCell(0, reconciledCell("Iran", "Q794")); + project.rows.add(row); + row = new Row(2); + row.setCell(0, reconciledCell("Japan", "Q17")); + project.rows.add(row); + row = new Row(2); + row.setCell(0, reconciledCell("Tajikistan", "Q863")); + project.rows.add(row); + row = new Row(2); + row.setCell(0, reconciledCell("United States of America", "Q30")); + project.rows.add(row); + } + + @AfterMethod + public void TearDown() { + project = null; + options = null; + engine = null; + bindings = null; + } + + static public Cell reconciledCell(String name, String id) { + ReconCandidate r = new ReconCandidate(id, name, new String[0], 100); + List candidates = new ArrayList(); + candidates.add(r); + Recon rec = new Recon(0, RECON_IDENTIFIER_SPACE, RECON_SCHEMA_SPACE); + rec.service = RECON_SERVICE; + rec.candidates = candidates; + rec.match = r; + return new Cell(name, rec); + } + + /** + * Test to fetch simple strings + */ + + @Test + public void testFetchStrings() throws Exception { + JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P297\",\"name\":\"ISO 3166-1 alpha-2 code\"}]}"); + + EngineDependentOperation op = new ExtendDataOperation(engine_config, + "country", + RECON_SERVICE, + RECON_IDENTIFIER_SPACE, + RECON_SCHEMA_SPACE, + extension, + 1); + ProcessManager pm = project.getProcessManager(); + Process process = op.createProcess(project, options); + process.startPerforming(pm); + Assert.assertTrue(process.isRunning()); + try { + // We have 4 rows so 4000 ms should be largely enough. + Thread.sleep(5000); + } catch (InterruptedException e) { + Assert.fail("Test interrupted"); + } + Assert.assertFalse(process.isRunning()); + + // Inspect rows + Assert.assertTrue("IR".equals(project.rows.get(0).getCellValue(1))); + Assert.assertTrue("JP".equals(project.rows.get(1).getCellValue(1))); + Assert.assertTrue("TJ".equals(project.rows.get(2).getCellValue(1))); + Assert.assertTrue("US".equals(project.rows.get(3).getCellValue(1))); + + // Make sure we did not create any recon stats for that column (no reconciled value) + Assert.assertTrue(project.columnModel.getColumnByName("ISO 3166-1 alpha-2 code").getReconStats() == null); + } + + /** + * Test to fetch counts of values + */ + + @Test + public void testFetchCounts() throws Exception { + JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P38\",\"name\":\"currency\",\"settings\":{\"count\":\"on\"}}]}"); + + EngineDependentOperation op = new ExtendDataOperation(engine_config, + "country", + RECON_SERVICE, + RECON_IDENTIFIER_SPACE, + RECON_SCHEMA_SPACE, + extension, + 1); + ProcessManager pm = project.getProcessManager(); + Process process = op.createProcess(project, options); + process.startPerforming(pm); + Assert.assertTrue(process.isRunning()); + try { + Thread.sleep(5000); + } catch (InterruptedException e) { + Assert.fail("Test interrupted"); + } + Assert.assertFalse(process.isRunning()); + + // Test to be updated as countries change currencies! + Assert.assertTrue(Math.round((float)project.rows.get(2).getCellValue(1)) == 2); + Assert.assertTrue(Math.round((float)project.rows.get(3).getCellValue(1)) == 1); + + // Make sure we did not create any recon stats for that column (no reconciled value) + Assert.assertTrue(project.columnModel.getColumnByName("currency").getReconStats() == null); + } + + /** + * Test fetch only the best statements + */ + @Test + public void testFetchCurrent() throws Exception { + JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P38\",\"name\":\"currency\",\"settings\":{\"rank\":\"best\"}}]}"); + + EngineDependentOperation op = new ExtendDataOperation(engine_config, + "country", + RECON_SERVICE, + RECON_IDENTIFIER_SPACE, + RECON_SCHEMA_SPACE, + extension, + 1); + ProcessManager pm = project.getProcessManager(); + Process process = op.createProcess(project, options); + process.startPerforming(pm); + Assert.assertTrue(process.isRunning()); + try { + Thread.sleep(5000); + } catch (InterruptedException e) { + Assert.fail("Test interrupted"); + } + Assert.assertFalse(process.isRunning()); + + /* + * Tajikistan has one "preferred" currency and one "normal" one + * (in terms of statement ranks). + * But thanks to our setting in the extension configuration, + * we only fetch the current one, so the one just after it is + * the one for the US (USD). + */ + Assert.assertTrue("Tajikistani somoni".equals(project.rows.get(2).getCellValue(1))); + Assert.assertTrue("United States dollar".equals(project.rows.get(3).getCellValue(1))); + + // Make sure all the values are reconciled + Assert.assertTrue(project.columnModel.getColumnByName("currency").getReconStats().matchedTopics == 4); + } + + /** + * Test fetch records (multiple values per reconciled cell) + */ + @Test + public void testFetchRecord() throws Exception { + JSONObject extension = new JSONObject("{\"properties\":[{\"id\":\"P38\",\"name\":\"currency\"}]}"); + + EngineDependentOperation op = new ExtendDataOperation(engine_config, + "country", + RECON_SERVICE, + RECON_IDENTIFIER_SPACE, + RECON_SCHEMA_SPACE, + extension, + 1); + ProcessManager pm = project.getProcessManager(); + Process process = op.createProcess(project, options); + process.startPerforming(pm); + Assert.assertTrue(process.isRunning()); + try { + Thread.sleep(5000); + } catch (InterruptedException e) { + Assert.fail("Test interrupted"); + } + Assert.assertFalse(process.isRunning()); + + /* + * Tajikistan has one "preferred" currency and one "normal" one + * (in terms of statement ranks). + * The second currency is fetched as well, which creates a record + * (the cell to the left of it is left blank). + */ + Assert.assertTrue("Tajikistani somoni".equals(project.rows.get(2).getCellValue(1))); + Assert.assertTrue("Tajikistani ruble".equals(project.rows.get(3).getCellValue(1))); + Assert.assertTrue(null == project.rows.get(3).getCellValue(0)); + + // Make sure all the values are reconciled + Assert.assertTrue(project.columnModel.getColumnByName("currency").getReconStats().matchedTopics == 5); + } + +}