From 4b3e5c02a14c2ef261cc0ca5498b207bf685a75e Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Mon, 31 Dec 2018 16:19:28 +0100 Subject: [PATCH 1/2] Refactor feature computation in StandardReconConfig --- .../model/recon/StandardReconConfig.java | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/main/src/com/google/refine/model/recon/StandardReconConfig.java b/main/src/com/google/refine/model/recon/StandardReconConfig.java index 34981f0a5..ffbfedf2c 100644 --- a/main/src/com/google/refine/model/recon/StandardReconConfig.java +++ b/main/src/com/google/refine/model/recon/StandardReconConfig.java @@ -542,8 +542,20 @@ public class StandardReconConfig extends ReconConfig { recon.addCandidate(candidate); count++; } - - if (count > 0) { + + computeFeatures(recon, text); + return recon; + } + + /** + * Recomputes the features associated with this reconciliation + * object (only if we have at least one candidate). + * + * @param text + * the cell value to compare the reconciliation data to + */ + public void computeFeatures(Recon recon, String text) { + if (!recon.candidates.isEmpty()) { ReconCandidate candidate = recon.candidates.get(0); recon.setFeature(Recon.Feature_nameMatch, text.equalsIgnoreCase(candidate.name)); @@ -561,8 +573,7 @@ public class StandardReconConfig extends ReconConfig { } } } - return recon; - } + } static protected double wordDistance(String s1, String s2) { Set words1 = breakWords(s1); From b1a84da1da59aee44094f99e7286a055f29e6da6 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Mon, 31 Dec 2018 17:38:19 +0100 Subject: [PATCH 2/2] Recompute features when creating new items. Closes #1887. --- .../wikidata/editing/NewItemLibrary.java | 23 +++++++++++++++++-- .../wikidata/editing/NewItemLibraryTest.java | 13 ++++++++++- .../model/recon/StandardReconConfig.java | 2 ++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/extensions/wikidata/src/org/openrefine/wikidata/editing/NewItemLibrary.java b/extensions/wikidata/src/org/openrefine/wikidata/editing/NewItemLibrary.java index abb9fa0f1..ba8c0ee03 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/editing/NewItemLibrary.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/editing/NewItemLibrary.java @@ -37,6 +37,8 @@ import com.google.refine.model.Recon; import com.google.refine.model.ReconCandidate; import com.google.refine.model.ReconStats; import com.google.refine.model.Row; +import com.google.refine.model.recon.ReconConfig; +import com.google.refine.model.recon.StandardReconConfig; /** * This keeps track of the new items that we have created for each internal @@ -106,17 +108,34 @@ public class NewItemLibrary { continue; } Recon recon = cell.recon; + boolean changed = false; if (Recon.Judgment.New.equals(recon.judgment) && !reset && map.containsKey(recon.id)) { recon.judgment = Recon.Judgment.Matched; recon.match = new ReconCandidate(map.get(recon.id), cell.value.toString(), new String[0], 100); - impactedColumns.add(i); + recon.addCandidate(recon.match); + changed = true; + } else if (Recon.Judgment.Matched.equals(recon.judgment) && reset && map.containsKey(recon.id)) { recon.judgment = Recon.Judgment.New; + if(recon.candidates != null) { + recon.candidates.remove(recon.candidates.size()-1); + } recon.match = null; - impactedColumns.add(i); + changed = true; + } + + if (changed) { + impactedColumns.add(i); + // Compute features + Column column = project.columnModel.getColumnByCellIndex(i); + ReconConfig config = column.getReconConfig(); + if (config instanceof StandardReconConfig) { + StandardReconConfig stdConfig = (StandardReconConfig)config; + stdConfig.computeFeatures(recon, cell.getValueAsString()); + } } } } diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/NewItemLibraryTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/NewItemLibraryTest.java index 3cb2063a3..2a4ae71e4 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/NewItemLibraryTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/NewItemLibraryTest.java @@ -24,6 +24,9 @@ package org.openrefine.wikidata.editing; import static org.junit.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import java.util.Collections; import org.openrefine.wikidata.testing.JacksonSerializationTest; import org.openrefine.wikidata.testing.TestingData; @@ -33,6 +36,7 @@ import org.testng.annotations.Test; import com.google.refine.model.Cell; import com.google.refine.model.Project; import com.google.refine.model.Recon; +import com.google.refine.model.recon.StandardReconConfig; import com.google.refine.tests.RefineTest; public class NewItemLibraryTest extends RefineTest { @@ -54,6 +58,10 @@ public class NewItemLibraryTest extends RefineTest { @Test public void testUpdateReconciledCells() { Project project = createCSVProject(TestingData.inceptionWithNewCsv); + StandardReconConfig config = new StandardReconConfig("http://my.endpoint", + "http://my.schema", "http://my.schema", "Q5", "human", true, Collections.emptyList()); + project.columnModel.columns.get(0).setReconConfig(config); + project.rows.get(0).cells.set(0, TestingData.makeNewItemCell(3289L, "University of Ljubljana")); project.rows.get(1).cells.set(0, TestingData.makeMatchedCell("Q865528", "University of Warwick")); project.rows.get(2).cells.set(0, TestingData.makeNewItemCell(1234L, "new uni")); @@ -61,9 +69,12 @@ public class NewItemLibraryTest extends RefineTest { isMatchedTo("Q865528", project.rows.get(1).cells.get(0)); isNewTo(1234L, project.rows.get(2).cells.get(0)); library.updateReconciledCells(project, false); - isMatchedTo("Q384", project.rows.get(0).cells.get(0)); + Cell firstCell = project.rows.get(0).cells.get(0); + isMatchedTo("Q384", firstCell); + assertTrue((Boolean) firstCell.recon.getFeature(Recon.Feature_nameMatch)); isMatchedTo("Q865528", project.rows.get(1).cells.get(0)); isMatchedTo("Q345", project.rows.get(2).cells.get(0)); + assertTrue(project.rows.get(2).cells.get(0).recon.getFeature(Recon.Feature_nameLevenshtein).equals(0)); library.updateReconciledCells(project, true); isNewTo(3289L, project.rows.get(0).cells.get(0)); isMatchedTo("Q865528", project.rows.get(1).cells.get(0)); diff --git a/main/src/com/google/refine/model/recon/StandardReconConfig.java b/main/src/com/google/refine/model/recon/StandardReconConfig.java index ffbfedf2c..89ea21680 100644 --- a/main/src/com/google/refine/model/recon/StandardReconConfig.java +++ b/main/src/com/google/refine/model/recon/StandardReconConfig.java @@ -572,6 +572,8 @@ public class StandardReconConfig extends ReconConfig { } } } + } else { + recon.features = new Object[Recon.Feature_max]; } }