Merge pull request #1925 from OpenRefine/issue1887
Compute reconciliation features when creating new items
This commit is contained in:
commit
6dd9f41639
@ -37,6 +37,8 @@ import com.google.refine.model.Recon;
|
|||||||
import com.google.refine.model.ReconCandidate;
|
import com.google.refine.model.ReconCandidate;
|
||||||
import com.google.refine.model.ReconStats;
|
import com.google.refine.model.ReconStats;
|
||||||
import com.google.refine.model.Row;
|
import com.google.refine.model.Row;
|
||||||
|
import com.google.refine.model.recon.ReconConfig;
|
||||||
|
import com.google.refine.model.recon.StandardReconConfig;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This keeps track of the new items that we have created for each internal
|
* This keeps track of the new items that we have created for each internal
|
||||||
@ -106,17 +108,34 @@ public class NewItemLibrary {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
Recon recon = cell.recon;
|
Recon recon = cell.recon;
|
||||||
|
boolean changed = false;
|
||||||
if (Recon.Judgment.New.equals(recon.judgment) && !reset
|
if (Recon.Judgment.New.equals(recon.judgment) && !reset
|
||||||
&& map.containsKey(recon.id)) {
|
&& map.containsKey(recon.id)) {
|
||||||
recon.judgment = Recon.Judgment.Matched;
|
recon.judgment = Recon.Judgment.Matched;
|
||||||
recon.match = new ReconCandidate(map.get(recon.id), cell.value.toString(),
|
recon.match = new ReconCandidate(map.get(recon.id), cell.value.toString(),
|
||||||
new String[0], 100);
|
new String[0], 100);
|
||||||
impactedColumns.add(i);
|
recon.addCandidate(recon.match);
|
||||||
|
changed = true;
|
||||||
|
|
||||||
} else if (Recon.Judgment.Matched.equals(recon.judgment) && reset
|
} else if (Recon.Judgment.Matched.equals(recon.judgment) && reset
|
||||||
&& map.containsKey(recon.id)) {
|
&& map.containsKey(recon.id)) {
|
||||||
recon.judgment = Recon.Judgment.New;
|
recon.judgment = Recon.Judgment.New;
|
||||||
|
if(recon.candidates != null) {
|
||||||
|
recon.candidates.remove(recon.candidates.size()-1);
|
||||||
|
}
|
||||||
recon.match = null;
|
recon.match = null;
|
||||||
impactedColumns.add(i);
|
changed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (changed) {
|
||||||
|
impactedColumns.add(i);
|
||||||
|
// Compute features
|
||||||
|
Column column = project.columnModel.getColumnByCellIndex(i);
|
||||||
|
ReconConfig config = column.getReconConfig();
|
||||||
|
if (config instanceof StandardReconConfig) {
|
||||||
|
StandardReconConfig stdConfig = (StandardReconConfig)config;
|
||||||
|
stdConfig.computeFeatures(recon, cell.getValueAsString());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -24,6 +24,9 @@
|
|||||||
package org.openrefine.wikidata.editing;
|
package org.openrefine.wikidata.editing;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.testng.Assert.assertTrue;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
import org.openrefine.wikidata.testing.JacksonSerializationTest;
|
import org.openrefine.wikidata.testing.JacksonSerializationTest;
|
||||||
import org.openrefine.wikidata.testing.TestingData;
|
import org.openrefine.wikidata.testing.TestingData;
|
||||||
@ -33,6 +36,7 @@ import org.testng.annotations.Test;
|
|||||||
import com.google.refine.model.Cell;
|
import com.google.refine.model.Cell;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
import com.google.refine.model.Recon;
|
import com.google.refine.model.Recon;
|
||||||
|
import com.google.refine.model.recon.StandardReconConfig;
|
||||||
import com.google.refine.tests.RefineTest;
|
import com.google.refine.tests.RefineTest;
|
||||||
|
|
||||||
public class NewItemLibraryTest extends RefineTest {
|
public class NewItemLibraryTest extends RefineTest {
|
||||||
@ -54,6 +58,10 @@ public class NewItemLibraryTest extends RefineTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testUpdateReconciledCells() {
|
public void testUpdateReconciledCells() {
|
||||||
Project project = createCSVProject(TestingData.inceptionWithNewCsv);
|
Project project = createCSVProject(TestingData.inceptionWithNewCsv);
|
||||||
|
StandardReconConfig config = new StandardReconConfig("http://my.endpoint",
|
||||||
|
"http://my.schema", "http://my.schema", "Q5", "human", true, Collections.emptyList());
|
||||||
|
project.columnModel.columns.get(0).setReconConfig(config);
|
||||||
|
|
||||||
project.rows.get(0).cells.set(0, TestingData.makeNewItemCell(3289L, "University of Ljubljana"));
|
project.rows.get(0).cells.set(0, TestingData.makeNewItemCell(3289L, "University of Ljubljana"));
|
||||||
project.rows.get(1).cells.set(0, TestingData.makeMatchedCell("Q865528", "University of Warwick"));
|
project.rows.get(1).cells.set(0, TestingData.makeMatchedCell("Q865528", "University of Warwick"));
|
||||||
project.rows.get(2).cells.set(0, TestingData.makeNewItemCell(1234L, "new uni"));
|
project.rows.get(2).cells.set(0, TestingData.makeNewItemCell(1234L, "new uni"));
|
||||||
@ -61,9 +69,12 @@ public class NewItemLibraryTest extends RefineTest {
|
|||||||
isMatchedTo("Q865528", project.rows.get(1).cells.get(0));
|
isMatchedTo("Q865528", project.rows.get(1).cells.get(0));
|
||||||
isNewTo(1234L, project.rows.get(2).cells.get(0));
|
isNewTo(1234L, project.rows.get(2).cells.get(0));
|
||||||
library.updateReconciledCells(project, false);
|
library.updateReconciledCells(project, false);
|
||||||
isMatchedTo("Q384", project.rows.get(0).cells.get(0));
|
Cell firstCell = project.rows.get(0).cells.get(0);
|
||||||
|
isMatchedTo("Q384", firstCell);
|
||||||
|
assertTrue((Boolean) firstCell.recon.getFeature(Recon.Feature_nameMatch));
|
||||||
isMatchedTo("Q865528", project.rows.get(1).cells.get(0));
|
isMatchedTo("Q865528", project.rows.get(1).cells.get(0));
|
||||||
isMatchedTo("Q345", project.rows.get(2).cells.get(0));
|
isMatchedTo("Q345", project.rows.get(2).cells.get(0));
|
||||||
|
assertTrue(project.rows.get(2).cells.get(0).recon.getFeature(Recon.Feature_nameLevenshtein).equals(0));
|
||||||
library.updateReconciledCells(project, true);
|
library.updateReconciledCells(project, true);
|
||||||
isNewTo(3289L, project.rows.get(0).cells.get(0));
|
isNewTo(3289L, project.rows.get(0).cells.get(0));
|
||||||
isMatchedTo("Q865528", project.rows.get(1).cells.get(0));
|
isMatchedTo("Q865528", project.rows.get(1).cells.get(0));
|
||||||
|
@ -551,8 +551,20 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
recon.addCandidate(candidate);
|
recon.addCandidate(candidate);
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (count > 0) {
|
computeFeatures(recon, text);
|
||||||
|
return recon;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recomputes the features associated with this reconciliation
|
||||||
|
* object (only if we have at least one candidate).
|
||||||
|
*
|
||||||
|
* @param text
|
||||||
|
* the cell value to compare the reconciliation data to
|
||||||
|
*/
|
||||||
|
public void computeFeatures(Recon recon, String text) {
|
||||||
|
if (!recon.candidates.isEmpty()) {
|
||||||
ReconCandidate candidate = recon.candidates.get(0);
|
ReconCandidate candidate = recon.candidates.get(0);
|
||||||
|
|
||||||
recon.setFeature(Recon.Feature_nameMatch, text.equalsIgnoreCase(candidate.name));
|
recon.setFeature(Recon.Feature_nameMatch, text.equalsIgnoreCase(candidate.name));
|
||||||
@ -569,9 +581,10 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
recon.features = new Object[Recon.Feature_max];
|
||||||
}
|
}
|
||||||
return recon;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
static protected double wordDistance(String s1, String s2) {
|
static protected double wordDistance(String s1, String s2) {
|
||||||
Set<String> words1 = breakWords(s1);
|
Set<String> words1 = breakWords(s1);
|
||||||
|
Loading…
Reference in New Issue
Block a user