Added more metadata into recon objects.
Tried to minimize number of unique recon objects created when calling Recon.dup(). git-svn-id: http://google-refine.googlecode.com/svn/trunk@560 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
e77b99e58b
commit
15c188ad7a
@ -134,17 +134,32 @@ public class ReconJudgeOneCellCommand extends Command {
|
||||
", containing \"" + cell.value + "\"";
|
||||
|
||||
String description = null;
|
||||
|
||||
newCell.recon.matchRank = -1;
|
||||
newCell.recon.judgmentAction = "single";
|
||||
newCell.recon.judgmentBatchSize = 1;
|
||||
|
||||
if (judgment == Judgment.None) {
|
||||
newCell.recon.judgment = Recon.Judgment.None;
|
||||
newCell.recon.match = null;
|
||||
|
||||
description = "Discard recon judgment for " + cellDescription;
|
||||
} else if (judgment == Judgment.New) {
|
||||
newCell.recon.judgment = Recon.Judgment.New;
|
||||
newCell.recon.match = null;
|
||||
|
||||
description = "Mark to create new topic for " + cellDescription;
|
||||
} else {
|
||||
newCell.recon.judgment = Recon.Judgment.Matched;
|
||||
newCell.recon.match = this.match;
|
||||
|
||||
for (int m = 0; m < newCell.recon.candidates.size(); m++) {
|
||||
if (newCell.recon.candidates.get(m).topicGUID.equals(this.match.topicGUID)) {
|
||||
newCell.recon.matchRank = m;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
description = "Match " + this.match.topicName +
|
||||
" (" + match.topicID + ") to " +
|
||||
cellDescription;
|
||||
|
@ -132,6 +132,8 @@ public class ExcelImporter implements Importer {
|
||||
* Now process the data rows
|
||||
*/
|
||||
int rowsWithData = 0;
|
||||
Map<String, Recon> reconMap = new HashMap<String, Recon>();
|
||||
|
||||
for (; r <= lastRow; r++) {
|
||||
org.apache.poi.ss.usermodel.Row row = sheet.getRow(r);
|
||||
if (row == null) {
|
||||
@ -206,10 +208,22 @@ public class ExcelImporter implements Importer {
|
||||
id = id.substring(0, h);
|
||||
}
|
||||
|
||||
if (reconMap.containsKey(id)) {
|
||||
recon = reconMap.get(id);
|
||||
recon.judgmentBatchSize++;
|
||||
} else {
|
||||
recon = new Recon();
|
||||
recon.judgment = Judgment.Matched;
|
||||
recon.service = "import";
|
||||
recon.match = new ReconCandidate(id, "", value.toString(), new String[0], 100);
|
||||
recon.matchRank = 0;
|
||||
recon.judgment = Judgment.Matched;
|
||||
recon.judgmentAction = "auto";
|
||||
recon.judgmentBatchSize = 1;
|
||||
recon.addCandidate(recon.match);
|
||||
|
||||
reconMap.put(id, recon);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -60,9 +60,16 @@ public class Recon implements HasFields, Jsonizable {
|
||||
|
||||
final public long id;
|
||||
public Object[] features = new Object[Feature_max];
|
||||
public String service = "unknown";
|
||||
public List<ReconCandidate> candidates;
|
||||
|
||||
public Judgment judgment = Judgment.None;
|
||||
public String judgmentAction = "unknown";
|
||||
public long judgmentHistoryEntry = -1;
|
||||
public int judgmentBatchSize = 0;
|
||||
|
||||
public ReconCandidate match = null;
|
||||
public int matchRank = -1;
|
||||
|
||||
public Recon() {
|
||||
id = System.currentTimeMillis() * 1000000 + Math.round(Math.random() * 1000000);
|
||||
@ -81,8 +88,16 @@ public class Recon implements HasFields, Jsonizable {
|
||||
r.candidates = new ArrayList<ReconCandidate>(candidates);
|
||||
}
|
||||
|
||||
r.service = service;
|
||||
|
||||
r.judgment = judgment;
|
||||
|
||||
r.judgmentAction = judgmentAction;
|
||||
r.judgmentHistoryEntry = judgmentHistoryEntry;
|
||||
r.judgmentBatchSize = judgmentBatchSize;
|
||||
|
||||
r.match = match;
|
||||
r.matchRank = matchRank;
|
||||
|
||||
return r;
|
||||
}
|
||||
@ -190,6 +205,15 @@ public class Recon implements HasFields, Jsonizable {
|
||||
writer.value(o);
|
||||
}
|
||||
writer.endArray();
|
||||
|
||||
writer.key("service"); writer.value(service);
|
||||
writer.key("judgmentAction"); writer.value(judgmentAction);
|
||||
writer.key("judgmentHistoryEntry"); writer.value(judgmentHistoryEntry);
|
||||
writer.key("judgmentBatchSize"); writer.value(judgmentBatchSize);
|
||||
|
||||
if (match != null) {
|
||||
writer.key("matchRank"); writer.value(matchRank);
|
||||
}
|
||||
}
|
||||
|
||||
writer.endObject();
|
||||
@ -268,6 +292,16 @@ public class Recon implements HasFields, Jsonizable {
|
||||
recon.addCandidate(ReconCandidate.loadStreaming(jp));
|
||||
}
|
||||
}
|
||||
} else if ("service".equals(fieldName)) {
|
||||
recon.service = jp.getText();
|
||||
} else if ("judgmentAction".equals(fieldName)) {
|
||||
recon.judgmentAction = jp.getText();
|
||||
} else if ("judgmentHistoryEntry".equals(fieldName)) {
|
||||
recon.judgmentHistoryEntry = jp.getLongValue();
|
||||
} else if ("judgmentBatchSize".equals(fieldName)) {
|
||||
recon.judgmentBatchSize = jp.getIntValue();
|
||||
} else if ("matchRank".equals(fieldName)) {
|
||||
recon.matchRank = jp.getIntValue();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -181,8 +181,12 @@ public class DataExtensionChange implements Change {
|
||||
ReconCandidate rc = (ReconCandidate) value;
|
||||
Recon recon = new Recon();
|
||||
recon.addCandidate(rc);
|
||||
recon.service = "mql";
|
||||
recon.match = rc;
|
||||
recon.matchRank = 0;
|
||||
recon.judgment = Judgment.Matched;
|
||||
recon.judgmentAction = "auto";
|
||||
recon.judgmentBatchSize = 1;
|
||||
|
||||
cell = new Cell(rc.topicName, recon);
|
||||
} else {
|
||||
|
@ -149,8 +149,11 @@ public class GuidBasedReconConfig extends StrictReconConfig {
|
||||
|
||||
Recon recon = new Recon();
|
||||
recon.addCandidate(candidate);
|
||||
recon.match = candidate;
|
||||
recon.service = "mql";
|
||||
recon.judgment = Judgment.Matched;
|
||||
recon.judgmentAction = "auto";
|
||||
recon.match = candidate;
|
||||
recon.matchRank = 0;
|
||||
|
||||
guidToRecon.put(guid, recon);
|
||||
}
|
||||
|
@ -256,6 +256,7 @@ public class HeuristicReconConfig extends ReconConfig {
|
||||
recon = new Recon();
|
||||
}
|
||||
|
||||
recon.service = "recon";
|
||||
recons.add(recon);
|
||||
}
|
||||
} finally {
|
||||
@ -314,7 +315,9 @@ public class HeuristicReconConfig extends ReconConfig {
|
||||
candidate.score / recon.candidates.get(1).score >= 1.5) {
|
||||
|
||||
recon.match = candidate;
|
||||
recon.matchRank = 0;
|
||||
recon.judgment = Judgment.Matched;
|
||||
recon.judgmentAction = "auto";
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -361,6 +364,7 @@ public class HeuristicReconConfig extends ReconConfig {
|
||||
if (recon == null) {
|
||||
recon = new Recon();
|
||||
}
|
||||
recon.service = "recon";
|
||||
recons.add(recon);
|
||||
}
|
||||
|
||||
@ -408,7 +412,9 @@ public class HeuristicReconConfig extends ReconConfig {
|
||||
recon.setFeature(Recon.Feature_typeMatch, true);
|
||||
if (autoMatch && result.has("match") && result.getBoolean("match")) {
|
||||
recon.match = candidate;
|
||||
recon.matchRank = 0;
|
||||
recon.judgment = Judgment.Matched;
|
||||
recon.judgmentAction = "auto";
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -153,8 +153,11 @@ public class IdBasedReconConfig extends StrictReconConfig {
|
||||
|
||||
Recon recon = new Recon();
|
||||
recon.addCandidate(candidate);
|
||||
recon.match = candidate;
|
||||
recon.service = "mql";
|
||||
recon.judgment = Judgment.Matched;
|
||||
recon.judgmentAction = "auto";
|
||||
recon.match = candidate;
|
||||
recon.matchRank = 0;
|
||||
|
||||
idToRecon.put(id, recon);
|
||||
}
|
||||
|
@ -167,8 +167,11 @@ public class KeyBasedReconConfig extends StrictReconConfig {
|
||||
|
||||
Recon recon = new Recon();
|
||||
recon.addCandidate(candidate);
|
||||
recon.match = candidate;
|
||||
recon.service = "mql";
|
||||
recon.judgment = Judgment.Matched;
|
||||
recon.judgmentAction = "auto";
|
||||
recon.match = candidate;
|
||||
recon.matchRank = 0;
|
||||
|
||||
keyToRecon.put(key, recon);
|
||||
}
|
||||
|
@ -37,7 +37,11 @@ abstract public class EngineDependentMassCellOperation extends EngineDependentOp
|
||||
List<CellChange> cellChanges = new ArrayList<CellChange>(project.rows.size());
|
||||
|
||||
FilteredRows filteredRows = engine.getAllFilteredRows(false);
|
||||
try {
|
||||
filteredRows.accept(project, createRowVisitor(project, cellChanges));
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
String description = createDescription(column, cellChanges);
|
||||
|
||||
|
@ -1,6 +1,8 @@
|
||||
package com.metaweb.gridworks.operations;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.json.JSONException;
|
||||
@ -62,6 +64,7 @@ public class ReconDiscardJudgmentsOperation extends EngineDependentMassCellOpera
|
||||
return new RowVisitor() {
|
||||
int cellIndex;
|
||||
List<CellChange> cellChanges;
|
||||
Map<Long, Recon> dupReconMap = new HashMap<Long, Recon>();
|
||||
|
||||
public RowVisitor init(int cellIndex, List<CellChange> cellChanges) {
|
||||
this.cellIndex = cellIndex;
|
||||
@ -72,11 +75,22 @@ public class ReconDiscardJudgmentsOperation extends EngineDependentMassCellOpera
|
||||
public boolean visit(Project project, int rowIndex, Row row, boolean includeContextual, boolean includeDependent) {
|
||||
Cell cell = row.getCell(cellIndex);
|
||||
if (cell != null && cell.recon != null) {
|
||||
Recon recon = cell.recon.dup();
|
||||
recon.judgment = Judgment.None;
|
||||
recon.match = null;
|
||||
Recon newRecon;
|
||||
if (dupReconMap.containsKey(cell.recon.id)) {
|
||||
newRecon = dupReconMap.get(cell.recon.id);
|
||||
newRecon.judgmentBatchSize++;
|
||||
} else {
|
||||
newRecon = cell.recon.dup();
|
||||
newRecon.match = null;
|
||||
newRecon.matchRank = -1;
|
||||
newRecon.judgment = Judgment.None;
|
||||
newRecon.judgmentAction = "mass";
|
||||
newRecon.judgmentBatchSize = 1;
|
||||
|
||||
Cell newCell = new Cell(cell.value, recon);
|
||||
dupReconMap.put(cell.recon.id, newRecon);
|
||||
}
|
||||
|
||||
Cell newCell = new Cell(cell.value, newRecon);
|
||||
|
||||
CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell);
|
||||
cellChanges.add(cellChange);
|
||||
|
@ -151,7 +151,8 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
|
||||
return new RowVisitor() {
|
||||
int _cellIndex;
|
||||
List<CellChange> _cellChanges;
|
||||
Map<String, Recon> _sharedRecons = new HashMap<String, Recon>();
|
||||
Recon _sharedNewRecon = null;
|
||||
Map<Long, Recon> _dupReconMap = new HashMap<Long, Recon>();
|
||||
|
||||
public RowVisitor init(int cellIndex, List<CellChange> cellChanges) {
|
||||
_cellIndex = cellIndex;
|
||||
@ -167,20 +168,37 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
|
||||
|
||||
Recon recon = null;
|
||||
if (_judgment == Judgment.New && _shareNewTopics) {
|
||||
String s = cell.value.toString();
|
||||
if (_sharedRecons.containsKey(s)) {
|
||||
recon = _sharedRecons.get(s);
|
||||
} else {
|
||||
recon = new Recon();
|
||||
recon.judgment = Judgment.New;
|
||||
|
||||
_sharedRecons.put(s, recon);
|
||||
if (_sharedNewRecon == null) {
|
||||
_sharedNewRecon = new Recon();
|
||||
_sharedNewRecon.judgment = Judgment.New;
|
||||
_sharedNewRecon.judgmentBatchSize = 0;
|
||||
_sharedNewRecon.judgmentAction = "similar";
|
||||
}
|
||||
_sharedNewRecon.judgmentBatchSize++;
|
||||
|
||||
recon = _sharedNewRecon;
|
||||
} else {
|
||||
recon = cell.recon == null ? new Recon() : cell.recon.dup();
|
||||
if (_dupReconMap.containsKey(cell.recon.id)) {
|
||||
recon = _dupReconMap.get(cell.recon.id);
|
||||
recon.judgmentBatchSize++;
|
||||
} else {
|
||||
recon = cell.recon.dup();
|
||||
recon.judgmentBatchSize = 1;
|
||||
recon.matchRank = -1;
|
||||
recon.judgmentAction = "similar";
|
||||
|
||||
if (_judgment == Judgment.Matched) {
|
||||
recon.judgment = Recon.Judgment.Matched;
|
||||
recon.match = _match;
|
||||
|
||||
if (recon.candidates != null) {
|
||||
for (int m = 0; m < recon.candidates.size(); m++) {
|
||||
if (recon.candidates.get(m).topicGUID.equals(_match.topicGUID)) {
|
||||
recon.matchRank = m;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (_judgment == Judgment.New) {
|
||||
recon.judgment = Recon.Judgment.New;
|
||||
recon.match = null;
|
||||
@ -188,6 +206,9 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
|
||||
recon.judgment = Recon.Judgment.None;
|
||||
recon.match = null;
|
||||
}
|
||||
|
||||
_dupReconMap.put(cell.recon.id, recon);
|
||||
}
|
||||
}
|
||||
|
||||
Cell newCell = new Cell(cell.value, recon);
|
||||
|
@ -74,7 +74,8 @@ public class ReconMarkNewTopicsOperation extends EngineDependentMassCellOperatio
|
||||
return new RowVisitor() {
|
||||
int cellIndex;
|
||||
List<CellChange> cellChanges;
|
||||
Map<String, Recon> _sharedRecons = new HashMap<String, Recon>();
|
||||
Map<String, Recon> sharedRecons = new HashMap<String, Recon>();
|
||||
Map<Long, Recon> dupReconMap = new HashMap<Long, Recon>();
|
||||
|
||||
public RowVisitor init(int cellIndex, List<CellChange> cellChanges) {
|
||||
this.cellIndex = cellIndex;
|
||||
@ -88,18 +89,32 @@ public class ReconMarkNewTopicsOperation extends EngineDependentMassCellOperatio
|
||||
Recon recon = null;
|
||||
if (_shareNewTopics) {
|
||||
String s = cell.value == null ? "" : cell.value.toString();
|
||||
if (_sharedRecons.containsKey(s)) {
|
||||
recon = _sharedRecons.get(s);
|
||||
if (sharedRecons.containsKey(s)) {
|
||||
recon = sharedRecons.get(s);
|
||||
recon.judgmentBatchSize++;
|
||||
} else {
|
||||
recon = new Recon();
|
||||
recon.judgment = Judgment.New;
|
||||
recon.judgmentBatchSize = 1;
|
||||
recon.judgmentAction = "mass";
|
||||
|
||||
_sharedRecons.put(s, recon);
|
||||
sharedRecons.put(s, recon);
|
||||
}
|
||||
} else {
|
||||
long reconID = cell.recon == null ? 0 : cell.recon.id;
|
||||
if (dupReconMap.containsKey(reconID)) {
|
||||
recon = dupReconMap.get(reconID);
|
||||
recon.judgmentBatchSize++;
|
||||
} else {
|
||||
recon = cell.recon == null ? new Recon() : cell.recon.dup();
|
||||
recon.match = null;
|
||||
recon.matchRank = -1;
|
||||
recon.judgment = Judgment.New;
|
||||
recon.judgmentBatchSize = 1;
|
||||
recon.judgmentAction = "mass";
|
||||
|
||||
dupReconMap.put(reconID, recon);
|
||||
}
|
||||
}
|
||||
|
||||
Cell newCell = new Cell(cell.value, recon);
|
||||
|
@ -1,6 +1,8 @@
|
||||
package com.metaweb.gridworks.operations;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.json.JSONException;
|
||||
@ -13,6 +15,7 @@ import com.metaweb.gridworks.model.AbstractOperation;
|
||||
import com.metaweb.gridworks.model.Cell;
|
||||
import com.metaweb.gridworks.model.Column;
|
||||
import com.metaweb.gridworks.model.Project;
|
||||
import com.metaweb.gridworks.model.Recon;
|
||||
import com.metaweb.gridworks.model.ReconCandidate;
|
||||
import com.metaweb.gridworks.model.Row;
|
||||
import com.metaweb.gridworks.model.Recon.Judgment;
|
||||
@ -62,6 +65,7 @@ public class ReconMatchBestCandidatesOperation extends EngineDependentMassCellOp
|
||||
return new RowVisitor() {
|
||||
int cellIndex;
|
||||
List<CellChange> cellChanges;
|
||||
Map<Long, Recon> dupReconMap = new HashMap<Long, Recon>();
|
||||
|
||||
public RowVisitor init(int cellIndex, List<CellChange> cellChanges) {
|
||||
this.cellIndex = cellIndex;
|
||||
@ -75,12 +79,24 @@ public class ReconMatchBestCandidatesOperation extends EngineDependentMassCellOp
|
||||
if (cell != null && cell.recon != null) {
|
||||
ReconCandidate candidate = cell.recon.getBestCandidate();
|
||||
if (candidate != null) {
|
||||
Recon newRecon;
|
||||
if (dupReconMap.containsKey(cell.recon.id)) {
|
||||
newRecon = dupReconMap.get(cell.recon.id);
|
||||
newRecon.judgmentBatchSize++;
|
||||
} else {
|
||||
newRecon = cell.recon.dup();
|
||||
newRecon.judgmentBatchSize = 1;
|
||||
newRecon.match = candidate;
|
||||
newRecon.matchRank = 0;
|
||||
newRecon.judgment = Judgment.Matched;
|
||||
newRecon.judgmentAction = "mass";
|
||||
|
||||
dupReconMap.put(cell.recon.id, newRecon);
|
||||
}
|
||||
Cell newCell = new Cell(
|
||||
cell.value,
|
||||
cell.recon.dup()
|
||||
newRecon
|
||||
);
|
||||
newCell.recon.match = candidate;
|
||||
newCell.recon.judgment = Judgment.Matched;
|
||||
|
||||
CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell);
|
||||
cellChanges.add(cellChange);
|
||||
|
@ -1,6 +1,8 @@
|
||||
package com.metaweb.gridworks.operations;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.json.JSONArray;
|
||||
@ -96,6 +98,7 @@ public class ReconMatchSpecificTopicOperation extends EngineDependentMassCellOpe
|
||||
return new RowVisitor() {
|
||||
int cellIndex;
|
||||
List<CellChange> cellChanges;
|
||||
Map<Long, Recon> dupReconMap = new HashMap<Long, Recon>();
|
||||
|
||||
public RowVisitor init(int cellIndex, List<CellChange> cellChanges) {
|
||||
this.cellIndex = cellIndex;
|
||||
@ -104,15 +107,29 @@ public class ReconMatchSpecificTopicOperation extends EngineDependentMassCellOpe
|
||||
}
|
||||
|
||||
public boolean visit(Project project, int rowIndex, Row row, boolean includeContextual, boolean includeDependent) {
|
||||
if (cellIndex < row.cells.size()) {
|
||||
Cell cell = row.cells.get(cellIndex);
|
||||
Cell cell = row.getCell(cellIndex);
|
||||
if (cell != null) {
|
||||
long reconID = cell.recon != null ? cell.recon.id : 0;
|
||||
|
||||
Recon newRecon;
|
||||
if (dupReconMap.containsKey(reconID)) {
|
||||
newRecon = dupReconMap.get(reconID);
|
||||
newRecon.judgmentBatchSize++;
|
||||
} else {
|
||||
newRecon = cell.recon != null ? cell.recon.dup() : new Recon();
|
||||
newRecon.match = match;
|
||||
newRecon.matchRank = -1;
|
||||
newRecon.judgment = Judgment.Matched;
|
||||
newRecon.judgmentAction = "mass";
|
||||
newRecon.judgmentBatchSize = 1;
|
||||
|
||||
dupReconMap.put(reconID, newRecon);
|
||||
}
|
||||
|
||||
Cell newCell = new Cell(
|
||||
cell.value,
|
||||
cell.recon != null ? cell.recon.dup() : new Recon()
|
||||
newRecon
|
||||
);
|
||||
newCell.recon.match = match;
|
||||
newCell.recon.judgment = Judgment.Matched;
|
||||
|
||||
CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell);
|
||||
cellChanges.add(cellChange);
|
||||
|
@ -229,11 +229,13 @@ public class ReconOperation extends EngineDependentOperation {
|
||||
List<Recon> recons = _reconConfig.batchRecon(jobs);
|
||||
for (int j = i; j < to; j++) {
|
||||
Recon recon = recons.get(j - i);
|
||||
if (recon == null) {
|
||||
recon = new Recon();
|
||||
List<ReconEntry> entries = groups.get(j).entries;
|
||||
|
||||
if (recon != null) {
|
||||
recon.judgmentBatchSize = entries.size();
|
||||
}
|
||||
|
||||
for (ReconEntry entry : groups.get(j).entries) {
|
||||
for (ReconEntry entry : entries) {
|
||||
Cell oldCell = entry.cell;
|
||||
Cell newCell = new Cell(oldCell.value, recon);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user