From 699ec160821785271db1cbf41961f9d81d522281 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Fri, 12 Jan 2018 19:32:45 +0000 Subject: [PATCH] Report duplicate items in DistinctValuesScrutinizer --- .../wikidata/module/langs/translation-en.json | 2 +- .../qa/scrutinizers/DistinctValuesScrutinizer.java | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/extensions/wikidata/module/langs/translation-en.json b/extensions/wikidata/module/langs/translation-en.json index e51280508..c54bcc76b 100644 --- a/extensions/wikidata/module/langs/translation-en.json +++ b/extensions/wikidata/module/langs/translation-en.json @@ -95,7 +95,7 @@ }, "identical-values-for-distinct-valued-property": { "title": "Identical values for {property_entity}", - "body": "This property should have distinct values." + "body": "This property should have distinct values, but the same value was found on {item1_entity} and {item2_entity} for instance." }, "no-edit-generated": { "title": "No edit was generated.", diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DistinctValuesScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DistinctValuesScrutinizer.java index 27a26080d..336ccacb7 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DistinctValuesScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DistinctValuesScrutinizer.java @@ -20,7 +20,7 @@ import org.wikidata.wdtk.datamodel.interfaces.Value; */ public class DistinctValuesScrutinizer extends StatementScrutinizer { - private Map> _seenValues; + private Map> _seenValues; public DistinctValuesScrutinizer() { _seenValues = new HashMap<>(); @@ -31,22 +31,24 @@ public class DistinctValuesScrutinizer extends StatementScrutinizer { PropertyIdValue pid = statement.getClaim().getMainSnak().getPropertyId(); if (_fetcher.hasDistinctValues(pid)) { Value mainSnakValue = statement.getClaim().getMainSnak().getValue(); - Set seen = _seenValues.get(pid); + Map seen = _seenValues.get(pid); if (seen == null) { - seen = new HashSet(); + seen = new HashMap(); _seenValues.put(pid, seen); } - if (seen.contains(mainSnakValue)) { + if (seen.containsKey(mainSnakValue)) { + EntityIdValue otherId = seen.get(mainSnakValue); QAWarning issue = new QAWarning( "identical-values-for-distinct-valued-property", pid.getId(), QAWarning.Severity.IMPORTANT, 1); issue.setProperty("property_entity", pid); - // TODO also report the items on which the property is duplicated + issue.setProperty("item1_entity", entityId); + issue.setProperty("item2_entity", otherId); addIssue(issue); } else { - seen.add(mainSnakValue); + seen.put(mainSnakValue, entityId); } } }