Merge pull request #1919 from OpenRefine/issue1917

Index terms by language code in ItemUpdate.
This commit is contained in:
Antonin Delpeuch 2019-01-06 15:55:39 +01:00 committed by GitHub
commit 9a0ee0f568
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 106 additions and 37 deletions

View File

@ -24,12 +24,15 @@
package org.openrefine.wikidata.updates; package org.openrefine.wikidata.updates;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.jsoup.helper.Validate; import org.jsoup.helper.Validate;
@ -57,9 +60,9 @@ public class ItemUpdate {
private final ItemIdValue qid; private final ItemIdValue qid;
private final List<Statement> addedStatements; private final List<Statement> addedStatements;
private final Set<Statement> deletedStatements; private final Set<Statement> deletedStatements;
private final Set<MonolingualTextValue> labels; private final Map<String, MonolingualTextValue> labels;
private final Set<MonolingualTextValue> descriptions; private final Map<String, MonolingualTextValue> descriptions;
private final Set<MonolingualTextValue> aliases; private final Map<String, List<MonolingualTextValue>> aliases;
/** /**
* Constructor. * Constructor.
@ -69,7 +72,7 @@ public class ItemUpdate {
* new items. * new items.
* @param addedStatements * @param addedStatements
* the statements to add on the item. They should be distinct. They * the statements to add on the item. They should be distinct. They
* are modelled as a list because their insertion order matters. * are modeled as a list because their insertion order matters.
* @param deletedStatements * @param deletedStatements
* the statements to remove from the item * the statements to remove from the item
* @param labels * @param labels
@ -98,17 +101,42 @@ public class ItemUpdate {
deletedStatements = Collections.emptySet(); deletedStatements = Collections.emptySet();
} }
this.deletedStatements = deletedStatements; this.deletedStatements = deletedStatements;
if (labels == null) { this.labels = constructTermMap(labels != null ? labels : Collections.emptyList());
labels = Collections.emptySet(); this.descriptions = constructTermMap(descriptions != null ? descriptions : Collections.emptyList());
this.aliases = constructTermListMap(aliases != null ? aliases : Collections.emptyList());
} }
/**
* Private constructor to avoid re-constructing term maps when
* merging two item updates.
*
* No validation is done on the arguments, they all have to be non-null.
*
* @param qid
* the subject of the update
* @param addedStatements
* the statements to add
* @param deletedStatements
* the statements to delete
* @param labels
* the labels to add
* @param descriptions
* the descriptions to add
* @param aliases
* the aliases to add
*/
private ItemUpdate(
ItemIdValue qid,
List<Statement> addedStatements,
Set<Statement> deletedStatements,
Map<String, MonolingualTextValue> labels,
Map<String, MonolingualTextValue> descriptions,
Map<String, List<MonolingualTextValue>> aliases) {
this.qid = qid;
this.addedStatements = addedStatements;
this.deletedStatements = deletedStatements;
this.labels = labels; this.labels = labels;
if (descriptions == null) {
descriptions = Collections.emptySet();
}
this.descriptions = descriptions; this.descriptions = descriptions;
if (aliases == null) {
aliases = Collections.emptySet();
}
this.aliases = aliases; this.aliases = aliases;
} }
@ -144,7 +172,7 @@ public class ItemUpdate {
*/ */
@JsonProperty("labels") @JsonProperty("labels")
public Set<MonolingualTextValue> getLabels() { public Set<MonolingualTextValue> getLabels() {
return labels; return labels.values().stream().collect(Collectors.toSet());
} }
/** /**
@ -152,7 +180,7 @@ public class ItemUpdate {
*/ */
@JsonProperty("descriptions") @JsonProperty("descriptions")
public Set<MonolingualTextValue> getDescriptions() { public Set<MonolingualTextValue> getDescriptions() {
return descriptions; return descriptions.values().stream().collect(Collectors.toSet());
} }
/** /**
@ -160,7 +188,7 @@ public class ItemUpdate {
*/ */
@JsonProperty("addedAliases") @JsonProperty("addedAliases")
public Set<MonolingualTextValue> getAliases() { public Set<MonolingualTextValue> getAliases() {
return aliases; return aliases.values().stream().flatMap(List::stream).collect(Collectors.toSet());
} }
/** /**
@ -181,8 +209,10 @@ public class ItemUpdate {
} }
/** /**
* Merges all the changes in other into this instance. Both updates should have * Merges all the changes in other with this instance. Both updates should have
* the same subject. * the same subject. Changes coming from `other` have priority over changes
* from this instance. This instance is not modified, the merged update is returned
* instead.
* *
* @param other * @param other
* the other change that should be merged * the other change that should be merged
@ -197,12 +227,25 @@ public class ItemUpdate {
} }
Set<Statement> newDeletedStatements = new HashSet<>(deletedStatements); Set<Statement> newDeletedStatements = new HashSet<>(deletedStatements);
newDeletedStatements.addAll(other.getDeletedStatements()); newDeletedStatements.addAll(other.getDeletedStatements());
Set<MonolingualTextValue> newLabels = new HashSet<>(labels); Map<String,MonolingualTextValue> newLabels = new HashMap<>(labels);
newLabels.addAll(other.getLabels()); for(MonolingualTextValue otherLabel : other.getLabels()) {
Set<MonolingualTextValue> newDescriptions = new HashSet<>(descriptions); newLabels.put(otherLabel.getLanguageCode(), otherLabel);
newDescriptions.addAll(other.getDescriptions()); }
Set<MonolingualTextValue> newAliases = new HashSet<>(aliases); Map<String,MonolingualTextValue> newDescriptions = new HashMap<>(descriptions);
newAliases.addAll(other.getAliases()); for(MonolingualTextValue otherDescription : other.getDescriptions()) {
newDescriptions.put(otherDescription.getLanguageCode(), otherDescription);
}
Map<String,List<MonolingualTextValue>> newAliases = new HashMap<>(aliases);
for(MonolingualTextValue alias : other.getAliases()) {
List<MonolingualTextValue> aliases = newAliases.get(alias.getLanguageCode());
if(aliases == null) {
aliases = new LinkedList<>();
newAliases.put(alias.getLanguageCode(), aliases);
}
if(!aliases.contains(alias)) {
aliases.add(alias);
}
}
return new ItemUpdate(qid, newAddedStatements, newDeletedStatements, newLabels, newDescriptions, newAliases); return new ItemUpdate(qid, newAddedStatements, newDeletedStatements, newLabels, newDescriptions, newAliases);
} }
@ -265,19 +308,17 @@ public class ItemUpdate {
*/ */
public ItemUpdate normalizeLabelsAndAliases() { public ItemUpdate normalizeLabelsAndAliases() {
// Ensure that we are only adding aliases with labels // Ensure that we are only adding aliases with labels
Set<String> labelLanguages = labels.stream().map(l -> l.getLanguageCode()).collect(Collectors.toSet());
Set<MonolingualTextValue> filteredAliases = new HashSet<>(); Set<MonolingualTextValue> filteredAliases = new HashSet<>();
Set<MonolingualTextValue> newLabels = new HashSet<>(labels); Map<String, MonolingualTextValue> newLabels = new HashMap<>(labels);
for (MonolingualTextValue alias : aliases) { for (MonolingualTextValue alias : getAliases()) {
if (!labelLanguages.contains(alias.getLanguageCode())) { if (!labels.containsKey(alias.getLanguageCode())) {
labelLanguages.add(alias.getLanguageCode()); newLabels.put(alias.getLanguageCode(), alias);
newLabels.add(alias);
} else { } else {
filteredAliases.add(alias); filteredAliases.add(alias);
} }
} }
return new ItemUpdate(qid, addedStatements, deletedStatements, newLabels, descriptions, filteredAliases); return new ItemUpdate(qid, addedStatements, deletedStatements,
newLabels, descriptions, constructTermListMap(filteredAliases));
} }
@Override @Override
@ -288,8 +329,9 @@ public class ItemUpdate {
ItemUpdate otherUpdate = (ItemUpdate) other; ItemUpdate otherUpdate = (ItemUpdate) other;
return qid.equals(otherUpdate.getItemId()) && addedStatements.equals(otherUpdate.getAddedStatements()) return qid.equals(otherUpdate.getItemId()) && addedStatements.equals(otherUpdate.getAddedStatements())
&& deletedStatements.equals(otherUpdate.getDeletedStatements()) && deletedStatements.equals(otherUpdate.getDeletedStatements())
&& labels.equals(otherUpdate.getLabels()) && descriptions.equals(otherUpdate.getDescriptions()) && getLabels().equals(otherUpdate.getLabels())
&& aliases.equals(otherUpdate.getAliases()); && getDescriptions().equals(otherUpdate.getDescriptions())
&& getAliases().equals(otherUpdate.getAliases());
} }
@Override @Override
@ -330,4 +372,21 @@ public class ItemUpdate {
return builder.toString(); return builder.toString();
} }
protected Map<String,MonolingualTextValue> constructTermMap(Collection<MonolingualTextValue> mltvs) {
return mltvs.stream()
.collect(Collectors.toMap(MonolingualTextValue::getLanguageCode, Function.identity()));
}
protected Map<String, List<MonolingualTextValue>> constructTermListMap(Collection<MonolingualTextValue> mltvs) {
Map<String,List<MonolingualTextValue>> result = new HashMap<>();
for(MonolingualTextValue mltv : mltvs) {
List<MonolingualTextValue> values = result.get(mltv.getLanguageCode());
if (values == null) {
values = new LinkedList<>();
result.put(mltv.getLanguageCode(), values);
}
values.add(mltv);
}
return result;
}
} }

View File

@ -156,4 +156,14 @@ public class ItemUpdateTest {
.addLabel(aliasFr).build(); .addLabel(aliasFr).build();
assertEquals(expectedUpdate, normalized); assertEquals(expectedUpdate, normalized);
} }
@Test
public void testMergeLabels() {
MonolingualTextValue label1 = Datamodel.makeMonolingualTextValue("first label", "en");
MonolingualTextValue label2 = Datamodel.makeMonolingualTextValue("second label", "en");
ItemUpdate update1 = new ItemUpdateBuilder(existingSubject).addLabel(label1).build();
ItemUpdate update2 = new ItemUpdateBuilder(existingSubject).addLabel(label2).build();
ItemUpdate merged = update1.merge(update2);
assertEquals(Collections.singleton(label2), merged.getLabels());
}
} }