Merge pull request #1919 from OpenRefine/issue1917
Index terms by language code in ItemUpdate.
This commit is contained in:
commit
9a0ee0f568
@ -24,12 +24,15 @@
|
|||||||
package org.openrefine.wikidata.updates;
|
package org.openrefine.wikidata.updates;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.function.Function;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.jsoup.helper.Validate;
|
import org.jsoup.helper.Validate;
|
||||||
@ -57,9 +60,9 @@ public class ItemUpdate {
|
|||||||
private final ItemIdValue qid;
|
private final ItemIdValue qid;
|
||||||
private final List<Statement> addedStatements;
|
private final List<Statement> addedStatements;
|
||||||
private final Set<Statement> deletedStatements;
|
private final Set<Statement> deletedStatements;
|
||||||
private final Set<MonolingualTextValue> labels;
|
private final Map<String, MonolingualTextValue> labels;
|
||||||
private final Set<MonolingualTextValue> descriptions;
|
private final Map<String, MonolingualTextValue> descriptions;
|
||||||
private final Set<MonolingualTextValue> aliases;
|
private final Map<String, List<MonolingualTextValue>> aliases;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor.
|
* Constructor.
|
||||||
@ -69,7 +72,7 @@ public class ItemUpdate {
|
|||||||
* new items.
|
* new items.
|
||||||
* @param addedStatements
|
* @param addedStatements
|
||||||
* the statements to add on the item. They should be distinct. They
|
* the statements to add on the item. They should be distinct. They
|
||||||
* are modelled as a list because their insertion order matters.
|
* are modeled as a list because their insertion order matters.
|
||||||
* @param deletedStatements
|
* @param deletedStatements
|
||||||
* the statements to remove from the item
|
* the statements to remove from the item
|
||||||
* @param labels
|
* @param labels
|
||||||
@ -98,18 +101,43 @@ public class ItemUpdate {
|
|||||||
deletedStatements = Collections.emptySet();
|
deletedStatements = Collections.emptySet();
|
||||||
}
|
}
|
||||||
this.deletedStatements = deletedStatements;
|
this.deletedStatements = deletedStatements;
|
||||||
if (labels == null) {
|
this.labels = constructTermMap(labels != null ? labels : Collections.emptyList());
|
||||||
labels = Collections.emptySet();
|
this.descriptions = constructTermMap(descriptions != null ? descriptions : Collections.emptyList());
|
||||||
}
|
this.aliases = constructTermListMap(aliases != null ? aliases : Collections.emptyList());
|
||||||
this.labels = labels;
|
}
|
||||||
if (descriptions == null) {
|
|
||||||
descriptions = Collections.emptySet();
|
/**
|
||||||
}
|
* Private constructor to avoid re-constructing term maps when
|
||||||
this.descriptions = descriptions;
|
* merging two item updates.
|
||||||
if (aliases == null) {
|
*
|
||||||
aliases = Collections.emptySet();
|
* No validation is done on the arguments, they all have to be non-null.
|
||||||
}
|
*
|
||||||
this.aliases = aliases;
|
* @param qid
|
||||||
|
* the subject of the update
|
||||||
|
* @param addedStatements
|
||||||
|
* the statements to add
|
||||||
|
* @param deletedStatements
|
||||||
|
* the statements to delete
|
||||||
|
* @param labels
|
||||||
|
* the labels to add
|
||||||
|
* @param descriptions
|
||||||
|
* the descriptions to add
|
||||||
|
* @param aliases
|
||||||
|
* the aliases to add
|
||||||
|
*/
|
||||||
|
private ItemUpdate(
|
||||||
|
ItemIdValue qid,
|
||||||
|
List<Statement> addedStatements,
|
||||||
|
Set<Statement> deletedStatements,
|
||||||
|
Map<String, MonolingualTextValue> labels,
|
||||||
|
Map<String, MonolingualTextValue> descriptions,
|
||||||
|
Map<String, List<MonolingualTextValue>> aliases) {
|
||||||
|
this.qid = qid;
|
||||||
|
this.addedStatements = addedStatements;
|
||||||
|
this.deletedStatements = deletedStatements;
|
||||||
|
this.labels = labels;
|
||||||
|
this.descriptions = descriptions;
|
||||||
|
this.aliases = aliases;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -144,7 +172,7 @@ public class ItemUpdate {
|
|||||||
*/
|
*/
|
||||||
@JsonProperty("labels")
|
@JsonProperty("labels")
|
||||||
public Set<MonolingualTextValue> getLabels() {
|
public Set<MonolingualTextValue> getLabels() {
|
||||||
return labels;
|
return labels.values().stream().collect(Collectors.toSet());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -152,7 +180,7 @@ public class ItemUpdate {
|
|||||||
*/
|
*/
|
||||||
@JsonProperty("descriptions")
|
@JsonProperty("descriptions")
|
||||||
public Set<MonolingualTextValue> getDescriptions() {
|
public Set<MonolingualTextValue> getDescriptions() {
|
||||||
return descriptions;
|
return descriptions.values().stream().collect(Collectors.toSet());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -160,7 +188,7 @@ public class ItemUpdate {
|
|||||||
*/
|
*/
|
||||||
@JsonProperty("addedAliases")
|
@JsonProperty("addedAliases")
|
||||||
public Set<MonolingualTextValue> getAliases() {
|
public Set<MonolingualTextValue> getAliases() {
|
||||||
return aliases;
|
return aliases.values().stream().flatMap(List::stream).collect(Collectors.toSet());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -181,8 +209,10 @@ public class ItemUpdate {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Merges all the changes in other into this instance. Both updates should have
|
* Merges all the changes in other with this instance. Both updates should have
|
||||||
* the same subject.
|
* the same subject. Changes coming from `other` have priority over changes
|
||||||
|
* from this instance. This instance is not modified, the merged update is returned
|
||||||
|
* instead.
|
||||||
*
|
*
|
||||||
* @param other
|
* @param other
|
||||||
* the other change that should be merged
|
* the other change that should be merged
|
||||||
@ -197,12 +227,25 @@ public class ItemUpdate {
|
|||||||
}
|
}
|
||||||
Set<Statement> newDeletedStatements = new HashSet<>(deletedStatements);
|
Set<Statement> newDeletedStatements = new HashSet<>(deletedStatements);
|
||||||
newDeletedStatements.addAll(other.getDeletedStatements());
|
newDeletedStatements.addAll(other.getDeletedStatements());
|
||||||
Set<MonolingualTextValue> newLabels = new HashSet<>(labels);
|
Map<String,MonolingualTextValue> newLabels = new HashMap<>(labels);
|
||||||
newLabels.addAll(other.getLabels());
|
for(MonolingualTextValue otherLabel : other.getLabels()) {
|
||||||
Set<MonolingualTextValue> newDescriptions = new HashSet<>(descriptions);
|
newLabels.put(otherLabel.getLanguageCode(), otherLabel);
|
||||||
newDescriptions.addAll(other.getDescriptions());
|
}
|
||||||
Set<MonolingualTextValue> newAliases = new HashSet<>(aliases);
|
Map<String,MonolingualTextValue> newDescriptions = new HashMap<>(descriptions);
|
||||||
newAliases.addAll(other.getAliases());
|
for(MonolingualTextValue otherDescription : other.getDescriptions()) {
|
||||||
|
newDescriptions.put(otherDescription.getLanguageCode(), otherDescription);
|
||||||
|
}
|
||||||
|
Map<String,List<MonolingualTextValue>> newAliases = new HashMap<>(aliases);
|
||||||
|
for(MonolingualTextValue alias : other.getAliases()) {
|
||||||
|
List<MonolingualTextValue> aliases = newAliases.get(alias.getLanguageCode());
|
||||||
|
if(aliases == null) {
|
||||||
|
aliases = new LinkedList<>();
|
||||||
|
newAliases.put(alias.getLanguageCode(), aliases);
|
||||||
|
}
|
||||||
|
if(!aliases.contains(alias)) {
|
||||||
|
aliases.add(alias);
|
||||||
|
}
|
||||||
|
}
|
||||||
return new ItemUpdate(qid, newAddedStatements, newDeletedStatements, newLabels, newDescriptions, newAliases);
|
return new ItemUpdate(qid, newAddedStatements, newDeletedStatements, newLabels, newDescriptions, newAliases);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -265,19 +308,17 @@ public class ItemUpdate {
|
|||||||
*/
|
*/
|
||||||
public ItemUpdate normalizeLabelsAndAliases() {
|
public ItemUpdate normalizeLabelsAndAliases() {
|
||||||
// Ensure that we are only adding aliases with labels
|
// Ensure that we are only adding aliases with labels
|
||||||
Set<String> labelLanguages = labels.stream().map(l -> l.getLanguageCode()).collect(Collectors.toSet());
|
|
||||||
|
|
||||||
Set<MonolingualTextValue> filteredAliases = new HashSet<>();
|
Set<MonolingualTextValue> filteredAliases = new HashSet<>();
|
||||||
Set<MonolingualTextValue> newLabels = new HashSet<>(labels);
|
Map<String, MonolingualTextValue> newLabels = new HashMap<>(labels);
|
||||||
for (MonolingualTextValue alias : aliases) {
|
for (MonolingualTextValue alias : getAliases()) {
|
||||||
if (!labelLanguages.contains(alias.getLanguageCode())) {
|
if (!labels.containsKey(alias.getLanguageCode())) {
|
||||||
labelLanguages.add(alias.getLanguageCode());
|
newLabels.put(alias.getLanguageCode(), alias);
|
||||||
newLabels.add(alias);
|
|
||||||
} else {
|
} else {
|
||||||
filteredAliases.add(alias);
|
filteredAliases.add(alias);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new ItemUpdate(qid, addedStatements, deletedStatements, newLabels, descriptions, filteredAliases);
|
return new ItemUpdate(qid, addedStatements, deletedStatements,
|
||||||
|
newLabels, descriptions, constructTermListMap(filteredAliases));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -288,8 +329,9 @@ public class ItemUpdate {
|
|||||||
ItemUpdate otherUpdate = (ItemUpdate) other;
|
ItemUpdate otherUpdate = (ItemUpdate) other;
|
||||||
return qid.equals(otherUpdate.getItemId()) && addedStatements.equals(otherUpdate.getAddedStatements())
|
return qid.equals(otherUpdate.getItemId()) && addedStatements.equals(otherUpdate.getAddedStatements())
|
||||||
&& deletedStatements.equals(otherUpdate.getDeletedStatements())
|
&& deletedStatements.equals(otherUpdate.getDeletedStatements())
|
||||||
&& labels.equals(otherUpdate.getLabels()) && descriptions.equals(otherUpdate.getDescriptions())
|
&& getLabels().equals(otherUpdate.getLabels())
|
||||||
&& aliases.equals(otherUpdate.getAliases());
|
&& getDescriptions().equals(otherUpdate.getDescriptions())
|
||||||
|
&& getAliases().equals(otherUpdate.getAliases());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -329,5 +371,22 @@ public class ItemUpdate {
|
|||||||
builder.append("\n>");
|
builder.append("\n>");
|
||||||
return builder.toString();
|
return builder.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected Map<String,MonolingualTextValue> constructTermMap(Collection<MonolingualTextValue> mltvs) {
|
||||||
|
return mltvs.stream()
|
||||||
|
.collect(Collectors.toMap(MonolingualTextValue::getLanguageCode, Function.identity()));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Map<String, List<MonolingualTextValue>> constructTermListMap(Collection<MonolingualTextValue> mltvs) {
|
||||||
|
Map<String,List<MonolingualTextValue>> result = new HashMap<>();
|
||||||
|
for(MonolingualTextValue mltv : mltvs) {
|
||||||
|
List<MonolingualTextValue> values = result.get(mltv.getLanguageCode());
|
||||||
|
if (values == null) {
|
||||||
|
values = new LinkedList<>();
|
||||||
|
result.put(mltv.getLanguageCode(), values);
|
||||||
|
}
|
||||||
|
values.add(mltv);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -156,4 +156,14 @@ public class ItemUpdateTest {
|
|||||||
.addLabel(aliasFr).build();
|
.addLabel(aliasFr).build();
|
||||||
assertEquals(expectedUpdate, normalized);
|
assertEquals(expectedUpdate, normalized);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMergeLabels() {
|
||||||
|
MonolingualTextValue label1 = Datamodel.makeMonolingualTextValue("first label", "en");
|
||||||
|
MonolingualTextValue label2 = Datamodel.makeMonolingualTextValue("second label", "en");
|
||||||
|
ItemUpdate update1 = new ItemUpdateBuilder(existingSubject).addLabel(label1).build();
|
||||||
|
ItemUpdate update2 = new ItemUpdateBuilder(existingSubject).addLabel(label2).build();
|
||||||
|
ItemUpdate merged = update1.merge(update2);
|
||||||
|
assertEquals(Collections.singleton(label2), merged.getLabels());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user