Merge pull request #2349 from afkbrb/issue-2103-description-validator
Issue 2103 description validator
This commit is contained in:
commit
61ab6401df
@ -156,5 +156,15 @@
|
||||
"warnings-messages/invalid-entity-type/title": "{property_entity} used on items",
|
||||
"warnings-messages/invalid-entity-type/body": "Uses of {property_entity} on items such as {example_entity} are invalid.",
|
||||
"warnings-messages/early-gregorian-date/title": "Early dates in the Gregorian calendar",
|
||||
"warnings-messages/early-gregorian-date/body": "Dates earlier than October 1582 (such as in year {example_year}) are unlikely to be expressed using the Gregorian calendar. See the <a href=\"https://www.wikidata.org/wiki/Wikidata:Tools/OpenRefine/Editing/Schema_alignment#Dates\" target=\"_blank\">manual</a> to specify the appropriate calendar for your dates."
|
||||
"warnings-messages/early-gregorian-date/body": "Dates earlier than October 1582 (such as in year {example_year}) are unlikely to be expressed using the Gregorian calendar. See the <a href=\"https://www.wikidata.org/wiki/Wikidata:Tools/OpenRefine/Editing/Schema_alignment#Dates\" target=\"_blank\">manual</a> to specify the appropriate calendar for your dates.",
|
||||
"warnings-messages/item-description-too-long/title": "Description is too long",
|
||||
"warnings-messages/item-description-too-long/body": "Description ({lang}) such as <span class=\"wb-issue-preformat\">{description}</span> on {example_entity} is too long. Its length is {length}, which is more than {max_length}. Descriptions are not full sentences, but small bits of information. In most cases, the proper length is between two and twelve words. See the <a href=\"https://www.wikidata.org/wiki/Help:Description#Length\" target=\"_blank\">manual</a> for more information.",
|
||||
"warnings-messages/item-description-identical-with-label/title": "Description is identical with label",
|
||||
"warnings-messages/item-description-identical-with-label/body": "Both the description ({lang}) and the label ({label_lang}) on {example_entity} are <span class=\"wb-issue-preformat\">{description}</span>. Description are expected to be more specific than labels. See the <a href=\"https://www.wikidata.org/wiki/Help:Description\" target=\"_blank\">manual</a> for more information.",
|
||||
"warnings-messages/item-description-end-by-punctuation-sign/title": "Description ends by punctuation sign",
|
||||
"warnings-messages/item-description-end-by-punctuation-sign/body": "Description ({lang}) such as <span class=\"wb-issue-preformat\">{description}</span> on {example_entity} ends by a punctuation sign \"{punctuation_sign}\". Description are not sentences, so the punctuation sign at the end should be avoided. See the <a href=\"https://www.wikidata.org/wiki/Help:Description#Length\" target=\"_blank\">manual</a> for more information.",
|
||||
"warnings-messages/item-description-begin-with-uppercase/title": "Description begins with uppercase letter",
|
||||
"warnings-messages/item-description-begin-with-uppercase/body": "Description ({lang}) such as <span class=\"wb-issue-preformat\">{description}</span> on {example_entity} begins with uppercase letter \"{uppercase_letter}\". Descriptions begin with a lowercase letter except when uppercase would normally be required or expected. See the <a href=\"https://www.wikidata.org/wiki/Help:Description#Capitalization\" target=\"_blank\">manual</a> for more information.",
|
||||
"warnings-messages/item-description-begin-with-article/title": "Description begins with article (\"a\", \"an\" or \"the\")",
|
||||
"warnings-messages/item-description-begin-with-article/body": "Description ({lang}) such as <span class=\"wb-issue-preformat\">{description}</span> on {example_entity} begins with article \"{article}\". Descriptions should not normally begin with initial articles (\"a\", \"an\", \"the\"). See the <a href=\"https://www.wikidata.org/wiki/Help:Description#No_initial_articles_(a,_an,_the)\" target=\"_blank\">manual</a> for more information."
|
||||
}
|
||||
|
@ -28,22 +28,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.openrefine.wikidata.qa.scrutinizers.CalendarScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.DistinctValuesScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.EntityTypeScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.FormatScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.NewItemScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.NoEditsMadeScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.QualifierCompatibilityScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.QuantityScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.RestrictedPositionScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.RestrictedValuesScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.SelfReferentialScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.SingleValueScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.UnsourcedScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.WhitespaceScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.*;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler;
|
||||
import org.openrefine.wikidata.utils.EntityCache;
|
||||
@ -82,6 +67,8 @@ public class EditInspector {
|
||||
register(new RestrictedValuesScrutinizer());
|
||||
register(new EntityTypeScrutinizer());
|
||||
register(new CalendarScrutinizer());
|
||||
register(new CommonDescriptionScrutinizer());
|
||||
register(new EnglishDescriptionScrutinizer());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -0,0 +1,58 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.qa.QAWarning;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @author Lu Liu
|
||||
*/
|
||||
public class CommonDescriptionScrutinizer extends DescriptionScrutinizer {
|
||||
|
||||
public static final String descTooLongType = "item-description-too-long";
|
||||
public static final String descIdenticalWithLabel = "item-description-identical-with-label";
|
||||
|
||||
@Override
|
||||
public void scrutinize(ItemUpdate update, String descText, String lang) {
|
||||
checkLength(update, descText, lang);
|
||||
checkLabel(update, descText, lang);
|
||||
}
|
||||
|
||||
// Descriptions are not full sentences, but small bits of information.
|
||||
// In most cases, the proper length is between two and twelve words.
|
||||
protected void checkLength(ItemUpdate update, String descText, String lang) {
|
||||
final int maxLength = 250;
|
||||
if (descText.length() > maxLength) {
|
||||
QAWarning issue = new QAWarning(descTooLongType, null, QAWarning.Severity.CRITICAL, 1);
|
||||
issue.setProperty("example_entity", update.getItemId());
|
||||
issue.setProperty("description", descText);
|
||||
issue.setProperty("lang", lang);
|
||||
issue.setProperty("length", descText.length());
|
||||
issue.setProperty("max_length", maxLength);
|
||||
addIssue(issue);
|
||||
}
|
||||
}
|
||||
|
||||
// Description are expected to be more specific than labels.
|
||||
protected void checkLabel(ItemUpdate update, String descText, String lang) {
|
||||
Set<MonolingualTextValue> labels = update.getLabels();
|
||||
labels.addAll(update.getLabelsIfNew()); // merge
|
||||
for (MonolingualTextValue label : labels) {
|
||||
String labelText = label.getText();
|
||||
if (labelText == null) continue;
|
||||
labelText = labelText.trim();
|
||||
if (labelText.equals(descText)) {
|
||||
QAWarning issue = new QAWarning(descIdenticalWithLabel, null, QAWarning.Severity.WARNING, 1);
|
||||
issue.setProperty("example_entity", update.getItemId());
|
||||
issue.setProperty("description", descText);
|
||||
issue.setProperty("lang", lang);
|
||||
issue.setProperty("label_lang", label.getLanguageCode());
|
||||
addIssue(issue);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.qa.QAWarning;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @author Lu Liu
|
||||
*/
|
||||
public abstract class DescriptionScrutinizer extends EditScrutinizer {
|
||||
|
||||
@Override
|
||||
public void scrutinize(ItemUpdate update) {
|
||||
Set<MonolingualTextValue> descriptions = update.getDescriptions();
|
||||
descriptions.addAll(update.getDescriptionsIfNew()); // merge
|
||||
for (MonolingualTextValue description : descriptions) {
|
||||
String descText = description.getText();
|
||||
if (descText == null) continue;
|
||||
descText = descText.trim();
|
||||
if (descText.length() == 0) continue; // avoid NullPointerException
|
||||
|
||||
scrutinize(update, descText, description.getLanguageCode());
|
||||
}
|
||||
}
|
||||
|
||||
public abstract void scrutinize(ItemUpdate update, String descText, String lang);
|
||||
|
||||
}
|
@ -0,0 +1,72 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.qa.QAWarning;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
|
||||
/**
|
||||
* @author Lu Liu
|
||||
*/
|
||||
public class EnglishDescriptionScrutinizer extends DescriptionScrutinizer {
|
||||
|
||||
public static final String descEndsByPunctuationSign = "item-description-end-by-punctuation-sign";
|
||||
public static final String descBeginWithUppercase = "item-description-begin-with-uppercase";
|
||||
public static final String descBeginWithArticle = "item-description-begin-with-article";
|
||||
|
||||
private static final String LANG = "en";
|
||||
|
||||
@Override
|
||||
public void scrutinize(ItemUpdate update, String descText, String lang) {
|
||||
if (!LANG.equalsIgnoreCase(lang)) return;
|
||||
|
||||
checkPunctuationSign(update, descText);
|
||||
checkUppercase(update, descText);
|
||||
checkArticle(update, descText);
|
||||
}
|
||||
|
||||
// Description are not sentences, so the punctuation sign at the end should be avoided.
|
||||
protected void checkPunctuationSign(ItemUpdate update, String descText) {
|
||||
assert descText.length() > 0;
|
||||
final String punctuationSigns = ".?!;:,'\"";
|
||||
|
||||
char last = descText.charAt(descText.length() - 1);
|
||||
if (punctuationSigns.indexOf(last) != -1) {
|
||||
QAWarning issue = new QAWarning(descEndsByPunctuationSign, null, QAWarning.Severity.WARNING, 1);
|
||||
issue.setProperty("example_entity", update.getItemId());
|
||||
issue.setProperty("description", descText);
|
||||
issue.setProperty("lang", LANG);
|
||||
issue.setProperty("punctuation_sign", last);
|
||||
addIssue(issue);
|
||||
}
|
||||
}
|
||||
|
||||
// Descriptions begin with a lowercase letter except when uppercase would normally be required or expected.
|
||||
protected void checkUppercase(ItemUpdate update, String descText) {
|
||||
assert descText.length() > 0;
|
||||
|
||||
char first = descText.charAt(0);
|
||||
if ('A' <= first && first <= 'Z') {
|
||||
QAWarning issue = new QAWarning(descBeginWithUppercase, null, QAWarning.Severity.INFO, 1);
|
||||
issue.setProperty("example_entity", update.getItemId());
|
||||
issue.setProperty("description", descText);
|
||||
issue.setProperty("lang", LANG);
|
||||
issue.setProperty("uppercase_letter", first);
|
||||
addIssue(issue);
|
||||
}
|
||||
}
|
||||
|
||||
// Descriptions should not normally begin with initial articles ("a", "an", "the").
|
||||
protected void checkArticle(ItemUpdate update, String descText) {
|
||||
assert descText.length() > 0;
|
||||
|
||||
String firstWord = descText.split("\\s")[0].toLowerCase();
|
||||
if ("a".equals(firstWord) || "an".equals(firstWord) || "the".equals(firstWord)) {
|
||||
QAWarning issue = new QAWarning(descBeginWithArticle, null, QAWarning.Severity.WARNING, 1);
|
||||
issue.setProperty("example_entity", update.getItemId());
|
||||
issue.setProperty("description", descText);
|
||||
issue.setProperty("lang", LANG);
|
||||
issue.setProperty("article", firstWord);
|
||||
addIssue(issue);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,74 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.testing.TestingData;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||
import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
|
||||
public class CommonDescriptionScrutinizerTest extends ScrutinizerTest {
|
||||
|
||||
@Override
|
||||
public EditScrutinizer getScrutinizer() {
|
||||
return new CommonDescriptionScrutinizer();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGoodDesc() {
|
||||
String description = "good description";
|
||||
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA)
|
||||
.addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
||||
.build();
|
||||
scrutinize(update);
|
||||
assertNoWarningRaised();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTooLong() {
|
||||
String description = "long description long description long description long description "
|
||||
+ "long description long description long description long description "
|
||||
+ "long description long description long description long description "
|
||||
+ "long description long description long description long description";
|
||||
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA)
|
||||
.addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
||||
.build();
|
||||
scrutinize(update);
|
||||
assertWarningsRaised(CommonDescriptionScrutinizer.descTooLongType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIdenticalWithLabel() {
|
||||
String description = "identical with label";
|
||||
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA)
|
||||
.addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
||||
.addLabel(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
||||
.build();
|
||||
scrutinize(update);
|
||||
assertWarningsRaised(CommonDescriptionScrutinizer.descIdenticalWithLabel);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIdenticalWithLabel1() {
|
||||
String description = "identical with label";
|
||||
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA)
|
||||
.addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
||||
.addLabel(Datamodel.makeMonolingualTextValue("bonjour", "fr"), true)
|
||||
.build();
|
||||
scrutinize(update);
|
||||
assertNoWarningRaised();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAwfulDesc() {
|
||||
String description = "long description long description long description long description "
|
||||
+ "long description long description long description long description "
|
||||
+ "long description long description long description long description "
|
||||
+ "long description long description long description long description";
|
||||
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA)
|
||||
.addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
||||
.addLabel(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
||||
.build();
|
||||
scrutinize(update);
|
||||
assertWarningsRaised(CommonDescriptionScrutinizer.descTooLongType, CommonDescriptionScrutinizer.descIdenticalWithLabel);
|
||||
}
|
||||
}
|
@ -0,0 +1,67 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.testing.TestingData;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||
import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
|
||||
public class EnglishDescriptionScrutinizerTest extends ScrutinizerTest {
|
||||
|
||||
@Override
|
||||
public EditScrutinizer getScrutinizer() {
|
||||
return new EnglishDescriptionScrutinizer();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGoodDesc() {
|
||||
String description = "good description";
|
||||
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA)
|
||||
.addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
||||
.build();
|
||||
scrutinize(update);
|
||||
assertNoWarningRaised();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEndWithPunctuationSign() {
|
||||
String description = "description with punctuationSign.";
|
||||
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA)
|
||||
.addDescription(Datamodel.makeMonolingualTextValue(description, "en"), false)
|
||||
.build();
|
||||
scrutinize(update);
|
||||
assertWarningsRaised(EnglishDescriptionScrutinizer.descEndsByPunctuationSign);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBeginWithUppercase() {
|
||||
String description = "Begin with uppercase";
|
||||
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA)
|
||||
.addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
||||
.build();
|
||||
scrutinize(update);
|
||||
assertWarningsRaised(EnglishDescriptionScrutinizer.descBeginWithUppercase);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBeginWithArticle() {
|
||||
String description = "an article test";
|
||||
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA)
|
||||
.addDescription(Datamodel.makeMonolingualTextValue(description, "en"), false)
|
||||
.build();
|
||||
scrutinize(update);
|
||||
assertWarningsRaised(EnglishDescriptionScrutinizer.descBeginWithArticle);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAwfulDesc() {
|
||||
String description = "An awful description.";
|
||||
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA)
|
||||
.addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
||||
.addLabel(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
||||
.build();
|
||||
scrutinize(update);
|
||||
assertWarningsRaised(EnglishDescriptionScrutinizer.descEndsByPunctuationSign,
|
||||
EnglishDescriptionScrutinizer.descBeginWithUppercase, EnglishDescriptionScrutinizer.descBeginWithArticle);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user