move check functions in DescriptionScrutinizer to CommonDescriptionScrutinizer
This commit is contained in:
parent
43b32a07e0
commit
be5f6e6402
@ -160,7 +160,7 @@
|
|||||||
"warnings-messages/item-description-too-long/title": "Description is too long",
|
"warnings-messages/item-description-too-long/title": "Description is too long",
|
||||||
"warnings-messages/item-description-too-long/body": "Description ({lang}) such as <span class=\"wb-issue-preformat\">{description}</span> on {example_entity} is too long. Its length is {length}, which is more than {max_length}. Descriptions are not full sentences, but small bits of information. In most cases, the proper length is between two and twelve words. See the <a href=\"https://www.wikidata.org/wiki/Help:Description#Length\" target=\"_blank\">manual</a> for more information.",
|
"warnings-messages/item-description-too-long/body": "Description ({lang}) such as <span class=\"wb-issue-preformat\">{description}</span> on {example_entity} is too long. Its length is {length}, which is more than {max_length}. Descriptions are not full sentences, but small bits of information. In most cases, the proper length is between two and twelve words. See the <a href=\"https://www.wikidata.org/wiki/Help:Description#Length\" target=\"_blank\">manual</a> for more information.",
|
||||||
"warnings-messages/item-description-identical-with-label/title": "Description is identical with label",
|
"warnings-messages/item-description-identical-with-label/title": "Description is identical with label",
|
||||||
"warnings-messages/item-description-identical-with-label/body": "Both the description ({lang}) and the label ({label_lang}) on {example_entity} are <span class=\"wb-issue-preformat\">{description}</span>. Description are expected to be more specific than labels. See the <a href=\"https://www.wikidata.org/wiki/Help:Description\" target=\"_blank\">manual</a> for more information.",",
|
"warnings-messages/item-description-identical-with-label/body": "Both the description ({lang}) and the label ({label_lang}) on {example_entity} are <span class=\"wb-issue-preformat\">{description}</span>. Description are expected to be more specific than labels. See the <a href=\"https://www.wikidata.org/wiki/Help:Description\" target=\"_blank\">manual</a> for more information.",
|
||||||
"warnings-messages/item-description-end-by-punctuation-sign/title": "Description ends by punctuation sign",
|
"warnings-messages/item-description-end-by-punctuation-sign/title": "Description ends by punctuation sign",
|
||||||
"warnings-messages/item-description-end-by-punctuation-sign/body": "Description ({lang}) such as <span class=\"wb-issue-preformat\">{description}</span> on {example_entity} ends by a punctuation sign \"{punctuation_sign}\". Description are not sentences, so the punctuation sign at the end should be avoided. See the <a href=\"https://www.wikidata.org/wiki/Help:Description#Length\" target=\"_blank\">manual</a> for more information.",
|
"warnings-messages/item-description-end-by-punctuation-sign/body": "Description ({lang}) such as <span class=\"wb-issue-preformat\">{description}</span> on {example_entity} ends by a punctuation sign \"{punctuation_sign}\". Description are not sentences, so the punctuation sign at the end should be avoided. See the <a href=\"https://www.wikidata.org/wiki/Help:Description#Length\" target=\"_blank\">manual</a> for more information.",
|
||||||
"warnings-messages/item-description-begin-with-uppercase/title": "Description begins with uppercase letter",
|
"warnings-messages/item-description-begin-with-uppercase/title": "Description begins with uppercase letter",
|
||||||
|
@ -67,6 +67,7 @@ public class EditInspector {
|
|||||||
register(new RestrictedValuesScrutinizer());
|
register(new RestrictedValuesScrutinizer());
|
||||||
register(new EntityTypeScrutinizer());
|
register(new EntityTypeScrutinizer());
|
||||||
register(new CalendarScrutinizer());
|
register(new CalendarScrutinizer());
|
||||||
|
register(new CommonDescriptionScrutinizer());
|
||||||
register(new EnglishDescriptionScrutinizer());
|
register(new EnglishDescriptionScrutinizer());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,58 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.qa.QAWarning;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Lu Liu
|
||||||
|
*/
|
||||||
|
public class CommonDescriptionScrutinizer extends DescriptionScrutinizer {
|
||||||
|
|
||||||
|
public static final String descTooLongType = "item-description-too-long";
|
||||||
|
public static final String descIdenticalWithLabel = "item-description-identical-with-label";
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void scrutinize(ItemUpdate update, String descText, String lang) {
|
||||||
|
checkLength(update, descText, lang);
|
||||||
|
checkLabel(update, descText, lang);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Descriptions are not full sentences, but small bits of information.
|
||||||
|
// In most cases, the proper length is between two and twelve words.
|
||||||
|
protected void checkLength(ItemUpdate update, String descText, String lang) {
|
||||||
|
final int maxLength = 250;
|
||||||
|
if (descText.length() > maxLength) {
|
||||||
|
QAWarning issue = new QAWarning(descTooLongType, null, QAWarning.Severity.CRITICAL, 1);
|
||||||
|
issue.setProperty("example_entity", update.getItemId());
|
||||||
|
issue.setProperty("description", descText);
|
||||||
|
issue.setProperty("lang", lang);
|
||||||
|
issue.setProperty("length", descText.length());
|
||||||
|
issue.setProperty("max_length", maxLength);
|
||||||
|
addIssue(issue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Description are expected to be more specific than labels.
|
||||||
|
protected void checkLabel(ItemUpdate update, String descText, String lang) {
|
||||||
|
Set<MonolingualTextValue> labels = update.getLabels();
|
||||||
|
labels.addAll(update.getLabelsIfNew()); // merge
|
||||||
|
for (MonolingualTextValue label : labels) {
|
||||||
|
String labelText = label.getText();
|
||||||
|
if (labelText == null) continue;
|
||||||
|
labelText = labelText.trim();
|
||||||
|
if (labelText.equals(descText)) {
|
||||||
|
QAWarning issue = new QAWarning(descIdenticalWithLabel, null, QAWarning.Severity.WARNING, 1);
|
||||||
|
issue.setProperty("example_entity", update.getItemId());
|
||||||
|
issue.setProperty("description", descText);
|
||||||
|
issue.setProperty("lang", lang);
|
||||||
|
issue.setProperty("label_lang", label.getLanguageCode());
|
||||||
|
addIssue(issue);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -7,22 +7,10 @@ import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
|||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A scrutinizer that checks the description of an item.
|
|
||||||
* <p>
|
|
||||||
* This abstract scrutinizer does the following checks:
|
|
||||||
* 1. is the description too long
|
|
||||||
* 2. is the description identical with the label in the same language
|
|
||||||
* <p>
|
|
||||||
* We can easily implement a language-specific description scrutinizer
|
|
||||||
* by extending this class.
|
|
||||||
*
|
|
||||||
* @author Lu Liu
|
* @author Lu Liu
|
||||||
*/
|
*/
|
||||||
public abstract class DescriptionScrutinizer extends EditScrutinizer {
|
public abstract class DescriptionScrutinizer extends EditScrutinizer {
|
||||||
|
|
||||||
public static final String descTooLongType = "item-description-too-long";
|
|
||||||
public static final String descIdenticalWithLabel = "item-description-identical-with-label";
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void scrutinize(ItemUpdate update) {
|
public void scrutinize(ItemUpdate update) {
|
||||||
Set<MonolingualTextValue> descriptions = update.getDescriptions();
|
Set<MonolingualTextValue> descriptions = update.getDescriptions();
|
||||||
@ -33,50 +21,10 @@ public abstract class DescriptionScrutinizer extends EditScrutinizer {
|
|||||||
descText = descText.trim();
|
descText = descText.trim();
|
||||||
if (descText.length() == 0) continue; // avoid NullPointerException
|
if (descText.length() == 0) continue; // avoid NullPointerException
|
||||||
|
|
||||||
String lang = description.getLanguageCode();
|
scrutinize(update, descText, description.getLanguageCode());
|
||||||
|
|
||||||
checkLength(update, descText, lang);
|
|
||||||
checkLabel(update, descText, lang);
|
|
||||||
|
|
||||||
scrutinize(update, descText, lang);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract void scrutinize(ItemUpdate update, String descText, String lang);
|
public abstract void scrutinize(ItemUpdate update, String descText, String lang);
|
||||||
|
|
||||||
// Descriptions are not full sentences, but small bits of information.
|
|
||||||
// In most cases, the proper length is between two and twelve words.
|
|
||||||
protected void checkLength(ItemUpdate update, String descText, String lang) {
|
|
||||||
final int maxLength = 250;
|
|
||||||
if (descText.length() > maxLength) {
|
|
||||||
QAWarning issue = new QAWarning(descTooLongType, null, QAWarning.Severity.CRITICAL, 1);
|
|
||||||
issue.setProperty("example_entity", update.getItemId());
|
|
||||||
issue.setProperty("description", descText);
|
|
||||||
issue.setProperty("lang", lang);
|
|
||||||
issue.setProperty("length", descText.length());
|
|
||||||
issue.setProperty("max_length", maxLength);
|
|
||||||
addIssue(issue);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Description are expected to be more specific than labels.
|
|
||||||
protected void checkLabel(ItemUpdate update, String descText, String lang) {
|
|
||||||
Set<MonolingualTextValue> labels = update.getLabels();
|
|
||||||
labels.addAll(update.getLabelsIfNew()); // merge
|
|
||||||
for (MonolingualTextValue label : labels) {
|
|
||||||
String labelText = label.getText();
|
|
||||||
if (labelText == null) continue;
|
|
||||||
labelText = labelText.trim();
|
|
||||||
if (labelText.equals(descText)) {
|
|
||||||
QAWarning issue = new QAWarning(descIdenticalWithLabel, null, QAWarning.Severity.WARNING, 1);
|
|
||||||
issue.setProperty("example_entity", update.getItemId());
|
|
||||||
issue.setProperty("description", descText);
|
|
||||||
issue.setProperty("lang", lang);
|
|
||||||
issue.setProperty("label_lang", label.getLanguageCode());
|
|
||||||
addIssue(issue);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,74 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.testing.TestingData;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
|
|
||||||
|
public class CommonDescriptionScrutinizerTest extends ScrutinizerTest {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EditScrutinizer getScrutinizer() {
|
||||||
|
return new CommonDescriptionScrutinizer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGoodDesc() {
|
||||||
|
String description = "good description";
|
||||||
|
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA)
|
||||||
|
.addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
||||||
|
.build();
|
||||||
|
scrutinize(update);
|
||||||
|
assertNoWarningRaised();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTooLong() {
|
||||||
|
String description = "long description long description long description long description "
|
||||||
|
+ "long description long description long description long description "
|
||||||
|
+ "long description long description long description long description "
|
||||||
|
+ "long description long description long description long description";
|
||||||
|
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA)
|
||||||
|
.addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
||||||
|
.build();
|
||||||
|
scrutinize(update);
|
||||||
|
assertWarningsRaised(CommonDescriptionScrutinizer.descTooLongType);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIdenticalWithLabel() {
|
||||||
|
String description = "identical with label";
|
||||||
|
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA)
|
||||||
|
.addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
||||||
|
.addLabel(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
||||||
|
.build();
|
||||||
|
scrutinize(update);
|
||||||
|
assertWarningsRaised(CommonDescriptionScrutinizer.descIdenticalWithLabel);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIdenticalWithLabel1() {
|
||||||
|
String description = "identical with label";
|
||||||
|
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA)
|
||||||
|
.addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
||||||
|
.addLabel(Datamodel.makeMonolingualTextValue("bonjour", "fr"), true)
|
||||||
|
.build();
|
||||||
|
scrutinize(update);
|
||||||
|
assertNoWarningRaised();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAwfulDesc() {
|
||||||
|
String description = "long description long description long description long description "
|
||||||
|
+ "long description long description long description long description "
|
||||||
|
+ "long description long description long description long description "
|
||||||
|
+ "long description long description long description long description";
|
||||||
|
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA)
|
||||||
|
.addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
||||||
|
.addLabel(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
||||||
|
.build();
|
||||||
|
scrutinize(update);
|
||||||
|
assertWarningsRaised(CommonDescriptionScrutinizer.descTooLongType, CommonDescriptionScrutinizer.descIdenticalWithLabel);
|
||||||
|
}
|
||||||
|
}
|
@ -23,41 +23,6 @@ public class EnglishDescriptionScrutinizerTest extends ScrutinizerTest {
|
|||||||
assertNoWarningRaised();
|
assertNoWarningRaised();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testTooLong() {
|
|
||||||
String description = "long description long description long description long description "
|
|
||||||
+ "long description long description long description long description "
|
|
||||||
+ "long description long description long description long description "
|
|
||||||
+ "long description long description long description long description ";
|
|
||||||
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA)
|
|
||||||
.addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
|
||||||
.build();
|
|
||||||
scrutinize(update);
|
|
||||||
assertWarningsRaised(EnglishDescriptionScrutinizer.descTooLongType);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testIdenticalWithLabel1() {
|
|
||||||
String description = "identical with label";
|
|
||||||
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA)
|
|
||||||
.addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
|
||||||
.addLabel(Datamodel.makeMonolingualTextValue("bonjour", "fr"), true)
|
|
||||||
.build();
|
|
||||||
scrutinize(update);
|
|
||||||
assertNoWarningRaised();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testIdenticalWithLabel() {
|
|
||||||
String description = "identical with label";
|
|
||||||
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA)
|
|
||||||
.addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
|
||||||
.addLabel(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
|
||||||
.build();
|
|
||||||
scrutinize(update);
|
|
||||||
assertWarningsRaised(EnglishDescriptionScrutinizer.descIdenticalWithLabel);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testEndWithPunctuationSign() {
|
public void testEndWithPunctuationSign() {
|
||||||
String description = "description with punctuationSign.";
|
String description = "description with punctuationSign.";
|
||||||
@ -90,16 +55,13 @@ public class EnglishDescriptionScrutinizerTest extends ScrutinizerTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAwfulDesc() {
|
public void testAwfulDesc() {
|
||||||
String description = "An awful description An awful description An awful description An awful description"
|
String description = "An awful description.";
|
||||||
+ "An awful description An awful description An awful description An awful description"
|
|
||||||
+ "An awful description An awful description An awful description An awful description"
|
|
||||||
+ "An awful description An awful description An awful description An awful description!";
|
|
||||||
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA)
|
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA)
|
||||||
.addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
.addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
||||||
.addLabel(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
.addLabel(Datamodel.makeMonolingualTextValue(description, "en"), true)
|
||||||
.build();
|
.build();
|
||||||
scrutinize(update);
|
scrutinize(update);
|
||||||
assertWarningsRaised(EnglishDescriptionScrutinizer.descTooLongType, EnglishDescriptionScrutinizer.descEndsByPunctuationSign,
|
assertWarningsRaised(EnglishDescriptionScrutinizer.descEndsByPunctuationSign,
|
||||||
EnglishDescriptionScrutinizer.descBeginWithUppercase, EnglishDescriptionScrutinizer.descBeginWithArticle, EnglishDescriptionScrutinizer.descIdenticalWithLabel);
|
EnglishDescriptionScrutinizer.descBeginWithUppercase, EnglishDescriptionScrutinizer.descBeginWithArticle);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user