From 4252de06ebf97d01a39f15aad59e6f90341937a0 Mon Sep 17 00:00:00 2001 From: afkbrb <2428391347@qq.com> Date: Sun, 1 Mar 2020 22:03:15 +0800 Subject: [PATCH 01/11] add DescriptionScrutinizer --- .../scrutinizers/DescriptionScrutinizer.java | 98 +++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizer.java diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizer.java new file mode 100644 index 000000000..9fd79627b --- /dev/null +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizer.java @@ -0,0 +1,98 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.openrefine.wikidata.qa.QAWarning; +import org.openrefine.wikidata.updates.ItemUpdate; +import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue; + +import java.util.Set; + +/** + * A scrutinizer that checks the description of an item. + * + * The checks work well for English. + * It's impossible to cover all languages, + * but since most edited information is in English, + * merely focusing on English here should be enough. + * + * To be more specific, it does the following checks: + * 1. is a description is too long + * 2. does a description end by punctuation signs + * 3. does a description begin with a uppercase letter + * 4. does a description begins with article ("a", "an" or "the") + * 5. is the description identical with corresponding label + * + * @author Lu Liu + */ +public class DescriptionScrutinizer extends EditScrutinizer { + + public static final String descTooLongType = "item-description-too-long"; + public static final String descEndsByPunctuationSign = "item-description-end-by-punctuation-sign"; + public static final String descBeginWithUppercase = "item-description-begin-with-uppercase"; + public static final String descBeginWithArticle = "item-description-begin-with-article"; + public static final String descIdenticalWithLabel = "item-description-identical-with-label"; + + private static final int descLengthThreshold = 250; + + private static final String punctuationSigns = ".!?,'\""; + + @Override + public void scrutinize(ItemUpdate update) { + Set descriptions = update.getDescriptions(); + descriptions.addAll(update.getDescriptionsIfNew()); // merge + for (MonolingualTextValue description : descriptions) { + doScrutinize(update, description); + } + } + + private void doScrutinize(ItemUpdate update, MonolingualTextValue description) { + String descText = description.getText(); + if (descText == null) return; + descText = descText.trim(); + if (descText.length() == 0) return; + + // length check + if (descText.length() > descLengthThreshold) { + warningWithEntity(update, descTooLongType); + } + + // punctuation sign check + char last = descText.charAt(descText.length() - 1); + if (punctuationSigns.indexOf(last) != -1) { + warningWithEntity(update, descEndsByPunctuationSign); + } + + // begin with uppercase letter check + char first = descText.charAt(0); + if ('A' <= first && first <= 'Z') { + warningWithEntity(update, descBeginWithUppercase); + } + + // article check + String firstWord = descText.split("\\s")[0].toLowerCase(); + if ("a".equals(firstWord) || "an".equals(firstWord) || "the".equals(firstWord)) { + warningWithEntity(update, descBeginWithArticle); + } + + // description-label check + Set labels = update.getLabels(); + labels.addAll(update.getLabelsIfNew()); // merge + for (MonolingualTextValue label : labels) { + if (label.getLanguageCode().equals(description.getLanguageCode())) { + String labelText = label.getText(); + if (labelText == null) break; + labelText = labelText.trim(); + if (labelText.equals(descText)) { + warningWithEntity(update, descIdenticalWithLabel); + } + break; + } + } + } + + private void warningWithEntity(ItemUpdate update, String type) { + QAWarning issue = new QAWarning(type, null, QAWarning.Severity.WARNING, 1); + issue.setProperty("example_entity", update.getItemId()); + addIssue(issue); + } + +} From 3006a59a084a736c3c8ec30526ab8b968f8441f0 Mon Sep 17 00:00:00 2001 From: afkbrb <2428391347@qq.com> Date: Sun, 1 Mar 2020 22:03:59 +0800 Subject: [PATCH 02/11] register DescriptionScrutinizer --- .../openrefine/wikidata/qa/EditInspector.java | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java index e41fe81f0..6e1c6597b 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java @@ -28,22 +28,7 @@ import java.util.List; import java.util.Map; import java.util.stream.Collectors; -import org.openrefine.wikidata.qa.scrutinizers.CalendarScrutinizer; -import org.openrefine.wikidata.qa.scrutinizers.DistinctValuesScrutinizer; -import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer; -import org.openrefine.wikidata.qa.scrutinizers.EntityTypeScrutinizer; -import org.openrefine.wikidata.qa.scrutinizers.FormatScrutinizer; -import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer; -import org.openrefine.wikidata.qa.scrutinizers.NewItemScrutinizer; -import org.openrefine.wikidata.qa.scrutinizers.NoEditsMadeScrutinizer; -import org.openrefine.wikidata.qa.scrutinizers.QualifierCompatibilityScrutinizer; -import org.openrefine.wikidata.qa.scrutinizers.QuantityScrutinizer; -import org.openrefine.wikidata.qa.scrutinizers.RestrictedPositionScrutinizer; -import org.openrefine.wikidata.qa.scrutinizers.RestrictedValuesScrutinizer; -import org.openrefine.wikidata.qa.scrutinizers.SelfReferentialScrutinizer; -import org.openrefine.wikidata.qa.scrutinizers.SingleValueScrutinizer; -import org.openrefine.wikidata.qa.scrutinizers.UnsourcedScrutinizer; -import org.openrefine.wikidata.qa.scrutinizers.WhitespaceScrutinizer; +import org.openrefine.wikidata.qa.scrutinizers.*; import org.openrefine.wikidata.updates.ItemUpdate; import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler; import org.openrefine.wikidata.utils.EntityCache; @@ -82,6 +67,7 @@ public class EditInspector { register(new RestrictedValuesScrutinizer()); register(new EntityTypeScrutinizer()); register(new CalendarScrutinizer()); + register(new DescriptionScrutinizer()); } /** From 077b4c7afa9e245fb7ecabe5f1f04ca33403ac78 Mon Sep 17 00:00:00 2001 From: afkbrb <2428391347@qq.com> Date: Sun, 1 Mar 2020 22:04:37 +0800 Subject: [PATCH 03/11] add DescriptionScrutinizerTest --- .../DescriptionScrutinizerTest.java | 105 ++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizerTest.java diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizerTest.java new file mode 100644 index 000000000..4cdecd41d --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizerTest.java @@ -0,0 +1,105 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.openrefine.wikidata.testing.TestingData; +import org.openrefine.wikidata.updates.ItemUpdate; +import org.openrefine.wikidata.updates.ItemUpdateBuilder; +import org.testng.annotations.Test; +import org.wikidata.wdtk.datamodel.helpers.Datamodel; + +public class DescriptionScrutinizerTest extends ScrutinizerTest { + + @Override + public EditScrutinizer getScrutinizer() { + return new DescriptionScrutinizer(); + } + + @Test + public void testTooLong() { + String description = "long description long description long description long description " + + "long description long description long description long description " + + "long description long description long description long description " + + "long description long description long description long description "; + ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) + .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) + .build(); + scrutinize(update); + assertWarningsRaised(DescriptionScrutinizer.descTooLongType); + } + + @Test + public void testEndWithPunctuationSign() { + String description = "description with punctuationSign."; + ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) + .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), false) + .build(); + scrutinize(update); + assertWarningsRaised(DescriptionScrutinizer.descEndsByPunctuationSign); + } + + @Test + public void testBeginWithUppercase() { + String description = "Begin with uppercase"; + ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) + .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) + .build(); + scrutinize(update); + assertWarningsRaised(DescriptionScrutinizer.descBeginWithUppercase); + } + + @Test + public void testBeginWithArticle() { + String description = "an article test"; + ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) + .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), false) + .build(); + scrutinize(update); + assertWarningsRaised(DescriptionScrutinizer.descBeginWithArticle); + } + + @Test + public void testIdenticalWithLabel() { + String description = "identical with label"; + ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) + .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) + .addLabel(Datamodel.makeMonolingualTextValue(description, "en"), true) + .build(); + scrutinize(update); + assertWarningsRaised(DescriptionScrutinizer.descIdenticalWithLabel); + } + + @Test + public void testIdenticalWithLabel1() { + String description = "identical with label"; + ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) + .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) + .addLabel(Datamodel.makeMonolingualTextValue("bonjour", "fr"), true) + .build(); + scrutinize(update); + assertNoWarningRaised(); + } + + @Test + public void testGoodDesc() { + String description = "good description"; + ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) + .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) + .build(); + scrutinize(update); + assertNoWarningRaised(); + } + + @Test + public void testAwfulDesc() { + String description = "An awful description An awful description An awful description An awful description" + + "An awful description An awful description An awful description An awful description" + + "An awful description An awful description An awful description An awful description" + + "An awful description An awful description An awful description An awful description!"; + ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) + .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) + .addLabel(Datamodel.makeMonolingualTextValue(description, "en"), true) + .build(); + scrutinize(update); + assertWarningsRaised(DescriptionScrutinizer.descTooLongType, DescriptionScrutinizer.descEndsByPunctuationSign, + DescriptionScrutinizer.descBeginWithUppercase, DescriptionScrutinizer.descBeginWithArticle, DescriptionScrutinizer.descIdenticalWithLabel); + } +} From d1a177e37d78497c474f270c00da338fa19e661a Mon Sep 17 00:00:00 2001 From: afkbrb <2428391347@qq.com> Date: Sun, 1 Mar 2020 22:05:16 +0800 Subject: [PATCH 04/11] add corresponding translation --- extensions/wikidata/module/langs/translation-en.json | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/extensions/wikidata/module/langs/translation-en.json b/extensions/wikidata/module/langs/translation-en.json index 0a1717be4..cb0c294b0 100644 --- a/extensions/wikidata/module/langs/translation-en.json +++ b/extensions/wikidata/module/langs/translation-en.json @@ -156,5 +156,15 @@ "warnings-messages/invalid-entity-type/title": "{property_entity} used on items", "warnings-messages/invalid-entity-type/body": "Uses of {property_entity} on items such as {example_entity} are invalid.", "warnings-messages/early-gregorian-date/title": "Early dates in the Gregorian calendar", - "warnings-messages/early-gregorian-date/body": "Dates earlier than October 1582 (such as in year {example_year}) are unlikely to be expressed using the Gregorian calendar. See the manual to specify the appropriate calendar for your dates." + "warnings-messages/early-gregorian-date/body": "Dates earlier than October 1582 (such as in year {example_year}) are unlikely to be expressed using the Gregorian calendar. See the manual to specify the appropriate calendar for your dates.", + "warnings-messages/item-description-too-long/title": "Description is too long", + "warnings-messages/item-description-too-long/body": "Description length of items such as {example_entity} is too long (more that 250 characters).", + "warnings-messages/item-description-end-by-punctuation-sign/title": "Description ends by punctuation sign", + "warnings-messages/item-description-end-by-punctuation-sign/body": "Description of items such as {example_entity} ends by a punctuation sign.", + "warnings-messages/item-description-begin-with-uppercase/title": "Description begins with a uppercase letter", + "warnings-messages/item-description-begin-with-uppercase/body": "Description of items such as {example_entity} begins with a uppercase letter.", + "warnings-messages/item-description-begin-with-article/title": "Description begins with article (a, an or the)", + "warnings-messages/item-description-begin-with-article/body": "Description of items such as {example_entity} begins with article (a, and or the).", + "warnings-messages/item-description-identical-with-label/title": "Description is identical with label", + "warnings-messages/item-description-identical-with-label/body": "Description of items such as {example_entity} is identical with corresponding label." } From 7e5a7acd5858e5d677a967e77e78cf739efabf7c Mon Sep 17 00:00:00 2001 From: afkbrb <2428391347@qq.com> Date: Sun, 1 Mar 2020 22:22:52 +0800 Subject: [PATCH 05/11] fix spelling mistake --- extensions/wikidata/module/langs/translation-en.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/wikidata/module/langs/translation-en.json b/extensions/wikidata/module/langs/translation-en.json index cb0c294b0..92421d13b 100644 --- a/extensions/wikidata/module/langs/translation-en.json +++ b/extensions/wikidata/module/langs/translation-en.json @@ -164,7 +164,7 @@ "warnings-messages/item-description-begin-with-uppercase/title": "Description begins with a uppercase letter", "warnings-messages/item-description-begin-with-uppercase/body": "Description of items such as {example_entity} begins with a uppercase letter.", "warnings-messages/item-description-begin-with-article/title": "Description begins with article (a, an or the)", - "warnings-messages/item-description-begin-with-article/body": "Description of items such as {example_entity} begins with article (a, and or the).", + "warnings-messages/item-description-begin-with-article/body": "Description of items such as {example_entity} begins with article (a, an or the).", "warnings-messages/item-description-identical-with-label/title": "Description is identical with label", "warnings-messages/item-description-identical-with-label/body": "Description of items such as {example_entity} is identical with corresponding label." } From 9659157a0b3de6d3693f812609b8579ab7c03316 Mon Sep 17 00:00:00 2001 From: Lu Liu <2w6f8c@gmail.com> Date: Sun, 1 Mar 2020 23:13:38 +0800 Subject: [PATCH 06/11] Update DescriptionScrutinizer.java --- .../wikidata/qa/scrutinizers/DescriptionScrutinizer.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizer.java index 9fd79627b..437dad3ba 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizer.java @@ -15,10 +15,10 @@ import java.util.Set; * merely focusing on English here should be enough. * * To be more specific, it does the following checks: - * 1. is a description is too long + * 1. is a description too long * 2. does a description end by punctuation signs * 3. does a description begin with a uppercase letter - * 4. does a description begins with article ("a", "an" or "the") + * 4. does a description begin with article ("a", "an" or "the") * 5. is the description identical with corresponding label * * @author Lu Liu From 82f95f2bbffd3ffb6768e319033501c4a0232eb9 Mon Sep 17 00:00:00 2001 From: afkbrb <2428391347@qq.com> Date: Mon, 2 Mar 2020 14:35:42 +0800 Subject: [PATCH 07/11] update DescriptionScrutinizer & create EnglishDescriptionScrutinizer --- .../wikidata/module/langs/translation-en.json | 16 +-- .../openrefine/wikidata/qa/EditInspector.java | 2 +- .../scrutinizers/DescriptionScrutinizer.java | 106 ++++++++---------- .../EnglishDescriptionScrutinizer.java | 72 ++++++++++++ ...=> EnglishDescriptionScrutinizerTest.java} | 98 ++++++++-------- 5 files changed, 175 insertions(+), 119 deletions(-) create mode 100644 extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/EnglishDescriptionScrutinizer.java rename extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/{DescriptionScrutinizerTest.java => EnglishDescriptionScrutinizerTest.java} (82%) diff --git a/extensions/wikidata/module/langs/translation-en.json b/extensions/wikidata/module/langs/translation-en.json index 92421d13b..919ee7b08 100644 --- a/extensions/wikidata/module/langs/translation-en.json +++ b/extensions/wikidata/module/langs/translation-en.json @@ -158,13 +158,13 @@ "warnings-messages/early-gregorian-date/title": "Early dates in the Gregorian calendar", "warnings-messages/early-gregorian-date/body": "Dates earlier than October 1582 (such as in year {example_year}) are unlikely to be expressed using the Gregorian calendar. See the manual to specify the appropriate calendar for your dates.", "warnings-messages/item-description-too-long/title": "Description is too long", - "warnings-messages/item-description-too-long/body": "Description length of items such as {example_entity} is too long (more that 250 characters).", - "warnings-messages/item-description-end-by-punctuation-sign/title": "Description ends by punctuation sign", - "warnings-messages/item-description-end-by-punctuation-sign/body": "Description of items such as {example_entity} ends by a punctuation sign.", - "warnings-messages/item-description-begin-with-uppercase/title": "Description begins with a uppercase letter", - "warnings-messages/item-description-begin-with-uppercase/body": "Description of items such as {example_entity} begins with a uppercase letter.", - "warnings-messages/item-description-begin-with-article/title": "Description begins with article (a, an or the)", - "warnings-messages/item-description-begin-with-article/body": "Description of items such as {example_entity} begins with article (a, an or the).", + "warnings-messages/item-description-too-long/body": "Description ({lang}) such as {description} on {example_entity} is too long. Its length is {length}, which is more than {max_length}. Descriptions are not full sentences, but small bits of information. In most cases, the proper length is between two and twelve words. See the manual for more information.", "warnings-messages/item-description-identical-with-label/title": "Description is identical with label", - "warnings-messages/item-description-identical-with-label/body": "Description of items such as {example_entity} is identical with corresponding label." + "warnings-messages/item-description-identical-with-label/body": "Both the description ({lang}) and the label ({label_lang}) on {example_entity} are {description}. Description are expected to be more specific than labels", + "warnings-messages/item-description-end-by-punctuation-sign/title": "Description ends by punctuation sign", + "warnings-messages/item-description-end-by-punctuation-sign/body": "Description ({lang}) such as {description} on {example_entity} ends by a punctuation sign \"{punctuation_sign}\". Description are not sentences, so the punctuation sign at the end should be avoided. See the manual for more information.", + "warnings-messages/item-description-begin-with-uppercase/title": "Description begins with uppercase letter", + "warnings-messages/item-description-begin-with-uppercase/body": "Description ({lang}) such as {description} on {example_entity} begins with uppercase letter \"{uppercase_letter}\". Descriptions begin with a lowercase letter except when uppercase would normally be required or expected. See the manual for more information.", + "warnings-messages/item-description-begin-with-article/title": "Description begins with article (\"a\", \"an\" or \"the\")", + "warnings-messages/item-description-begin-with-article/body": "Description ({lang}) such as {description} on {example_entity} begins with article \"{article}\". Descriptions should not normally begin with initial articles (\"a\", \"an\", \"the\"). See the manual for more information." } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java index 6e1c6597b..d86cbbef1 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java @@ -67,7 +67,7 @@ public class EditInspector { register(new RestrictedValuesScrutinizer()); register(new EntityTypeScrutinizer()); register(new CalendarScrutinizer()); - register(new DescriptionScrutinizer()); + register(new EnglishDescriptionScrutinizer()); } /** diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizer.java index 437dad3ba..29dd99162 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizer.java @@ -8,91 +8,75 @@ import java.util.Set; /** * A scrutinizer that checks the description of an item. - * - * The checks work well for English. - * It's impossible to cover all languages, - * but since most edited information is in English, - * merely focusing on English here should be enough. - * - * To be more specific, it does the following checks: - * 1. is a description too long - * 2. does a description end by punctuation signs - * 3. does a description begin with a uppercase letter - * 4. does a description begin with article ("a", "an" or "the") - * 5. is the description identical with corresponding label + *

+ * This abstract scrutinizer does the following checks: + * 1. is the description too long + * 2. is the description identical with the label in the same language + *

+ * We can easily implement a language-specific description scrutinizer + * by extending this class. * * @author Lu Liu */ -public class DescriptionScrutinizer extends EditScrutinizer { +public abstract class DescriptionScrutinizer extends EditScrutinizer { public static final String descTooLongType = "item-description-too-long"; - public static final String descEndsByPunctuationSign = "item-description-end-by-punctuation-sign"; - public static final String descBeginWithUppercase = "item-description-begin-with-uppercase"; - public static final String descBeginWithArticle = "item-description-begin-with-article"; public static final String descIdenticalWithLabel = "item-description-identical-with-label"; - private static final int descLengthThreshold = 250; - - private static final String punctuationSigns = ".!?,'\""; - @Override public void scrutinize(ItemUpdate update) { Set descriptions = update.getDescriptions(); descriptions.addAll(update.getDescriptionsIfNew()); // merge for (MonolingualTextValue description : descriptions) { - doScrutinize(update, description); + String descText = description.getText(); + if (descText == null) continue; + descText = descText.trim(); + if (descText.length() == 0) continue; // avoid NullPointerException + + String lang = description.getLanguageCode(); + + checkLength(update, descText, lang); + checkLabel(update, descText, lang); + + scrutinize(update, descText, lang); } } - private void doScrutinize(ItemUpdate update, MonolingualTextValue description) { - String descText = description.getText(); - if (descText == null) return; - descText = descText.trim(); - if (descText.length() == 0) return; + public abstract void scrutinize(ItemUpdate update, String descText, String lang); - // length check - if (descText.length() > descLengthThreshold) { - warningWithEntity(update, descTooLongType); + // Descriptions are not full sentences, but small bits of information. + // In most cases, the proper length is between two and twelve words. + protected void checkLength(ItemUpdate update, String descText, String lang) { + final int maxLength = 250; + if (descText.length() > maxLength) { + QAWarning issue = new QAWarning(descTooLongType, null, QAWarning.Severity.WARNING, 1); + issue.setProperty("example_entity", update.getItemId()); + issue.setProperty("description", descText); + issue.setProperty("lang", lang); + issue.setProperty("length", descText.length()); + issue.setProperty("max_length", maxLength); + addIssue(issue); } + } - // punctuation sign check - char last = descText.charAt(descText.length() - 1); - if (punctuationSigns.indexOf(last) != -1) { - warningWithEntity(update, descEndsByPunctuationSign); - } - - // begin with uppercase letter check - char first = descText.charAt(0); - if ('A' <= first && first <= 'Z') { - warningWithEntity(update, descBeginWithUppercase); - } - - // article check - String firstWord = descText.split("\\s")[0].toLowerCase(); - if ("a".equals(firstWord) || "an".equals(firstWord) || "the".equals(firstWord)) { - warningWithEntity(update, descBeginWithArticle); - } - - // description-label check + // Description are expected to be more specific than labels. + protected void checkLabel(ItemUpdate update, String descText, String lang) { Set labels = update.getLabels(); labels.addAll(update.getLabelsIfNew()); // merge for (MonolingualTextValue label : labels) { - if (label.getLanguageCode().equals(description.getLanguageCode())) { - String labelText = label.getText(); - if (labelText == null) break; - labelText = labelText.trim(); - if (labelText.equals(descText)) { - warningWithEntity(update, descIdenticalWithLabel); - } + String labelText = label.getText(); + if (labelText == null) continue; + labelText = labelText.trim(); + if (labelText.equals(descText)) { + QAWarning issue = new QAWarning(descIdenticalWithLabel, null, QAWarning.Severity.WARNING, 1); + issue.setProperty("example_entity", update.getItemId()); + issue.setProperty("description", descText); + issue.setProperty("lang", lang); + issue.setProperty("label_lang", label.getLanguageCode()); + addIssue(issue); break; } } } - private void warningWithEntity(ItemUpdate update, String type) { - QAWarning issue = new QAWarning(type, null, QAWarning.Severity.WARNING, 1); - issue.setProperty("example_entity", update.getItemId()); - addIssue(issue); - } - } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/EnglishDescriptionScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/EnglishDescriptionScrutinizer.java new file mode 100644 index 000000000..ce3274a8d --- /dev/null +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/EnglishDescriptionScrutinizer.java @@ -0,0 +1,72 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.openrefine.wikidata.qa.QAWarning; +import org.openrefine.wikidata.updates.ItemUpdate; + +/** + * @author Lu Liu + */ +public class EnglishDescriptionScrutinizer extends DescriptionScrutinizer { + + public static final String descEndsByPunctuationSign = "item-description-end-by-punctuation-sign"; + public static final String descBeginWithUppercase = "item-description-begin-with-uppercase"; + public static final String descBeginWithArticle = "item-description-begin-with-article"; + + private static final String LANG = "en"; + + @Override + public void scrutinize(ItemUpdate update, String descText, String lang) { + if (!LANG.equalsIgnoreCase(lang)) return; + + checkPunctuationSign(update, descText); + checkUppercase(update, descText); + checkArticle(update, descText); + } + + // Description are not sentences, so the punctuation sign at the end should be avoided. + protected void checkPunctuationSign(ItemUpdate update, String descText) { + assert descText.length() > 0; + final String punctuationSigns = ".?!;:,'\""; + + char last = descText.charAt(descText.length() - 1); + if (punctuationSigns.indexOf(last) != -1) { + QAWarning issue = new QAWarning(descEndsByPunctuationSign, null, QAWarning.Severity.WARNING, 1); + issue.setProperty("example_entity", update.getItemId()); + issue.setProperty("description", descText); + issue.setProperty("lang", LANG); + issue.setProperty("punctuation_sign", last); + addIssue(issue); + } + } + + // Descriptions begin with a lowercase letter except when uppercase would normally be required or expected. + protected void checkUppercase(ItemUpdate update, String descText) { + assert descText.length() > 0; + + char first = descText.charAt(0); + if ('A' <= first && first <= 'Z') { + QAWarning issue = new QAWarning(descBeginWithUppercase, null, QAWarning.Severity.INFO, 1); + issue.setProperty("example_entity", update.getItemId()); + issue.setProperty("description", descText); + issue.setProperty("lang", LANG); + issue.setProperty("uppercase_letter", first); + addIssue(issue); + } + } + + // Descriptions should not normally begin with initial articles ("a", "an", "the"). + protected void checkArticle(ItemUpdate update, String descText) { + assert descText.length() > 0; + + String firstWord = descText.split("\\s")[0].toLowerCase(); + if ("a".equals(firstWord) || "an".equals(firstWord) || "the".equals(firstWord)) { + QAWarning issue = new QAWarning(descBeginWithArticle, null, QAWarning.Severity.WARNING, 1); + issue.setProperty("example_entity", update.getItemId()); + issue.setProperty("description", descText); + issue.setProperty("lang", LANG); + issue.setProperty("article", firstWord); + addIssue(issue); + } + } + +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/EnglishDescriptionScrutinizerTest.java similarity index 82% rename from extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizerTest.java rename to extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/EnglishDescriptionScrutinizerTest.java index 4cdecd41d..c87e94d45 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizerTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/EnglishDescriptionScrutinizerTest.java @@ -6,11 +6,21 @@ import org.openrefine.wikidata.updates.ItemUpdateBuilder; import org.testng.annotations.Test; import org.wikidata.wdtk.datamodel.helpers.Datamodel; -public class DescriptionScrutinizerTest extends ScrutinizerTest { +public class EnglishDescriptionScrutinizerTest extends ScrutinizerTest { @Override public EditScrutinizer getScrutinizer() { - return new DescriptionScrutinizer(); + return new EnglishDescriptionScrutinizer(); + } + + @Test + public void testGoodDesc() { + String description = "good description"; + ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) + .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) + .build(); + scrutinize(update); + assertNoWarningRaised(); } @Test @@ -23,48 +33,7 @@ public class DescriptionScrutinizerTest extends ScrutinizerTest { .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) .build(); scrutinize(update); - assertWarningsRaised(DescriptionScrutinizer.descTooLongType); - } - - @Test - public void testEndWithPunctuationSign() { - String description = "description with punctuationSign."; - ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) - .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), false) - .build(); - scrutinize(update); - assertWarningsRaised(DescriptionScrutinizer.descEndsByPunctuationSign); - } - - @Test - public void testBeginWithUppercase() { - String description = "Begin with uppercase"; - ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) - .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) - .build(); - scrutinize(update); - assertWarningsRaised(DescriptionScrutinizer.descBeginWithUppercase); - } - - @Test - public void testBeginWithArticle() { - String description = "an article test"; - ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) - .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), false) - .build(); - scrutinize(update); - assertWarningsRaised(DescriptionScrutinizer.descBeginWithArticle); - } - - @Test - public void testIdenticalWithLabel() { - String description = "identical with label"; - ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) - .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) - .addLabel(Datamodel.makeMonolingualTextValue(description, "en"), true) - .build(); - scrutinize(update); - assertWarningsRaised(DescriptionScrutinizer.descIdenticalWithLabel); + assertWarningsRaised(EnglishDescriptionScrutinizer.descTooLongType); } @Test @@ -79,13 +48,44 @@ public class DescriptionScrutinizerTest extends ScrutinizerTest { } @Test - public void testGoodDesc() { - String description = "good description"; + public void testIdenticalWithLabel() { + String description = "identical with label"; + ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) + .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) + .addLabel(Datamodel.makeMonolingualTextValue(description, "en"), true) + .build(); + scrutinize(update); + assertWarningsRaised(EnglishDescriptionScrutinizer.descIdenticalWithLabel); + } + + @Test + public void testEndWithPunctuationSign() { + String description = "description with punctuationSign."; + ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) + .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), false) + .build(); + scrutinize(update); + assertWarningsRaised(EnglishDescriptionScrutinizer.descEndsByPunctuationSign); + } + + @Test + public void testBeginWithUppercase() { + String description = "Begin with uppercase"; ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) .build(); scrutinize(update); - assertNoWarningRaised(); + assertWarningsRaised(EnglishDescriptionScrutinizer.descBeginWithUppercase); + } + + @Test + public void testBeginWithArticle() { + String description = "an article test"; + ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) + .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), false) + .build(); + scrutinize(update); + assertWarningsRaised(EnglishDescriptionScrutinizer.descBeginWithArticle); } @Test @@ -99,7 +99,7 @@ public class DescriptionScrutinizerTest extends ScrutinizerTest { .addLabel(Datamodel.makeMonolingualTextValue(description, "en"), true) .build(); scrutinize(update); - assertWarningsRaised(DescriptionScrutinizer.descTooLongType, DescriptionScrutinizer.descEndsByPunctuationSign, - DescriptionScrutinizer.descBeginWithUppercase, DescriptionScrutinizer.descBeginWithArticle, DescriptionScrutinizer.descIdenticalWithLabel); + assertWarningsRaised(EnglishDescriptionScrutinizer.descTooLongType, EnglishDescriptionScrutinizer.descEndsByPunctuationSign, + EnglishDescriptionScrutinizer.descBeginWithUppercase, EnglishDescriptionScrutinizer.descBeginWithArticle, EnglishDescriptionScrutinizer.descIdenticalWithLabel); } } From b0bf9203ba150793bdb5f1a490c30bd53bd22f33 Mon Sep 17 00:00:00 2001 From: afkbrb <2428391347@qq.com> Date: Mon, 2 Mar 2020 14:52:01 +0800 Subject: [PATCH 08/11] add "." --- extensions/wikidata/module/langs/translation-en.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/wikidata/module/langs/translation-en.json b/extensions/wikidata/module/langs/translation-en.json index 919ee7b08..277a87f20 100644 --- a/extensions/wikidata/module/langs/translation-en.json +++ b/extensions/wikidata/module/langs/translation-en.json @@ -160,7 +160,7 @@ "warnings-messages/item-description-too-long/title": "Description is too long", "warnings-messages/item-description-too-long/body": "Description ({lang}) such as {description} on {example_entity} is too long. Its length is {length}, which is more than {max_length}. Descriptions are not full sentences, but small bits of information. In most cases, the proper length is between two and twelve words. See the manual for more information.", "warnings-messages/item-description-identical-with-label/title": "Description is identical with label", - "warnings-messages/item-description-identical-with-label/body": "Both the description ({lang}) and the label ({label_lang}) on {example_entity} are {description}. Description are expected to be more specific than labels", + "warnings-messages/item-description-identical-with-label/body": "Both the description ({lang}) and the label ({label_lang}) on {example_entity} are {description}. Description are expected to be more specific than labels.", "warnings-messages/item-description-end-by-punctuation-sign/title": "Description ends by punctuation sign", "warnings-messages/item-description-end-by-punctuation-sign/body": "Description ({lang}) such as {description} on {example_entity} ends by a punctuation sign \"{punctuation_sign}\". Description are not sentences, so the punctuation sign at the end should be avoided. See the manual for more information.", "warnings-messages/item-description-begin-with-uppercase/title": "Description begins with uppercase letter", From 2fef34795d00a44cb3923e3c405c8f119e53c1ba Mon Sep 17 00:00:00 2001 From: afkbrb <2428391347@qq.com> Date: Mon, 2 Mar 2020 17:40:03 +0800 Subject: [PATCH 09/11] change severity --- .../wikidata/qa/scrutinizers/DescriptionScrutinizer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizer.java index 29dd99162..69a8ce6a1 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizer.java @@ -49,7 +49,7 @@ public abstract class DescriptionScrutinizer extends EditScrutinizer { protected void checkLength(ItemUpdate update, String descText, String lang) { final int maxLength = 250; if (descText.length() > maxLength) { - QAWarning issue = new QAWarning(descTooLongType, null, QAWarning.Severity.WARNING, 1); + QAWarning issue = new QAWarning(descTooLongType, null, QAWarning.Severity.CRITICAL, 1); issue.setProperty("example_entity", update.getItemId()); issue.setProperty("description", descText); issue.setProperty("lang", lang); From 43b32a07e08e08b7444eff27332c0820b0e7dc6e Mon Sep 17 00:00:00 2001 From: Lu Liu <2w6f8c@gmail.com> Date: Mon, 2 Mar 2020 20:10:02 +0800 Subject: [PATCH 10/11] Update extensions/wikidata/module/langs/translation-en.json Co-Authored-By: Thad Guidry --- extensions/wikidata/module/langs/translation-en.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/wikidata/module/langs/translation-en.json b/extensions/wikidata/module/langs/translation-en.json index 277a87f20..7dc468024 100644 --- a/extensions/wikidata/module/langs/translation-en.json +++ b/extensions/wikidata/module/langs/translation-en.json @@ -160,7 +160,7 @@ "warnings-messages/item-description-too-long/title": "Description is too long", "warnings-messages/item-description-too-long/body": "Description ({lang}) such as {description} on {example_entity} is too long. Its length is {length}, which is more than {max_length}. Descriptions are not full sentences, but small bits of information. In most cases, the proper length is between two and twelve words. See the manual for more information.", "warnings-messages/item-description-identical-with-label/title": "Description is identical with label", - "warnings-messages/item-description-identical-with-label/body": "Both the description ({lang}) and the label ({label_lang}) on {example_entity} are {description}. Description are expected to be more specific than labels.", + "warnings-messages/item-description-identical-with-label/body": "Both the description ({lang}) and the label ({label_lang}) on {example_entity} are {description}. Description are expected to be more specific than labels. See the manual for more information.",", "warnings-messages/item-description-end-by-punctuation-sign/title": "Description ends by punctuation sign", "warnings-messages/item-description-end-by-punctuation-sign/body": "Description ({lang}) such as {description} on {example_entity} ends by a punctuation sign \"{punctuation_sign}\". Description are not sentences, so the punctuation sign at the end should be avoided. See the manual for more information.", "warnings-messages/item-description-begin-with-uppercase/title": "Description begins with uppercase letter", From be5f6e64025e23ce7e002dbd3b30ca3f2916173a Mon Sep 17 00:00:00 2001 From: afkbrb <2428391347@qq.com> Date: Mon, 2 Mar 2020 21:59:55 +0800 Subject: [PATCH 11/11] move check functions in DescriptionScrutinizer to CommonDescriptionScrutinizer --- .../wikidata/module/langs/translation-en.json | 2 +- .../openrefine/wikidata/qa/EditInspector.java | 1 + .../CommonDescriptionScrutinizer.java | 58 +++++++++++++++ .../scrutinizers/DescriptionScrutinizer.java | 54 +------------- .../CommonDescriptionScrutinizerTest.java | 74 +++++++++++++++++++ .../EnglishDescriptionScrutinizerTest.java | 44 +---------- 6 files changed, 138 insertions(+), 95 deletions(-) create mode 100644 extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/CommonDescriptionScrutinizer.java create mode 100644 extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/CommonDescriptionScrutinizerTest.java diff --git a/extensions/wikidata/module/langs/translation-en.json b/extensions/wikidata/module/langs/translation-en.json index 7dc468024..c63393aec 100644 --- a/extensions/wikidata/module/langs/translation-en.json +++ b/extensions/wikidata/module/langs/translation-en.json @@ -160,7 +160,7 @@ "warnings-messages/item-description-too-long/title": "Description is too long", "warnings-messages/item-description-too-long/body": "Description ({lang}) such as {description} on {example_entity} is too long. Its length is {length}, which is more than {max_length}. Descriptions are not full sentences, but small bits of information. In most cases, the proper length is between two and twelve words. See the manual for more information.", "warnings-messages/item-description-identical-with-label/title": "Description is identical with label", - "warnings-messages/item-description-identical-with-label/body": "Both the description ({lang}) and the label ({label_lang}) on {example_entity} are {description}. Description are expected to be more specific than labels. See the manual for more information.",", + "warnings-messages/item-description-identical-with-label/body": "Both the description ({lang}) and the label ({label_lang}) on {example_entity} are {description}. Description are expected to be more specific than labels. See the manual for more information.", "warnings-messages/item-description-end-by-punctuation-sign/title": "Description ends by punctuation sign", "warnings-messages/item-description-end-by-punctuation-sign/body": "Description ({lang}) such as {description} on {example_entity} ends by a punctuation sign \"{punctuation_sign}\". Description are not sentences, so the punctuation sign at the end should be avoided. See the manual for more information.", "warnings-messages/item-description-begin-with-uppercase/title": "Description begins with uppercase letter", diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java index d86cbbef1..7165ffaef 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java @@ -67,6 +67,7 @@ public class EditInspector { register(new RestrictedValuesScrutinizer()); register(new EntityTypeScrutinizer()); register(new CalendarScrutinizer()); + register(new CommonDescriptionScrutinizer()); register(new EnglishDescriptionScrutinizer()); } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/CommonDescriptionScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/CommonDescriptionScrutinizer.java new file mode 100644 index 000000000..be78919a4 --- /dev/null +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/CommonDescriptionScrutinizer.java @@ -0,0 +1,58 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.openrefine.wikidata.qa.QAWarning; +import org.openrefine.wikidata.updates.ItemUpdate; +import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue; + +import java.util.Set; + +/** + * @author Lu Liu + */ +public class CommonDescriptionScrutinizer extends DescriptionScrutinizer { + + public static final String descTooLongType = "item-description-too-long"; + public static final String descIdenticalWithLabel = "item-description-identical-with-label"; + + @Override + public void scrutinize(ItemUpdate update, String descText, String lang) { + checkLength(update, descText, lang); + checkLabel(update, descText, lang); + } + + // Descriptions are not full sentences, but small bits of information. + // In most cases, the proper length is between two and twelve words. + protected void checkLength(ItemUpdate update, String descText, String lang) { + final int maxLength = 250; + if (descText.length() > maxLength) { + QAWarning issue = new QAWarning(descTooLongType, null, QAWarning.Severity.CRITICAL, 1); + issue.setProperty("example_entity", update.getItemId()); + issue.setProperty("description", descText); + issue.setProperty("lang", lang); + issue.setProperty("length", descText.length()); + issue.setProperty("max_length", maxLength); + addIssue(issue); + } + } + + // Description are expected to be more specific than labels. + protected void checkLabel(ItemUpdate update, String descText, String lang) { + Set labels = update.getLabels(); + labels.addAll(update.getLabelsIfNew()); // merge + for (MonolingualTextValue label : labels) { + String labelText = label.getText(); + if (labelText == null) continue; + labelText = labelText.trim(); + if (labelText.equals(descText)) { + QAWarning issue = new QAWarning(descIdenticalWithLabel, null, QAWarning.Severity.WARNING, 1); + issue.setProperty("example_entity", update.getItemId()); + issue.setProperty("description", descText); + issue.setProperty("lang", lang); + issue.setProperty("label_lang", label.getLanguageCode()); + addIssue(issue); + break; + } + } + } + +} diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizer.java index 69a8ce6a1..3a9075adf 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DescriptionScrutinizer.java @@ -7,22 +7,10 @@ import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue; import java.util.Set; /** - * A scrutinizer that checks the description of an item. - *

- * This abstract scrutinizer does the following checks: - * 1. is the description too long - * 2. is the description identical with the label in the same language - *

- * We can easily implement a language-specific description scrutinizer - * by extending this class. - * * @author Lu Liu */ public abstract class DescriptionScrutinizer extends EditScrutinizer { - public static final String descTooLongType = "item-description-too-long"; - public static final String descIdenticalWithLabel = "item-description-identical-with-label"; - @Override public void scrutinize(ItemUpdate update) { Set descriptions = update.getDescriptions(); @@ -33,50 +21,10 @@ public abstract class DescriptionScrutinizer extends EditScrutinizer { descText = descText.trim(); if (descText.length() == 0) continue; // avoid NullPointerException - String lang = description.getLanguageCode(); - - checkLength(update, descText, lang); - checkLabel(update, descText, lang); - - scrutinize(update, descText, lang); + scrutinize(update, descText, description.getLanguageCode()); } } public abstract void scrutinize(ItemUpdate update, String descText, String lang); - // Descriptions are not full sentences, but small bits of information. - // In most cases, the proper length is between two and twelve words. - protected void checkLength(ItemUpdate update, String descText, String lang) { - final int maxLength = 250; - if (descText.length() > maxLength) { - QAWarning issue = new QAWarning(descTooLongType, null, QAWarning.Severity.CRITICAL, 1); - issue.setProperty("example_entity", update.getItemId()); - issue.setProperty("description", descText); - issue.setProperty("lang", lang); - issue.setProperty("length", descText.length()); - issue.setProperty("max_length", maxLength); - addIssue(issue); - } - } - - // Description are expected to be more specific than labels. - protected void checkLabel(ItemUpdate update, String descText, String lang) { - Set labels = update.getLabels(); - labels.addAll(update.getLabelsIfNew()); // merge - for (MonolingualTextValue label : labels) { - String labelText = label.getText(); - if (labelText == null) continue; - labelText = labelText.trim(); - if (labelText.equals(descText)) { - QAWarning issue = new QAWarning(descIdenticalWithLabel, null, QAWarning.Severity.WARNING, 1); - issue.setProperty("example_entity", update.getItemId()); - issue.setProperty("description", descText); - issue.setProperty("lang", lang); - issue.setProperty("label_lang", label.getLanguageCode()); - addIssue(issue); - break; - } - } - } - } diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/CommonDescriptionScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/CommonDescriptionScrutinizerTest.java new file mode 100644 index 000000000..83850e312 --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/CommonDescriptionScrutinizerTest.java @@ -0,0 +1,74 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.openrefine.wikidata.testing.TestingData; +import org.openrefine.wikidata.updates.ItemUpdate; +import org.openrefine.wikidata.updates.ItemUpdateBuilder; +import org.testng.annotations.Test; +import org.wikidata.wdtk.datamodel.helpers.Datamodel; + +public class CommonDescriptionScrutinizerTest extends ScrutinizerTest { + + @Override + public EditScrutinizer getScrutinizer() { + return new CommonDescriptionScrutinizer(); + } + + @Test + public void testGoodDesc() { + String description = "good description"; + ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) + .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) + .build(); + scrutinize(update); + assertNoWarningRaised(); + } + + @Test + public void testTooLong() { + String description = "long description long description long description long description " + + "long description long description long description long description " + + "long description long description long description long description " + + "long description long description long description long description"; + ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) + .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) + .build(); + scrutinize(update); + assertWarningsRaised(CommonDescriptionScrutinizer.descTooLongType); + } + + @Test + public void testIdenticalWithLabel() { + String description = "identical with label"; + ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) + .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) + .addLabel(Datamodel.makeMonolingualTextValue(description, "en"), true) + .build(); + scrutinize(update); + assertWarningsRaised(CommonDescriptionScrutinizer.descIdenticalWithLabel); + } + + @Test + public void testIdenticalWithLabel1() { + String description = "identical with label"; + ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) + .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) + .addLabel(Datamodel.makeMonolingualTextValue("bonjour", "fr"), true) + .build(); + scrutinize(update); + assertNoWarningRaised(); + } + + @Test + public void testAwfulDesc() { + String description = "long description long description long description long description " + + "long description long description long description long description " + + "long description long description long description long description " + + "long description long description long description long description"; + ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) + .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) + .addLabel(Datamodel.makeMonolingualTextValue(description, "en"), true) + .build(); + scrutinize(update); + assertWarningsRaised(CommonDescriptionScrutinizer.descTooLongType, CommonDescriptionScrutinizer.descIdenticalWithLabel); + } +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/EnglishDescriptionScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/EnglishDescriptionScrutinizerTest.java index c87e94d45..e3e2dc796 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/EnglishDescriptionScrutinizerTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/EnglishDescriptionScrutinizerTest.java @@ -23,41 +23,6 @@ public class EnglishDescriptionScrutinizerTest extends ScrutinizerTest { assertNoWarningRaised(); } - @Test - public void testTooLong() { - String description = "long description long description long description long description " - + "long description long description long description long description " - + "long description long description long description long description " - + "long description long description long description long description "; - ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) - .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) - .build(); - scrutinize(update); - assertWarningsRaised(EnglishDescriptionScrutinizer.descTooLongType); - } - - @Test - public void testIdenticalWithLabel1() { - String description = "identical with label"; - ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) - .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) - .addLabel(Datamodel.makeMonolingualTextValue("bonjour", "fr"), true) - .build(); - scrutinize(update); - assertNoWarningRaised(); - } - - @Test - public void testIdenticalWithLabel() { - String description = "identical with label"; - ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) - .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) - .addLabel(Datamodel.makeMonolingualTextValue(description, "en"), true) - .build(); - scrutinize(update); - assertWarningsRaised(EnglishDescriptionScrutinizer.descIdenticalWithLabel); - } - @Test public void testEndWithPunctuationSign() { String description = "description with punctuationSign."; @@ -90,16 +55,13 @@ public class EnglishDescriptionScrutinizerTest extends ScrutinizerTest { @Test public void testAwfulDesc() { - String description = "An awful description An awful description An awful description An awful description" - + "An awful description An awful description An awful description An awful description" - + "An awful description An awful description An awful description An awful description" - + "An awful description An awful description An awful description An awful description!"; + String description = "An awful description."; ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) .addDescription(Datamodel.makeMonolingualTextValue(description, "en"), true) .addLabel(Datamodel.makeMonolingualTextValue(description, "en"), true) .build(); scrutinize(update); - assertWarningsRaised(EnglishDescriptionScrutinizer.descTooLongType, EnglishDescriptionScrutinizer.descEndsByPunctuationSign, - EnglishDescriptionScrutinizer.descBeginWithUppercase, EnglishDescriptionScrutinizer.descBeginWithArticle, EnglishDescriptionScrutinizer.descIdenticalWithLabel); + assertWarningsRaised(EnglishDescriptionScrutinizer.descEndsByPunctuationSign, + EnglishDescriptionScrutinizer.descBeginWithUppercase, EnglishDescriptionScrutinizer.descBeginWithArticle); } }