diff --git a/extensions/wikidata/module/langs/translation-en.json b/extensions/wikidata/module/langs/translation-en.json index 40a298048..18a82adb3 100644 --- a/extensions/wikidata/module/langs/translation-en.json +++ b/extensions/wikidata/module/langs/translation-en.json @@ -169,14 +169,6 @@ "title": "No language provided for monolingual text.", "body": "Some label, description, alias or monolingual text value have been skipped because no language was provided. Example value: {example_text}." }, - "leading-whitespace": { - "title": "Leading whitespace in strings.", - "body": "Strings such as {example_string} have leading whitespace." - }, - "trailing-whitespace": { - "title": "Trailing whitespace in strings.", - "body": "Strings such as {example_string} have trailing whitespace." - }, "duplicate-whitespace": { "title": "Duplicate whitespace in strings.", "body": "Strings such as {example_string} contain duplicate whitespace." diff --git a/extensions/wikidata/module/langs/translation-fr.json b/extensions/wikidata/module/langs/translation-fr.json index adab4dbac..c82590f9c 100644 --- a/extensions/wikidata/module/langs/translation-fr.json +++ b/extensions/wikidata/module/langs/translation-fr.json @@ -168,14 +168,6 @@ "title": "Pas de langue fournie pour des textes monolingues.", "body": "Des libellés, descriptions, alias ou textes monolingues ont été ignorés car aucune langue n'a été fournie. Exemple: {example_text}." }, - "leading-whitespace": { - "title": "Espaces au début de chaînes de caractères.", - "body": "Des chaînes telles que {example_string} ont des espaces au début." - }, - "trailing-whitespace": { - "title": "Espaces à la fin de chaînes de caractères.", - "body": "Des chaînes telles que {example_string} ont des espaces à la fin." - }, "duplicate-whitespace": { "title": "Espaces dédoublées dans des chaînes de caractères.", "body": "Des chaînes telles que {example_string} contiennent des espaces dédoublées." diff --git a/extensions/wikidata/module/langs/translation-jp.json b/extensions/wikidata/module/langs/translation-jp.json index c1b4009bf..2094def7e 100644 --- a/extensions/wikidata/module/langs/translation-jp.json +++ b/extensions/wikidata/module/langs/translation-jp.json @@ -180,14 +180,6 @@ "title": "言語指定がありません.", "body": "言語指定がないので、ラベル・記述・別名・単一言語テキストが無視されました。例えば: {example_text}." }, - "leading-whitespace": { - "title": "文頭に空白文字があります.", - "body": "{example_string}の文頭に空白文字があります." - }, - "trailing-whitespace": { - "title": "文末に空白文字があります.", - "body": "{example_string}の文末に空白文字があります." - }, "duplicate-whitespace": { "title": "二重の空白文字があります.", "body": "{example_string}には二重の空白文字があります." diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizer.java index a5685384e..cdcbcc7f1 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizer.java @@ -43,15 +43,11 @@ public class WhitespaceScrutinizer extends ValueScrutinizer { private Map _issuesMap; - public static final String leadingWhitespaceType = "leading-whitespace"; - public static final String trailingWhitespaceType = "trailing-whitespace"; public static final String duplicateWhitespaceType = "duplicate-whitespace"; public static final String nonPrintableCharsType = "non-printable-characters"; public WhitespaceScrutinizer() { _issuesMap = new HashMap<>(); - _issuesMap.put(leadingWhitespaceType, Pattern.compile("^\\s")); - _issuesMap.put(trailingWhitespaceType, Pattern.compile("\\s$")); _issuesMap.put(duplicateWhitespaceType, Pattern.compile("\\s\\s")); // https://stackoverflow.com/questions/14565934/regular-expression-to-remove-all-non-printable-characters diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbMonolingualExpr.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbMonolingualExpr.java index ef7622f2d..f393c7d15 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbMonolingualExpr.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbMonolingualExpr.java @@ -53,7 +53,7 @@ public class WbMonolingualExpr implements WbExpression { String text = getValueExpr().evaluate(ctxt).getString(); try { String lang = getLanguageExpr().evaluate(ctxt); - return Datamodel.makeMonolingualTextValue(text, lang); + return Datamodel.makeMonolingualTextValue(text.trim(), lang); } catch (SkipSchemaExpressionException e) { QAWarning warning = new QAWarning("monolingual-text-without-language", null, QAWarning.Severity.WARNING, 1); diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringConstant.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringConstant.java index 7a1ddf9e3..a8d4ce34c 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringConstant.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringConstant.java @@ -39,7 +39,7 @@ public class WbStringConstant implements WbExpression { Validate.notNull(value); Validate.isTrue(!value.isEmpty()); // for now we don't accept empty strings // because in the variable counterpart of this expression, they are skipped - this.value = value; + this.value = value.trim(); } @Override diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringVariable.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringVariable.java index 7385d35eb..4e1ae3bd2 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringVariable.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringVariable.java @@ -62,7 +62,7 @@ public class WbStringVariable extends WbVariableExpr { if (cell.value instanceof Double && ((Double)cell.value) % 1 == 0) { stringValue = Integer.toString(((Double)cell.value).intValue()); } - return Datamodel.makeStringValue(stringValue); + return Datamodel.makeStringValue(stringValue.trim()); } throw new SkipSchemaExpressionException(); } diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizerTest.java index c2ffad6a1..25a70049c 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizerTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizerTest.java @@ -33,18 +33,6 @@ public class WhitespaceScrutinizerTest extends ValueScrutinizerTest { return new WhitespaceScrutinizer(); } - @Test - public void testLeadingWhitespace() { - scrutinize(Datamodel.makeStringValue(" a")); - assertWarningsRaised(WhitespaceScrutinizer.leadingWhitespaceType); - } - - @Test - public void testTrailingWhitespace() { - scrutinize(Datamodel.makeStringValue("a\t")); - assertWarningsRaised(WhitespaceScrutinizer.trailingWhitespaceType); - } - @Test public void testDuplicateWhitespace() { scrutinize(Datamodel.makeStringValue("a\t b")); @@ -65,14 +53,13 @@ public class WhitespaceScrutinizerTest extends ValueScrutinizerTest { @Test public void testMultipleIssues() { - scrutinize(Datamodel.makeStringValue(" a\t b ")); - assertWarningsRaised(WhitespaceScrutinizer.duplicateWhitespaceType, WhitespaceScrutinizer.leadingWhitespaceType, - WhitespaceScrutinizer.trailingWhitespaceType); + scrutinize(Datamodel.makeStringValue("a\t b\u0003")); + assertWarningsRaised(WhitespaceScrutinizer.duplicateWhitespaceType, WhitespaceScrutinizer.nonPrintableCharsType); } @Test public void testMonolingualTextValue() { - scrutinizeLabel(Datamodel.makeMonolingualTextValue(" a", "fr")); - assertWarningsRaised(WhitespaceScrutinizer.leadingWhitespaceType); + scrutinizeLabel(Datamodel.makeMonolingualTextValue("a b", "fr")); + assertWarningsRaised(WhitespaceScrutinizer.duplicateWhitespaceType); } } diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbMonolingualExprTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbMonolingualExprTest.java index bea0536b8..09dd4bd23 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbMonolingualExprTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbMonolingualExprTest.java @@ -43,6 +43,12 @@ public class WbMonolingualExprTest extends WbExpressionTest { evaluatesTo(Datamodel.makeStringValue("hello world"), constant); } + @Test + public void testTrim() { + evaluatesTo(Datamodel.makeStringValue("hello world"), new WbStringConstant(" hello world ")); + } + @Test(expectedExceptions = IllegalArgumentException.class) public void testEmpty() { new WbStringConstant(""); diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbStringVariableTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbStringVariableTest.java index 3cace300c..334ce09f5 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbStringVariableTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbStringVariableTest.java @@ -47,12 +47,11 @@ public class WbStringVariableTest extends WbVariableTest { } /** - * It is not up to the evaluator to clean up the strings it gets. This is - * flagged later on by scrutinizers. + * The evaluator cleans up leading and trailing whitespace, but not duplicate spaces */ @Test public void testTrailingWhitespace() { - evaluatesTo(Datamodel.makeStringValue("dirty \t"), "dirty \t"); + evaluatesTo(Datamodel.makeStringValue("dirty"), "dirty \t"); } /** @@ -74,7 +73,7 @@ public class WbStringVariableTest extends WbVariableTest { @Test public void testLeadingWhitespace() { - evaluatesTo(Datamodel.makeStringValue(" dirty"), " dirty"); + evaluatesTo(Datamodel.makeStringValue("dirty"), " dirty"); } @Test