diff --git a/extensions/wikidata/module/langs/translation-en.json b/extensions/wikidata/module/langs/translation-en.json
index 40a298048..18a82adb3 100644
--- a/extensions/wikidata/module/langs/translation-en.json
+++ b/extensions/wikidata/module/langs/translation-en.json
@@ -169,14 +169,6 @@
"title": "No language provided for monolingual text.",
"body": "Some label, description, alias or monolingual text value have been skipped because no language was provided. Example value: {example_text}."
},
- "leading-whitespace": {
- "title": "Leading whitespace in strings.",
- "body": "Strings such as {example_string} have leading whitespace."
- },
- "trailing-whitespace": {
- "title": "Trailing whitespace in strings.",
- "body": "Strings such as {example_string} have trailing whitespace."
- },
"duplicate-whitespace": {
"title": "Duplicate whitespace in strings.",
"body": "Strings such as {example_string} contain duplicate whitespace."
diff --git a/extensions/wikidata/module/langs/translation-fr.json b/extensions/wikidata/module/langs/translation-fr.json
index adab4dbac..c82590f9c 100644
--- a/extensions/wikidata/module/langs/translation-fr.json
+++ b/extensions/wikidata/module/langs/translation-fr.json
@@ -168,14 +168,6 @@
"title": "Pas de langue fournie pour des textes monolingues.",
"body": "Des libellés, descriptions, alias ou textes monolingues ont été ignorés car aucune langue n'a été fournie. Exemple: {example_text}."
},
- "leading-whitespace": {
- "title": "Espaces au début de chaînes de caractères.",
- "body": "Des chaînes telles que {example_string} ont des espaces au début."
- },
- "trailing-whitespace": {
- "title": "Espaces à la fin de chaînes de caractères.",
- "body": "Des chaînes telles que {example_string} ont des espaces à la fin."
- },
"duplicate-whitespace": {
"title": "Espaces dédoublées dans des chaînes de caractères.",
"body": "Des chaînes telles que {example_string} contiennent des espaces dédoublées."
diff --git a/extensions/wikidata/module/langs/translation-jp.json b/extensions/wikidata/module/langs/translation-jp.json
index c1b4009bf..2094def7e 100644
--- a/extensions/wikidata/module/langs/translation-jp.json
+++ b/extensions/wikidata/module/langs/translation-jp.json
@@ -180,14 +180,6 @@
"title": "言語指定がありません.",
"body": "言語指定がないので、ラベル・記述・別名・単一言語テキストが無視されました。例えば: {example_text}."
},
- "leading-whitespace": {
- "title": "文頭に空白文字があります.",
- "body": "{example_string}の文頭に空白文字があります."
- },
- "trailing-whitespace": {
- "title": "文末に空白文字があります.",
- "body": "{example_string}の文末に空白文字があります."
- },
"duplicate-whitespace": {
"title": "二重の空白文字があります.",
"body": "{example_string}には二重の空白文字があります."
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizer.java
index a5685384e..cdcbcc7f1 100644
--- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizer.java
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizer.java
@@ -43,15 +43,11 @@ public class WhitespaceScrutinizer extends ValueScrutinizer {
private Map _issuesMap;
- public static final String leadingWhitespaceType = "leading-whitespace";
- public static final String trailingWhitespaceType = "trailing-whitespace";
public static final String duplicateWhitespaceType = "duplicate-whitespace";
public static final String nonPrintableCharsType = "non-printable-characters";
public WhitespaceScrutinizer() {
_issuesMap = new HashMap<>();
- _issuesMap.put(leadingWhitespaceType, Pattern.compile("^\\s"));
- _issuesMap.put(trailingWhitespaceType, Pattern.compile("\\s$"));
_issuesMap.put(duplicateWhitespaceType, Pattern.compile("\\s\\s"));
// https://stackoverflow.com/questions/14565934/regular-expression-to-remove-all-non-printable-characters
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbMonolingualExpr.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbMonolingualExpr.java
index ef7622f2d..f393c7d15 100644
--- a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbMonolingualExpr.java
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbMonolingualExpr.java
@@ -53,7 +53,7 @@ public class WbMonolingualExpr implements WbExpression {
String text = getValueExpr().evaluate(ctxt).getString();
try {
String lang = getLanguageExpr().evaluate(ctxt);
- return Datamodel.makeMonolingualTextValue(text, lang);
+ return Datamodel.makeMonolingualTextValue(text.trim(), lang);
} catch (SkipSchemaExpressionException e) {
QAWarning warning = new QAWarning("monolingual-text-without-language", null, QAWarning.Severity.WARNING, 1);
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringConstant.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringConstant.java
index 7a1ddf9e3..a8d4ce34c 100644
--- a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringConstant.java
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringConstant.java
@@ -39,7 +39,7 @@ public class WbStringConstant implements WbExpression {
Validate.notNull(value);
Validate.isTrue(!value.isEmpty()); // for now we don't accept empty strings
// because in the variable counterpart of this expression, they are skipped
- this.value = value;
+ this.value = value.trim();
}
@Override
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringVariable.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringVariable.java
index 7385d35eb..4e1ae3bd2 100644
--- a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringVariable.java
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringVariable.java
@@ -62,7 +62,7 @@ public class WbStringVariable extends WbVariableExpr {
if (cell.value instanceof Double && ((Double)cell.value) % 1 == 0) {
stringValue = Integer.toString(((Double)cell.value).intValue());
}
- return Datamodel.makeStringValue(stringValue);
+ return Datamodel.makeStringValue(stringValue.trim());
}
throw new SkipSchemaExpressionException();
}
diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizerTest.java
index c2ffad6a1..25a70049c 100644
--- a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizerTest.java
+++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizerTest.java
@@ -33,18 +33,6 @@ public class WhitespaceScrutinizerTest extends ValueScrutinizerTest {
return new WhitespaceScrutinizer();
}
- @Test
- public void testLeadingWhitespace() {
- scrutinize(Datamodel.makeStringValue(" a"));
- assertWarningsRaised(WhitespaceScrutinizer.leadingWhitespaceType);
- }
-
- @Test
- public void testTrailingWhitespace() {
- scrutinize(Datamodel.makeStringValue("a\t"));
- assertWarningsRaised(WhitespaceScrutinizer.trailingWhitespaceType);
- }
-
@Test
public void testDuplicateWhitespace() {
scrutinize(Datamodel.makeStringValue("a\t b"));
@@ -65,14 +53,13 @@ public class WhitespaceScrutinizerTest extends ValueScrutinizerTest {
@Test
public void testMultipleIssues() {
- scrutinize(Datamodel.makeStringValue(" a\t b "));
- assertWarningsRaised(WhitespaceScrutinizer.duplicateWhitespaceType, WhitespaceScrutinizer.leadingWhitespaceType,
- WhitespaceScrutinizer.trailingWhitespaceType);
+ scrutinize(Datamodel.makeStringValue("a\t b\u0003"));
+ assertWarningsRaised(WhitespaceScrutinizer.duplicateWhitespaceType, WhitespaceScrutinizer.nonPrintableCharsType);
}
@Test
public void testMonolingualTextValue() {
- scrutinizeLabel(Datamodel.makeMonolingualTextValue(" a", "fr"));
- assertWarningsRaised(WhitespaceScrutinizer.leadingWhitespaceType);
+ scrutinizeLabel(Datamodel.makeMonolingualTextValue("a b", "fr"));
+ assertWarningsRaised(WhitespaceScrutinizer.duplicateWhitespaceType);
}
}
diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbMonolingualExprTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbMonolingualExprTest.java
index bea0536b8..09dd4bd23 100644
--- a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbMonolingualExprTest.java
+++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbMonolingualExprTest.java
@@ -43,6 +43,12 @@ public class WbMonolingualExprTest extends WbExpressionTest {
evaluatesTo(Datamodel.makeStringValue("hello world"), constant);
}
+ @Test
+ public void testTrim() {
+ evaluatesTo(Datamodel.makeStringValue("hello world"), new WbStringConstant(" hello world "));
+ }
+
@Test(expectedExceptions = IllegalArgumentException.class)
public void testEmpty() {
new WbStringConstant("");
diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbStringVariableTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbStringVariableTest.java
index 3cace300c..334ce09f5 100644
--- a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbStringVariableTest.java
+++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbStringVariableTest.java
@@ -47,12 +47,11 @@ public class WbStringVariableTest extends WbVariableTest {
}
/**
- * It is not up to the evaluator to clean up the strings it gets. This is
- * flagged later on by scrutinizers.
+ * The evaluator cleans up leading and trailing whitespace, but not duplicate spaces
*/
@Test
public void testTrailingWhitespace() {
- evaluatesTo(Datamodel.makeStringValue("dirty \t"), "dirty \t");
+ evaluatesTo(Datamodel.makeStringValue("dirty"), "dirty \t");
}
/**
@@ -74,7 +73,7 @@ public class WbStringVariableTest extends WbVariableTest {
@Test
public void testLeadingWhitespace() {
- evaluatesTo(Datamodel.makeStringValue(" dirty"), " dirty");
+ evaluatesTo(Datamodel.makeStringValue("dirty"), " dirty");
}
@Test