Trim strings automatically in Wikibase schema. Closes #1781.

This commit is contained in:
Antonin Delpeuch 2018-11-01 15:29:57 +00:00
parent 3fb282852d
commit 8b4cf84bfd
8 changed files with 13 additions and 50 deletions

View File

@ -169,14 +169,6 @@
"title": "No language provided for monolingual text.",
"body": "Some label, description, alias or monolingual text value have been skipped because no language was provided. Example value: <span class=\"wb-issue-preformat\">{example_text}</span>."
},
"leading-whitespace": {
"title": "Leading whitespace in strings.",
"body": "Strings such as <span class=\"wb-issue-preformat\">{example_string}</span> have leading whitespace."
},
"trailing-whitespace": {
"title": "Trailing whitespace in strings.",
"body": "Strings such as <span class=\"wb-issue-preformat\">{example_string}</span> have trailing whitespace."
},
"duplicate-whitespace": {
"title": "Duplicate whitespace in strings.",
"body": "Strings such as <span class=\"wb-issue-preformat\">{example_string}</span> contain duplicate whitespace."

View File

@ -167,14 +167,6 @@
"title": "Pas de langue fournie pour des textes monolingues.",
"body": "Des libellés, descriptions, alias ou textes monolingues ont été ignorés car aucune langue n'a été fournie. Exemple: <span class=\"wb-issue-preformat\">{example_text}</span>."
},
"leading-whitespace": {
"title": "Espaces au début de chaînes de caractères.",
"body": "Des chaînes telles que <span class=\"wb-issue-preformat\">{example_string}</span> ont des espaces au début."
},
"trailing-whitespace": {
"title": "Espaces à la fin de chaînes de caractères.",
"body": "Des chaînes telles que <span class=\"wb-issue-preformat\">{example_string}</span> ont des espaces à la fin."
},
"duplicate-whitespace": {
"title": "Espaces dédoublées dans des chaînes de caractères.",
"body": "Des chaînes telles que <span class=\"wb-issue-preformat\">{example_string}</span> contiennent des espaces dédoublées."

View File

@ -180,14 +180,6 @@
"title": "言語指定がありません.",
"body": "言語指定がないので、ラベル・記述・別名・単一言語テキストが無視されました。例えば: <span class=\"wb-issue-preformat\">{example_text}</span>."
},
"leading-whitespace": {
"title": "文頭に空白文字があります.",
"body": "<span class=\"wb-issue-preformat\">{example_string}</span>の文頭に空白文字があります."
},
"trailing-whitespace": {
"title": "文末に空白文字があります.",
"body": "<span class=\"wb-issue-preformat\">{example_string}</span>の文末に空白文字があります."
},
"duplicate-whitespace": {
"title": "二重の空白文字があります.",
"body": "<span class=\"wb-issue-preformat\">{example_string}</span>には二重の空白文字があります."

View File

@ -43,15 +43,11 @@ public class WhitespaceScrutinizer extends ValueScrutinizer {
private Map<String, Pattern> _issuesMap;
public static final String leadingWhitespaceType = "leading-whitespace";
public static final String trailingWhitespaceType = "trailing-whitespace";
public static final String duplicateWhitespaceType = "duplicate-whitespace";
public static final String nonPrintableCharsType = "non-printable-characters";
public WhitespaceScrutinizer() {
_issuesMap = new HashMap<>();
_issuesMap.put(leadingWhitespaceType, Pattern.compile("^\\s"));
_issuesMap.put(trailingWhitespaceType, Pattern.compile("\\s$"));
_issuesMap.put(duplicateWhitespaceType, Pattern.compile("\\s\\s"));
// https://stackoverflow.com/questions/14565934/regular-expression-to-remove-all-non-printable-characters

View File

@ -58,7 +58,7 @@ public class WbStringVariable extends WbVariableExpr<StringValue> {
public StringValue fromCell(Cell cell, ExpressionContext ctxt)
throws SkipSchemaExpressionException {
if (!cell.value.toString().isEmpty()) {
return Datamodel.makeStringValue(cell.value.toString());
return Datamodel.makeStringValue(cell.value.toString().trim());
}
throw new SkipSchemaExpressionException();
}

View File

@ -33,18 +33,6 @@ public class WhitespaceScrutinizerTest extends ValueScrutinizerTest {
return new WhitespaceScrutinizer();
}
@Test
public void testLeadingWhitespace() {
scrutinize(Datamodel.makeStringValue(" a"));
assertWarningsRaised(WhitespaceScrutinizer.leadingWhitespaceType);
}
@Test
public void testTrailingWhitespace() {
scrutinize(Datamodel.makeStringValue("a\t"));
assertWarningsRaised(WhitespaceScrutinizer.trailingWhitespaceType);
}
@Test
public void testDuplicateWhitespace() {
scrutinize(Datamodel.makeStringValue("a\t b"));
@ -65,14 +53,13 @@ public class WhitespaceScrutinizerTest extends ValueScrutinizerTest {
@Test
public void testMultipleIssues() {
scrutinize(Datamodel.makeStringValue(" a\t b "));
assertWarningsRaised(WhitespaceScrutinizer.duplicateWhitespaceType, WhitespaceScrutinizer.leadingWhitespaceType,
WhitespaceScrutinizer.trailingWhitespaceType);
scrutinize(Datamodel.makeStringValue("a\t b\u0003"));
assertWarningsRaised(WhitespaceScrutinizer.duplicateWhitespaceType, WhitespaceScrutinizer.nonPrintableCharsType);
}
@Test
public void testMonolingualTextValue() {
scrutinizeLabel(Datamodel.makeMonolingualTextValue(" a", "fr"));
assertWarningsRaised(WhitespaceScrutinizer.leadingWhitespaceType);
scrutinizeLabel(Datamodel.makeMonolingualTextValue("a b", "fr"));
assertWarningsRaised(WhitespaceScrutinizer.duplicateWhitespaceType);
}
}

View File

@ -20,6 +20,11 @@ public class WbStringConstantTest extends WbExpressionTest<StringValue> {
evaluatesTo(Datamodel.makeStringValue("hello world"), constant);
}
@Test
public void testTrim() {
evaluatesTo(Datamodel.makeStringValue("hello world"), new WbStringConstant(" hello world "));
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testEmpty() {
new WbStringConstant("");

View File

@ -45,17 +45,16 @@ public class WbStringVariableTest extends WbVariableTest<StringValue> {
}
/**
* It is not up to the evaluator to clean up the strings it gets. This is
* flagged later on by scrutinizers.
* The evaluator cleans up leading and trailing whitespace, but not duplicate spaces
*/
@Test
public void testTrailingWhitespace() {
evaluatesTo(Datamodel.makeStringValue("dirty \t"), "dirty \t");
evaluatesTo(Datamodel.makeStringValue("dirty"), "dirty \t");
}
@Test
public void testLeadingWhitespace() {
evaluatesTo(Datamodel.makeStringValue(" dirty"), " dirty");
evaluatesTo(Datamodel.makeStringValue("dirty"), " dirty");
}
@Test