Merge pull request #1794 from OpenRefine/issue1781
Trim strings automatically in Wikibase schema.
This commit is contained in:
commit
e5061cc44c
@ -169,14 +169,6 @@
|
||||
"title": "No language provided for monolingual text.",
|
||||
"body": "Some label, description, alias or monolingual text value have been skipped because no language was provided. Example value: <span class=\"wb-issue-preformat\">{example_text}</span>."
|
||||
},
|
||||
"leading-whitespace": {
|
||||
"title": "Leading whitespace in strings.",
|
||||
"body": "Strings such as <span class=\"wb-issue-preformat\">{example_string}</span> have leading whitespace."
|
||||
},
|
||||
"trailing-whitespace": {
|
||||
"title": "Trailing whitespace in strings.",
|
||||
"body": "Strings such as <span class=\"wb-issue-preformat\">{example_string}</span> have trailing whitespace."
|
||||
},
|
||||
"duplicate-whitespace": {
|
||||
"title": "Duplicate whitespace in strings.",
|
||||
"body": "Strings such as <span class=\"wb-issue-preformat\">{example_string}</span> contain duplicate whitespace."
|
||||
|
@ -168,14 +168,6 @@
|
||||
"title": "Pas de langue fournie pour des textes monolingues.",
|
||||
"body": "Des libellés, descriptions, alias ou textes monolingues ont été ignorés car aucune langue n'a été fournie. Exemple: <span class=\"wb-issue-preformat\">{example_text}</span>."
|
||||
},
|
||||
"leading-whitespace": {
|
||||
"title": "Espaces au début de chaînes de caractères.",
|
||||
"body": "Des chaînes telles que <span class=\"wb-issue-preformat\">{example_string}</span> ont des espaces au début."
|
||||
},
|
||||
"trailing-whitespace": {
|
||||
"title": "Espaces à la fin de chaînes de caractères.",
|
||||
"body": "Des chaînes telles que <span class=\"wb-issue-preformat\">{example_string}</span> ont des espaces à la fin."
|
||||
},
|
||||
"duplicate-whitespace": {
|
||||
"title": "Espaces dédoublées dans des chaînes de caractères.",
|
||||
"body": "Des chaînes telles que <span class=\"wb-issue-preformat\">{example_string}</span> contiennent des espaces dédoublées."
|
||||
|
@ -180,14 +180,6 @@
|
||||
"title": "言語指定がありません.",
|
||||
"body": "言語指定がないので、ラベル・記述・別名・単一言語テキストが無視されました。例えば: <span class=\"wb-issue-preformat\">{example_text}</span>."
|
||||
},
|
||||
"leading-whitespace": {
|
||||
"title": "文頭に空白文字があります.",
|
||||
"body": "<span class=\"wb-issue-preformat\">{example_string}</span>の文頭に空白文字があります."
|
||||
},
|
||||
"trailing-whitespace": {
|
||||
"title": "文末に空白文字があります.",
|
||||
"body": "<span class=\"wb-issue-preformat\">{example_string}</span>の文末に空白文字があります."
|
||||
},
|
||||
"duplicate-whitespace": {
|
||||
"title": "二重の空白文字があります.",
|
||||
"body": "<span class=\"wb-issue-preformat\">{example_string}</span>には二重の空白文字があります."
|
||||
|
@ -43,15 +43,11 @@ public class WhitespaceScrutinizer extends ValueScrutinizer {
|
||||
|
||||
private Map<String, Pattern> _issuesMap;
|
||||
|
||||
public static final String leadingWhitespaceType = "leading-whitespace";
|
||||
public static final String trailingWhitespaceType = "trailing-whitespace";
|
||||
public static final String duplicateWhitespaceType = "duplicate-whitespace";
|
||||
public static final String nonPrintableCharsType = "non-printable-characters";
|
||||
|
||||
public WhitespaceScrutinizer() {
|
||||
_issuesMap = new HashMap<>();
|
||||
_issuesMap.put(leadingWhitespaceType, Pattern.compile("^\\s"));
|
||||
_issuesMap.put(trailingWhitespaceType, Pattern.compile("\\s$"));
|
||||
_issuesMap.put(duplicateWhitespaceType, Pattern.compile("\\s\\s"));
|
||||
|
||||
// https://stackoverflow.com/questions/14565934/regular-expression-to-remove-all-non-printable-characters
|
||||
|
@ -53,7 +53,7 @@ public class WbMonolingualExpr implements WbExpression<MonolingualTextValue> {
|
||||
String text = getValueExpr().evaluate(ctxt).getString();
|
||||
try {
|
||||
String lang = getLanguageExpr().evaluate(ctxt);
|
||||
return Datamodel.makeMonolingualTextValue(text, lang);
|
||||
return Datamodel.makeMonolingualTextValue(text.trim(), lang);
|
||||
|
||||
} catch (SkipSchemaExpressionException e) {
|
||||
QAWarning warning = new QAWarning("monolingual-text-without-language", null, QAWarning.Severity.WARNING, 1);
|
||||
|
@ -39,7 +39,7 @@ public class WbStringConstant implements WbExpression<StringValue> {
|
||||
Validate.notNull(value);
|
||||
Validate.isTrue(!value.isEmpty()); // for now we don't accept empty strings
|
||||
// because in the variable counterpart of this expression, they are skipped
|
||||
this.value = value;
|
||||
this.value = value.trim();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -62,7 +62,7 @@ public class WbStringVariable extends WbVariableExpr<StringValue> {
|
||||
if (cell.value instanceof Double && ((Double)cell.value) % 1 == 0) {
|
||||
stringValue = Integer.toString(((Double)cell.value).intValue());
|
||||
}
|
||||
return Datamodel.makeStringValue(stringValue);
|
||||
return Datamodel.makeStringValue(stringValue.trim());
|
||||
}
|
||||
throw new SkipSchemaExpressionException();
|
||||
}
|
||||
|
@ -33,18 +33,6 @@ public class WhitespaceScrutinizerTest extends ValueScrutinizerTest {
|
||||
return new WhitespaceScrutinizer();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLeadingWhitespace() {
|
||||
scrutinize(Datamodel.makeStringValue(" a"));
|
||||
assertWarningsRaised(WhitespaceScrutinizer.leadingWhitespaceType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTrailingWhitespace() {
|
||||
scrutinize(Datamodel.makeStringValue("a\t"));
|
||||
assertWarningsRaised(WhitespaceScrutinizer.trailingWhitespaceType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDuplicateWhitespace() {
|
||||
scrutinize(Datamodel.makeStringValue("a\t b"));
|
||||
@ -65,14 +53,13 @@ public class WhitespaceScrutinizerTest extends ValueScrutinizerTest {
|
||||
|
||||
@Test
|
||||
public void testMultipleIssues() {
|
||||
scrutinize(Datamodel.makeStringValue(" a\t b "));
|
||||
assertWarningsRaised(WhitespaceScrutinizer.duplicateWhitespaceType, WhitespaceScrutinizer.leadingWhitespaceType,
|
||||
WhitespaceScrutinizer.trailingWhitespaceType);
|
||||
scrutinize(Datamodel.makeStringValue("a\t b\u0003"));
|
||||
assertWarningsRaised(WhitespaceScrutinizer.duplicateWhitespaceType, WhitespaceScrutinizer.nonPrintableCharsType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMonolingualTextValue() {
|
||||
scrutinizeLabel(Datamodel.makeMonolingualTextValue(" a", "fr"));
|
||||
assertWarningsRaised(WhitespaceScrutinizer.leadingWhitespaceType);
|
||||
scrutinizeLabel(Datamodel.makeMonolingualTextValue("a b", "fr"));
|
||||
assertWarningsRaised(WhitespaceScrutinizer.duplicateWhitespaceType);
|
||||
}
|
||||
}
|
||||
|
@ -43,6 +43,12 @@ public class WbMonolingualExprTest extends WbExpressionTest<MonolingualTextValue
|
||||
setRow("en", "hello");
|
||||
evaluatesTo(Datamodel.makeMonolingualTextValue("hello", "en"), expr);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTrim() {
|
||||
setRow("en", " hello ");
|
||||
evaluatesTo(Datamodel.makeMonolingualTextValue("hello", "en"), expr);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInvalidLanguageCode() {
|
||||
|
@ -20,6 +20,11 @@ public class WbStringConstantTest extends WbExpressionTest<StringValue> {
|
||||
evaluatesTo(Datamodel.makeStringValue("hello world"), constant);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTrim() {
|
||||
evaluatesTo(Datamodel.makeStringValue("hello world"), new WbStringConstant(" hello world "));
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = IllegalArgumentException.class)
|
||||
public void testEmpty() {
|
||||
new WbStringConstant("");
|
||||
|
@ -47,12 +47,11 @@ public class WbStringVariableTest extends WbVariableTest<StringValue> {
|
||||
}
|
||||
|
||||
/**
|
||||
* It is not up to the evaluator to clean up the strings it gets. This is
|
||||
* flagged later on by scrutinizers.
|
||||
* The evaluator cleans up leading and trailing whitespace, but not duplicate spaces
|
||||
*/
|
||||
@Test
|
||||
public void testTrailingWhitespace() {
|
||||
evaluatesTo(Datamodel.makeStringValue("dirty \t"), "dirty \t");
|
||||
evaluatesTo(Datamodel.makeStringValue("dirty"), "dirty \t");
|
||||
}
|
||||
|
||||
/**
|
||||
@ -74,7 +73,7 @@ public class WbStringVariableTest extends WbVariableTest<StringValue> {
|
||||
|
||||
@Test
|
||||
public void testLeadingWhitespace() {
|
||||
evaluatesTo(Datamodel.makeStringValue(" dirty"), " dirty");
|
||||
evaluatesTo(Datamodel.makeStringValue("dirty"), " dirty");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
Loading…
Reference in New Issue
Block a user