Merge pull request #1594 from OpenRefine/wikidata/better-format-feedback

Give better feedback when values with invalid formats are provided.
This commit is contained in:
Antonin Delpeuch 2018-05-22 23:56:24 +02:00 committed by GitHub
commit 1d6226b095
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 53 additions and 9 deletions

View File

@ -200,6 +200,22 @@
"invalid-identifier-space": { "invalid-identifier-space": {
"title": "Invalid identifier space for reconciled cells.", "title": "Invalid identifier space for reconciled cells.",
"body": "Some reconciled cells such as <span class=\"wb-issue-preformat\">{example_cell}</span> were ignored because they are not reconciled to Wikidata." "body": "Some reconciled cells such as <span class=\"wb-issue-preformat\">{example_cell}</span> were ignored because they are not reconciled to Wikidata."
},
"ignored-language": {
"title": "Invalid language identifiers.",
"body": "Some language identifiers are invalid, such as <span class=\"wb-issue-preformat\">{example_value}</span>. See the <a href=\"https://www.wikidata.org/wiki/Wikidata:Tools/OpenRefine/Editing/Schema_alignment#Languages\" target=\"_blank\">allowed values</a>."
},
"ignored-date": {
"title": "Invalid date formats.",
"body": "Some dates are incorrectly formatted, such as <span class=\"wb-issue-preformat\">{example_value}</span>. See the <a href=\"https://www.wikidata.org/wiki/Wikidata:Tools/OpenRefine/Editing/Schema_alignment#Dates\" target=\"_blank\">allowed formats</a>."
},
"ignored-amount": {
"title": "Invalid amount formats.",
"body": "Some amounts are incorrectly formatted, such as <span class=\"wb-issue-preformat\">{example_value}</span>. See the <a href=\"https://www.wikidata.org/wiki/Wikidata:Tools/OpenRefine/Editing/Schema_alignment#Quantities\" target=\"_blank\">allowed formats</a>."
},
"ignored-coordinates": {
"title": "Invalid geographic coordinates.",
"body": "Some coordinates are incorrectly formatted, such as <span class=\"wb-issue-preformat\">{example_value}</span>. See the <a href=\"https://www.wikidata.org/wiki/Wikidata:Tools/OpenRefine/Editing/Schema_alignment#Globe_coordinates\" target=\"_blank\">allowed formats</a>."
} }
} }
} }

View File

@ -55,9 +55,12 @@ public class WbDateConstant implements WbExpression<TimeValue> {
* precision it induces (an integer according to Wikibase's data model). * precision it induces (an integer according to Wikibase's data model).
*/ */
public static Map<SimpleDateFormat, Integer> acceptedFormats = ImmutableMap.<SimpleDateFormat, Integer> builder() public static Map<SimpleDateFormat, Integer> acceptedFormats = ImmutableMap.<SimpleDateFormat, Integer> builder()
.put(new SimpleDateFormat("yyyy"), 9).put(new SimpleDateFormat("yyyy-MM"), 10) .put(new SimpleDateFormat("yyyy"), 9)
.put(new SimpleDateFormat("yyyy-MM-dd"), 11).put(new SimpleDateFormat("yyyy-MM-dd'T'HH"), 12) .put(new SimpleDateFormat("yyyy-MM"), 10)
.put(new SimpleDateFormat("yyyy-MM-dd"), 11)
.put(new SimpleDateFormat("yyyy-MM-dd'T'HH"), 12)
.put(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm"), 13) .put(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm"), 13)
.put(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm'Z'"), 13)
.put(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"), 14).build(); .put(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"), 14).build();
private TimeValue parsed; private TimeValue parsed;
@ -114,10 +117,7 @@ public class WbDateConstant implements WbExpression<TimeValue> {
Calendar calendar = Calendar.getInstance(); Calendar calendar = Calendar.getInstance();
calendar = Calendar.getInstance(); calendar = Calendar.getInstance();
calendar.setTime(date); calendar.setTime(date);
return Datamodel.makeTimeValue(calendar.get(Calendar.YEAR), (byte) (calendar.get(Calendar.MONTH) + 1), // java return Datamodel.makeTimeValue(calendar.get(Calendar.YEAR), (byte) (calendar.get(Calendar.MONTH) + 1),
// starts
// at
// 0
(byte) calendar.get(Calendar.DAY_OF_MONTH), (byte) calendar.get(Calendar.HOUR_OF_DAY), (byte) calendar.get(Calendar.DAY_OF_MONTH), (byte) calendar.get(Calendar.HOUR_OF_DAY),
(byte) calendar.get(Calendar.MINUTE), (byte) calendar.get(Calendar.SECOND), (byte) precision, 0, 1, (byte) calendar.get(Calendar.MINUTE), (byte) calendar.get(Calendar.SECOND), (byte) precision, 0, 1,
0, TimeValue.CM_GREGORIAN_PRO); 0, TimeValue.CM_GREGORIAN_PRO);

View File

@ -25,6 +25,7 @@ package org.openrefine.wikidata.schema;
import java.text.ParseException; import java.text.ParseException;
import org.openrefine.wikidata.qa.QAWarning;
import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException; import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
import org.wikidata.wdtk.datamodel.interfaces.TimeValue; import org.wikidata.wdtk.datamodel.interfaces.TimeValue;
@ -57,6 +58,11 @@ public class WbDateVariable extends WbVariableExpr<TimeValue> {
// TODO accept parsed dates (without converting them to strings) // TODO accept parsed dates (without converting them to strings)
return WbDateConstant.parse(cell.value.toString()); return WbDateConstant.parse(cell.value.toString());
} catch (ParseException e) { } catch (ParseException e) {
if(!cell.value.toString().isEmpty()) {
QAWarning issue = new QAWarning("ignored-date", null, QAWarning.Severity.WARNING, 1);
issue.setProperty("example_value", cell.value.toString());
ctxt.addWarning(issue);
}
throw new SkipSchemaExpressionException(); throw new SkipSchemaExpressionException();
} }
} }

View File

@ -23,6 +23,7 @@
******************************************************************************/ ******************************************************************************/
package org.openrefine.wikidata.schema; package org.openrefine.wikidata.schema;
import org.openrefine.wikidata.qa.QAWarning;
import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException; import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonCreator;
@ -59,6 +60,10 @@ public class WbLanguageVariable extends WbVariableExpr<String> {
String normalized = WbLanguageConstant.normalizeLanguageCode(code); String normalized = WbLanguageConstant.normalizeLanguageCode(code);
if (normalized != null) { if (normalized != null) {
return normalized; return normalized;
} else {
QAWarning issue = new QAWarning("ignored-language", null, QAWarning.Severity.WARNING, 1);
issue.setProperty("example_value", cell.value.toString());
ctxt.addWarning(issue);
} }
} }
throw new SkipSchemaExpressionException(); throw new SkipSchemaExpressionException();

View File

@ -25,6 +25,7 @@ package org.openrefine.wikidata.schema;
import java.text.ParseException; import java.text.ParseException;
import org.openrefine.wikidata.qa.QAWarning;
import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException; import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue; import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
@ -50,6 +51,11 @@ public class WbLocationVariable extends WbVariableExpr<GlobeCoordinatesValue> {
try { try {
return WbLocationConstant.parse(expr); return WbLocationConstant.parse(expr);
} catch (ParseException e) { } catch (ParseException e) {
if (!expr.trim().isEmpty()) {
QAWarning issue = new QAWarning("ignored-coordinates", null, QAWarning.Severity.WARNING, 1);
issue.setProperty("example_value", expr);
ctxt.addWarning(issue);
}
throw new SkipSchemaExpressionException(); throw new SkipSchemaExpressionException();
} }
} }

View File

@ -26,6 +26,7 @@ package org.openrefine.wikidata.schema;
import java.math.BigDecimal; import java.math.BigDecimal;
import org.apache.commons.lang.Validate; import org.apache.commons.lang.Validate;
import org.openrefine.wikidata.qa.QAWarning;
import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException; import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
import org.wikidata.wdtk.datamodel.helpers.Datamodel; import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
@ -66,8 +67,8 @@ public class WbQuantityExpr implements WbExpression<QuantityValue> {
BigDecimal parsedAmount = null; BigDecimal parsedAmount = null;
BigDecimal lowerBound = null; BigDecimal lowerBound = null;
BigDecimal upperBound = null; BigDecimal upperBound = null;
try { String originalAmount = amount.getString().toUpperCase();
String originalAmount = amount.getString().toUpperCase(); try {
parsedAmount = new BigDecimal(originalAmount); parsedAmount = new BigDecimal(originalAmount);
@ -81,6 +82,11 @@ public class WbQuantityExpr implements WbExpression<QuantityValue> {
// workaround for https://github.com/Wikidata/Wikidata-Toolkit/issues/341 // workaround for https://github.com/Wikidata/Wikidata-Toolkit/issues/341
parsedAmount = new BigDecimal(parsedAmount.toPlainString()); parsedAmount = new BigDecimal(parsedAmount.toPlainString());
} catch (NumberFormatException e) { } catch (NumberFormatException e) {
if (!originalAmount.isEmpty()) {
QAWarning issue = new QAWarning("ignored-amount", null, QAWarning.Severity.WARNING, 1);
issue.setProperty("example_value", originalAmount);
ctxt.addWarning(issue);
}
throw new SkipSchemaExpressionException(); throw new SkipSchemaExpressionException();
} }

View File

@ -35,7 +35,9 @@ public class WbDateVariableTest extends WbVariableTest<TimeValue> {
private TimeValue year = Datamodel.makeTimeValue(2018, (byte) 1, (byte) 1, (byte) 0, (byte) 0, (byte) 0, (byte) 9, private TimeValue year = Datamodel.makeTimeValue(2018, (byte) 1, (byte) 1, (byte) 0, (byte) 0, (byte) 0, (byte) 9,
0, 1, 0, TimeValue.CM_GREGORIAN_PRO); 0, 1, 0, TimeValue.CM_GREGORIAN_PRO);
private TimeValue day = Datamodel.makeTimeValue(2018, (byte) 2, (byte) 27, TimeValue.CM_GREGORIAN_PRO); private TimeValue day = Datamodel.makeTimeValue(2018, (byte) 2, (byte) 27, TimeValue.CM_GREGORIAN_PRO);
private TimeValue minute = Datamodel.makeTimeValue(2001, (byte) 2, (byte) 3, (byte)4, (byte)5, (byte)0, (byte)13, (byte)0, (byte)1, (byte)0, TimeValue.CM_GREGORIAN_PRO);
@Override @Override
public WbVariableExpr<TimeValue> initVariableExpr() { public WbVariableExpr<TimeValue> initVariableExpr() {
return new WbDateVariable(); return new WbDateVariable();
@ -66,7 +68,10 @@ public class WbDateVariableTest extends WbVariableTest<TimeValue> {
isSkipped(new Cell(1234.56, null)); isSkipped(new Cell(1234.56, null));
} }
// TODO accept parsed dates with default precision @Test
public void testMinutesISO() {
evaluatesTo(minute, "2001-02-03T04:05Z");
}
@Test @Test
public void testSerialize() { public void testSerialize() {