Extended tests and code for user friendly regular expression syntax errors

This commit is contained in:
Owen Stephens 2017-11-28 16:20:24 +00:00
parent 441d9f7f10
commit d64dee71dd
2 changed files with 101 additions and 101 deletions

View File

@ -36,47 +36,22 @@ package com.google.refine.util;
import java.util.regex.PatternSyntaxException;
public class PatternSyntaxExceptionParser {
private final PatternSyntaxException exception;
public PatternSyntaxExceptionParser(PatternSyntaxException e) {
this.exception = e;
}
public String getUserMessage() {
StringBuffer sb = new StringBuffer();
String desc = exception.getDescription();
switch(desc)
{
case "Unclosed character class":
//Need these errors to be more human readable
//Possibly include html for formatting
//Update tests first with user friendly errors
sb.append("The regular expression is missing a closing ']' character.");
break;
case "Unmatched closing ')'":
sb.append("The regular expression is missing a opening '(' character.");
break;
case "Unclosed group":
sb.append("The regular expression is missing a closing ')' character.");
break;
default:
// If no special handling in place fall back on method
// as used in java.util.regex.PatternSyntaxException
sb.append(desc);
}
return sb.toString();
}
/* Error messages from java.util.regex.Pattern
/* Class to translate PatternSyntaxExceptions into more user friendly error messages
Currently translates the following error messages from java.util.regex.Pattern
"Unclosed character class"
"Unmatched closing ')'"
"Unexpected internal error"
"Dangling meta character '" + ((char)ch) + "'"
"Unclosed counted closure"
"Illegal repetition"
"Illegal repetition range"
"Illegal character range"
The following messages are not currently translated and are output as per PatternSyntaxException
"\\k is not followed by '<' for named capturing group"
"(named capturing group <"+ name+"> does not exist"
"Illegal/unsupported escape sequence"
"Bad class syntax"
"Unclosed character class"
"Illegal character range"
"Unexpected character '"+((char)ch)+"'"
"Unclosed character family"
"Empty character family"
@ -92,9 +67,6 @@ public class PatternSyntaxExceptionParser {
"Unknown group type"
"Unknown inline modifier"
"Internal logic error"
"Unclosed counted closure"
"Illegal repetition range"
"Illegal repetition"
"Illegal control escape sequence"
"Illegal octal escape sequence"
"Hexadecimal codepoint is too big"
@ -102,6 +74,54 @@ public class PatternSyntaxExceptionParser {
"Illegal hexadecimal escape sequence"
"Illegal Unicode escape sequence"
*/
private final PatternSyntaxException exception;
public PatternSyntaxExceptionParser(PatternSyntaxException e) {
this.exception = e;
}
public String getUserMessage() {
StringBuffer sb = new StringBuffer();
String desc = exception.getDescription();
switch(desc)
{
case "Unclosed character class":
//Need these errors to be more human readable
//Possibly include html for formatting
//Update tests first with user friendly errors
sb.append("The regular expression is missing a closing ']' character, or has an empty pair of square brackets '[]'.");
break;
case "Unmatched closing ')'":
sb.append("The regular expression is missing a opening '(' character.");
break;
case "Unclosed group":
sb.append("The regular expression is missing a closing ')' character.");
break;
case "Dangling meta character '*'":
case "Dangling meta character '+'":
case "Dangling meta character '?'":
sb.append("The regular expression has a '*','+' or '?' in the wrong place.");
break;
case "Unexpected internal error":
sb.append("The regular expression has a backslash '\\' at the end.");
break;
case "Unclosed counted closure":
sb.append("The regular expression is missing a closing '}' character, or has an incorrect quantifier statement in curly brackets '{}'.");
break;
case "Illegal repetition":
sb.append("The regular expression has an incomplete or incorrect quantifier statement in curly brackets '{}'.");
break;
case "Illegal repetition range":
sb.append("The regular expression has a quantifier statement where the minimum is larger than the maximum (e.g. {4,3}).");
break;
case "Illegal character range":
sb.append("The regular expression has a range statement which is incomplete or has the characters in the incorrect order (e.g. [9-0])");
break;
default:
// If no special handling in place fall back on error msg
// created by java.util.regex.PatternSyntaxException
sb.append(exception.getMessage());
}
return sb.toString();
}
}

View File

@ -52,36 +52,6 @@ public class PatternSyntaxExceptionParserTests extends RefineTest {
logger = LoggerFactory.getLogger(this.getClass());
}
/*
Potential errors and error messages from PatternSyntaxException
groupopen:"Unmatched opening parenthesis."
Unclosed group near index 1 ( ^
groupclose:"Unmatched closing parenthesis."
Unmatched closing ')' )
setopen:"Unmatched opening square bracket."
Unclosed character class near index 0 [ ^
quanttarg:"Invalid target for quantifier."
Dangling meta character '+' near index 0 +{4} ^
Dangling meta character '*' near index 0 * ^
Dangling meta character '?' near index 0 ? ^
esccharopen:"Dangling backslash."
Unexpected internal error near index 1 \ ^
quantrev:"Quantifier minimum is greater than maximum."
Illegal repetition range near index 5 a{3,2} ^
rangerev:"Range values reversed. Start char is greater than end char."
Illegal character range near index 3 [9-0] ^
esccharbad:"Invalid escape sequence."
Illegal control escape sequence: \c
Illegal/unsupported escape sequence: \g \i \j \l \m \o \q \y
\k is not followed by '<' for named capturing group: \k
Unknown character property name {}: \p
Illegal Unicode escape sequence: {backslash}u
Illegal hexadecimal escape sequence: \x
Illegal octal escape sequence: \0
invalidnamegroup:
named capturing group is missing trailing '>' near index 5 (?<as?>a) ^
*/
@Test
public void unmatchedOpeningParenthesisTest(){
String s = "(abc";
@ -116,20 +86,7 @@ invalidnamegroup:
} catch (PatternSyntaxException err) {
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
Assert.assertEquals(e.getUserMessage(),
"The regular expression is missing a closing ']' character.");
}
}
@Test
public void quantifierTargetValidityTest(){
String s = "abc+*";
try {
Pattern pattern = Pattern.compile(s);
Assert.assertTrue(false,"Test pattern successfully compiled when it should fail");
} catch (PatternSyntaxException err) {
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
Assert.assertEquals(e.getUserMessage(),
"The regular expression has a '*','+' or '?' in the wrong place");
"The regular expression is missing a closing ']' character, or has an empty pair of square brackets '[]'.");
}
}
@ -146,6 +103,43 @@ invalidnamegroup:
}
}
@Test
public void unmatchedOpeningCurlyBracketTest(){
String s = "abc{3";
try {
Pattern pattern = Pattern.compile(s);
} catch (PatternSyntaxException err) {
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
Assert.assertEquals(e.getUserMessage(),
"The regular expression is missing a closing '}' character, or has an incorrect quantifier statement in curly brackets '{}'.");
}
}
@Test
public void illegalQuantifierStatement(){
String s = "abc{";
try {
Pattern pattern = Pattern.compile(s);
} catch (PatternSyntaxException err) {
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
Assert.assertEquals(e.getUserMessage(),
"The regular expression has an incomplete or incorrect quantifier statement in curly brackets '{}'.");
}
}
@Test
public void quantifierTargetValidityTest(){
String s = "abc+*";
try {
Pattern pattern = Pattern.compile(s);
Assert.assertTrue(false,"Test pattern successfully compiled when it should fail");
} catch (PatternSyntaxException err) {
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
Assert.assertEquals(e.getUserMessage(),
"The regular expression has a '*','+' or '?' in the wrong place.");
}
}
@Test
public void quantifierMagnitudeTest(){
String s = "a{4,3}";
@ -168,22 +162,8 @@ invalidnamegroup:
} catch (PatternSyntaxException err) {
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
Assert.assertEquals(e.getUserMessage(),
"The regular expression has a range statement with the characters in the incorrect order (e.g. [9-0])");
"The regular expression has a range statement which is incomplete or has the characters in the incorrect order (e.g. [9-0])");
}
}
/* This needs to be different to the others - see all the variations on invalid escape sequences
@Test
public void escapeSequenceValidityTest(){
String s = "";
try {
Pattern pattern = Pattern.compile(s);
Assert.assertTrue(false,"Test pattern successfully compiled when it should fail");
} catch (PatternSyntaxException err) {
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
Assert.assertEquals(e.getUserMessage(),
"Invalid escape sequence.");
}
}
*/
}