Extended tests and code for user friendly regular expression syntax errors

This commit is contained in:
Owen Stephens 2017-11-28 16:20:24 +00:00
parent 441d9f7f10
commit d64dee71dd
2 changed files with 101 additions and 101 deletions

View File

@ -36,6 +36,44 @@ package com.google.refine.util;
import java.util.regex.PatternSyntaxException;
public class PatternSyntaxExceptionParser {
/* Class to translate PatternSyntaxExceptions into more user friendly error messages
Currently translates the following error messages from java.util.regex.Pattern
"Unclosed character class"
"Unmatched closing ')'"
"Unexpected internal error"
"Dangling meta character '" + ((char)ch) + "'"
"Unclosed counted closure"
"Illegal repetition"
"Illegal repetition range"
"Illegal character range"
The following messages are not currently translated and are output as per PatternSyntaxException
"\\k is not followed by '<' for named capturing group"
"(named capturing group <"+ name+"> does not exist"
"Illegal/unsupported escape sequence"
"Bad class syntax"
"Unexpected character '"+((char)ch)+"'"
"Unclosed character family"
"Empty character family"
"Unknown Unicode property {name=<" + name + ">, "+ "value=<" + value + ">}"
"Unknown character script name {" + name + "}"
"Unknown character block name {" + name + "}"
"Unknown character property name {" + name + "}"
"named capturing group has 0 length name"
"named capturing group is missing trailing '>'"
"Named capturing group <" + name + "> is already defined"
"Look-behind group does not have " + "an obvious maximum length"
"Unknown look-behind group"
"Unknown group type"
"Unknown inline modifier"
"Internal logic error"
"Illegal control escape sequence"
"Illegal octal escape sequence"
"Hexadecimal codepoint is too big"
"Unclosed hexadecimal escape sequence"
"Illegal hexadecimal escape sequence"
"Illegal Unicode escape sequence"
*/
private final PatternSyntaxException exception;
public PatternSyntaxExceptionParser(PatternSyntaxException e) {
@ -51,7 +89,7 @@ public class PatternSyntaxExceptionParser {
//Need these errors to be more human readable
//Possibly include html for formatting
//Update tests first with user friendly errors
sb.append("The regular expression is missing a closing ']' character.");
sb.append("The regular expression is missing a closing ']' character, or has an empty pair of square brackets '[]'.");
break;
case "Unmatched closing ')'":
sb.append("The regular expression is missing a opening '(' character.");
@ -59,49 +97,31 @@ public class PatternSyntaxExceptionParser {
case "Unclosed group":
sb.append("The regular expression is missing a closing ')' character.");
break;
case "Dangling meta character '*'":
case "Dangling meta character '+'":
case "Dangling meta character '?'":
sb.append("The regular expression has a '*','+' or '?' in the wrong place.");
break;
case "Unexpected internal error":
sb.append("The regular expression has a backslash '\\' at the end.");
break;
case "Unclosed counted closure":
sb.append("The regular expression is missing a closing '}' character, or has an incorrect quantifier statement in curly brackets '{}'.");
break;
case "Illegal repetition":
sb.append("The regular expression has an incomplete or incorrect quantifier statement in curly brackets '{}'.");
break;
case "Illegal repetition range":
sb.append("The regular expression has a quantifier statement where the minimum is larger than the maximum (e.g. {4,3}).");
break;
case "Illegal character range":
sb.append("The regular expression has a range statement which is incomplete or has the characters in the incorrect order (e.g. [9-0])");
break;
default:
// If no special handling in place fall back on method
// as used in java.util.regex.PatternSyntaxException
sb.append(desc);
// If no special handling in place fall back on error msg
// created by java.util.regex.PatternSyntaxException
sb.append(exception.getMessage());
}
return sb.toString();
}
/* Error messages from java.util.regex.Pattern
"Unclosed character class"
"Unmatched closing ')'"
"Unexpected internal error"
"Dangling meta character '" + ((char)ch) + "'"
"\\k is not followed by '<' for named capturing group"
"(named capturing group <"+ name+"> does not exist"
"Illegal/unsupported escape sequence"
"Bad class syntax"
"Unclosed character class"
"Illegal character range"
"Unexpected character '"+((char)ch)+"'"
"Unclosed character family"
"Empty character family"
"Unknown Unicode property {name=<" + name + ">, "+ "value=<" + value + ">}"
"Unknown character script name {" + name + "}"
"Unknown character block name {" + name + "}"
"Unknown character property name {" + name + "}"
"named capturing group has 0 length name"
"named capturing group is missing trailing '>'"
"Named capturing group <" + name + "> is already defined"
"Look-behind group does not have " + "an obvious maximum length"
"Unknown look-behind group"
"Unknown group type"
"Unknown inline modifier"
"Internal logic error"
"Unclosed counted closure"
"Illegal repetition range"
"Illegal repetition"
"Illegal control escape sequence"
"Illegal octal escape sequence"
"Hexadecimal codepoint is too big"
"Unclosed hexadecimal escape sequence"
"Illegal hexadecimal escape sequence"
"Illegal Unicode escape sequence"
*/
}

View File

@ -51,36 +51,6 @@ public class PatternSyntaxExceptionParserTests extends RefineTest {
public void init() {
logger = LoggerFactory.getLogger(this.getClass());
}
/*
Potential errors and error messages from PatternSyntaxException
groupopen:"Unmatched opening parenthesis."
Unclosed group near index 1 ( ^
groupclose:"Unmatched closing parenthesis."
Unmatched closing ')' )
setopen:"Unmatched opening square bracket."
Unclosed character class near index 0 [ ^
quanttarg:"Invalid target for quantifier."
Dangling meta character '+' near index 0 +{4} ^
Dangling meta character '*' near index 0 * ^
Dangling meta character '?' near index 0 ? ^
esccharopen:"Dangling backslash."
Unexpected internal error near index 1 \ ^
quantrev:"Quantifier minimum is greater than maximum."
Illegal repetition range near index 5 a{3,2} ^
rangerev:"Range values reversed. Start char is greater than end char."
Illegal character range near index 3 [9-0] ^
esccharbad:"Invalid escape sequence."
Illegal control escape sequence: \c
Illegal/unsupported escape sequence: \g \i \j \l \m \o \q \y
\k is not followed by '<' for named capturing group: \k
Unknown character property name {}: \p
Illegal Unicode escape sequence: {backslash}u
Illegal hexadecimal escape sequence: \x
Illegal octal escape sequence: \0
invalidnamegroup:
named capturing group is missing trailing '>' near index 5 (?<as?>a) ^
*/
@Test
public void unmatchedOpeningParenthesisTest(){
@ -116,20 +86,7 @@ invalidnamegroup:
} catch (PatternSyntaxException err) {
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
Assert.assertEquals(e.getUserMessage(),
"The regular expression is missing a closing ']' character.");
}
}
@Test
public void quantifierTargetValidityTest(){
String s = "abc+*";
try {
Pattern pattern = Pattern.compile(s);
Assert.assertTrue(false,"Test pattern successfully compiled when it should fail");
} catch (PatternSyntaxException err) {
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
Assert.assertEquals(e.getUserMessage(),
"The regular expression has a '*','+' or '?' in the wrong place");
"The regular expression is missing a closing ']' character, or has an empty pair of square brackets '[]'.");
}
}
@ -146,6 +103,43 @@ invalidnamegroup:
}
}
@Test
public void unmatchedOpeningCurlyBracketTest(){
String s = "abc{3";
try {
Pattern pattern = Pattern.compile(s);
} catch (PatternSyntaxException err) {
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
Assert.assertEquals(e.getUserMessage(),
"The regular expression is missing a closing '}' character, or has an incorrect quantifier statement in curly brackets '{}'.");
}
}
@Test
public void illegalQuantifierStatement(){
String s = "abc{";
try {
Pattern pattern = Pattern.compile(s);
} catch (PatternSyntaxException err) {
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
Assert.assertEquals(e.getUserMessage(),
"The regular expression has an incomplete or incorrect quantifier statement in curly brackets '{}'.");
}
}
@Test
public void quantifierTargetValidityTest(){
String s = "abc+*";
try {
Pattern pattern = Pattern.compile(s);
Assert.assertTrue(false,"Test pattern successfully compiled when it should fail");
} catch (PatternSyntaxException err) {
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
Assert.assertEquals(e.getUserMessage(),
"The regular expression has a '*','+' or '?' in the wrong place.");
}
}
@Test
public void quantifierMagnitudeTest(){
String s = "a{4,3}";
@ -168,22 +162,8 @@ invalidnamegroup:
} catch (PatternSyntaxException err) {
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
Assert.assertEquals(e.getUserMessage(),
"The regular expression has a range statement with the characters in the incorrect order (e.g. [9-0])");
"The regular expression has a range statement which is incomplete or has the characters in the incorrect order (e.g. [9-0])");
}
}
/* This needs to be different to the others - see all the variations on invalid escape sequences
@Test
public void escapeSequenceValidityTest(){
String s = "";
try {
Pattern pattern = Pattern.compile(s);
Assert.assertTrue(false,"Test pattern successfully compiled when it should fail");
} catch (PatternSyntaxException err) {
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
Assert.assertEquals(e.getUserMessage(),
"Invalid escape sequence.");
}
}
*/
}