Extended tests and code for user friendly regular expression syntax errors
This commit is contained in:
parent
441d9f7f10
commit
d64dee71dd
@ -36,6 +36,44 @@ package com.google.refine.util;
|
|||||||
import java.util.regex.PatternSyntaxException;
|
import java.util.regex.PatternSyntaxException;
|
||||||
|
|
||||||
public class PatternSyntaxExceptionParser {
|
public class PatternSyntaxExceptionParser {
|
||||||
|
/* Class to translate PatternSyntaxExceptions into more user friendly error messages
|
||||||
|
Currently translates the following error messages from java.util.regex.Pattern
|
||||||
|
"Unclosed character class"
|
||||||
|
"Unmatched closing ')'"
|
||||||
|
"Unexpected internal error"
|
||||||
|
"Dangling meta character '" + ((char)ch) + "'"
|
||||||
|
"Unclosed counted closure"
|
||||||
|
"Illegal repetition"
|
||||||
|
"Illegal repetition range"
|
||||||
|
"Illegal character range"
|
||||||
|
|
||||||
|
The following messages are not currently translated and are output as per PatternSyntaxException
|
||||||
|
"\\k is not followed by '<' for named capturing group"
|
||||||
|
"(named capturing group <"+ name+"> does not exist"
|
||||||
|
"Illegal/unsupported escape sequence"
|
||||||
|
"Bad class syntax"
|
||||||
|
"Unexpected character '"+((char)ch)+"'"
|
||||||
|
"Unclosed character family"
|
||||||
|
"Empty character family"
|
||||||
|
"Unknown Unicode property {name=<" + name + ">, "+ "value=<" + value + ">}"
|
||||||
|
"Unknown character script name {" + name + "}"
|
||||||
|
"Unknown character block name {" + name + "}"
|
||||||
|
"Unknown character property name {" + name + "}"
|
||||||
|
"named capturing group has 0 length name"
|
||||||
|
"named capturing group is missing trailing '>'"
|
||||||
|
"Named capturing group <" + name + "> is already defined"
|
||||||
|
"Look-behind group does not have " + "an obvious maximum length"
|
||||||
|
"Unknown look-behind group"
|
||||||
|
"Unknown group type"
|
||||||
|
"Unknown inline modifier"
|
||||||
|
"Internal logic error"
|
||||||
|
"Illegal control escape sequence"
|
||||||
|
"Illegal octal escape sequence"
|
||||||
|
"Hexadecimal codepoint is too big"
|
||||||
|
"Unclosed hexadecimal escape sequence"
|
||||||
|
"Illegal hexadecimal escape sequence"
|
||||||
|
"Illegal Unicode escape sequence"
|
||||||
|
*/
|
||||||
private final PatternSyntaxException exception;
|
private final PatternSyntaxException exception;
|
||||||
|
|
||||||
public PatternSyntaxExceptionParser(PatternSyntaxException e) {
|
public PatternSyntaxExceptionParser(PatternSyntaxException e) {
|
||||||
@ -51,7 +89,7 @@ public class PatternSyntaxExceptionParser {
|
|||||||
//Need these errors to be more human readable
|
//Need these errors to be more human readable
|
||||||
//Possibly include html for formatting
|
//Possibly include html for formatting
|
||||||
//Update tests first with user friendly errors
|
//Update tests first with user friendly errors
|
||||||
sb.append("The regular expression is missing a closing ']' character.");
|
sb.append("The regular expression is missing a closing ']' character, or has an empty pair of square brackets '[]'.");
|
||||||
break;
|
break;
|
||||||
case "Unmatched closing ')'":
|
case "Unmatched closing ')'":
|
||||||
sb.append("The regular expression is missing a opening '(' character.");
|
sb.append("The regular expression is missing a opening '(' character.");
|
||||||
@ -59,49 +97,31 @@ public class PatternSyntaxExceptionParser {
|
|||||||
case "Unclosed group":
|
case "Unclosed group":
|
||||||
sb.append("The regular expression is missing a closing ')' character.");
|
sb.append("The regular expression is missing a closing ')' character.");
|
||||||
break;
|
break;
|
||||||
|
case "Dangling meta character '*'":
|
||||||
|
case "Dangling meta character '+'":
|
||||||
|
case "Dangling meta character '?'":
|
||||||
|
sb.append("The regular expression has a '*','+' or '?' in the wrong place.");
|
||||||
|
break;
|
||||||
|
case "Unexpected internal error":
|
||||||
|
sb.append("The regular expression has a backslash '\\' at the end.");
|
||||||
|
break;
|
||||||
|
case "Unclosed counted closure":
|
||||||
|
sb.append("The regular expression is missing a closing '}' character, or has an incorrect quantifier statement in curly brackets '{}'.");
|
||||||
|
break;
|
||||||
|
case "Illegal repetition":
|
||||||
|
sb.append("The regular expression has an incomplete or incorrect quantifier statement in curly brackets '{}'.");
|
||||||
|
break;
|
||||||
|
case "Illegal repetition range":
|
||||||
|
sb.append("The regular expression has a quantifier statement where the minimum is larger than the maximum (e.g. {4,3}).");
|
||||||
|
break;
|
||||||
|
case "Illegal character range":
|
||||||
|
sb.append("The regular expression has a range statement which is incomplete or has the characters in the incorrect order (e.g. [9-0])");
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
// If no special handling in place fall back on method
|
// If no special handling in place fall back on error msg
|
||||||
// as used in java.util.regex.PatternSyntaxException
|
// created by java.util.regex.PatternSyntaxException
|
||||||
sb.append(desc);
|
sb.append(exception.getMessage());
|
||||||
}
|
}
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
/* Error messages from java.util.regex.Pattern
|
|
||||||
"Unclosed character class"
|
|
||||||
"Unmatched closing ')'"
|
|
||||||
"Unexpected internal error"
|
|
||||||
"Dangling meta character '" + ((char)ch) + "'"
|
|
||||||
"\\k is not followed by '<' for named capturing group"
|
|
||||||
"(named capturing group <"+ name+"> does not exist"
|
|
||||||
"Illegal/unsupported escape sequence"
|
|
||||||
"Bad class syntax"
|
|
||||||
"Unclosed character class"
|
|
||||||
"Illegal character range"
|
|
||||||
"Unexpected character '"+((char)ch)+"'"
|
|
||||||
"Unclosed character family"
|
|
||||||
"Empty character family"
|
|
||||||
"Unknown Unicode property {name=<" + name + ">, "+ "value=<" + value + ">}"
|
|
||||||
"Unknown character script name {" + name + "}"
|
|
||||||
"Unknown character block name {" + name + "}"
|
|
||||||
"Unknown character property name {" + name + "}"
|
|
||||||
"named capturing group has 0 length name"
|
|
||||||
"named capturing group is missing trailing '>'"
|
|
||||||
"Named capturing group <" + name + "> is already defined"
|
|
||||||
"Look-behind group does not have " + "an obvious maximum length"
|
|
||||||
"Unknown look-behind group"
|
|
||||||
"Unknown group type"
|
|
||||||
"Unknown inline modifier"
|
|
||||||
"Internal logic error"
|
|
||||||
"Unclosed counted closure"
|
|
||||||
"Illegal repetition range"
|
|
||||||
"Illegal repetition"
|
|
||||||
"Illegal control escape sequence"
|
|
||||||
"Illegal octal escape sequence"
|
|
||||||
"Hexadecimal codepoint is too big"
|
|
||||||
"Unclosed hexadecimal escape sequence"
|
|
||||||
"Illegal hexadecimal escape sequence"
|
|
||||||
"Illegal Unicode escape sequence"
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -51,36 +51,6 @@ public class PatternSyntaxExceptionParserTests extends RefineTest {
|
|||||||
public void init() {
|
public void init() {
|
||||||
logger = LoggerFactory.getLogger(this.getClass());
|
logger = LoggerFactory.getLogger(this.getClass());
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
Potential errors and error messages from PatternSyntaxException
|
|
||||||
groupopen:"Unmatched opening parenthesis."
|
|
||||||
Unclosed group near index 1 ( ^
|
|
||||||
groupclose:"Unmatched closing parenthesis."
|
|
||||||
Unmatched closing ')' )
|
|
||||||
setopen:"Unmatched opening square bracket."
|
|
||||||
Unclosed character class near index 0 [ ^
|
|
||||||
quanttarg:"Invalid target for quantifier."
|
|
||||||
Dangling meta character '+' near index 0 +{4} ^
|
|
||||||
Dangling meta character '*' near index 0 * ^
|
|
||||||
Dangling meta character '?' near index 0 ? ^
|
|
||||||
esccharopen:"Dangling backslash."
|
|
||||||
Unexpected internal error near index 1 \ ^
|
|
||||||
quantrev:"Quantifier minimum is greater than maximum."
|
|
||||||
Illegal repetition range near index 5 a{3,2} ^
|
|
||||||
rangerev:"Range values reversed. Start char is greater than end char."
|
|
||||||
Illegal character range near index 3 [9-0] ^
|
|
||||||
esccharbad:"Invalid escape sequence."
|
|
||||||
Illegal control escape sequence: \c
|
|
||||||
Illegal/unsupported escape sequence: \g \i \j \l \m \o \q \y
|
|
||||||
\k is not followed by '<' for named capturing group: \k
|
|
||||||
Unknown character property name {}: \p
|
|
||||||
Illegal Unicode escape sequence: {backslash}u
|
|
||||||
Illegal hexadecimal escape sequence: \x
|
|
||||||
Illegal octal escape sequence: \0
|
|
||||||
invalidnamegroup:
|
|
||||||
named capturing group is missing trailing '>' near index 5 (?<as?>a) ^
|
|
||||||
*/
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void unmatchedOpeningParenthesisTest(){
|
public void unmatchedOpeningParenthesisTest(){
|
||||||
@ -116,20 +86,7 @@ invalidnamegroup:
|
|||||||
} catch (PatternSyntaxException err) {
|
} catch (PatternSyntaxException err) {
|
||||||
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
|
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
|
||||||
Assert.assertEquals(e.getUserMessage(),
|
Assert.assertEquals(e.getUserMessage(),
|
||||||
"The regular expression is missing a closing ']' character.");
|
"The regular expression is missing a closing ']' character, or has an empty pair of square brackets '[]'.");
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void quantifierTargetValidityTest(){
|
|
||||||
String s = "abc+*";
|
|
||||||
try {
|
|
||||||
Pattern pattern = Pattern.compile(s);
|
|
||||||
Assert.assertTrue(false,"Test pattern successfully compiled when it should fail");
|
|
||||||
} catch (PatternSyntaxException err) {
|
|
||||||
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
|
|
||||||
Assert.assertEquals(e.getUserMessage(),
|
|
||||||
"The regular expression has a '*','+' or '?' in the wrong place");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -146,6 +103,43 @@ invalidnamegroup:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void unmatchedOpeningCurlyBracketTest(){
|
||||||
|
String s = "abc{3";
|
||||||
|
try {
|
||||||
|
Pattern pattern = Pattern.compile(s);
|
||||||
|
} catch (PatternSyntaxException err) {
|
||||||
|
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
|
||||||
|
Assert.assertEquals(e.getUserMessage(),
|
||||||
|
"The regular expression is missing a closing '}' character, or has an incorrect quantifier statement in curly brackets '{}'.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void illegalQuantifierStatement(){
|
||||||
|
String s = "abc{";
|
||||||
|
try {
|
||||||
|
Pattern pattern = Pattern.compile(s);
|
||||||
|
} catch (PatternSyntaxException err) {
|
||||||
|
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
|
||||||
|
Assert.assertEquals(e.getUserMessage(),
|
||||||
|
"The regular expression has an incomplete or incorrect quantifier statement in curly brackets '{}'.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void quantifierTargetValidityTest(){
|
||||||
|
String s = "abc+*";
|
||||||
|
try {
|
||||||
|
Pattern pattern = Pattern.compile(s);
|
||||||
|
Assert.assertTrue(false,"Test pattern successfully compiled when it should fail");
|
||||||
|
} catch (PatternSyntaxException err) {
|
||||||
|
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
|
||||||
|
Assert.assertEquals(e.getUserMessage(),
|
||||||
|
"The regular expression has a '*','+' or '?' in the wrong place.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void quantifierMagnitudeTest(){
|
public void quantifierMagnitudeTest(){
|
||||||
String s = "a{4,3}";
|
String s = "a{4,3}";
|
||||||
@ -168,22 +162,8 @@ invalidnamegroup:
|
|||||||
} catch (PatternSyntaxException err) {
|
} catch (PatternSyntaxException err) {
|
||||||
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
|
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
|
||||||
Assert.assertEquals(e.getUserMessage(),
|
Assert.assertEquals(e.getUserMessage(),
|
||||||
"The regular expression has a range statement with the characters in the incorrect order (e.g. [9-0])");
|
"The regular expression has a range statement which is incomplete or has the characters in the incorrect order (e.g. [9-0])");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This needs to be different to the others - see all the variations on invalid escape sequences
|
|
||||||
@Test
|
|
||||||
public void escapeSequenceValidityTest(){
|
|
||||||
String s = "";
|
|
||||||
try {
|
|
||||||
Pattern pattern = Pattern.compile(s);
|
|
||||||
Assert.assertTrue(false,"Test pattern successfully compiled when it should fail");
|
|
||||||
} catch (PatternSyntaxException err) {
|
|
||||||
PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err);
|
|
||||||
Assert.assertEquals(e.getUserMessage(),
|
|
||||||
"Invalid escape sequence.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user