From d64dee71dda96f16502fea33ebfe9ded9e0ac2de Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Tue, 28 Nov 2017 16:20:24 +0000 Subject: [PATCH] Extended tests and code for user friendly regular expression syntax errors --- .../util/PatternSyntaxExceptionParser.java | 104 +++++++++++------- .../PatternSyntaxExceptionParserTests.java | 98 +++++++---------- 2 files changed, 101 insertions(+), 101 deletions(-) diff --git a/main/src/com/google/refine/util/PatternSyntaxExceptionParser.java b/main/src/com/google/refine/util/PatternSyntaxExceptionParser.java index 47c1d65bf..fddc08d92 100644 --- a/main/src/com/google/refine/util/PatternSyntaxExceptionParser.java +++ b/main/src/com/google/refine/util/PatternSyntaxExceptionParser.java @@ -36,6 +36,44 @@ package com.google.refine.util; import java.util.regex.PatternSyntaxException; public class PatternSyntaxExceptionParser { + /* Class to translate PatternSyntaxExceptions into more user friendly error messages + Currently translates the following error messages from java.util.regex.Pattern + "Unclosed character class" + "Unmatched closing ')'" + "Unexpected internal error" + "Dangling meta character '" + ((char)ch) + "'" + "Unclosed counted closure" + "Illegal repetition" + "Illegal repetition range" + "Illegal character range" + + The following messages are not currently translated and are output as per PatternSyntaxException + "\\k is not followed by '<' for named capturing group" + "(named capturing group <"+ name+"> does not exist" + "Illegal/unsupported escape sequence" + "Bad class syntax" + "Unexpected character '"+((char)ch)+"'" + "Unclosed character family" + "Empty character family" + "Unknown Unicode property {name=<" + name + ">, "+ "value=<" + value + ">}" + "Unknown character script name {" + name + "}" + "Unknown character block name {" + name + "}" + "Unknown character property name {" + name + "}" + "named capturing group has 0 length name" + "named capturing group is missing trailing '>'" + "Named capturing group <" + name + "> is already defined" + "Look-behind group does not have " + "an obvious maximum length" + "Unknown look-behind group" + "Unknown group type" + "Unknown inline modifier" + "Internal logic error" + "Illegal control escape sequence" + "Illegal octal escape sequence" + "Hexadecimal codepoint is too big" + "Unclosed hexadecimal escape sequence" + "Illegal hexadecimal escape sequence" + "Illegal Unicode escape sequence" + */ private final PatternSyntaxException exception; public PatternSyntaxExceptionParser(PatternSyntaxException e) { @@ -51,7 +89,7 @@ public class PatternSyntaxExceptionParser { //Need these errors to be more human readable //Possibly include html for formatting //Update tests first with user friendly errors - sb.append("The regular expression is missing a closing ']' character."); + sb.append("The regular expression is missing a closing ']' character, or has an empty pair of square brackets '[]'."); break; case "Unmatched closing ')'": sb.append("The regular expression is missing a opening '(' character."); @@ -59,49 +97,31 @@ public class PatternSyntaxExceptionParser { case "Unclosed group": sb.append("The regular expression is missing a closing ')' character."); break; + case "Dangling meta character '*'": + case "Dangling meta character '+'": + case "Dangling meta character '?'": + sb.append("The regular expression has a '*','+' or '?' in the wrong place."); + break; + case "Unexpected internal error": + sb.append("The regular expression has a backslash '\\' at the end."); + break; + case "Unclosed counted closure": + sb.append("The regular expression is missing a closing '}' character, or has an incorrect quantifier statement in curly brackets '{}'."); + break; + case "Illegal repetition": + sb.append("The regular expression has an incomplete or incorrect quantifier statement in curly brackets '{}'."); + break; + case "Illegal repetition range": + sb.append("The regular expression has a quantifier statement where the minimum is larger than the maximum (e.g. {4,3})."); + break; + case "Illegal character range": + sb.append("The regular expression has a range statement which is incomplete or has the characters in the incorrect order (e.g. [9-0])"); + break; default: - // If no special handling in place fall back on method - // as used in java.util.regex.PatternSyntaxException - sb.append(desc); + // If no special handling in place fall back on error msg + // created by java.util.regex.PatternSyntaxException + sb.append(exception.getMessage()); } return sb.toString(); } - /* Error messages from java.util.regex.Pattern - "Unclosed character class" - "Unmatched closing ')'" - "Unexpected internal error" - "Dangling meta character '" + ((char)ch) + "'" - "\\k is not followed by '<' for named capturing group" - "(named capturing group <"+ name+"> does not exist" - "Illegal/unsupported escape sequence" - "Bad class syntax" - "Unclosed character class" - "Illegal character range" - "Unexpected character '"+((char)ch)+"'" - "Unclosed character family" - "Empty character family" - "Unknown Unicode property {name=<" + name + ">, "+ "value=<" + value + ">}" - "Unknown character script name {" + name + "}" - "Unknown character block name {" + name + "}" - "Unknown character property name {" + name + "}" - "named capturing group has 0 length name" - "named capturing group is missing trailing '>'" - "Named capturing group <" + name + "> is already defined" - "Look-behind group does not have " + "an obvious maximum length" - "Unknown look-behind group" - "Unknown group type" - "Unknown inline modifier" - "Internal logic error" - "Unclosed counted closure" - "Illegal repetition range" - "Illegal repetition" - "Illegal control escape sequence" - "Illegal octal escape sequence" - "Hexadecimal codepoint is too big" - "Unclosed hexadecimal escape sequence" - "Illegal hexadecimal escape sequence" - "Illegal Unicode escape sequence" - */ - - } diff --git a/main/tests/server/src/com/google/refine/tests/util/PatternSyntaxExceptionParserTests.java b/main/tests/server/src/com/google/refine/tests/util/PatternSyntaxExceptionParserTests.java index 9b06f7a5d..b3cff97ea 100644 --- a/main/tests/server/src/com/google/refine/tests/util/PatternSyntaxExceptionParserTests.java +++ b/main/tests/server/src/com/google/refine/tests/util/PatternSyntaxExceptionParserTests.java @@ -51,36 +51,6 @@ public class PatternSyntaxExceptionParserTests extends RefineTest { public void init() { logger = LoggerFactory.getLogger(this.getClass()); } - -/* -Potential errors and error messages from PatternSyntaxException -groupopen:"Unmatched opening parenthesis." - Unclosed group near index 1 ( ^ -groupclose:"Unmatched closing parenthesis." - Unmatched closing ')' ) -setopen:"Unmatched opening square bracket." - Unclosed character class near index 0 [ ^ -quanttarg:"Invalid target for quantifier." - Dangling meta character '+' near index 0 +{4} ^ - Dangling meta character '*' near index 0 * ^ - Dangling meta character '?' near index 0 ? ^ -esccharopen:"Dangling backslash." - Unexpected internal error near index 1 \ ^ -quantrev:"Quantifier minimum is greater than maximum." - Illegal repetition range near index 5 a{3,2} ^ -rangerev:"Range values reversed. Start char is greater than end char." - Illegal character range near index 3 [9-0] ^ -esccharbad:"Invalid escape sequence." - Illegal control escape sequence: \c - Illegal/unsupported escape sequence: \g \i \j \l \m \o \q \y - \k is not followed by '<' for named capturing group: \k - Unknown character property name {}: \p - Illegal Unicode escape sequence: {backslash}u - Illegal hexadecimal escape sequence: \x - Illegal octal escape sequence: \0 -invalidnamegroup: - named capturing group is missing trailing '>' near index 5 (?a) ^ -*/ @Test public void unmatchedOpeningParenthesisTest(){ @@ -116,20 +86,7 @@ invalidnamegroup: } catch (PatternSyntaxException err) { PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err); Assert.assertEquals(e.getUserMessage(), - "The regular expression is missing a closing ']' character."); - } - } - - @Test - public void quantifierTargetValidityTest(){ - String s = "abc+*"; - try { - Pattern pattern = Pattern.compile(s); - Assert.assertTrue(false,"Test pattern successfully compiled when it should fail"); - } catch (PatternSyntaxException err) { - PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err); - Assert.assertEquals(e.getUserMessage(), - "The regular expression has a '*','+' or '?' in the wrong place"); + "The regular expression is missing a closing ']' character, or has an empty pair of square brackets '[]'."); } } @@ -146,6 +103,43 @@ invalidnamegroup: } } + @Test + public void unmatchedOpeningCurlyBracketTest(){ + String s = "abc{3"; + try { + Pattern pattern = Pattern.compile(s); + } catch (PatternSyntaxException err) { + PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err); + Assert.assertEquals(e.getUserMessage(), + "The regular expression is missing a closing '}' character, or has an incorrect quantifier statement in curly brackets '{}'."); + } + } + + @Test + public void illegalQuantifierStatement(){ + String s = "abc{"; + try { + Pattern pattern = Pattern.compile(s); + } catch (PatternSyntaxException err) { + PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err); + Assert.assertEquals(e.getUserMessage(), + "The regular expression has an incomplete or incorrect quantifier statement in curly brackets '{}'."); + } + } + + @Test + public void quantifierTargetValidityTest(){ + String s = "abc+*"; + try { + Pattern pattern = Pattern.compile(s); + Assert.assertTrue(false,"Test pattern successfully compiled when it should fail"); + } catch (PatternSyntaxException err) { + PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err); + Assert.assertEquals(e.getUserMessage(), + "The regular expression has a '*','+' or '?' in the wrong place."); + } + } + @Test public void quantifierMagnitudeTest(){ String s = "a{4,3}"; @@ -168,22 +162,8 @@ invalidnamegroup: } catch (PatternSyntaxException err) { PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err); Assert.assertEquals(e.getUserMessage(), - "The regular expression has a range statement with the characters in the incorrect order (e.g. [9-0])"); + "The regular expression has a range statement which is incomplete or has the characters in the incorrect order (e.g. [9-0])"); } } -/* This needs to be different to the others - see all the variations on invalid escape sequences - @Test - public void escapeSequenceValidityTest(){ - String s = ""; - try { - Pattern pattern = Pattern.compile(s); - Assert.assertTrue(false,"Test pattern successfully compiled when it should fail"); - } catch (PatternSyntaxException err) { - PatternSyntaxExceptionParser e = new PatternSyntaxExceptionParser(err); - Assert.assertEquals(e.getUserMessage(), - "Invalid escape sequence."); - } - } -*/ }