( 'lexer' {gtype=LEXER_GRAMMAR;} // pure lexer | 'parser' {gtype=PARSER_GRAMMAR;} // pure parser | 'tree' {gtype=TREE_GRAMMAR;} // a tree parser | {gtype=COMBINED_GRAMMAR;} // merged parser/lexer ) g='grammar' id ';' optionsSpec? tokensSpec? attrScope* action* rule+ EOF -> ^( {adaptor.create(gtype,$g)} id DOC_COMMENT? optionsSpec? tokensSpec? attrScope* action* rule+ ) ; tokensSpec : TOKENS tokenSpec+ '}' -> ^(TOKENS tokenSpec+) ; tokenSpec : TOKEN_REF ( '=' (lit=STRING_LITERAL|lit=CHAR_LITERAL) -> ^('=' TOKEN_REF $lit) | -> TOKEN_REF ) ';' ; attrScope : 'scope' id ACTION -> ^('scope' id ACTION) ; /** Match stuff like @parser::members {int i;} */ action : '@' (actionScopeName '::')? id ACTION -> ^('@' actionScopeName? id ACTION) ; /** Sometimes the scope names will collide with keywords; allow them as * ids for action scopes. */ actionScopeName : id | l='lexer' -> ID[$l] | p='parser' -> ID[$p] ; optionsSpec : OPTIONS (option ';')+ '}' -> ^(OPTIONS option+) ; option : id '=' optionValue -> ^('=' id optionValue) ; optionValue : id | STRING_LITERAL | CHAR_LITERAL | INT | s='*' -> STRING_LITERAL[$s] // used for k=* ; rule scope { String name; } : DOC_COMMENT? ( modifier=('protected'|'public'|'private'|'fragment') )? id {$rule::name = $id.text;} '!'? ( arg=ARG_ACTION )? ( 'returns' rt=ARG_ACTION )? throwsSpec? optionsSpec? ruleScopeSpec? ruleAction* ':' altList ';' exceptionGroup? -> ^( RULE id {modifier!=null?adaptor.create(modifier):null} ^(ARG $arg)? ^(RET $rt)? optionsSpec? ruleScopeSpec? ruleAction* altList exceptionGroup? EOR["EOR"] ) ; /** Match stuff like @init {int i;} */ ruleAction : '@' id ACTION -> ^('@' id ACTION) ; throwsSpec : 'throws' id ( ',' id )* -> ^('throws' id+) ; ruleScopeSpec : 'scope' ACTION -> ^('scope' ACTION) | 'scope' id (',' id)* ';' -> ^('scope' id+) | 'scope' ACTION 'scope' id (',' id)* ';' -> ^('scope' ACTION id+ ) ; block : lp='(' ( (opts=optionsSpec)? ':' )? a1=alternative rewrite ( '|' a2=alternative rewrite )* rp=')' -> ^( BLOCK[$lp,"BLOCK"] optionsSpec? alternative+ EOB[$rp,"EOB"] ) ; altList @init { // must create root manually as it's used by invoked rules in real antlr tool. // leave here to demonstrate use of {...} in rewrite rule // it's really BLOCK[firstToken,"BLOCK"]; set line/col to previous ( or : token. CommonTree blkRoot = (CommonTree)adaptor.create(BLOCK,input.LT(-1),"BLOCK"); } : a1=alternative rewrite ( '|' a2=alternative rewrite )* -> ^( {blkRoot} (alternative rewrite?)+ EOB["EOB"] ) ; alternative @init { Token firstToken = input.LT(1); Token prevToken = input.LT(-1); // either : or | I think } : element+ -> ^(ALT[firstToken,"ALT"] element+ EOA["EOA"]) | -> ^(ALT[prevToken,"ALT"] EPSILON[prevToken,"EPSILON"] EOA["EOA"]) ; exceptionGroup : ( exceptionHandler )+ ( finallyClause )? | finallyClause ; exceptionHandler : 'catch' ARG_ACTION ACTION -> ^('catch' ARG_ACTION ACTION) ; finallyClause : 'finally' ACTION -> ^('finally' ACTION) ; element : elementNoOptionSpec ; elementNoOptionSpec : id (labelOp='='|labelOp='+=') atom ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] ^($labelOp id atom) EOA["EOA"]) EOB["EOB"])) | -> ^($labelOp id atom) ) | id (labelOp='='|labelOp='+=') block ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] ^($labelOp id block) EOA["EOA"]) EOB["EOB"])) | -> ^($labelOp id block) ) | atom ( ebnfSuffix -> ^(BLOCK["BLOCK"] ^(ALT["ALT"] atom EOA["EOA"]) EOB["EOB"]) | -> atom ) | ebnf | ACTION | SEMPRED ( '=>' -> GATED_SEMPRED | -> SEMPRED ) | treeSpec ; atom: range ( (op='^'|op='!') -> ^($op range) | -> range ) | terminal | notSet ( (op='^'|op='!') -> ^($op notSet) | -> notSet ) | RULE_REF ( arg=ARG_ACTION )? ( (op='^'|op='!') )? -> {$arg!=null&&op!=null}? ^($op RULE_REF $arg) -> {$arg!=null}? ^(RULE_REF $arg) -> {$op!=null}? ^($op RULE_REF) -> RULE_REF ; notSet : '~' ( notTerminal -> ^('~' notTerminal) | block -> ^('~' block) ) ; treeSpec : '^(' element ( element )+ ')' -> ^(TREE_BEGIN element+) ; /** Matches ENBF blocks (and token sets via block rule) */ ebnf @init { Token firstToken = input.LT(1); } @after { $ebnf.tree.getToken().setLine(firstToken.getLine()); $ebnf.tree.getToken().setCharPositionInLine(firstToken.getCharPositionInLine()); } : block ( op='?' -> ^(OPTIONAL[op] block) | op='*' -> ^(CLOSURE[op] block) | op='+' -> ^(POSITIVE_CLOSURE[op] block) | '^' -> ^('^' block) | '!' -> ^('!' block) | '=>' // syntactic predicate -> {gtype==COMBINED_GRAMMAR && Character.isUpperCase($rule::name.charAt(0))}? // if lexer rule in combined, leave as pred for lexer ^(SYNPRED["=>"] block) // in real antlr tool, text for SYN_SEMPRED is predname -> SYN_SEMPRED | -> block ) ; range! : c1=CHAR_LITERAL RANGE c2=CHAR_LITERAL -> ^(CHAR_RANGE[$c1,".."] $c1 $c2) ; terminal : ( CHAR_LITERAL -> CHAR_LITERAL // Args are only valid for lexer rules | TOKEN_REF ( ARG_ACTION -> ^(TOKEN_REF ARG_ACTION) | -> TOKEN_REF ) | STRING_LITERAL -> STRING_LITERAL | '.' -> '.' ) ( '^' -> ^('^' $terminal) | '!' -> ^('!' $terminal) )? ; notTerminal : CHAR_LITERAL | TOKEN_REF | STRING_LITERAL ; ebnfSuffix @init { Token op = input.LT(1); } : '?' -> OPTIONAL[op] | '*' -> CLOSURE[op] | '+' -> POSITIVE_CLOSURE[op] ; // R E W R I T E S Y N T A X rewrite @init { Token firstToken = input.LT(1); } : (rew+='->' preds+=SEMPRED predicated+=rewrite_alternative)* rew2='->' last=rewrite_alternative -> ^($rew $preds $predicated)* ^($rew2 $last) | ; rewrite_alternative options {backtrack=true;} : rewrite_template | rewrite_tree_alternative | /* empty rewrite */ -> ^(ALT["ALT"] EPSILON["EPSILON"] EOA["EOA"]) ; rewrite_template_block : lp='(' rewrite_template ')' -> ^(BLOCK[$lp,"BLOCK"] rewrite_template EOB[$lp,"EOB"]) ; rewrite_tree_block : lp='(' rewrite_tree_alternative ')' -> ^(BLOCK[$lp,"BLOCK"] rewrite_tree_alternative EOB[$lp,"EOB"]) ; rewrite_tree_alternative : rewrite_tree_element+ -> ^(ALT["ALT"] rewrite_tree_element+ EOA["EOA"]) ; rewrite_tree_element : rewrite_tree_atom | rewrite_tree_atom ebnfSuffix -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] rewrite_tree_atom EOA["EOA"]) EOB["EOB"])) | rewrite_tree ( ebnfSuffix -> ^(BLOCK["BLOCK"] ^(ALT["ALT"] rewrite_tree EOA["EOA"]) EOB["EOB"]) | -> rewrite_tree ) | rewrite_tree_ebnf ; rewrite_tree_atom : CHAR_LITERAL | TOKEN_REF ARG_ACTION? -> ^(TOKEN_REF ARG_ACTION?) // for imaginary nodes | RULE_REF | STRING_LITERAL | d='$' id -> LABEL[$d,$id.text] // reference to a label in a rewrite rule | ACTION ; rewrite_tree_ebnf @init { Token firstToken = input.LT(1); } @after { $rewrite_tree_ebnf.tree.getToken().setLine(firstToken.getLine()); $rewrite_tree_ebnf.tree.getToken().setCharPositionInLine(firstToken.getCharPositionInLine()); } : rewrite_tree_block ebnfSuffix -> ^(ebnfSuffix rewrite_tree_block) ; rewrite_tree : '^(' rewrite_tree_atom rewrite_tree_element* ')' -> ^(TREE_BEGIN rewrite_tree_atom rewrite_tree_element* ) ; /** Build a tree for a template rewrite: ^(TEMPLATE (ID|ACTION) ^(ARGLIST ^(ARG ID ACTION) ...) ) where ARGLIST is always there even if no args exist. ID can be "template" keyword. If first child is ACTION then it's an indirect template ref -> foo(a={...}, b={...}) -> ({string-e})(a={...}, b={...}) // e evaluates to template name -> {%{$ID.text}} // create literal template from string (done in ActionTranslator) -> {st-expr} // st-expr evaluates to ST */ rewrite_template : // -> template(a={...},...) "..." inline template {input.LT(1).getText().equals("template")}? id lp='(' rewrite_template_args ')' st=( DOUBLE_QUOTE_STRING_LITERAL | DOUBLE_ANGLE_STRING_LITERAL ) -> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args $st) | // -> foo(a={...}, ...) rewrite_template_ref | // -> ({expr})(a={...}, ...) rewrite_indirect_template_head | // -> {...} ACTION ; /** -> foo(a={...}, ...) */ rewrite_template_ref : id lp='(' rewrite_template_args ')' -> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args) ; /** -> ({expr})(a={...}, ...) */ rewrite_indirect_template_head : lp='(' ACTION ')' '(' rewrite_template_args ')' -> ^(TEMPLATE[$lp,"TEMPLATE"] ACTION rewrite_template_args) ; rewrite_template_args : rewrite_template_arg (',' rewrite_template_arg)* -> ^(ARGLIST rewrite_template_arg+) | -> ARGLIST ; rewrite_template_arg : id '=' ACTION -> ^(ARG[$id.start] id ACTION) ; id : TOKEN_REF -> ID[$TOKEN_REF] | RULE_REF -> ID[$RULE_REF] ; // L E X I C A L R U L E S SL_COMMENT : '//' ( ' $ANTLR ' SRC // src directive | ~('\r'|'\n')* ) '\r'? '\n' {$channel=HIDDEN;} ; ML_COMMENT : '/*' {if (input.LA(1)=='*') $type=DOC_COMMENT; else $channel=HIDDEN;} .* '*/' ; CHAR_LITERAL : '\'' LITERAL_CHAR '\'' ; STRING_LITERAL : '\'' LITERAL_CHAR LITERAL_CHAR* '\'' ; fragment LITERAL_CHAR : ESC | ~('\''|'\\') ; DOUBLE_QUOTE_STRING_LITERAL : '"' LITERAL_CHAR* '"' ; DOUBLE_ANGLE_STRING_LITERAL : '<<' .* '>>' ; fragment ESC : '\\' ( 'n' | 'r' | 't' | 'b' | 'f' | '"' | '\'' | '\\' | '>' | 'u' XDIGIT XDIGIT XDIGIT XDIGIT | . // unknown, leave as it is ) ; fragment XDIGIT : '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' ; INT : '0'..'9'+ ; ARG_ACTION : NESTED_ARG_ACTION ; fragment NESTED_ARG_ACTION : '[' ( options {greedy=false; k=1;} : NESTED_ARG_ACTION | ACTION_STRING_LITERAL | ACTION_CHAR_LITERAL | . )* ']' {setText(getText().substring(1, getText().length()-1));} ; ACTION : NESTED_ACTION ( '?' {$type = SEMPRED;} )? ; fragment NESTED_ACTION : '{' ( options {greedy=false; k=3;} : NESTED_ACTION | SL_COMMENT | ML_COMMENT | ACTION_STRING_LITERAL | ACTION_CHAR_LITERAL | . )* '}' ; fragment ACTION_CHAR_LITERAL : '\'' (ACTION_ESC|~('\\'|'\'')) '\'' ; fragment ACTION_STRING_LITERAL : '"' (ACTION_ESC|~('\\'|'"'))+ '"' ; fragment ACTION_ESC : '\\\'' | '\\' '"' // ANTLR doesn't like: '\\"' | '\\' ~('\''|'"') ; TOKEN_REF : 'A'..'Z' ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* ; RULE_REF : 'a'..'z' ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* ; /** Match the start of an options section. Don't allow normal * action processing on the {...} as it's not a action. */ OPTIONS : 'options' WS_LOOP '{' ; TOKENS : 'tokens' WS_LOOP '{' ; /** Reset the file and line information; useful when the grammar * has been generated so that errors are shown relative to the * original file like the old C preprocessor used to do. */ fragment SRC : 'src' ' ' file=ACTION_STRING_LITERAL ' ' line=INT ; WS : ( ' ' | '\t' | '\r'? '\n' )+ {$channel=HIDDEN;} ; fragment WS_LOOP : ( WS | SL_COMMENT | ML_COMMENT )* ;