header { /* [The "BSD licence"] Copyright (c) 2005-2008 Terence Parr All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.antlr.tool; import java.util.*; import java.io.*; import org.antlr.analysis.*; import org.antlr.misc.*; import antlr.*; } /** Read in an ANTLR grammar and build an AST. Try not to do * any actions, just build the tree. * * The phases are: * * antlr.g (this file) * assign.types.g * define.g * buildnfa.g * antlr.print.g (optional) * codegen.g * * Terence Parr * University of San Francisco * 2005 */ class ANTLRParser extends Parser; options { buildAST = true; exportVocab=ANTLR; ASTLabelType="GrammarAST"; k=3; } tokens { OPTIONS="options"; TOKENS="tokens"; PARSER="parser"; LEXER; RULE; BLOCK; OPTIONAL; CLOSURE; POSITIVE_CLOSURE; SYNPRED; RANGE; CHAR_RANGE; EPSILON; ALT; EOR; EOB; EOA; // end of alt ID; ARG; ARGLIST; RET; LEXER_GRAMMAR; PARSER_GRAMMAR; TREE_GRAMMAR; COMBINED_GRAMMAR; INITACTION; FORCED_ACTION; // {{...}} always exec even during syn preds LABEL; // $x used in rewrite rules TEMPLATE; SCOPE="scope"; IMPORT="import"; GATED_SEMPRED; // {p}? => SYN_SEMPRED; // (...) => it's a manually-specified synpred converted to sempred BACKTRACK_SEMPRED; // auto backtracking mode syn pred converted to sempred FRAGMENT="fragment"; DOT; } { Grammar grammar = null; protected int gtype = 0; protected String currentRuleName = null; protected GrammarAST currentBlockAST = null; protected GrammarAST setToBlockWithSet(GrammarAST b) { GrammarAST alt = #(#[ALT,"ALT"],#b,#[EOA,""]); prefixWithSynPred(alt); return #(#[BLOCK,"BLOCK"], alt, #[EOB,""] ); } /** Create a copy of the alt and make it into a BLOCK; all actions, * labels, tree operators, rewrites are removed. */ protected GrammarAST createBlockFromDupAlt(GrammarAST alt) { GrammarAST nalt = GrammarAST.dupTreeNoActions(alt, null); GrammarAST blk = #(#[BLOCK,"BLOCK"], nalt, #[EOB,""] ); return blk; } /** Rewrite alt to have a synpred as first element; * (xxx)=>xxx * but only if they didn't specify one manually. */ protected void prefixWithSynPred(GrammarAST alt) { // if they want backtracking and it's not a lexer rule in combined grammar String autoBacktrack = (String)grammar.getBlockOption(currentBlockAST, "backtrack"); if ( autoBacktrack==null ) { autoBacktrack = (String)grammar.getOption("backtrack"); } if ( autoBacktrack!=null&&autoBacktrack.equals("true") && !(gtype==COMBINED_GRAMMAR && Character.isUpperCase(currentRuleName.charAt(0))) && alt.getFirstChild().getType()!=SYN_SEMPRED ) { // duplicate alt and make a synpred block around that dup'd alt GrammarAST synpredBlockAST = createBlockFromDupAlt(alt); // Create a BACKTRACK_SEMPRED node as if user had typed this in // Effectively we replace (xxx)=>xxx with {synpredxxx}? xxx GrammarAST synpredAST = createSynSemPredFromBlock(synpredBlockAST, BACKTRACK_SEMPRED); // insert BACKTRACK_SEMPRED as first element of alt synpredAST.getLastSibling().setNextSibling(alt.getFirstChild()); alt.setFirstChild(synpredAST); } } protected GrammarAST createSynSemPredFromBlock(GrammarAST synpredBlockAST, int synpredTokenType) { // add grammar fragment to a list so we can make fake rules for them // later. String predName = grammar.defineSyntacticPredicate(synpredBlockAST,currentRuleName); // convert (alpha)=> into {synpredN}? where N is some pred count // during code gen we convert to function call with templates String synpredinvoke = predName; GrammarAST p = #[synpredTokenType,synpredinvoke]; // track how many decisions have synpreds grammar.blocksWithSynPreds.add(currentBlockAST); return p; } public GrammarAST createSimpleRuleAST(String name, GrammarAST block, boolean fragment) { GrammarAST modifier = null; if ( fragment ) { modifier = #[FRAGMENT,"fragment"]; } GrammarAST EORAST = #[EOR,""]; GrammarAST EOBAST = block.getLastChild(); EORAST.setLine(EOBAST.getLine()); EORAST.setColumn(EOBAST.getColumn()); GrammarAST ruleAST = #([RULE,"rule"], [ID,name],modifier,[ARG,"ARG"],[RET,"RET"], [SCOPE,"scope"],block,EORAST); ruleAST.setLine(block.getLine()); ruleAST.setColumn(block.getColumn()); return ruleAST; } public void reportError(RecognitionException ex) { Token token = null; try { token = LT(1); } catch (TokenStreamException tse) { ErrorManager.internalError("can't get token???", tse); } ErrorManager.syntaxError( ErrorManager.MSG_SYNTAX_ERROR, grammar, token, "antlr: "+ex.toString(), ex); } public void cleanup(GrammarAST root) { if ( gtype==LEXER_GRAMMAR ) { String filter = (String)grammar.getOption("filter"); GrammarAST tokensRuleAST = grammar.addArtificialMatchTokensRule( root, grammar.lexerRuleNamesInCombined, grammar.getDelegateNames(), filter!=null&&filter.equals("true")); } } } grammar![Grammar g] { this.grammar = g; GrammarAST opt=null; Token optionsStartToken = null; Map opts; // set to factory that sets enclosing rule astFactory = new ASTFactory() { { setASTNodeClass(GrammarAST.class); setASTNodeClass("org.antlr.tool.GrammarAST"); } public AST create(Token token) { AST t = super.create(token); ((GrammarAST)t).enclosingRuleName = currentRuleName; return t; } public AST create(int i) { AST t = super.create(i); ((GrammarAST)t).enclosingRuleName = currentRuleName; return t; } }; } : //hdr:headerSpec ( ACTION )? ( cmt:DOC_COMMENT )? gr:grammarType gid:id {grammar.setName(#gid.getText());} SEMI ( {optionsStartToken=LT(1);} opts=optionsSpec {grammar.setOptions(opts, optionsStartToken);} {opt=(GrammarAST)returnAST;} )? (ig:delegateGrammars)? (ts:tokensSpec!)? scopes:attrScopes (a:actions)? r:rules EOF { #grammar = #(null, #(#gr, #gid, #cmt, opt, #ig, #ts, #scopes, #a, #r)); cleanup(#grammar); } ; grammarType : ( "lexer"! {gtype=LEXER_GRAMMAR; grammar.type = Grammar.LEXER;} // pure lexer | "parser"! {gtype=PARSER_GRAMMAR; grammar.type = Grammar.PARSER;} // pure parser | "tree"! {gtype=TREE_GRAMMAR; grammar.type = Grammar.TREE_PARSER;} // a tree parser | {gtype=COMBINED_GRAMMAR; grammar.type = Grammar.COMBINED;} // merged parser/lexer ) gr:"grammar" {#gr.setType(gtype);} ; actions : (action)+ ; /** Match stuff like @parser::members {int i;} */ action : AMPERSAND^ (actionScopeName COLON! COLON!)? id ACTION ; /** Sometimes the scope names will collide with keywords; allow them as * ids for action scopes. */ actionScopeName : id | l:"lexer" {#l.setType(ID);} | p:"parser" {#p.setType(ID);} ; optionsSpec returns [Map opts=new HashMap()] : OPTIONS^ (option[opts] SEMI!)+ RCURLY! ; option[Map opts] { Object value=null; } : o:id ASSIGN^ value=optionValue { opts.put(#o.getText(), value); } ; optionValue returns [Object value=null] : x:id {value = #x.getText();} | s:STRING_LITERAL {String vs = #s.getText(); value=vs.substring(1,vs.length()-1);} | c:CHAR_LITERAL {String vs = #c.getText(); value=vs.substring(1,vs.length()-1);} | i:INT {value = new Integer(#i.getText());} | ss:STAR {#ss.setType(STRING_LITERAL); value = "*";} // used for k=* // | cs:charSet {value = #cs;} // return set AST in this case ; delegateGrammars : "import"^ delegateGrammar (COMMA! delegateGrammar)* SEMI! ; delegateGrammar : lab:id ASSIGN^ g:id {grammar.importGrammar(#g, #lab.getText());} | g2:id {grammar.importGrammar(#g2,null);} ; tokensSpec : TOKENS^ ( tokenSpec )+ RCURLY! ; tokenSpec : TOKEN_REF ( ASSIGN^ (STRING_LITERAL|CHAR_LITERAL) )? SEMI! ; attrScopes : (attrScope)* ; attrScope : "scope"^ id ACTION ; rules : ( options { // limitation of appox LL(k) says ambig upon // DOC_COMMENT TOKEN_REF, but that's an impossible sequence warnWhenFollowAmbig=false; } : //{g.type==PARSER}? (aliasLexerRule)=>aliasLexerRule | rule )+ ; rule! { GrammarAST modifier=null, blk=null, blkRoot=null, eob=null; int start = ((TokenWithIndex)LT(1)).getIndex(); int startLine = LT(1).getLine(); GrammarAST opt = null; Map opts = null; } : ( d:DOC_COMMENT )? ( p1:"protected" {modifier=#p1;} | p2:"public" {modifier=#p2;} | p3:"private" {modifier=#p3;} | p4:"fragment" {modifier=#p4;} )? ruleName:id {currentRuleName=#ruleName.getText(); if ( gtype==LEXER_GRAMMAR && #p4==null ) { grammar.lexerRuleNamesInCombined.add(currentRuleName); } } ( BANG )? ( aa:ARG_ACTION )? ( "returns" rt:ARG_ACTION )? ( throwsSpec )? ( opts=optionsSpec {opt=(GrammarAST)returnAST;} )? scopes:ruleScopeSpec (a:ruleActions)? colon:COLON { blkRoot = #[BLOCK,"BLOCK"]; blkRoot.blockOptions = opts; blkRoot.setLine(colon.getLine()); blkRoot.setColumn(colon.getColumn()); eob = #[EOB,""]; } b:altList[opts] {blk = #b;} semi:SEMI ( ex:exceptionGroup )? { int stop = ((TokenWithIndex)LT(1)).getIndex()-1; // point at the semi or exception thingie eob.setLine(semi.getLine()); eob.setColumn(semi.getColumn()); GrammarAST eor = #[EOR,""]; eor.setLine(semi.getLine()); eor.setColumn(semi.getColumn()); GrammarAST root = #[RULE,"rule"]; root.ruleStartTokenIndex = start; root.ruleStopTokenIndex = stop; root.setLine(startLine); root.blockOptions = opts; #rule = #(root, #ruleName,modifier,#(#[ARG,"ARG"],#aa),#(#[RET,"RET"],#rt), opt,#scopes,#a,blk,ex,eor); currentRuleName=null; } ; ruleActions : (ruleAction)+ ; /** Match stuff like @init {int i;} */ ruleAction : AMPERSAND^ id ACTION ; throwsSpec : "throws" id ( COMMA id )* ; ruleScopeSpec { int line = LT(1).getLine(); int column = LT(1).getColumn(); } :! ( options {warnWhenFollowAmbig=false;} : "scope" a:ACTION )? ( "scope" ids:idList SEMI! )* { GrammarAST scopeRoot = (GrammarAST)#[SCOPE,"scope"]; scopeRoot.setLine(line); scopeRoot.setColumn(column); #ruleScopeSpec = #(scopeRoot, #a, #ids); } ; /** Build #(BLOCK ( #(ALT ...) EOB )+ ) */ block { GrammarAST save = currentBlockAST; Map opts=null; } : lp:LPAREN^ {#lp.setType(BLOCK); #lp.setText("BLOCK");} ( // 2nd alt and optional branch ambig due to // linear approx LL(2) issue. COLON ACTION // matched correctly in 2nd alt. options { warnWhenFollowAmbig = false; } : (opts=optionsSpec {#block.setOptions(grammar,opts);})? ( ruleActions )? COLON! | ACTION COLON! )? {currentBlockAST = #lp;} a1:alternative rewrite {if (LA(1)==OR||(LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR)) prefixWithSynPred(#a1);} ( OR! a2:alternative rewrite {if (LA(1)==OR||(LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR)) prefixWithSynPred(#a2);} )* rp:RPAREN! { currentBlockAST = save; GrammarAST eob = #[EOB,""]; eob.setLine(rp.getLine()); eob.setColumn(rp.getColumn()); #block.addChild(eob); } ; altList[Map opts] { GrammarAST blkRoot = #[BLOCK,"BLOCK"]; blkRoot.blockOptions = opts; blkRoot.setLine(LT(0).getLine()); // set to : or ( blkRoot.setColumn(LT(0).getColumn()); GrammarAST save = currentBlockAST; currentBlockAST = #blkRoot; } : a1:alternative rewrite {if (LA(1)==OR||(LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR)) prefixWithSynPred(#a1);} ( OR! a2:alternative rewrite {if (LA(1)==OR||(LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR)) prefixWithSynPred(#a2);} )* { #altList = #(blkRoot,#altList,#[EOB,""]); currentBlockAST = save; } ; alternative { GrammarAST eoa = #[EOA, ""]; GrammarAST altRoot = #[ALT,"ALT"]; altRoot.setLine(LT(1).getLine()); altRoot.setColumn(LT(1).getColumn()); } : ( el:element )+ { if ( #alternative==null ) { #alternative = #(altRoot,#[EPSILON,"epsilon"],eoa); } else { // we have a real list of stuff #alternative = #(altRoot, #alternative, eoa); } } | { GrammarAST eps = #[EPSILON,"epsilon"]; eps.setLine(LT(0).getLine()); // get line/col of '|' or ':' (prev token) eps.setColumn(LT(0).getColumn()); #alternative = #(altRoot,eps,eoa); } ; exceptionGroup : ( exceptionHandler )+ ( finallyClause )? | finallyClause ; exceptionHandler : "catch"^ ARG_ACTION ACTION ; finallyClause : "finally"^ ACTION ; element : elementNoOptionSpec ; elementNoOptionSpec { IntSet elements=null; GrammarAST sub, sub2; } : ( id (ASSIGN^|PLUS_ASSIGN^) (atom|block) ( sub=ebnfSuffix[(GrammarAST)currentAST.root,false]! {#elementNoOptionSpec=sub;} )? | atom ( sub2=ebnfSuffix[(GrammarAST)currentAST.root,false]! {#elementNoOptionSpec=sub2;} )? | ebnf | FORCED_ACTION | ACTION | p:SEMPRED ( IMPLIES! {#p.setType(GATED_SEMPRED);} )? { grammar.blocksWithSemPreds.add(currentBlockAST); } | t3:tree ) ; atom : range (ROOT^|BANG^)? | ( options { // TOKEN_REF WILDCARD could match terminal here then WILDCARD next generateAmbigWarnings=false; } : // grammar.rule but ensure no spaces. "A . B" is not a qualified ref // We do here rather than lexer so we can build a tree {LT(1).getColumn()+LT(1).getText().length()==LT(2).getColumn()&& LT(2).getColumn()+1==LT(3).getColumn()}? id w:WILDCARD^ (terminal|ruleref) {#w.setType(DOT);} | terminal | ruleref ) | notSet (ROOT^|BANG^)? ; ruleref : rr:RULE_REF^ ( ARG_ACTION )? (ROOT^|BANG^)? ; notSet { int line = LT(1).getLine(); int col = LT(1).getColumn(); GrammarAST subrule=null; } : n:NOT^ ( notTerminal | block ) {#notSet.setLine(line); #notSet.setColumn(col);} ; tree : TREE_BEGIN^ element ( element )+ RPAREN! ; /** matches ENBF blocks (and sets via block rule) */ ebnf! { int line = LT(1).getLine(); int col = LT(1).getColumn(); } : b:block ( QUESTION {#ebnf=#([OPTIONAL,"?"],#b);} | STAR {#ebnf=#([CLOSURE,"*"],#b);} | PLUS {#ebnf=#([POSITIVE_CLOSURE,"+"],#b);} | IMPLIES! // syntactic predicate { if ( gtype==COMBINED_GRAMMAR && Character.isUpperCase(currentRuleName.charAt(0)) ) { // ignore for lexer rules in combined #ebnf = #(#[SYNPRED,"=>"],#b); } else { // create manually specified (...)=> predicate; // convert to sempred #ebnf = createSynSemPredFromBlock(#b, SYN_SEMPRED); } } | ROOT {#ebnf = #(#ROOT, #b);} | BANG {#ebnf = #(#BANG, #b);} | {#ebnf = #b;} ) {#ebnf.setLine(line); #ebnf.setColumn(col);} ; range! { GrammarAST subrule=null, root=null; } : c1:CHAR_LITERAL RANGE c2:CHAR_LITERAL { GrammarAST r = #[CHAR_RANGE,".."]; r.setLine(c1.getLine()); r.setColumn(c1.getColumn()); #range = #(r, #c1, #c2); root = #range; } // (subrule=ebnfSuffix[root,false] {#range=subrule;})? ; terminal { GrammarAST ebnfRoot=null, subrule=null; } : cl:CHAR_LITERAL^ ( elementOptions[#cl]! )? (ROOT^|BANG^)? | tr:TOKEN_REF^ ( elementOptions[#tr]! )? ( ARG_ACTION )? // Args are only valid for lexer rules (ROOT^|BANG^)? | sl:STRING_LITERAL^ ( elementOptions[#sl]! )? (ROOT^|BANG^)? | wi:WILDCARD (ROOT^|BANG^)? ; elementOptions[GrammarAST terminalAST] : OPEN_ELEMENT_OPTION^ defaultNodeOption[terminalAST] CLOSE_ELEMENT_OPTION! | OPEN_ELEMENT_OPTION^ elementOption[terminalAST] (SEMI! elementOption[terminalAST])* CLOSE_ELEMENT_OPTION! ; defaultNodeOption[GrammarAST terminalAST] { StringBuffer buf = new StringBuffer(); } : i:id {buf.append(#i.getText());} (WILDCARD i2:id {buf.append("."+#i2.getText());})* {terminalAST.setTerminalOption(grammar,Grammar.defaultTokenOption,buf.toString());} ; elementOption[GrammarAST terminalAST] : a:id ASSIGN^ (b:id|s:STRING_LITERAL) { Object v = (#b!=null)?#b.getText():#s.getText(); terminalAST.setTerminalOption(grammar,#a.getText(),v); } ; ebnfSuffix[GrammarAST elemAST, boolean inRewrite] returns [GrammarAST subrule=null] { GrammarAST ebnfRoot=null; } :! ( QUESTION {ebnfRoot = #[OPTIONAL,"?"];} | STAR {ebnfRoot = #[CLOSURE,"*"];} | PLUS {ebnfRoot = #[POSITIVE_CLOSURE,"+"];} ) { GrammarAST save = currentBlockAST; ebnfRoot.setLine(elemAST.getLine()); ebnfRoot.setColumn(elemAST.getColumn()); GrammarAST blkRoot = #[BLOCK,"BLOCK"]; currentBlockAST = blkRoot; GrammarAST eob = #[EOB,""]; eob.setLine(elemAST.getLine()); eob.setColumn(elemAST.getColumn()); GrammarAST alt = #(#[ALT,"ALT"],elemAST,#[EOA,""]); if ( !inRewrite ) { prefixWithSynPred(alt); } subrule = #(ebnfRoot, #(blkRoot,alt,eob) ); currentBlockAST = save; } ; notTerminal : cl:CHAR_LITERAL | tr:TOKEN_REF | STRING_LITERAL ; idList : id (COMMA! id)* ; id : TOKEN_REF {#id.setType(ID);} | RULE_REF {#id.setType(ID);} ; // R E W R I T E S Y N T A X rewrite { GrammarAST root = new GrammarAST(); } :! ( options { warnWhenFollowAmbig=false;} : rew:REWRITE pred:SEMPRED alt:rewrite_alternative {root.addChild( #(#rew, #pred, #alt) );} )* rew2:REWRITE alt2:rewrite_alternative { root.addChild( #(#rew2, #alt2) ); #rewrite = (GrammarAST)root.getFirstChild(); } | ; rewrite_block : lp:LPAREN^ {#lp.setType(BLOCK); #lp.setText("BLOCK");} rewrite_alternative RPAREN! { GrammarAST eob = #[EOB,""]; eob.setLine(lp.getLine()); eob.setColumn(lp.getColumn()); #rewrite_block.addChild(eob); } ; rewrite_alternative { GrammarAST eoa = #[EOA, ""]; GrammarAST altRoot = #[ALT,"ALT"]; altRoot.setLine(LT(1).getLine()); altRoot.setColumn(LT(1).getColumn()); } : {grammar.buildTemplate()}? rewrite_template | {grammar.buildAST()}? ( rewrite_element )+ { if ( #rewrite_alternative==null ) { #rewrite_alternative = #(altRoot,#[EPSILON,"epsilon"],eoa); } else { #rewrite_alternative = #(altRoot, #rewrite_alternative,eoa); } } | {#rewrite_alternative = #(altRoot,#[EPSILON,"epsilon"],eoa);} | {grammar.buildAST()}? ETC ; rewrite_element { GrammarAST subrule=null; } : t:rewrite_atom ( subrule=ebnfSuffix[#t,true] {#rewrite_element=subrule;} )? | rewrite_ebnf | tr:rewrite_tree ( subrule=ebnfSuffix[#tr,true] {#rewrite_element=subrule;} )? ; rewrite_atom { GrammarAST subrule=null; } : tr:TOKEN_REF^ (elementOptions[#tr]!)? (ARG_ACTION)? // for imaginary nodes | rr:RULE_REF | cl:CHAR_LITERAL^ (elementOptions[#cl]!)? | sl:STRING_LITERAL^ (elementOptions[#sl]!)? |! d:DOLLAR i:id // reference to a label in a rewrite rule { #rewrite_atom = #[LABEL,i_AST.getText()]; #rewrite_atom.setLine(#d.getLine()); #rewrite_atom.setColumn(#d.getColumn()); } | ACTION ; rewrite_ebnf! { int line = LT(1).getLine(); int col = LT(1).getColumn(); } : b:rewrite_block ( QUESTION {#rewrite_ebnf=#([OPTIONAL,"?"],#b);} | STAR {#rewrite_ebnf=#([CLOSURE,"*"],#b);} | PLUS {#rewrite_ebnf=#([POSITIVE_CLOSURE,"+"],#b);} ) {#rewrite_ebnf.setLine(line); #rewrite_ebnf.setColumn(col);} ; rewrite_tree : TREE_BEGIN^ rewrite_atom ( rewrite_element )* RPAREN! ; /** Build a tree for a template rewrite: ^(TEMPLATE (ID|ACTION) ^(ARGLIST ^(ARG ID ACTION) ...) ) where ARGLIST is always there even if no args exist. ID can be "template" keyword. If first child is ACTION then it's an indirect template ref -> foo(a={...}, b={...}) -> ({string-e})(a={...}, b={...}) // e evaluates to template name -> {%{$ID.text}} // create literal template from string (done in ActionTranslator) -> {st-expr} // st-expr evaluates to ST */ rewrite_template {Token st=null;} : // -> template(a={...},...) "..." {LT(1).getText().equals("template")}? // inline rewrite_template_head {st=LT(1);} ( DOUBLE_QUOTE_STRING_LITERAL! | DOUBLE_ANGLE_STRING_LITERAL! ) {#rewrite_template.addChild(#[st]);} | // -> foo(a={...}, ...) rewrite_template_head | // -> ({expr})(a={...}, ...) rewrite_indirect_template_head | // -> {...} ACTION ; /** -> foo(a={...}, ...) */ rewrite_template_head : id lp:LPAREN^ {#lp.setType(TEMPLATE); #lp.setText("TEMPLATE");} rewrite_template_args RPAREN! ; /** -> ({expr})(a={...}, ...) */ rewrite_indirect_template_head : lp:LPAREN^ {#lp.setType(TEMPLATE); #lp.setText("TEMPLATE");} ACTION RPAREN! LPAREN! rewrite_template_args RPAREN! ; rewrite_template_args : rewrite_template_arg (COMMA! rewrite_template_arg)* {#rewrite_template_args = #(#[ARGLIST,"ARGLIST"], rewrite_template_args);} | {#rewrite_template_args = #[ARGLIST,"ARGLIST"];} ; rewrite_template_arg : id a:ASSIGN^ {#a.setType(ARG); #a.setText("ARG");} ACTION ; class ANTLRLexer extends Lexer; options { k=3; exportVocab=ANTLR; testLiterals=false; interactive=true; charVocabulary='\003'..'\377'; } { /** advance the current column number by one; don't do tabs. * we want char position in line to be sent to AntlrWorks. */ public void tab() { setColumn( getColumn()+1 ); } public boolean hasASTOperator = false; } WS : ( ' ' | '\t' | ('\r')? '\n' {newline();} ) ; COMMENT : ( SL_COMMENT | t:ML_COMMENT {$setType(t.getType());} ) ; protected SL_COMMENT : "//" ( (" $ANTLR")=> " $ANTLR " SRC ('\r')? '\n' // src directive | ( options {greedy=false;} : . )* ('\r')? '\n' ) { newline(); } ; protected ML_COMMENT : "/*" ( { LA(2)!='/' }? '*' {$setType(DOC_COMMENT);} | ) ( options { greedy=false; // make it exit upon "*/" } : '\r' '\n' {newline();} | '\n' {newline();} | ~('\n'|'\r') )* "*/" ; OPEN_ELEMENT_OPTION : '<' ; CLOSE_ELEMENT_OPTION : '>' ; AMPERSAND : '@'; COMMA : ','; QUESTION : '?' ; TREE_BEGIN : "^(" ; LPAREN: '(' ; RPAREN: ')' ; COLON : ':' ; STAR: '*' ; PLUS: '+' ; ASSIGN : '=' ; PLUS_ASSIGN : "+=" ; IMPLIES : "=>" ; REWRITE : "->" ; SEMI: ';' ; ROOT : '^' {hasASTOperator=true;} ; BANG : '!' {hasASTOperator=true;} ; OR : '|' ; WILDCARD : '.' ; ETC : "..." ; RANGE : ".." ; NOT : '~' ; RCURLY: '}' ; DOLLAR : '$' ; STRAY_BRACKET : ']' { ErrorManager.syntaxError( ErrorManager.MSG_SYNTAX_ERROR, null, _token, "antlr: dangling ']'? make sure to escape with \\]", null); } ; CHAR_LITERAL : '\'' (ESC|'\n'{newline();}|~'\'')* '\'' { StringBuffer s = Grammar.getUnescapedStringFromGrammarStringLiteral($getText); if ( s.length()>1 ) { $setType(STRING_LITERAL); } } ; DOUBLE_QUOTE_STRING_LITERAL : '"' ('\\'! '"'|'\\' ~'"'|'\n'{newline();}|~'"')* '"' ; DOUBLE_ANGLE_STRING_LITERAL : "<<" (options {greedy=false;}:'\n'{newline();}|.)* ">>" ; protected ESC : '\\' ( 'n' | 'r' | 't' | 'b' | 'f' | '"' | '\'' | '\\' | '>' | 'u' XDIGIT XDIGIT XDIGIT XDIGIT | . // unknown, leave as it is ) ; protected DIGIT : '0'..'9' ; protected XDIGIT : '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' ; INT : ('0'..'9')+ ; //HETERO_TYPE : '<'! ~'<' (~'>')* '>'! ; ARG_ACTION : '['! NESTED_ARG_ACTION ']'! ; protected NESTED_ARG_ACTION : ( '\r' '\n' {newline();} | '\n' {newline();} | '\\'! ']' | '\\' ~']' | ACTION_STRING_LITERAL | ACTION_CHAR_LITERAL | ~']' )* ; ACTION {int actionLine=getLine(); int actionColumn = getColumn(); } : NESTED_ACTION ( '?'! {_ttype = SEMPRED;} )? { Token t = makeToken(_ttype); String action = $getText; int n = 1; // num delimiter chars if ( action.startsWith("{{") && action.endsWith("}}") ) { t.setType(FORCED_ACTION); n = 2; } action = action.substring(n,action.length()-n); t.setText(action); t.setLine(actionLine); // set action line to start t.setColumn(actionColumn); $setToken(t); } ; protected NESTED_ACTION : '{' ( options { greedy = false; // exit upon '}' } : ( '\r' '\n' {newline();} | '\n' {newline();} ) | NESTED_ACTION | ACTION_CHAR_LITERAL | COMMENT | ACTION_STRING_LITERAL | ACTION_ESC | . )* '}' ; protected ACTION_CHAR_LITERAL : '\'' (ACTION_ESC|'\n'{newline();}|~'\'')* '\'' ; protected ACTION_STRING_LITERAL : '"' (ACTION_ESC|'\n'{newline();}|~'"')* '"' ; protected ACTION_ESC : "\\'" | "\\\"" | '\\' ~('\''|'"') ; TOKEN_REF options { testLiterals = true; } : 'A'..'Z' ( // scarf as many letters/numbers as you can options { warnWhenFollowAmbig=false; } : 'a'..'z'|'A'..'Z'|'_'|'0'..'9' )* ; // we get a warning here when looking for options '{', but it works right RULE_REF { int t=0; } : t=INTERNAL_RULE_REF {_ttype=t;} ( {t==OPTIONS}? WS_LOOP ('{' {_ttype = OPTIONS;})? | {t==TOKENS}? WS_LOOP ('{' {_ttype = TOKENS;})? | ) ; protected WS_LOOP : ( // grab as much WS as you can options { greedy=true; } : WS | COMMENT )* ; protected INTERNAL_RULE_REF returns [int t] { t = RULE_REF; } : 'a'..'z' ( // scarf as many letters/numbers as you can options { warnWhenFollowAmbig=false; } : 'a'..'z'|'A'..'Z'|'_'|'0'..'9' )* {t = testLiteralsTable(t);} ; protected WS_OPT : (WS)? ; /** Reset the file and line information; useful when the grammar * has been generated so that errors are shown relative to the * original file like the old C preprocessor used to do. */ protected SRC : "src" ' ' file:ACTION_STRING_LITERAL ' ' line:INT { newline(); setFilename(file.getText().substring(1,file.getText().length()-1)); setLine(Integer.parseInt(line.getText())-1); // -1 because SL_COMMENT will increment the line no. KR $setType(Token.SKIP); // don't let this go to the parser } ;