/* [The "BSD license"] Copyright (c) 2006 Martin Traverso All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ group Ruby implements ANTLRCore; /** The overall file structure of a recognizer; stores methods for rules * and cyclic DFAs plus support code. */ outputFile(LEXER,PARSER,TREE_PARSER, actionScope, actions, docComment, recognizer, name, tokens, tokenNames, rules, cyclicDFAs, bitsets, buildTemplate, buildAST, rewrite, profile, backtracking, synpreds, memoize, numRules, fileName, ANTLRVersion, generatedTimestamp, trace, scopes, superClass, literals) ::= << # () # Generated by ANTLR on >> /** * Inherits parameters from outputFile(...) * * labelType is not used for Ruby (no explicit type declarations) */ lexer(grammar, name, tokens, scopes, rules, numRules, labelType, filterMode) ::= << class require 'stringio' =}; separator="\n"> def initialize(input) input = StringIO.new(input) if input.respond_to?(:to_str) @input = CharStream.new(input) @backtracking = 0 @failed = false end def next_token # TODO: catch exceptions @token = nil @channel = nil @text = nil @start = @input.index @line = @input.line @pos = @input.column @type = nil @type_int = nil return :EOF if == :EOF match_Tokens() if @token == nil @text ||= @input.substring(@start, @input.index - 1) @token = Token.new(@type, @type_int, @line, @pos, @text, @channel) end puts @token.inspect return @token end class Token attr_reader :token_type attr_reader :int_type attr_reader :line attr_reader :pos attr_reader :text attr_reader :channel def initialize(token_type, int_type, line, pos, text, channel = nil) @token_type = token_type @int_type = int_type @line = line @pos = pos @text = text @channel = channel end alias :to_i :int_type end private class CharStream attr_reader :line attr_reader :column attr_reader :index def initialize(input) @buffer = "" @input = input @line = 1 @column = 0 @index = 0; end # returns a Fixnum between 0 and 0xFFFF or :EOF def look_ahead(pos) offset = @index + pos - 1 if @buffer.length \< offset + 1 char = @input.read(offset + 1 - @buffer.length) @buffer \<\< char if not char.nil? end if offset \< @buffer.length @buffer[offset] else :EOF end end def mark @state = { :index => @index, :line => @line, :column => @column } return 0 end def rewind(marker) @index = @state[:index] @line = @state[:line] @column = @state[:column] end def consume look_ahead(1) # force a read from the input if necessary @column = @column + 1 if @buffer[@index] == ?\n @line = @line + 1 @column = 0 end @index = @index + 1 end def substring(start, stop) @buffer.slice(start, stop - start + 1) end end def match(value = nil) @failed = false case when value.nil? @input.consume() when value.respond_to?(:to_str) catch(:done) do value.each_byte do |c| @failed ||= !() @input.consume() if !@failed throw :done if @failed end end else @failed = !() @input.consume() if !@failed end if @failed && @backtracking \<= 0 raise "Expected #{value.respond_to?(:chr) ? value.chr : value}" end end def match_range(from, to) char = if char != :EOF && (char \>= from || char \<= to) @failed = false match() elsif @backtracking > 0 @failed = true else raise "Expected [#{from.chr}..#{to.chr}]" end end end >> parser(grammar, name, scopes, tokens, tokenNames, rules, numRules, bitsets, ASTLabelType, superClass, labelType, members) ::= << require 'Lexer' class attr_reader :lexer TOKENS = [ , ]}; separator=",\n"> ].inject({}) { |hash, pair| name = pair[0] index = pair[1] + 3 # hardcoded for now... no way to get this value from ANTLR if name[0] == ?' hash[:"T#{index}"] = index else hash[:"#{name}"] = index end hash } TOKENS[:EOF] = -1 def initialize(input) if input.respond_to?(:to_str) || input.respond_to?(:read) input = Lexer.new(input) end @lexer = input @input = TokenStream.new(input) @backtracking = 0 @failed = false @indent = 0 end private class TokenStream attr_reader :index def initialize(input) @buffer = [] @input = input @channel = nil @index = 0; end # returns a Token def look_ahead(pos) offset = @index + pos - 1 while @buffer[-1] != :EOF && @buffer.length \< offset + 1 token = @input.next_token if token == :EOF || token.channel == @channel @buffer \<\< token end end offset = -1 if offset >= @buffer.length if offset \< @buffer.length @buffer[offset] end end def mark @state = { :index => @index } return 0 end def rewind(marker) @index = @state[:index] end def consume look_ahead(1) # force a read from the input if necessary @index = @index + 1 end end def match(token = nil) if token.nil? || == token @input.consume @failed = false return elsif @backtracking > 0 @failed = true else raise "Expected #{token}" end end def look_ahead(k) token = @input.look_ahead(k) if token != :EOF token = token.token_type end token end end >> /** How to generate a tree parser; same as parser except the input * stream is a different type. */ treeParser(grammar, name, scopes, tokens, tokenNames, globalAction, rules, numRules, bitsets, labelType, ASTLabelType, superClass, members) ::= << raise "treeParser not implemented" >> /** A simpler version of a rule template that is specific to the imaginary * rules created for syntactic predicates. As they never have return values * nor parameters etc..., just give simplest possible method. Don't do * any of the normal memoization stuff in here either; it's a waste. * As predicates cannot be inlined into the invoking rule, they need to * be in a rule by themselves. */ synpredRule(ruleName, ruleDescriptor, block, description, nakedBlock) ::= << # def _fragment end >> /** How to generate code for a rule. This includes any return type * data aggregates required for multiple return values. */ rule(ruleName,ruleDescriptor,block,emptyRule,description,exceptions,finally,memoize) ::= << # def () = nil }; separator = "\n" > puts " " * @indent + "+" @indent += 1 @indent -= 1 puts " " * @indent + "-" return { => _retval_ }; separator = ",">} # TODO: need "Attribute.index" for this to work: return }; separator = ","> return _retval_ end >> ruleLabelDefs() ::= << = nil}; separator="\n"> <[ruleDescriptor.tokenListLabels, ruleDescriptor.ruleListLabels] :{list_ = nil}; separator="\n" > >> /** How to generate a rule in the lexer; naked blocks are used for * fragment rules. */ lexerRule(ruleName,nakedBlock,ruleDescriptor,block,memoize) ::= << def match_() <\n> @type = : @type_int = end >> lexerRuleLabelDefs() ::= << <[ruleDescriptor.tokenLabels, ruleDescriptor.ruleLabels] :{ = nil}; separator="\n" > = nil}; separator="\n"> <[ruleDescriptor.tokenListLabels, ruleDescriptor.ruleListLabels] :{list_ = nil}; separator="\n" > >> /** How to generate code for the implicitly-defined lexer grammar rule * that chooses between lexer rules. */ tokensRule(ruleName,nakedBlock,args,block,ruleDescriptor) ::= << >> filteringNextToken() ::= << raise "filteringNextToken not implemented" >> filteringActionGate() ::= << raise "filteringActionGate not implemented" >> // S U B R U L E S /** A (...) subrule with multiple alternatives */ block(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << >> /** A rule block with multiple alternatives */ ruleBlock(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << >> /** * decision, decisionNumber don't seem to be relevant in this template * alts actually has a single element */ ruleBlockSingleAlt(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,description) ::= << >> /** A special case of a (...) subrule with a single alternative */ blockSingleAlt(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,description) ::= << >> /** A (..)+ block with 0 or more alternatives */ positiveClosureBlock(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << # matchedOnce = false while true alt = case alt else break end matchedOnce = true end if !matchedOnce raise "Expected at least one match: " end >> positiveClosureBlockSingleAlt ::= positiveClosureBlock /** A (..)* block with 0 or more alternatives */ closureBlock(alts,decls,decision,enclosingBlockLevel,blockLevel,decisionNumber,maxK,maxAlt,description) ::= << # while true alt = case alt else break end end >> closureBlockSingleAlt ::= closureBlock /** Optional blocks (x)? are translated to (x|) by before code generation * so we can just use the normal block template */ optionalBlock ::= block optionalBlockSingleAlt ::= block /** An alternative is just a list of elements; at outermost level */ alt(elements,altNum,description,autoAST,outerAlt)::= << # >> // E L E M E N T S /** match a token optionally with a label in front */ tokenRef(token,label,elementIndex)::= << _ = @input.look_ahead(1)<\n> match(:) >> /** ids+=ID */ tokenRefAndListLabel(token,label,elementIndex)::= << >> listLabel(label,elem)::= << list_ ||= [] list_ \<\< _ >> /** match a character */ charRef(char,label)::= << _ = <\n> match() >> /** match a character range */ charRangeRef(a,b,label)::= << _ = <\n> match_range(, ) >> /** For now, sets are interval tests and must be tested inline */ matchSet(s,label,elementIndex,postmatchCode)::= << _ = <\n> if match() else raise "Expected set" end >> matchSetAndListLabel(s,label,elementIndex,postmatchCode)::= << >> /** Match a string literal */ lexerStringRef(string,label)::= << _ = <\n> match()<\n> >> wildcard(label, elementIndex)::= << _ = <\n> match() >> wildcardAndListLabel(label,elementIndex)::= << >> /** Match . wildcard in lexer */ wildcardChar(label, elementIndex)::= << _ = <\n> match() >> wildcardCharListLabel(label, elementIndex)::= << raise "wildcardCharListLabel not implemented" >> /** Match a rule reference by invoking it possibly with arguments * and a return value or values. */ ruleRef(rule,label,elementIndex,args)::= << _ = ()<\n> ()<\n> >> /** ids+=ID */ ruleRefAndListLabel(rule,label,elementIndex,args)::= << >> /** A: b=B; B: .; TODO: Should we use a real token type instead of :invalid? How do we get it? */ lexerRuleRef(rule,label,args,elementIndex)::= << __start_ = @input.index __line_ = @input.line __pos_ = @input.column match_() _ = Token.new(:invalid, 0, __line_, __pos_, @input.substring(__start_, @input.index - 1), nil) match_() >> lexerRuleRefAndListLabel(rule,label,args,elementIndex) ::= << >> /** EOF in the lexer */ lexerMatchEOF(label,elementIndex)::= << _ = :EOF<\n> match(:EOF) >> /** match ^(root children) in tree parser */ tree(root, actionsAfterRoot, children, nullableChildList) ::= << raise "tree not implemented" >> /** Every predicate is used as a validating predicate (even when it is * also hoisted into a prediction expression). */ validateSemanticPredicate(pred,description)::= << # if ! raise "Semantic predicate failed: #{}" end >> // F i x e d D F A (if-then-else) dfaState(k,edges,eotPredictsAlt,description,stateNumber,semPredState)::= << # look_ahead_ = look_ahead_ = -1 if look_ahead_ == :EOF if else alt = <\n> raise "Expected: "<\n> end >> /** Same as a normal DFA state except that we don't examine lookahead * for the bypass alternative. It delays error detection but this * is faster, smaller, and more what people expect. For (X)? people * expect "if ( LA(1)==X ) match(X);" and that's it. * * If a semPredState, don't force lookahead lookup; preds might not * need. */ dfaOptionalBlockState(k,edges,eotPredictsAlt,description,stateNumber,semPredState)::= << # look_ahead_ = look_ahead_ = -1 if look_ahead_ == :EOF if end >> /** A DFA state that is actually the loopback decision of a closure * loop. If end-of-token (EOT) predicts any of the targets then it * should act like a default clause (i.e., no error can be generated). * This is used only in the lexer so that for ('a')* on the end of a * rule anything other than 'a' predicts exiting. * * If a semPredState, don't force lookahead lookup; preds might not * need. */ dfaLoopbackState(k,edges,eotPredictsAlt,description,stateNumber,semPredState)::= << # look_ahead_ = look_ahead_ = -1 if look_ahead_ == :EOF if else alt = end >> /** An accept state indicates a unique alternative has been predicted */ /** It is not clear that decisionNumber is available here */ dfaAcceptState(alt) ::= "alt = " /** A simple edge with an expression. If the expression is satisfied, * enter to the target state. To handle gated productions, we may * have to evaluate some predicates for this edge. */ dfaEdge(labelExpr, targetState, predicates)::= << && >> // F i x e d D F A (switch case) /** A DFA state where a SWITCH may be generated. The code generator * decides if this is possible: CodeGenerator.canGenerateSwitch(). */ dfaStateSwitch(k,edges,eotPredictsAlt,description,stateNumber,semPredState)::= << # case else alt = <\n> raise "Expected: "<\n> end >> /** * eotPredictsAlt is not relevant here */ dfaOptionalBlockStateSwitch(k,edges,eotPredictsAlt,description,stateNumber,semPredState)::= << # case end >> dfaLoopbackStateSwitch(k, edges,eotPredictsAlt,description,stateNumber,semPredState)::= << # case <\n> else alt = end >> dfaEdgeSwitch(labels, targetState)::= << when }; separator=","><\n> when }; separator=","><\n> >> // C y c l i c D F A /** The code to initiate execution of a cyclic DFA; this is used * in the rule to predict an alt just like the fixed DFA case. * The attribute is inherited via the parser, lexer, ... */ dfaDecision(decisionNumber,description)::= << alt = DFA.predict(self, @input) >> /** Generate the tables and support code needed for the DFAState object * argument. Unless there is a semantic predicate (or syn pred, which * become sem preds), all states should be encoded in the state tables. * Consequently, cyclicDFAState/cyclicDFAEdge,eotDFAEdge templates are * not used except for special DFA states that cannot be encoded as * a transition table. */ cyclicDFA(dfa)::= << DFA = DFA.new( [], [], [], [], [], [], [ ]}; separator=",\n", null=""> ]) def special_state_transition(s) case s }; separator="\n"> end raise "Expected: " -1 end public :special_state_transition >> /** A special state in a cyclic DFA; special means has a semantic predicate * or it's a huge set of symbols to check. */ cyclicDFAState(decisionNumber,stateNumber,edges,needErrorClause,semPredState)::= << @input.rewind(0) look_ahead__ = s = -1 return s if s >= 0 >> /** Just like a fixed DFA edge, test the lookahead and indicate what * state to jump to next if successful. Again, this is for special * states. */ cyclicDFAEdge(labelExpr, targetStateNumber, edgeNumber, predicates)::= << return s = if () && ()<\n> >> /** An edge pointing at end-of-token; essentially matches any char; * always jump to the target. */ eotDFAEdge(targetStateNumber,edgeNumber, predicates)::= << s = <\n> >> // D F A E X P R E S S I O N S andPredicates(left,right)::= "( && )" orPredicates(operands)::= << () >> notPredicate(pred)::= "!()" evalPredicate(pred,description)::= "()" evalSynPredicate(pred,description)::= "()" /** * It's not really clear that decisionNumber and stateNumber are available here */ lookaheadTest(atom,k,atomAsInt)::= << look_ahead_ == look_ahead_ == : >> /** Sometimes a lookahead test cannot assume that LA(k) is in a temp variable * somewhere. Must ask for the lookahead directly. */ isolatedLookaheadTest(atom,k,atomAsInt) ::= << == == : >> /** * It's not really clear that decisionNumber and stateNumber are available here */ lookaheadRangeTest(lower,upper,k,rangeNumber,lowerAsInt,upperAsInt)::= << (look_ahead_ \>= && look_ahead_ \<= ) (TOKENS[look_ahead_] \>= && TOKENS[look_ahead_] \<= ) >> isolatedLookaheadRangeTest(lower,upper,k,rangeNumber,lowerAsInt,upperAsInt) ::= << ( \>= && \<= ) (TOKENS[] \>= && TOKENS[] \<= ) >> setTest(ranges) ::= << >> // A T T R I B U T E S parameterAttributeRef(attr)::= "" parameterSetAttributeRef(attr,expr) ::= " = " scopeAttributeRef(scope,attr,index,negIndex)::= << raise "scopeAttributeRef not implemented" >> scopeSetAttributeRef(scope,attr,expr,index,negIndex) ::= << raise "scopeSetAttributeRef not implemented" >> /** $x is either global scope or x is rule with dynamic scope; refers * to stack itself not top of stack. This is useful for predicates * like {$function.size()>0 && $function::name.equals("foo")}? */ isolatedDynamicScopeRef(scope)::= << raise "isolatedDynamicScopeRef not implemented" >> /** reference an attribute of rule; might only have single return value */ ruleLabelRef(referencedRule,scope,attr)::= << _[:] _ >> /** [] # TODO: need "Attribute.index" for this to work >> **/ returnAttributeRef(ruleDescriptor,attr)::= << _retval_ >> returnSetAttributeRef(ruleDescriptor,attr,expr) ::= << _retval_ = >> /** How to translate $tokenLabel */ tokenLabelRef(label)::= "_" /** ids+=ID {$ids} or e+=expr {$e} */ listLabelRef(label)::= "list_" // not sure the next are the right approach; and they are evaluated early; // they cannot see TREE_PARSER or PARSER attributes for example. :( tokenLabelPropertyRef_text(scope,attr)::= "_.text" tokenLabelPropertyRef_type(scope,attr)::= "_.token_type" tokenLabelPropertyRef_line(scope,attr)::= "_.line" tokenLabelPropertyRef_pos(scope,attr) ::= "_.pos" tokenLabelPropertyRef_channel(scope,attr)::= "_.channel" tokenLabelPropertyRef_index(scope,attr)::= "_.index" tokenLabelPropertyRef_tree(scope,attr)::= << raise "tokenLabelPropertyRef_tree not implemented" >> ruleLabelPropertyRef_start(scope,attr)::= << raise "ruleLabelPropertyRef_start not implemented" >> ruleLabelPropertyRef_stop(scope,attr)::= << raise "ruleLabelPropertyRef_stop not implemented" >> ruleLabelPropertyRef_tree(scope,attr)::= << raise "ruleLabelPropertyRef_tree not implemented" >> ruleLabelPropertyRef_text(scope,attr)::= << raise "ruleLabelPropertyRef_text not implemented" >> ruleLabelPropertyRef_st(scope,attr)::= << raise "ruleLabelPropertyRef_st not implemented" >> /** Isolated $RULE ref ok in lexer as it's a Token */ lexerRuleLabel(label)::= << raise "lexerRuleLabel not implemented" >> lexerRuleLabelPropertyRef_type(scope,attr)::= << raise "lexerRuleLabelPropertyRef_type not implemented" >> lexerRuleLabelPropertyRef_line(scope,attr)::= << raise "lexerRuleLabelPropertyRef_line not implemented" >> lexerRuleLabelPropertyRef_pos(scope,attr)::= << raise "lexerRuleLabelPropertyRef_pos not implemented" >> lexerRuleLabelPropertyRef_channel(scope,attr)::= << raise "lexerRuleLabelPropertyRef_channel not implemented" >> lexerRuleLabelPropertyRef_index(scope,attr)::= << raise "lexerRuleLabelPropertyRef_index not implemented" >> lexerRuleLabelPropertyRef_text(scope,attr)::= << raise "lexerRuleLabelPropertyRef_text not implemented" >> lexerRulePropertyRef_text(scope,attr) ::= "@text" lexerRulePropertyRef_type(scope,attr) ::= << raise "lexerRulePropertyRef_type not implemented" >> lexerRulePropertyRef_line(scope,attr) ::= "@line" lexerRulePropertyRef_pos(scope,attr) ::= "@pos" lexerRulePropertyRef_index(scope,attr) ::= << raise "lexerRulePropertyRef_index not implemented" >> lexerRulePropertyRef_channel(scope,attr) ::= "@channel" lexerRulePropertyRef_start(scope,attr) ::= "@start" lexerRulePropertyRef_stop(scope,attr) ::= << raise "lexerRulePropertyRef_stop not implemented" >> ruleSetPropertyRef_tree(scope,attr,expr) ::= << raise "ruleSetPropertyRef_tree not implemented" >> ruleSetPropertyRef_st(scope,attr,expr) ::= << raise "ruleSetPropertyRef_st not implemented" >> // Somebody may ref $template or $tree or $stop within a rule: rulePropertyRef_start(scope,attr)::= << raise "rulePropertyRef_start not implemented" >> rulePropertyRef_stop(scope,attr)::= << raise "rulePropertyRef_stop not implemented" >> rulePropertyRef_tree(scope,attr)::= << raise "rulePropertyRef_tree not implemented" >> rulePropertyRef_text(scope,attr)::= << raise "rulePropertyRef_text not implemented" >> rulePropertyRef_st(scope,attr)::= << raise "rulePropertyRef_st not implemented" >> /** How to execute an action */ /** TODO: add syntactic predicate & bactracking gates **/ execAction(action)::= << >> // M I S C (properties, etc...) codeFileExtension()::=".rb" true()::= "true" false()::= "false" ///////////// --------------------------- private templates -------------------------------- bitset()::= << raise "bitset not implemented" >> element() ::= "" plainBlock(decls, alts, description) ::= << >> switchBlock(description, decisionNumber, maxAlt, alts, decls, decision) ::= << # alt = case alt end >> switchCase() ::= << when >> LA(k) ::= << @input.look_ahead() look_ahead() >> synpred(name) ::= << def start = @input.mark() @backtracking += 1 _fragment() @backtracking -= 1 success = !@failed @input.rewind(start) @failed = false return success end >> parameterScope(scope) ::= << }; separator=", "> >> dfaClass() ::= << class DFA def initialize(eot, eof, min, max, accept, special, transition) @eot = eot @eof = eof @min = min @max = max @accept = accept @special = special @transition = transition end def predict(parser, input) mark = input.mark() s = 0 # we always start at s0 begin loop do special_state = @special[s] if special_state >= 0 s = parser.special_state_transition(special_state) input.consume() next end if @accept[s] >= 1 return @accept[s] end # look for a normal char transition c = input.look_ahead(1).to_i if c != :EOF && c >= @min[s] && c \<= @max[s] next_state = @transition[s][c - @min[s]] # move to next state if next_state \< 0 # was in range but not a normal transition # must check EOT, which is like the else clause. # eot[s]>=0 indicates that an EOT edge goes to another # state. if @eot[s] >= 0 # EOT Transition to accept state? s = @eot[s] input.consume() next end raise "No viable alt" end s = next_state input.consume() next end if @eot[s] >= 0 # EOT Transition? s = @eot[s] input.consume() next end if c == :EOF && @eof[s] >= 0 # EOF Transition to accept state? return @accept[@eof[s]] end # not in range and not EOF/EOT, must be invalid symbol raise "No viable alt" end ensure input.rewind(mark) end end end >>