mirror of
https://github.com/marcin-szczepanski/jFuzzyLogic.git
synced 2024-12-18 16:35:27 +01:00
3000 lines
94 KiB
Java
3000 lines
94 KiB
Java
/*
|
|
[The "BSD licence"]
|
|
Copyright (c) 2005-2008 Terence Parr
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions
|
|
are met:
|
|
1. Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
2. Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
3. The name of the author may not be used to endorse or promote products
|
|
derived from this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
package org.antlr.tool;
|
|
|
|
import org.antlr.misc.*;
|
|
import org.antlr.misc.Utils;
|
|
import antlr.*;
|
|
import antlr.collections.AST;
|
|
import org.antlr.Tool;
|
|
import org.antlr.analysis.*;
|
|
import org.antlr.codegen.CodeGenerator;
|
|
import org.antlr.stringtemplate.StringTemplate;
|
|
import org.antlr.stringtemplate.language.AngleBracketTemplateLexer;
|
|
|
|
import java.io.*;
|
|
import java.util.*;
|
|
|
|
/** Represents a grammar in memory. */
|
|
public class Grammar {
|
|
public static final String SYNPRED_RULE_PREFIX = "synpred";
|
|
|
|
public static final String GRAMMAR_FILE_EXTENSION = ".g";
|
|
|
|
/** used for generating lexer temp files */
|
|
public static final String LEXER_GRAMMAR_FILE_EXTENSION = ".g";
|
|
|
|
public static final int INITIAL_DECISION_LIST_SIZE = 300;
|
|
public static final int INVALID_RULE_INDEX = -1;
|
|
|
|
// the various kinds of labels. t=type, id=ID, types+=type ids+=ID
|
|
public static final int RULE_LABEL = 1;
|
|
public static final int TOKEN_LABEL = 2;
|
|
public static final int RULE_LIST_LABEL = 3;
|
|
public static final int TOKEN_LIST_LABEL = 4;
|
|
public static final int CHAR_LABEL = 5; // used in lexer for x='a'
|
|
|
|
public static String[] LabelTypeToString =
|
|
{"<invalid>", "rule", "token", "rule-list", "token-list"};
|
|
|
|
public static final String ARTIFICIAL_TOKENS_RULENAME = "Tokens";
|
|
public static final String FRAGMENT_RULE_MODIFIER = "fragment";
|
|
|
|
public static final String SYNPREDGATE_ACTION_NAME = "synpredgate";
|
|
|
|
/** When converting ANTLR char and string literals, here is the
|
|
* value set of escape chars.
|
|
*/
|
|
public static int ANTLRLiteralEscapedCharValue[] = new int[255];
|
|
|
|
/** Given a char, we need to be able to show as an ANTLR literal.
|
|
*/
|
|
public static String ANTLRLiteralCharValueEscape[] = new String[255];
|
|
|
|
static {
|
|
ANTLRLiteralEscapedCharValue['n'] = '\n';
|
|
ANTLRLiteralEscapedCharValue['r'] = '\r';
|
|
ANTLRLiteralEscapedCharValue['t'] = '\t';
|
|
ANTLRLiteralEscapedCharValue['b'] = '\b';
|
|
ANTLRLiteralEscapedCharValue['f'] = '\f';
|
|
ANTLRLiteralEscapedCharValue['\\'] = '\\';
|
|
ANTLRLiteralEscapedCharValue['\''] = '\'';
|
|
ANTLRLiteralEscapedCharValue['"'] = '"';
|
|
ANTLRLiteralCharValueEscape['\n'] = "\\n";
|
|
ANTLRLiteralCharValueEscape['\r'] = "\\r";
|
|
ANTLRLiteralCharValueEscape['\t'] = "\\t";
|
|
ANTLRLiteralCharValueEscape['\b'] = "\\b";
|
|
ANTLRLiteralCharValueEscape['\f'] = "\\f";
|
|
ANTLRLiteralCharValueEscape['\\'] = "\\\\";
|
|
ANTLRLiteralCharValueEscape['\''] = "\\'";
|
|
}
|
|
|
|
public static final int LEXER = 1;
|
|
public static final int PARSER = 2;
|
|
public static final int TREE_PARSER = 3;
|
|
public static final int COMBINED = 4;
|
|
public static final String[] grammarTypeToString = new String[] {
|
|
"<invalid>",
|
|
"lexer",
|
|
"parser",
|
|
"tree",
|
|
"combined"
|
|
};
|
|
|
|
public static final String[] grammarTypeToFileNameSuffix = new String[] {
|
|
"<invalid>",
|
|
"Lexer",
|
|
"Parser",
|
|
"", // no suffix for tree grammars
|
|
"Parser" // if combined grammar, gen Parser and Lexer will be done later
|
|
};
|
|
|
|
/** Set of valid imports. E.g., can only import a tree parser into
|
|
* another tree parser. Maps delegate to set of delegator grammar types.
|
|
* validDelegations.get(LEXER) gives list of the kinds of delegators
|
|
* that can import lexers.
|
|
*/
|
|
public static MultiMap<Integer,Integer> validDelegations =
|
|
new MultiMap<Integer,Integer>() {
|
|
{
|
|
map(LEXER, LEXER);
|
|
map(LEXER, PARSER);
|
|
map(LEXER, COMBINED);
|
|
|
|
map(PARSER, PARSER);
|
|
map(PARSER, COMBINED);
|
|
|
|
map(TREE_PARSER, TREE_PARSER);
|
|
|
|
// TODO: allow COMBINED
|
|
// map(COMBINED, COMBINED);
|
|
}
|
|
};
|
|
|
|
/** This is the buffer of *all* tokens found in the grammar file
|
|
* including whitespace tokens etc... I use this to extract
|
|
* lexer rules from combined grammars.
|
|
*/
|
|
protected TokenStreamRewriteEngine tokenBuffer;
|
|
public static final String IGNORE_STRING_IN_GRAMMAR_FILE_NAME = "__";
|
|
public static final String AUTO_GENERATED_TOKEN_NAME_PREFIX = "T__";
|
|
|
|
public static class Decision {
|
|
public int decision;
|
|
public NFAState startState;
|
|
public GrammarAST blockAST;
|
|
public DFA dfa;
|
|
}
|
|
|
|
public class LabelElementPair {
|
|
public antlr.Token label;
|
|
public GrammarAST elementRef;
|
|
public String referencedRuleName;
|
|
/** Has an action referenced the label? Set by ActionAnalysis.g
|
|
* Currently only set for rule labels.
|
|
*/
|
|
public boolean actionReferencesLabel;
|
|
public int type; // in {RULE_LABEL,TOKEN_LABEL,RULE_LIST_LABEL,TOKEN_LIST_LABEL}
|
|
public LabelElementPair(antlr.Token label, GrammarAST elementRef) {
|
|
this.label = label;
|
|
this.elementRef = elementRef;
|
|
this.referencedRuleName = elementRef.getText();
|
|
}
|
|
public Rule getReferencedRule() {
|
|
return getRule(referencedRuleName);
|
|
}
|
|
public String toString() {
|
|
return elementRef.toString();
|
|
}
|
|
}
|
|
|
|
/** What name did the user provide for this grammar? */
|
|
public String name;
|
|
|
|
/** What type of grammar is this: lexer, parser, tree walker */
|
|
public int type;
|
|
|
|
/** A list of options specified at the grammar level such as language=Java.
|
|
* The value can be an AST for complicated values such as character sets.
|
|
* There may be code generator specific options in here. I do no
|
|
* interpretation of the key/value pairs...they are simply available for
|
|
* who wants them.
|
|
*/
|
|
protected Map options;
|
|
|
|
public static final Set legalLexerOptions =
|
|
new HashSet() {
|
|
{
|
|
add("language"); add("tokenVocab");
|
|
add("TokenLabelType");
|
|
add("superClass");
|
|
add("filter");
|
|
add("k");
|
|
add("backtrack");
|
|
add("memoize");
|
|
}
|
|
};
|
|
|
|
public static final Set legalParserOptions =
|
|
new HashSet() {
|
|
{
|
|
add("language"); add("tokenVocab");
|
|
add("output"); add("rewrite"); add("ASTLabelType");
|
|
add("TokenLabelType");
|
|
add("superClass");
|
|
add("k");
|
|
add("backtrack");
|
|
add("memoize");
|
|
}
|
|
};
|
|
|
|
public static final Set legalTreeParserOptions =
|
|
new HashSet() {
|
|
{
|
|
add("language"); add("tokenVocab");
|
|
add("output"); add("rewrite"); add("ASTLabelType");
|
|
add("TokenLabelType");
|
|
add("superClass");
|
|
add("filter");
|
|
add("k");
|
|
add("backtrack");
|
|
add("memoize");
|
|
}
|
|
};
|
|
|
|
public static final Set doNotCopyOptionsToLexer =
|
|
new HashSet() {
|
|
{
|
|
add("output"); add("ASTLabelType"); add("superClass");
|
|
add("k"); add("backtrack"); add("memoize"); add("rewrite");
|
|
}
|
|
};
|
|
|
|
public static final Map defaultOptions =
|
|
new HashMap() {
|
|
{
|
|
put("language","Java");
|
|
}
|
|
};
|
|
|
|
public static final Set legalBlockOptions =
|
|
new HashSet() {{add("k"); add("greedy"); add("backtrack"); add("memoize");}};
|
|
|
|
/** What are the default options for a subrule? */
|
|
public static final Map defaultBlockOptions =
|
|
new HashMap() {{put("greedy","true");}};
|
|
|
|
public static final Map defaultLexerBlockOptions =
|
|
new HashMap() {{put("greedy","true");}};
|
|
|
|
// Token options are here to avoid contaminating Token object in runtime
|
|
|
|
/** Legal options for terminal refs like ID<node=MyVarNode> */
|
|
public static final Set legalTokenOptions =
|
|
new HashSet() {
|
|
{
|
|
add(defaultTokenOption);
|
|
}
|
|
};
|
|
|
|
public static final String defaultTokenOption = "node";
|
|
|
|
/** Is there a global fixed lookahead set for this grammar?
|
|
* If 0, nothing specified. -1 implies we have not looked at
|
|
* the options table yet to set k.
|
|
*/
|
|
protected int global_k = -1;
|
|
|
|
/** Map a scope to a map of name:action pairs.
|
|
* Map<String, Map<String,GrammarAST>>
|
|
* The code generator will use this to fill holes in the output files.
|
|
* I track the AST node for the action in case I need the line number
|
|
* for errors.
|
|
*/
|
|
protected Map actions = new HashMap();
|
|
|
|
/** The NFA that represents the grammar with edges labelled with tokens
|
|
* or epsilon. It is more suitable to analysis than an AST representation.
|
|
*/
|
|
public NFA nfa;
|
|
|
|
protected NFAFactory factory;
|
|
|
|
/** If this grammar is part of a larger composite grammar via delegate
|
|
* statement, then this points at the composite. The composite holds
|
|
* a global list of rules, token types, decision numbers, etc...
|
|
*/
|
|
public CompositeGrammar composite;
|
|
|
|
/** A pointer back into grammar tree. Needed so we can add delegates. */
|
|
public CompositeGrammarTree compositeTreeNode;
|
|
|
|
/** If this is a delegate of another grammar, this is the label used
|
|
* as an instance var by that grammar to point at this grammar. null
|
|
* if no label was specified in the delegate statement.
|
|
*/
|
|
public String label;
|
|
|
|
/** TODO: hook this to the charVocabulary option */
|
|
protected IntSet charVocabulary = null;
|
|
|
|
/** For ANTLRWorks, we want to be able to map a line:col to a specific
|
|
* decision DFA so it can display DFA.
|
|
*/
|
|
Map lineColumnToLookaheadDFAMap = new HashMap();
|
|
|
|
public Tool tool;
|
|
|
|
/** The unique set of all rule references in any rule; set of tree node
|
|
* objects so two refs to same rule can exist but at different line/position.
|
|
*/
|
|
protected Set<GrammarAST> ruleRefs = new HashSet<GrammarAST>();
|
|
|
|
protected Set<GrammarAST> scopedRuleRefs = new HashSet();
|
|
|
|
/** The unique set of all token ID references in any rule */
|
|
protected Set<antlr.Token> tokenIDRefs = new HashSet<antlr.Token>();
|
|
|
|
/** Be able to assign a number to every decision in grammar;
|
|
* decisions in 1..n
|
|
*/
|
|
protected int decisionCount = 0;
|
|
|
|
/** A list of all rules that are in any left-recursive cycle. There
|
|
* could be multiple cycles, but this is a flat list of all problematic
|
|
* rules.
|
|
*/
|
|
protected Set<Rule> leftRecursiveRules;
|
|
|
|
/** An external tool requests that DFA analysis abort prematurely. Stops
|
|
* at DFA granularity, which are limited to a DFA size and time computation
|
|
* as failsafe.
|
|
*/
|
|
protected boolean externalAnalysisAbort;
|
|
|
|
/** When we read in a grammar, we track the list of syntactic predicates
|
|
* and build faux rules for them later. See my blog entry Dec 2, 2005:
|
|
* http://www.antlr.org/blog/antlr3/lookahead.tml
|
|
* This maps the name (we make up) for a pred to the AST grammar fragment.
|
|
*/
|
|
protected LinkedHashMap nameToSynpredASTMap;
|
|
|
|
/** At least one rule has memoize=true */
|
|
public boolean atLeastOneRuleMemoizes;
|
|
|
|
/** Was this created from a COMBINED grammar? */
|
|
public boolean implicitLexer;
|
|
|
|
/** Map a rule to it's Rule object */
|
|
protected LinkedHashMap<String,Rule> nameToRuleMap = new LinkedHashMap<String,Rule>();
|
|
|
|
/** If this rule is a delegate, some rules might be overridden; don't
|
|
* want to gen code for them.
|
|
*/
|
|
public Set<String> overriddenRules = new HashSet<String>();
|
|
|
|
/** The list of all rules referenced in this grammar, not defined here,
|
|
* and defined in a delegate grammar. Not all of these will be generated
|
|
* in the recognizer for this file; only those that are affected by rule
|
|
* definitions in this grammar. I am not sure the Java target will need
|
|
* this but I'm leaving in case other targets need it.
|
|
* @see NameSpaceChecker.lookForReferencesToUndefinedSymbols()
|
|
*/
|
|
protected Set<Rule> delegatedRuleReferences = new HashSet();
|
|
|
|
/** The ANTLRParser tracks lexer rules when reading combined grammars
|
|
* so we can build the Tokens rule.
|
|
*/
|
|
public List<String> lexerRuleNamesInCombined = new ArrayList<String>();
|
|
|
|
/** Track the scopes defined outside of rules and the scopes associated
|
|
* with all rules (even if empty).
|
|
*/
|
|
protected Map scopes = new HashMap();
|
|
|
|
/** An AST that records entire input grammar with all rules. A simple
|
|
* grammar with one rule, "grammar t; a : A | B ;", looks like:
|
|
* ( grammar t ( rule a ( BLOCK ( ALT A ) ( ALT B ) ) <end-of-rule> ) )
|
|
*/
|
|
protected GrammarAST grammarTree = null;
|
|
|
|
/** Each subrule/rule is a decision point and we must track them so we
|
|
* can go back later and build DFA predictors for them. This includes
|
|
* all the rules, subrules, optional blocks, ()+, ()* etc...
|
|
*/
|
|
protected Vector<Decision> indexToDecision =
|
|
new Vector<Decision>(INITIAL_DECISION_LIST_SIZE);
|
|
|
|
/** If non-null, this is the code generator we will use to generate
|
|
* recognizers in the target language.
|
|
*/
|
|
protected CodeGenerator generator;
|
|
|
|
public NameSpaceChecker nameSpaceChecker = new NameSpaceChecker(this);
|
|
|
|
public LL1Analyzer ll1Analyzer = new LL1Analyzer(this);
|
|
|
|
/** For merged lexer/parsers, we must construct a separate lexer spec.
|
|
* This is the template for lexer; put the literals first then the
|
|
* regular rules. We don't need to specify a token vocab import as
|
|
* I make the new grammar import from the old all in memory; don't want
|
|
* to force it to read from the disk. Lexer grammar will have same
|
|
* name as original grammar but will be in different filename. Foo.g
|
|
* with combined grammar will have FooParser.java generated and
|
|
* Foo__.g with again Foo inside. It will however generate FooLexer.java
|
|
* as it's a lexer grammar. A bit odd, but autogenerated. Can tweak
|
|
* later if we want.
|
|
*/
|
|
protected StringTemplate lexerGrammarST =
|
|
new StringTemplate(
|
|
"lexer grammar <name>;\n" +
|
|
"<if(options)>" +
|
|
"options {\n" +
|
|
" <options:{<it.name>=<it.value>;<\\n>}>\n" +
|
|
"}<\\n>\n" +
|
|
"<endif>\n" +
|
|
"<if(imports)>import <imports; separator=\", \">;<endif>\n" +
|
|
"<actionNames,actions:{n,a|@<n> {<a>}\n}>\n" +
|
|
"<literals:{<it.ruleName> : <it.literal> ;\n}>\n" +
|
|
"<rules>",
|
|
AngleBracketTemplateLexer.class
|
|
);
|
|
|
|
/** What file name holds this grammar? */
|
|
protected String fileName;
|
|
|
|
/** How long in ms did it take to build DFAs for this grammar?
|
|
* If this grammar is a combined grammar, it only records time for
|
|
* the parser grammar component. This only records the time to
|
|
* do the LL(*) work; NFA->DFA conversion.
|
|
*/
|
|
public long DFACreationWallClockTimeInMS;
|
|
|
|
public int numberOfSemanticPredicates = 0;
|
|
public int numberOfManualLookaheadOptions = 0;
|
|
public Set<Integer> setOfNondeterministicDecisionNumbers = new HashSet<Integer>();
|
|
public Set<Integer> setOfNondeterministicDecisionNumbersResolvedWithPredicates =
|
|
new HashSet<Integer>();
|
|
public Set setOfDFAWhoseAnalysisTimedOut = new HashSet();
|
|
|
|
/** Track decisions with syn preds specified for reporting.
|
|
* This is the a set of BLOCK type AST nodes.
|
|
*/
|
|
public Set<GrammarAST> blocksWithSynPreds = new HashSet();
|
|
|
|
/** Track decisions that actually use the syn preds in the DFA.
|
|
* Computed during NFA to DFA conversion.
|
|
*/
|
|
public Set<DFA> decisionsWhoseDFAsUsesSynPreds = new HashSet<DFA>();
|
|
|
|
/** Track names of preds so we can avoid generating preds that aren't used
|
|
* Computed during NFA to DFA conversion. Just walk accept states
|
|
* and look for synpreds because that is the only state target whose
|
|
* incident edges can have synpreds. Same is try for
|
|
* decisionsWhoseDFAsUsesSynPreds.
|
|
*/
|
|
public Set<String> synPredNamesUsedInDFA = new HashSet();
|
|
|
|
/** Track decisions with syn preds specified for reporting.
|
|
* This is the a set of BLOCK type AST nodes.
|
|
*/
|
|
public Set<GrammarAST> blocksWithSemPreds = new HashSet();
|
|
|
|
/** Track decisions that actually use the syn preds in the DFA. */
|
|
public Set<DFA> decisionsWhoseDFAsUsesSemPreds = new HashSet();
|
|
|
|
protected boolean allDecisionDFACreated = false;
|
|
|
|
/** We need a way to detect when a lexer grammar is autogenerated from
|
|
* another grammar or we are just sending in a string representing a
|
|
* grammar. We don't want to generate a .tokens file, for example,
|
|
* in such cases.
|
|
*/
|
|
protected boolean builtFromString = false;
|
|
|
|
/** Factored out the sanity checking code; delegate to it. */
|
|
GrammarSanity sanity = new GrammarSanity(this);
|
|
|
|
/** Create a grammar from file name. */
|
|
public Grammar(Tool tool, String fileName, CompositeGrammar composite) {
|
|
this.composite = composite;
|
|
setTool(tool);
|
|
setFileName(fileName);
|
|
// ensure we have the composite set to something
|
|
if ( composite.delegateGrammarTreeRoot==null ) {
|
|
composite.setDelegationRoot(this);
|
|
}
|
|
}
|
|
|
|
/** Useful for when you are sure that you are not part of a composite
|
|
* already. Used in Interp/RandomPhrase and testing.
|
|
*/
|
|
public Grammar() {
|
|
builtFromString = true;
|
|
composite = new CompositeGrammar(this);
|
|
}
|
|
|
|
/** Used for testing; only useful on noncomposite grammars.*/
|
|
public Grammar(String grammarString)
|
|
throws antlr.RecognitionException, antlr.TokenStreamException
|
|
{
|
|
this(null, grammarString);
|
|
}
|
|
|
|
/** Used for testing and Interp/RandomPhrase. Only useful on
|
|
* noncomposite grammars.
|
|
*/
|
|
public Grammar(Tool tool, String grammarString)
|
|
throws antlr.RecognitionException
|
|
{
|
|
this();
|
|
setTool(tool);
|
|
setFileName("<string>");
|
|
StringReader r = new StringReader(grammarString);
|
|
parseAndBuildAST(r);
|
|
composite.assignTokenTypes();
|
|
defineGrammarSymbols();
|
|
checkNameSpaceAndActions();
|
|
}
|
|
|
|
public void setFileName(String fileName) {
|
|
this.fileName = fileName;
|
|
}
|
|
|
|
public String getFileName() {
|
|
return fileName;
|
|
}
|
|
|
|
public void setName(String name) {
|
|
if ( name==null ) {
|
|
return;
|
|
}
|
|
// don't error check autogenerated files (those with '__' in them)
|
|
String saneFile = fileName.replace('\\', '/');
|
|
int lastSlash = saneFile.lastIndexOf('/');
|
|
String onlyFileName = saneFile.substring(lastSlash+1, fileName.length());
|
|
if ( !builtFromString ) {
|
|
int lastDot = onlyFileName.lastIndexOf('.');
|
|
String onlyFileNameNoSuffix = null;
|
|
if ( lastDot < 0 ) {
|
|
ErrorManager.error(ErrorManager.MSG_FILENAME_EXTENSION_ERROR, fileName);
|
|
onlyFileNameNoSuffix = onlyFileName+GRAMMAR_FILE_EXTENSION;
|
|
}
|
|
else {
|
|
onlyFileNameNoSuffix = onlyFileName.substring(0,lastDot);
|
|
}
|
|
if ( !name.equals(onlyFileNameNoSuffix) ) {
|
|
ErrorManager.error(ErrorManager.MSG_FILE_AND_GRAMMAR_NAME_DIFFER,
|
|
name,
|
|
fileName);
|
|
}
|
|
}
|
|
this.name = name;
|
|
}
|
|
|
|
public void setGrammarContent(String grammarString) throws RecognitionException {
|
|
StringReader r = new StringReader(grammarString);
|
|
parseAndBuildAST(r);
|
|
composite.assignTokenTypes();
|
|
composite.defineGrammarSymbols();
|
|
}
|
|
|
|
public void parseAndBuildAST()
|
|
throws IOException
|
|
{
|
|
FileReader fr = null;
|
|
BufferedReader br = null;
|
|
try {
|
|
fr = new FileReader(fileName);
|
|
br = new BufferedReader(fr);
|
|
parseAndBuildAST(br);
|
|
br.close();
|
|
br = null;
|
|
}
|
|
finally {
|
|
if ( br!=null ) {
|
|
br.close();
|
|
}
|
|
}
|
|
}
|
|
|
|
public void parseAndBuildAST(Reader r) {
|
|
// BUILD AST FROM GRAMMAR
|
|
ANTLRLexer lexer = new ANTLRLexer(r);
|
|
lexer.setFilename(this.getFileName());
|
|
// use the rewrite engine because we want to buffer up all tokens
|
|
// in case they have a merged lexer/parser, send lexer rules to
|
|
// new grammar.
|
|
lexer.setTokenObjectClass("antlr.TokenWithIndex");
|
|
tokenBuffer = new TokenStreamRewriteEngine(lexer);
|
|
tokenBuffer.discard(ANTLRParser.WS);
|
|
tokenBuffer.discard(ANTLRParser.ML_COMMENT);
|
|
tokenBuffer.discard(ANTLRParser.COMMENT);
|
|
tokenBuffer.discard(ANTLRParser.SL_COMMENT);
|
|
ANTLRParser parser = new ANTLRParser(tokenBuffer);
|
|
parser.setFilename(this.getFileName());
|
|
try {
|
|
parser.grammar(this);
|
|
}
|
|
catch (TokenStreamException tse) {
|
|
ErrorManager.internalError("unexpected stream error from parsing "+fileName, tse);
|
|
}
|
|
catch (RecognitionException re) {
|
|
ErrorManager.internalError("unexpected parser recognition error from "+fileName, re);
|
|
}
|
|
|
|
if ( lexer.hasASTOperator && !buildAST() ) {
|
|
Object value = getOption("output");
|
|
if ( value == null ) {
|
|
ErrorManager.grammarWarning(ErrorManager.MSG_REWRITE_OR_OP_WITH_NO_OUTPUT_OPTION,
|
|
this, null);
|
|
setOption("output", "AST", null);
|
|
}
|
|
else {
|
|
ErrorManager.grammarError(ErrorManager.MSG_AST_OP_WITH_NON_AST_OUTPUT_OPTION,
|
|
this, null, value);
|
|
}
|
|
}
|
|
|
|
grammarTree = (GrammarAST)parser.getAST();
|
|
setFileName(lexer.getFilename()); // the lexer #src might change name
|
|
if ( grammarTree==null || grammarTree.findFirstType(ANTLRParser.RULE)==null ) {
|
|
ErrorManager.error(ErrorManager.MSG_NO_RULES, getFileName());
|
|
return;
|
|
}
|
|
|
|
// Get syn pred rules and add to existing tree
|
|
List synpredRules =
|
|
getArtificialRulesForSyntacticPredicates(parser,
|
|
nameToSynpredASTMap);
|
|
for (int i = 0; i < synpredRules.size(); i++) {
|
|
GrammarAST rAST = (GrammarAST) synpredRules.get(i);
|
|
grammarTree.addChild(rAST);
|
|
}
|
|
}
|
|
|
|
public void defineGrammarSymbols() {
|
|
if ( Tool.internalOption_PrintGrammarTree ) {
|
|
System.out.println(grammarTree.toStringList());
|
|
}
|
|
|
|
// DEFINE RULES
|
|
//System.out.println("### define "+name+" rules");
|
|
DefineGrammarItemsWalker defineItemsWalker = new DefineGrammarItemsWalker();
|
|
defineItemsWalker.setASTNodeClass("org.antlr.tool.GrammarAST");
|
|
try {
|
|
defineItemsWalker.grammar(grammarTree, this);
|
|
}
|
|
catch (RecognitionException re) {
|
|
ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE,
|
|
re);
|
|
}
|
|
}
|
|
|
|
/** ANALYZE ACTIONS, LOOKING FOR LABEL AND ATTR REFS, sanity check */
|
|
public void checkNameSpaceAndActions() {
|
|
examineAllExecutableActions();
|
|
checkAllRulesForUselessLabels();
|
|
|
|
nameSpaceChecker.checkConflicts();
|
|
}
|
|
|
|
/** Many imports are illegal such as lexer into a tree grammar */
|
|
public boolean validImport(Grammar delegate) {
|
|
List<Integer> validDelegators = validDelegations.get(delegate.type);
|
|
return validDelegators!=null && validDelegators.contains(this.type);
|
|
}
|
|
|
|
/** If the grammar is a combined grammar, return the text of the implicit
|
|
* lexer grammar.
|
|
*/
|
|
public String getLexerGrammar() {
|
|
if ( lexerGrammarST.getAttribute("literals")==null &&
|
|
lexerGrammarST.getAttribute("rules")==null )
|
|
{
|
|
// if no rules, return nothing
|
|
return null;
|
|
}
|
|
lexerGrammarST.setAttribute("name", name);
|
|
// if there are any actions set for lexer, pass them in
|
|
if ( actions.get("lexer")!=null ) {
|
|
lexerGrammarST.setAttribute("actionNames",
|
|
((Map)actions.get("lexer")).keySet());
|
|
lexerGrammarST.setAttribute("actions",
|
|
((Map)actions.get("lexer")).values());
|
|
}
|
|
// make sure generated grammar has the same options
|
|
if ( options!=null ) {
|
|
Iterator optionNames = options.keySet().iterator();
|
|
while (optionNames.hasNext()) {
|
|
String optionName = (String) optionNames.next();
|
|
if ( !doNotCopyOptionsToLexer.contains(optionName) ) {
|
|
Object value = options.get(optionName);
|
|
lexerGrammarST.setAttribute("options.{name,value}", optionName, value);
|
|
}
|
|
}
|
|
}
|
|
return lexerGrammarST.toString();
|
|
}
|
|
|
|
public String getImplicitlyGeneratedLexerFileName() {
|
|
return name+
|
|
IGNORE_STRING_IN_GRAMMAR_FILE_NAME +
|
|
LEXER_GRAMMAR_FILE_EXTENSION;
|
|
}
|
|
|
|
/** Get the name of the generated recognizer; may or may not be same
|
|
* as grammar name.
|
|
* Recognizer is TParser and TLexer from T if combined, else
|
|
* just use T regardless of grammar type.
|
|
*/
|
|
public String getRecognizerName() {
|
|
String suffix = "";
|
|
List<Grammar> grammarsFromRootToMe = composite.getDelegators(this);
|
|
//System.out.println("grammarsFromRootToMe="+grammarsFromRootToMe);
|
|
String qualifiedName = name;
|
|
if ( grammarsFromRootToMe!=null ) {
|
|
StringBuffer buf = new StringBuffer();
|
|
for (Grammar g : grammarsFromRootToMe) {
|
|
buf.append(g.name);
|
|
buf.append('_');
|
|
}
|
|
buf.append(name);
|
|
qualifiedName = buf.toString();
|
|
}
|
|
if ( type==Grammar.COMBINED ||
|
|
(type==Grammar.LEXER && implicitLexer) )
|
|
{
|
|
suffix = Grammar.grammarTypeToFileNameSuffix[type];
|
|
}
|
|
return qualifiedName+suffix;
|
|
}
|
|
|
|
/** Parse a rule we add artificially that is a list of the other lexer
|
|
* rules like this: "Tokens : ID | INT | SEMI ;" nextToken() will invoke
|
|
* this to set the current token. Add char literals before
|
|
* the rule references.
|
|
*
|
|
* If in filter mode, we want every alt to backtrack and we need to
|
|
* do k=1 to force the "first token def wins" rule. Otherwise, the
|
|
* longest-match rule comes into play with LL(*).
|
|
*
|
|
* The ANTLRParser antlr.g file now invokes this when parsing a lexer
|
|
* grammar, which I think is proper even though it peeks at the info
|
|
* that later phases will (re)compute. It gets a list of lexer rules
|
|
* and builds a string representing the rule; then it creates a parser
|
|
* and adds the resulting tree to the grammar's tree.
|
|
*/
|
|
public GrammarAST addArtificialMatchTokensRule(GrammarAST grammarAST,
|
|
List<String> ruleNames,
|
|
List<String> delegateNames,
|
|
boolean filterMode) {
|
|
StringTemplate matchTokenRuleST = null;
|
|
if ( filterMode ) {
|
|
matchTokenRuleST = new StringTemplate(
|
|
ARTIFICIAL_TOKENS_RULENAME+
|
|
" options {k=1; backtrack=true;} : <rules; separator=\"|\">;",
|
|
AngleBracketTemplateLexer.class);
|
|
}
|
|
else {
|
|
matchTokenRuleST = new StringTemplate(
|
|
ARTIFICIAL_TOKENS_RULENAME+" : <rules; separator=\"|\">;",
|
|
AngleBracketTemplateLexer.class);
|
|
}
|
|
|
|
// Now add token rule references
|
|
for (int i = 0; i < ruleNames.size(); i++) {
|
|
String rname = (String) ruleNames.get(i);
|
|
matchTokenRuleST.setAttribute("rules", rname);
|
|
}
|
|
for (int i = 0; i < delegateNames.size(); i++) {
|
|
String dname = (String) delegateNames.get(i);
|
|
matchTokenRuleST.setAttribute("rules", dname+".Tokens");
|
|
}
|
|
//System.out.println("tokens rule: "+matchTokenRuleST.toString());
|
|
|
|
ANTLRLexer lexer = new ANTLRLexer(new StringReader(matchTokenRuleST.toString()));
|
|
lexer.setTokenObjectClass("antlr.TokenWithIndex");
|
|
TokenStreamRewriteEngine tokbuf =
|
|
new TokenStreamRewriteEngine(lexer);
|
|
tokbuf.discard(ANTLRParser.WS);
|
|
tokbuf.discard(ANTLRParser.ML_COMMENT);
|
|
tokbuf.discard(ANTLRParser.COMMENT);
|
|
tokbuf.discard(ANTLRParser.SL_COMMENT);
|
|
ANTLRParser parser = new ANTLRParser(tokbuf);
|
|
parser.grammar = this;
|
|
parser.gtype = ANTLRParser.LEXER_GRAMMAR;
|
|
parser.setASTNodeClass("org.antlr.tool.GrammarAST");
|
|
try {
|
|
parser.rule();
|
|
if ( Tool.internalOption_PrintGrammarTree ) {
|
|
System.out.println("Tokens rule: "+parser.getAST().toStringTree());
|
|
}
|
|
GrammarAST p = grammarAST;
|
|
while ( p.getType()!=ANTLRParser.LEXER_GRAMMAR ) {
|
|
p = (GrammarAST)p.getNextSibling();
|
|
}
|
|
p.addChild(parser.getAST());
|
|
}
|
|
catch (Exception e) {
|
|
ErrorManager.error(ErrorManager.MSG_ERROR_CREATING_ARTIFICIAL_RULE,
|
|
e);
|
|
}
|
|
return (GrammarAST)parser.getAST();
|
|
}
|
|
|
|
/** for any syntactic predicates, we need to define rules for them; they will get
|
|
* defined automatically like any other rule. :)
|
|
*/
|
|
protected List getArtificialRulesForSyntacticPredicates(ANTLRParser parser,
|
|
LinkedHashMap nameToSynpredASTMap)
|
|
{
|
|
List rules = new ArrayList();
|
|
if ( nameToSynpredASTMap==null ) {
|
|
return rules;
|
|
}
|
|
Set predNames = nameToSynpredASTMap.keySet();
|
|
boolean isLexer = grammarTree.getType()==ANTLRParser.LEXER_GRAMMAR;
|
|
for (Iterator it = predNames.iterator(); it.hasNext();) {
|
|
String synpredName = (String)it.next();
|
|
GrammarAST fragmentAST =
|
|
(GrammarAST) nameToSynpredASTMap.get(synpredName);
|
|
GrammarAST ruleAST =
|
|
parser.createSimpleRuleAST(synpredName,
|
|
fragmentAST,
|
|
isLexer);
|
|
rules.add(ruleAST);
|
|
}
|
|
return rules;
|
|
}
|
|
|
|
/** Walk the list of options, altering this Grammar object according
|
|
* to any I recognize.
|
|
protected void processOptions() {
|
|
Iterator optionNames = options.keySet().iterator();
|
|
while (optionNames.hasNext()) {
|
|
String optionName = (String) optionNames.next();
|
|
Object value = options.get(optionName);
|
|
if ( optionName.equals("tokenVocab") ) {
|
|
|
|
}
|
|
}
|
|
}
|
|
*/
|
|
|
|
/** Define all the rule begin/end NFAStates to solve forward reference
|
|
* issues. Critical for composite grammars too.
|
|
* This is normally called on all root/delegates manually and then
|
|
* buildNFA() is called afterwards because the NFA construction needs
|
|
* to see rule start/stop states from potentially every grammar. Has
|
|
* to be have these created a priori. Testing routines will often
|
|
* just call buildNFA(), which forces a call to this method if not
|
|
* done already. Works ONLY for single noncomposite grammars.
|
|
*/
|
|
public void createRuleStartAndStopNFAStates() {
|
|
//System.out.println("### createRuleStartAndStopNFAStates "+getGrammarTypeString()+" grammar "+name+" NFAs");
|
|
if ( nfa!=null ) {
|
|
return;
|
|
}
|
|
nfa = new NFA(this);
|
|
factory = new NFAFactory(nfa);
|
|
|
|
Collection rules = getRules();
|
|
for (Iterator itr = rules.iterator(); itr.hasNext();) {
|
|
Rule r = (Rule) itr.next();
|
|
String ruleName = r.name;
|
|
NFAState ruleBeginState = factory.newState();
|
|
ruleBeginState.setDescription("rule "+ruleName+" start");
|
|
ruleBeginState.enclosingRule = r;
|
|
r.startState = ruleBeginState;
|
|
NFAState ruleEndState = factory.newState();
|
|
ruleEndState.setDescription("rule "+ruleName+" end");
|
|
ruleEndState.setAcceptState(true);
|
|
ruleEndState.enclosingRule = r;
|
|
r.stopState = ruleEndState;
|
|
}
|
|
}
|
|
|
|
public void buildNFA() {
|
|
if ( nfa==null ) {
|
|
createRuleStartAndStopNFAStates();
|
|
}
|
|
if ( nfa.complete ) {
|
|
// don't let it create more than once; has side-effects
|
|
return;
|
|
}
|
|
//System.out.println("### build "+getGrammarTypeString()+" grammar "+name+" NFAs");
|
|
if ( getRules().size()==0 ) {
|
|
return;
|
|
}
|
|
|
|
TreeToNFAConverter nfaBuilder = new TreeToNFAConverter(this, nfa, factory);
|
|
try {
|
|
nfaBuilder.grammar(grammarTree);
|
|
}
|
|
catch (RecognitionException re) {
|
|
ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE,
|
|
name,
|
|
re);
|
|
}
|
|
nfa.complete = true;
|
|
}
|
|
|
|
/** For each decision in this grammar, compute a single DFA using the
|
|
* NFA states associated with the decision. The DFA construction
|
|
* determines whether or not the alternatives in the decision are
|
|
* separable using a regular lookahead language.
|
|
*
|
|
* Store the lookahead DFAs in the AST created from the user's grammar
|
|
* so the code generator or whoever can easily access it.
|
|
*
|
|
* This is a separate method because you might want to create a
|
|
* Grammar without doing the expensive analysis.
|
|
*/
|
|
public void createLookaheadDFAs() {
|
|
createLookaheadDFAs(true);
|
|
}
|
|
|
|
public void createLookaheadDFAs(boolean wackTempStructures) {
|
|
if ( nfa==null ) {
|
|
buildNFA();
|
|
}
|
|
|
|
// CHECK FOR LEFT RECURSION; Make sure we can actually do analysis
|
|
checkAllRulesForLeftRecursion();
|
|
|
|
/*
|
|
// was there a severe problem while sniffing the grammar?
|
|
if ( ErrorManager.doNotAttemptAnalysis() ) {
|
|
return;
|
|
}
|
|
*/
|
|
|
|
long start = System.currentTimeMillis();
|
|
|
|
//System.out.println("### create DFAs");
|
|
int numDecisions = getNumberOfDecisions();
|
|
if ( NFAToDFAConverter.SINGLE_THREADED_NFA_CONVERSION ) {
|
|
for (int decision=1; decision<=numDecisions; decision++) {
|
|
NFAState decisionStartState = getDecisionNFAStartState(decision);
|
|
if ( leftRecursiveRules.contains(decisionStartState.enclosingRule) ) {
|
|
// don't bother to process decisions within left recursive rules.
|
|
if ( composite.watchNFAConversion ) {
|
|
System.out.println("ignoring decision "+decision+
|
|
" within left-recursive rule "+decisionStartState.enclosingRule.name);
|
|
}
|
|
continue;
|
|
}
|
|
if ( !externalAnalysisAbort && decisionStartState.getNumberOfTransitions()>1 ) {
|
|
Rule r = decisionStartState.enclosingRule;
|
|
if ( r.isSynPred && !synPredNamesUsedInDFA.contains(r.name) ) {
|
|
continue;
|
|
}
|
|
DFA dfa = null;
|
|
// if k=* or k=1, try LL(1)
|
|
if ( getUserMaxLookahead(decision)==0 ||
|
|
getUserMaxLookahead(decision)==1 )
|
|
{
|
|
dfa = createLL_1_LookaheadDFA(decision);
|
|
}
|
|
if ( dfa==null ) {
|
|
if ( composite.watchNFAConversion ) {
|
|
System.out.println("decision "+decision+
|
|
" not suitable for LL(1)-optimized DFA analysis");
|
|
}
|
|
dfa = createLookaheadDFA(decision, wackTempStructures);
|
|
}
|
|
if ( dfa.startState==null ) {
|
|
// something went wrong; wipe out DFA
|
|
setLookaheadDFA(decision, null);
|
|
}
|
|
if ( Tool.internalOption_PrintDFA ) {
|
|
System.out.println("DFA d="+decision);
|
|
FASerializer serializer = new FASerializer(nfa.grammar);
|
|
String result = serializer.serialize(dfa.startState);
|
|
System.out.println(result);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
ErrorManager.info("two-threaded DFA conversion");
|
|
// create a barrier expecting n DFA and this main creation thread
|
|
Barrier barrier = new Barrier(3);
|
|
// assume 2 CPU for now
|
|
int midpoint = numDecisions/2;
|
|
NFAConversionThread t1 =
|
|
new NFAConversionThread(this, barrier, 1, midpoint);
|
|
new Thread(t1).start();
|
|
if ( midpoint == (numDecisions/2) ) {
|
|
midpoint++;
|
|
}
|
|
NFAConversionThread t2 =
|
|
new NFAConversionThread(this, barrier, midpoint, numDecisions);
|
|
new Thread(t2).start();
|
|
// wait for these two threads to finish
|
|
try {
|
|
barrier.waitForRelease();
|
|
}
|
|
catch(InterruptedException e) {
|
|
ErrorManager.internalError("what the hell? DFA interruptus", e);
|
|
}
|
|
}
|
|
|
|
long stop = System.currentTimeMillis();
|
|
DFACreationWallClockTimeInMS = stop - start;
|
|
|
|
// indicate that we've finished building DFA (even if #decisions==0)
|
|
allDecisionDFACreated = true;
|
|
}
|
|
|
|
public DFA createLL_1_LookaheadDFA(int decision) {
|
|
Decision d = getDecision(decision);
|
|
String enclosingRule = d.startState.enclosingRule.name;
|
|
Rule r = d.startState.enclosingRule;
|
|
NFAState decisionStartState = getDecisionNFAStartState(decision);
|
|
|
|
if ( composite.watchNFAConversion ) {
|
|
System.out.println("--------------------\nattempting LL(1) DFA (d="
|
|
+decisionStartState.getDecisionNumber()+") for "+
|
|
decisionStartState.getDescription());
|
|
}
|
|
|
|
if ( r.isSynPred && !synPredNamesUsedInDFA.contains(enclosingRule) ) {
|
|
return null;
|
|
}
|
|
|
|
// compute lookahead for each alt
|
|
int numAlts = getNumberOfAltsForDecisionNFA(decisionStartState);
|
|
LookaheadSet[] altLook = new LookaheadSet[numAlts+1];
|
|
for (int alt = 1; alt <= numAlts; alt++) {
|
|
int walkAlt =
|
|
decisionStartState.translateDisplayAltToWalkAlt(alt);
|
|
NFAState altLeftEdge = getNFAStateForAltOfDecision(decisionStartState, walkAlt);
|
|
NFAState altStartState = (NFAState)altLeftEdge.transition[0].target;
|
|
//System.out.println("alt "+alt+" start state = "+altStartState.stateNumber);
|
|
altLook[alt] = ll1Analyzer.LOOK(altStartState);
|
|
//System.out.println("alt "+alt+": "+altLook[alt].toString(this));
|
|
}
|
|
|
|
// compare alt i with alt j for disjointness
|
|
boolean decisionIsLL_1 = true;
|
|
outer:
|
|
for (int i = 1; i <= numAlts; i++) {
|
|
for (int j = i+1; j <= numAlts; j++) {
|
|
/*
|
|
System.out.println("compare "+i+", "+j+": "+
|
|
altLook[i].toString(this)+" with "+
|
|
altLook[j].toString(this));
|
|
*/
|
|
LookaheadSet collision = altLook[i].intersection(altLook[j]);
|
|
if ( !collision.isNil() ) {
|
|
//System.out.println("collision (non-LL(1)): "+collision.toString(this));
|
|
decisionIsLL_1 = false;
|
|
break outer;
|
|
}
|
|
}
|
|
}
|
|
|
|
boolean foundConfoundingPredicate =
|
|
ll1Analyzer.detectConfoundingPredicates(decisionStartState);
|
|
if ( decisionIsLL_1 && !foundConfoundingPredicate ) {
|
|
// build an LL(1) optimized DFA with edge for each altLook[i]
|
|
if ( NFAToDFAConverter.debug ) {
|
|
System.out.println("decision "+decision+" is simple LL(1)");
|
|
}
|
|
DFA lookaheadDFA = new LL1DFA(decision, decisionStartState, altLook);
|
|
setLookaheadDFA(decision, lookaheadDFA);
|
|
updateLineColumnToLookaheadDFAMap(lookaheadDFA);
|
|
return lookaheadDFA;
|
|
}
|
|
|
|
// not LL(1) but perhaps we can solve with simplified predicate search
|
|
// even if k=1 set manually, only resolve here if we have preds; i.e.,
|
|
// don't resolve etc...
|
|
|
|
/*
|
|
SemanticContext visiblePredicates =
|
|
ll1Analyzer.getPredicates(decisionStartState);
|
|
boolean foundConfoundingPredicate =
|
|
ll1Analyzer.detectConfoundingPredicates(decisionStartState);
|
|
*/
|
|
|
|
// exit if not forced k=1 or we found a predicate situation we
|
|
// can't handle: predicates in rules invoked from this decision.
|
|
if ( getUserMaxLookahead(decision)!=1 || // not manually set to k=1
|
|
!getAutoBacktrackMode(decision) ||
|
|
foundConfoundingPredicate )
|
|
{
|
|
//System.out.println("trying LL(*)");
|
|
return null;
|
|
}
|
|
|
|
List<IntervalSet> edges = new ArrayList<IntervalSet>();
|
|
for (int i = 1; i < altLook.length; i++) {
|
|
LookaheadSet s = altLook[i];
|
|
edges.add((IntervalSet)s.tokenTypeSet);
|
|
}
|
|
List<IntervalSet> disjoint = makeEdgeSetsDisjoint(edges);
|
|
//System.out.println("disjoint="+disjoint);
|
|
|
|
MultiMap<IntervalSet, Integer> edgeMap = new MultiMap<IntervalSet, Integer>();
|
|
for (int i = 0; i < disjoint.size(); i++) {
|
|
IntervalSet ds = (IntervalSet) disjoint.get(i);
|
|
for (int alt = 1; alt < altLook.length; alt++) {
|
|
LookaheadSet look = altLook[alt];
|
|
if ( !ds.and(look.tokenTypeSet).isNil() ) {
|
|
edgeMap.map(ds, alt);
|
|
}
|
|
}
|
|
}
|
|
//System.out.println("edge map: "+edgeMap);
|
|
|
|
// TODO: how do we know we covered stuff?
|
|
|
|
// build an LL(1) optimized DFA with edge for each altLook[i]
|
|
DFA lookaheadDFA = new LL1DFA(decision, decisionStartState, edgeMap);
|
|
setLookaheadDFA(decision, lookaheadDFA);
|
|
|
|
// create map from line:col to decision DFA (for ANTLRWorks)
|
|
updateLineColumnToLookaheadDFAMap(lookaheadDFA);
|
|
|
|
return lookaheadDFA;
|
|
}
|
|
|
|
private void updateLineColumnToLookaheadDFAMap(DFA lookaheadDFA) {
|
|
GrammarAST decisionAST = nfa.grammar.getDecisionBlockAST(lookaheadDFA.decisionNumber);
|
|
int line = decisionAST.getLine();
|
|
int col = decisionAST.getColumn();
|
|
lineColumnToLookaheadDFAMap.put(new StringBuffer().append(line + ":")
|
|
.append(col).toString(), lookaheadDFA);
|
|
}
|
|
|
|
protected List<IntervalSet> makeEdgeSetsDisjoint(List<IntervalSet> edges) {
|
|
OrderedHashSet<IntervalSet> disjointSets = new OrderedHashSet<IntervalSet>();
|
|
// walk each incoming edge label/set and add to disjoint set
|
|
int numEdges = edges.size();
|
|
for (int e = 0; e < numEdges; e++) {
|
|
IntervalSet t = (IntervalSet) edges.get(e);
|
|
if ( disjointSets.contains(t) ) { // exact set present
|
|
continue;
|
|
}
|
|
|
|
// compare t with set i for disjointness
|
|
IntervalSet remainder = t; // remainder starts out as whole set to add
|
|
int numDisjointElements = disjointSets.size();
|
|
for (int i = 0; i < numDisjointElements; i++) {
|
|
IntervalSet s_i = (IntervalSet)disjointSets.get(i);
|
|
|
|
if ( t.and(s_i).isNil() ) { // nothing in common
|
|
continue;
|
|
}
|
|
//System.out.println(label+" collides with "+rl);
|
|
|
|
// For any (s_i, t) with s_i&t!=nil replace with (s_i-t, s_i&t)
|
|
// (ignoring s_i-t if nil; don't put in list)
|
|
|
|
// Replace existing s_i with intersection since we
|
|
// know that will always be a non nil character class
|
|
IntervalSet intersection = (IntervalSet)s_i.and(t);
|
|
disjointSets.set(i, intersection);
|
|
|
|
// Compute s_i-t to see what is in current set and not in incoming
|
|
IntSet existingMinusNewElements = s_i.subtract(t);
|
|
//System.out.println(s_i+"-"+t+"="+existingMinusNewElements);
|
|
if ( !existingMinusNewElements.isNil() ) {
|
|
// found a new character class, add to the end (doesn't affect
|
|
// outer loop duration due to n computation a priori.
|
|
disjointSets.add(existingMinusNewElements);
|
|
}
|
|
|
|
// anything left to add to the reachableLabels?
|
|
remainder = (IntervalSet)t.subtract(s_i);
|
|
if ( remainder.isNil() ) {
|
|
break; // nothing left to add to set. done!
|
|
}
|
|
|
|
t = remainder;
|
|
}
|
|
if ( !remainder.isNil() ) {
|
|
disjointSets.add(remainder);
|
|
}
|
|
}
|
|
return disjointSets.elements();
|
|
}
|
|
|
|
public DFA createLookaheadDFA(int decision, boolean wackTempStructures) {
|
|
Decision d = getDecision(decision);
|
|
String enclosingRule = d.startState.enclosingRule.name;
|
|
Rule r = d.startState.enclosingRule;
|
|
|
|
//System.out.println("createLookaheadDFA(): "+enclosingRule+" dec "+decision+"; synprednames prev used "+synPredNamesUsedInDFA);
|
|
NFAState decisionStartState = getDecisionNFAStartState(decision);
|
|
long startDFA=0,stopDFA=0;
|
|
if ( composite.watchNFAConversion ) {
|
|
System.out.println("--------------------\nbuilding lookahead DFA (d="
|
|
+decisionStartState.getDecisionNumber()+") for "+
|
|
decisionStartState.getDescription());
|
|
startDFA = System.currentTimeMillis();
|
|
}
|
|
|
|
DFA lookaheadDFA = new DFA(decision, decisionStartState);
|
|
// Retry to create a simpler DFA if analysis failed (non-LL(*),
|
|
// recursion overflow, or time out).
|
|
boolean failed =
|
|
lookaheadDFA.analysisTimedOut() ||
|
|
lookaheadDFA.probe.isNonLLStarDecision() ||
|
|
lookaheadDFA.probe.analysisOverflowed();
|
|
if ( failed && lookaheadDFA.okToRetryDFAWithK1() ) {
|
|
// set k=1 option and try again.
|
|
// First, clean up tracking stuff
|
|
decisionsWhoseDFAsUsesSynPreds.remove(lookaheadDFA);
|
|
// TODO: clean up synPredNamesUsedInDFA also (harder)
|
|
d.blockAST.setBlockOption(this, "k", Utils.integer(1));
|
|
if ( composite.watchNFAConversion ) {
|
|
System.out.print("trying decision "+decision+
|
|
" again with k=1; reason: "+
|
|
lookaheadDFA.getReasonForFailure());
|
|
}
|
|
lookaheadDFA = null; // make sure other memory is "free" before redoing
|
|
lookaheadDFA = new DFA(decision, decisionStartState);
|
|
}
|
|
if ( lookaheadDFA.analysisTimedOut() ) { // did analysis bug out?
|
|
ErrorManager.internalError("could not even do k=1 for decision "+
|
|
decision+"; reason: "+
|
|
lookaheadDFA.getReasonForFailure());
|
|
}
|
|
|
|
|
|
setLookaheadDFA(decision, lookaheadDFA);
|
|
|
|
if ( wackTempStructures ) {
|
|
for (DFAState s : lookaheadDFA.getUniqueStates().values()) {
|
|
s.reset();
|
|
}
|
|
}
|
|
|
|
// create map from line:col to decision DFA (for ANTLRWorks)
|
|
updateLineColumnToLookaheadDFAMap(lookaheadDFA);
|
|
|
|
if ( composite.watchNFAConversion ) {
|
|
stopDFA = System.currentTimeMillis();
|
|
System.out.println("cost: "+lookaheadDFA.getNumberOfStates()+
|
|
" states, "+(int)(stopDFA-startDFA)+" ms");
|
|
}
|
|
//System.out.println("after create DFA; synPredNamesUsedInDFA="+synPredNamesUsedInDFA);
|
|
return lookaheadDFA;
|
|
}
|
|
|
|
/** Terminate DFA creation (grammar analysis).
|
|
*/
|
|
public void externallyAbortNFAToDFAConversion() {
|
|
externalAnalysisAbort = true;
|
|
}
|
|
|
|
public boolean NFAToDFAConversionExternallyAborted() {
|
|
return externalAnalysisAbort;
|
|
}
|
|
|
|
/** Return a new unique integer in the token type space */
|
|
public int getNewTokenType() {
|
|
composite.maxTokenType++;
|
|
return composite.maxTokenType;
|
|
}
|
|
|
|
/** Define a token at a particular token type value. Blast an
|
|
* old value with a new one. This is called normal grammar processsing
|
|
* and during import vocab operations to set tokens with specific values.
|
|
*/
|
|
public void defineToken(String text, int tokenType) {
|
|
//System.out.println("defineToken("+text+", "+tokenType+")");
|
|
if ( composite.tokenIDToTypeMap.get(text)!=null ) {
|
|
// already defined? Must be predefined one like EOF;
|
|
// do nothing
|
|
return;
|
|
}
|
|
// the index in the typeToTokenList table is actually shifted to
|
|
// hold faux labels as you cannot have negative indices.
|
|
if ( text.charAt(0)=='\'' ) {
|
|
composite.stringLiteralToTypeMap.put(text, Utils.integer(tokenType));
|
|
// track in reverse index too
|
|
if ( tokenType>=composite.typeToStringLiteralList.size() ) {
|
|
composite.typeToStringLiteralList.setSize(tokenType+1);
|
|
}
|
|
composite.typeToStringLiteralList.set(tokenType, text);
|
|
}
|
|
else { // must be a label like ID
|
|
composite.tokenIDToTypeMap.put(text, Utils.integer(tokenType));
|
|
}
|
|
int index = Label.NUM_FAUX_LABELS+tokenType-1;
|
|
//System.out.println("defining "+name+" token "+text+" at type="+tokenType+", index="+index);
|
|
composite.maxTokenType = Math.max(composite.maxTokenType, tokenType);
|
|
if ( index>=composite.typeToTokenList.size() ) {
|
|
composite.typeToTokenList.setSize(index+1);
|
|
}
|
|
String prevToken = (String)composite.typeToTokenList.get(index);
|
|
if ( prevToken==null || prevToken.charAt(0)=='\'' ) {
|
|
// only record if nothing there before or if thing before was a literal
|
|
composite.typeToTokenList.set(index, text);
|
|
}
|
|
}
|
|
|
|
/** Define a new rule. A new rule index is created by incrementing
|
|
* ruleIndex.
|
|
*/
|
|
public void defineRule(antlr.Token ruleToken,
|
|
String modifier,
|
|
Map options,
|
|
GrammarAST tree,
|
|
GrammarAST argActionAST,
|
|
int numAlts)
|
|
{
|
|
String ruleName = ruleToken.getText();
|
|
if ( getLocallyDefinedRule(ruleName)!=null ) {
|
|
ErrorManager.grammarError(ErrorManager.MSG_RULE_REDEFINITION,
|
|
this, ruleToken, ruleName);
|
|
return;
|
|
}
|
|
|
|
if ( (type==Grammar.PARSER||type==Grammar.TREE_PARSER) &&
|
|
Character.isUpperCase(ruleName.charAt(0)) )
|
|
{
|
|
ErrorManager.grammarError(ErrorManager.MSG_LEXER_RULES_NOT_ALLOWED,
|
|
this, ruleToken, ruleName);
|
|
return;
|
|
}
|
|
|
|
Rule r = new Rule(this, ruleName, composite.ruleIndex, numAlts);
|
|
/*
|
|
System.out.println("defineRule("+ruleName+",modifier="+modifier+
|
|
"): index="+r.index+", nalts="+numAlts);
|
|
*/
|
|
r.modifier = modifier;
|
|
nameToRuleMap.put(ruleName, r);
|
|
setRuleAST(ruleName, tree);
|
|
r.setOptions(options, ruleToken);
|
|
r.argActionAST = argActionAST;
|
|
composite.ruleIndexToRuleList.setSize(composite.ruleIndex+1);
|
|
composite.ruleIndexToRuleList.set(composite.ruleIndex, r);
|
|
composite.ruleIndex++;
|
|
if ( ruleName.startsWith(SYNPRED_RULE_PREFIX) ) {
|
|
r.isSynPred = true;
|
|
}
|
|
}
|
|
|
|
/** Define a new predicate and get back its name for use in building
|
|
* a semantic predicate reference to the syn pred.
|
|
*/
|
|
public String defineSyntacticPredicate(GrammarAST blockAST,
|
|
String currentRuleName)
|
|
{
|
|
if ( nameToSynpredASTMap==null ) {
|
|
nameToSynpredASTMap = new LinkedHashMap();
|
|
}
|
|
String predName =
|
|
SYNPRED_RULE_PREFIX+(nameToSynpredASTMap.size() + 1)+"_"+name;
|
|
blockAST.setTreeEnclosingRuleNameDeeply(predName);
|
|
nameToSynpredASTMap.put(predName, blockAST);
|
|
return predName;
|
|
}
|
|
|
|
public LinkedHashMap getSyntacticPredicates() {
|
|
return nameToSynpredASTMap;
|
|
}
|
|
|
|
public GrammarAST getSyntacticPredicate(String name) {
|
|
if ( nameToSynpredASTMap==null ) {
|
|
return null;
|
|
}
|
|
return (GrammarAST)nameToSynpredASTMap.get(name);
|
|
}
|
|
|
|
public void synPredUsedInDFA(DFA dfa, SemanticContext semCtx) {
|
|
decisionsWhoseDFAsUsesSynPreds.add(dfa);
|
|
semCtx.trackUseOfSyntacticPredicates(this); // walk ctx looking for preds
|
|
}
|
|
|
|
/*
|
|
public Set<Rule> getRuleNamesVisitedDuringLOOK() {
|
|
return rulesSensitiveToOtherRules;
|
|
}
|
|
*/
|
|
|
|
/** Given @scope::name {action} define it for this grammar. Later,
|
|
* the code generator will ask for the actions table. For composite
|
|
* grammars, make sure header action propogates down to all delegates.
|
|
*/
|
|
public void defineNamedAction(GrammarAST ampersandAST,
|
|
String scope,
|
|
GrammarAST nameAST,
|
|
GrammarAST actionAST)
|
|
{
|
|
if ( scope==null ) {
|
|
scope = getDefaultActionScope(type);
|
|
}
|
|
//System.out.println("@"+scope+"::"+nameAST.getText()+"{"+actionAST.getText()+"}");
|
|
String actionName = nameAST.getText();
|
|
Map scopeActions = (Map)actions.get(scope);
|
|
if ( scopeActions==null ) {
|
|
scopeActions = new HashMap();
|
|
actions.put(scope, scopeActions);
|
|
}
|
|
GrammarAST a = (GrammarAST)scopeActions.get(actionName);
|
|
if ( a!=null ) {
|
|
ErrorManager.grammarError(
|
|
ErrorManager.MSG_ACTION_REDEFINITION,this,
|
|
nameAST.getToken(),nameAST.getText());
|
|
}
|
|
else {
|
|
scopeActions.put(actionName,actionAST);
|
|
}
|
|
// propogate header (regardless of scope (lexer, parser, ...) ?
|
|
if ( this==composite.getRootGrammar() && actionName.equals("header") ) {
|
|
List<Grammar> allgrammars = composite.getRootGrammar().getDelegates();
|
|
for (Grammar g : allgrammars) {
|
|
g.defineNamedAction(ampersandAST, scope, nameAST, actionAST);
|
|
}
|
|
}
|
|
}
|
|
|
|
public Map getActions() {
|
|
return actions;
|
|
}
|
|
|
|
/** Given a grammar type, what should be the default action scope?
|
|
* If I say @members in a COMBINED grammar, for example, the
|
|
* default scope should be "parser".
|
|
*/
|
|
public String getDefaultActionScope(int grammarType) {
|
|
switch (grammarType) {
|
|
case Grammar.LEXER :
|
|
return "lexer";
|
|
case Grammar.PARSER :
|
|
case Grammar.COMBINED :
|
|
return "parser";
|
|
case Grammar.TREE_PARSER :
|
|
return "treeparser";
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public void defineLexerRuleFoundInParser(antlr.Token ruleToken,
|
|
GrammarAST ruleAST)
|
|
{
|
|
//System.out.println("rule tree is:\n"+ruleAST.toStringTree());
|
|
/*
|
|
String ruleText = tokenBuffer.toOriginalString(ruleAST.ruleStartTokenIndex,
|
|
ruleAST.ruleStopTokenIndex);
|
|
*/
|
|
// first, create the text of the rule
|
|
StringBuffer buf = new StringBuffer();
|
|
buf.append("// $ANTLR src \"");
|
|
buf.append(getFileName());
|
|
buf.append("\" ");
|
|
buf.append(ruleAST.getLine());
|
|
buf.append("\n");
|
|
for (int i=ruleAST.ruleStartTokenIndex;
|
|
i<=ruleAST.ruleStopTokenIndex && i<tokenBuffer.size();
|
|
i++)
|
|
{
|
|
TokenWithIndex t = (TokenWithIndex)tokenBuffer.getToken(i);
|
|
// undo the text deletions done by the lexer (ugh)
|
|
if ( t.getType()==ANTLRParser.BLOCK ) {
|
|
buf.append("(");
|
|
}
|
|
else if ( t.getType()==ANTLRParser.ACTION ) {
|
|
buf.append("{");
|
|
buf.append(t.getText());
|
|
buf.append("}");
|
|
}
|
|
else if ( t.getType()==ANTLRParser.SEMPRED ||
|
|
t.getType()==ANTLRParser.SYN_SEMPRED ||
|
|
t.getType()==ANTLRParser.GATED_SEMPRED ||
|
|
t.getType()==ANTLRParser.BACKTRACK_SEMPRED )
|
|
{
|
|
buf.append("{");
|
|
buf.append(t.getText());
|
|
buf.append("}?");
|
|
}
|
|
else if ( t.getType()==ANTLRParser.ARG_ACTION ) {
|
|
buf.append("[");
|
|
buf.append(t.getText());
|
|
buf.append("]");
|
|
}
|
|
else {
|
|
buf.append(t.getText());
|
|
}
|
|
}
|
|
String ruleText = buf.toString();
|
|
//System.out.println("[["+ruleText+"]]");
|
|
// now put the rule into the lexer grammar template
|
|
if ( getGrammarIsRoot() ) { // don't build lexers for delegates
|
|
lexerGrammarST.setAttribute("rules", ruleText);
|
|
}
|
|
// track this lexer rule's name
|
|
composite.lexerRules.add(ruleToken.getText());
|
|
}
|
|
|
|
/** If someone does PLUS='+' in the parser, must make sure we get
|
|
* "PLUS : '+' ;" in lexer not "T73 : '+';"
|
|
*/
|
|
public void defineLexerRuleForAliasedStringLiteral(String tokenID,
|
|
String literal,
|
|
int tokenType)
|
|
{
|
|
if ( getGrammarIsRoot() ) { // don't build lexers for delegates
|
|
//System.out.println("defineLexerRuleForAliasedStringLiteral: "+literal+" "+tokenType);
|
|
lexerGrammarST.setAttribute("literals.{ruleName,type,literal}",
|
|
tokenID,
|
|
Utils.integer(tokenType),
|
|
literal);
|
|
}
|
|
// track this lexer rule's name
|
|
composite.lexerRules.add(tokenID);
|
|
}
|
|
|
|
public void defineLexerRuleForStringLiteral(String literal, int tokenType) {
|
|
//System.out.println("defineLexerRuleForStringLiteral: "+literal+" "+tokenType);
|
|
// compute new token name like T237 and define it as having tokenType
|
|
String tokenID = computeTokenNameFromLiteral(tokenType,literal);
|
|
defineToken(tokenID, tokenType);
|
|
// tell implicit lexer to define a rule to match the literal
|
|
if ( getGrammarIsRoot() ) { // don't build lexers for delegates
|
|
lexerGrammarST.setAttribute("literals.{ruleName,type,literal}",
|
|
tokenID,
|
|
Utils.integer(tokenType),
|
|
literal);
|
|
}
|
|
}
|
|
|
|
public Rule getLocallyDefinedRule(String ruleName) {
|
|
Rule r = nameToRuleMap.get(ruleName);
|
|
return r;
|
|
}
|
|
|
|
public Rule getRule(String ruleName) {
|
|
Rule r = composite.getRule(ruleName);
|
|
/*
|
|
if ( r!=null && r.grammar != this ) {
|
|
System.out.println(name+".getRule("+ruleName+")="+r);
|
|
}
|
|
*/
|
|
return r;
|
|
}
|
|
|
|
public Rule getRule(String scopeName, String ruleName) {
|
|
if ( scopeName!=null ) { // scope override
|
|
Grammar scope = composite.getGrammar(scopeName);
|
|
if ( scope==null ) {
|
|
return null;
|
|
}
|
|
return scope.getLocallyDefinedRule(ruleName);
|
|
}
|
|
return getRule(ruleName);
|
|
}
|
|
|
|
public int getRuleIndex(String scopeName, String ruleName) {
|
|
Rule r = getRule(scopeName, ruleName);
|
|
if ( r!=null ) {
|
|
return r.index;
|
|
}
|
|
return INVALID_RULE_INDEX;
|
|
}
|
|
|
|
public int getRuleIndex(String ruleName) {
|
|
return getRuleIndex(null, ruleName);
|
|
}
|
|
|
|
public String getRuleName(int ruleIndex) {
|
|
Rule r = composite.ruleIndexToRuleList.get(ruleIndex);
|
|
if ( r!=null ) {
|
|
return r.name;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/** Should codegen.g gen rule for ruleName?
|
|
* If synpred, only gen if used in a DFA.
|
|
* If regular rule, only gen if not overridden in delegator
|
|
* Always gen Tokens rule though.
|
|
*/
|
|
public boolean generateMethodForRule(String ruleName) {
|
|
if ( ruleName.equals(ARTIFICIAL_TOKENS_RULENAME) ) {
|
|
// always generate Tokens rule to satisfy lexer interface
|
|
// but it may have no alternatives.
|
|
return true;
|
|
}
|
|
if ( overriddenRules.contains(ruleName) ) {
|
|
// don't generate any overridden rules
|
|
return false;
|
|
}
|
|
// generate if non-synpred or synpred used in a DFA
|
|
Rule r = getLocallyDefinedRule(ruleName);
|
|
return !r.isSynPred ||
|
|
(r.isSynPred&&synPredNamesUsedInDFA.contains(ruleName));
|
|
}
|
|
|
|
public AttributeScope defineGlobalScope(String name, Token scopeAction) {
|
|
AttributeScope scope = new AttributeScope(this, name, scopeAction);
|
|
scopes.put(name,scope);
|
|
return scope;
|
|
}
|
|
|
|
public AttributeScope createReturnScope(String ruleName, Token retAction) {
|
|
AttributeScope scope = new AttributeScope(this, ruleName, retAction);
|
|
scope.isReturnScope = true;
|
|
return scope;
|
|
}
|
|
|
|
public AttributeScope createRuleScope(String ruleName, Token scopeAction) {
|
|
AttributeScope scope = new AttributeScope(this, ruleName, scopeAction);
|
|
scope.isDynamicRuleScope = true;
|
|
return scope;
|
|
}
|
|
|
|
public AttributeScope createParameterScope(String ruleName, Token argAction) {
|
|
AttributeScope scope = new AttributeScope(this, ruleName, argAction);
|
|
scope.isParameterScope = true;
|
|
return scope;
|
|
}
|
|
|
|
/** Get a global scope */
|
|
public AttributeScope getGlobalScope(String name) {
|
|
return (AttributeScope)scopes.get(name);
|
|
}
|
|
|
|
public Map getGlobalScopes() {
|
|
return scopes;
|
|
}
|
|
|
|
/** Define a label defined in a rule r; check the validity then ask the
|
|
* Rule object to actually define it.
|
|
*/
|
|
protected void defineLabel(Rule r, antlr.Token label, GrammarAST element, int type) {
|
|
boolean err = nameSpaceChecker.checkForLabelTypeMismatch(r, label, type);
|
|
if ( err ) {
|
|
return;
|
|
}
|
|
r.defineLabel(label, element, type);
|
|
}
|
|
|
|
public void defineTokenRefLabel(String ruleName,
|
|
antlr.Token label,
|
|
GrammarAST tokenRef)
|
|
{
|
|
Rule r = getLocallyDefinedRule(ruleName);
|
|
if ( r!=null ) {
|
|
if ( type==LEXER &&
|
|
(tokenRef.getType()==ANTLRParser.CHAR_LITERAL||
|
|
tokenRef.getType()==ANTLRParser.BLOCK||
|
|
tokenRef.getType()==ANTLRParser.NOT||
|
|
tokenRef.getType()==ANTLRParser.CHAR_RANGE||
|
|
tokenRef.getType()==ANTLRParser.WILDCARD))
|
|
{
|
|
defineLabel(r, label, tokenRef, CHAR_LABEL);
|
|
}
|
|
else {
|
|
defineLabel(r, label, tokenRef, TOKEN_LABEL);
|
|
}
|
|
}
|
|
}
|
|
|
|
public void defineRuleRefLabel(String ruleName,
|
|
antlr.Token label,
|
|
GrammarAST ruleRef)
|
|
{
|
|
Rule r = getLocallyDefinedRule(ruleName);
|
|
if ( r!=null ) {
|
|
defineLabel(r, label, ruleRef, RULE_LABEL);
|
|
}
|
|
}
|
|
|
|
public void defineTokenListLabel(String ruleName,
|
|
antlr.Token label,
|
|
GrammarAST element)
|
|
{
|
|
Rule r = getLocallyDefinedRule(ruleName);
|
|
if ( r!=null ) {
|
|
defineLabel(r, label, element, TOKEN_LIST_LABEL);
|
|
}
|
|
}
|
|
|
|
public void defineRuleListLabel(String ruleName,
|
|
antlr.Token label,
|
|
GrammarAST element)
|
|
{
|
|
Rule r = getLocallyDefinedRule(ruleName);
|
|
if ( r!=null ) {
|
|
if ( !r.getHasMultipleReturnValues() ) {
|
|
ErrorManager.grammarError(
|
|
ErrorManager.MSG_LIST_LABEL_INVALID_UNLESS_RETVAL_STRUCT,this,
|
|
label,label.getText());
|
|
}
|
|
defineLabel(r, label, element, RULE_LIST_LABEL);
|
|
}
|
|
}
|
|
|
|
/** Given a set of all rewrite elements on right of ->, filter for
|
|
* label types such as Grammar.TOKEN_LABEL, Grammar.TOKEN_LIST_LABEL, ...
|
|
* Return a displayable token type name computed from the GrammarAST.
|
|
*/
|
|
public Set<String> getLabels(Set<GrammarAST> rewriteElements, int labelType) {
|
|
Set<String> labels = new HashSet<String>();
|
|
for (Iterator it = rewriteElements.iterator(); it.hasNext();) {
|
|
GrammarAST el = (GrammarAST) it.next();
|
|
if ( el.getType()==ANTLRParser.LABEL ) {
|
|
String labelName = el.getText();
|
|
Rule enclosingRule = getLocallyDefinedRule(el.enclosingRuleName);
|
|
LabelElementPair pair = enclosingRule.getLabel(labelName);
|
|
// if valid label and type is what we're looking for
|
|
// and not ref to old value val $rule, add to list
|
|
if ( pair!=null && pair.type==labelType &&
|
|
!labelName.equals(el.enclosingRuleName) )
|
|
{
|
|
labels.add(labelName);
|
|
}
|
|
}
|
|
}
|
|
return labels;
|
|
}
|
|
|
|
/** Before generating code, we examine all actions that can have
|
|
* $x.y and $y stuff in them because some code generation depends on
|
|
* Rule.referencedPredefinedRuleAttributes. I need to remove unused
|
|
* rule labels for example.
|
|
*/
|
|
protected void examineAllExecutableActions() {
|
|
Collection rules = getRules();
|
|
for (Iterator it = rules.iterator(); it.hasNext();) {
|
|
Rule r = (Rule) it.next();
|
|
// walk all actions within the rule elements, args, and exceptions
|
|
List<GrammarAST> actions = r.getInlineActions();
|
|
for (int i = 0; i < actions.size(); i++) {
|
|
GrammarAST actionAST = (GrammarAST) actions.get(i);
|
|
ActionAnalysisLexer sniffer =
|
|
new ActionAnalysisLexer(this, r.name, actionAST);
|
|
sniffer.analyze();
|
|
}
|
|
// walk any named actions like @init, @after
|
|
Collection<GrammarAST> namedActions = r.getActions().values();
|
|
for (Iterator it2 = namedActions.iterator(); it2.hasNext();) {
|
|
GrammarAST actionAST = (GrammarAST) it2.next();
|
|
ActionAnalysisLexer sniffer =
|
|
new ActionAnalysisLexer(this, r.name, actionAST);
|
|
sniffer.analyze();
|
|
}
|
|
}
|
|
}
|
|
|
|
/** Remove all labels on rule refs whose target rules have no return value.
|
|
* Do this for all rules in grammar.
|
|
*/
|
|
public void checkAllRulesForUselessLabels() {
|
|
if ( type==LEXER ) {
|
|
return;
|
|
}
|
|
Set rules = nameToRuleMap.keySet();
|
|
for (Iterator it = rules.iterator(); it.hasNext();) {
|
|
String ruleName = (String) it.next();
|
|
Rule r = getRule(ruleName);
|
|
removeUselessLabels(r.getRuleLabels());
|
|
removeUselessLabels(r.getRuleListLabels());
|
|
}
|
|
}
|
|
|
|
/** A label on a rule is useless if the rule has no return value, no
|
|
* tree or template output, and it is not referenced in an action.
|
|
*/
|
|
protected void removeUselessLabels(Map ruleToElementLabelPairMap) {
|
|
if ( ruleToElementLabelPairMap==null ) {
|
|
return;
|
|
}
|
|
Collection labels = ruleToElementLabelPairMap.values();
|
|
List kill = new ArrayList();
|
|
for (Iterator labelit = labels.iterator(); labelit.hasNext();) {
|
|
LabelElementPair pair = (LabelElementPair) labelit.next();
|
|
Rule refdRule = getRule(pair.elementRef.getText());
|
|
if ( refdRule!=null && !refdRule.getHasReturnValue() && !pair.actionReferencesLabel ) {
|
|
//System.out.println(pair.label.getText()+" is useless");
|
|
kill.add(pair.label.getText());
|
|
}
|
|
}
|
|
for (int i = 0; i < kill.size(); i++) {
|
|
String labelToKill = (String) kill.get(i);
|
|
// System.out.println("kill "+labelToKill);
|
|
ruleToElementLabelPairMap.remove(labelToKill);
|
|
}
|
|
}
|
|
|
|
/** Track a rule reference within an outermost alt of a rule. Used
|
|
* at the moment to decide if $ruleref refers to a unique rule ref in
|
|
* the alt. Rewrite rules force tracking of all rule AST results.
|
|
*
|
|
* This data is also used to verify that all rules have been defined.
|
|
*/
|
|
public void altReferencesRule(String enclosingRuleName,
|
|
GrammarAST refScopeAST,
|
|
GrammarAST refAST,
|
|
int outerAltNum)
|
|
{
|
|
/* Do nothing for now; not sure need; track S.x as x
|
|
String scope = null;
|
|
Grammar scopeG = null;
|
|
if ( refScopeAST!=null ) {
|
|
if ( !scopedRuleRefs.contains(refScopeAST) ) {
|
|
scopedRuleRefs.add(refScopeAST);
|
|
}
|
|
scope = refScopeAST.getText();
|
|
}
|
|
*/
|
|
Rule r = getRule(enclosingRuleName);
|
|
if ( r==null ) {
|
|
return; // no error here; see NameSpaceChecker
|
|
}
|
|
r.trackRuleReferenceInAlt(refAST, outerAltNum);
|
|
antlr.Token refToken = refAST.getToken();
|
|
if ( !ruleRefs.contains(refAST) ) {
|
|
ruleRefs.add(refAST);
|
|
}
|
|
}
|
|
|
|
/** Track a token reference within an outermost alt of a rule. Used
|
|
* to decide if $tokenref refers to a unique token ref in
|
|
* the alt. Does not track literals!
|
|
*
|
|
* Rewrite rules force tracking of all tokens.
|
|
*/
|
|
public void altReferencesTokenID(String ruleName, GrammarAST refAST, int outerAltNum) {
|
|
Rule r = getLocallyDefinedRule(ruleName);
|
|
if ( r==null ) {
|
|
return;
|
|
}
|
|
r.trackTokenReferenceInAlt(refAST, outerAltNum);
|
|
if ( !tokenIDRefs.contains(refAST.getToken()) ) {
|
|
tokenIDRefs.add(refAST.getToken());
|
|
}
|
|
}
|
|
|
|
/** To yield smaller, more readable code, track which rules have their
|
|
* predefined attributes accessed. If the rule has no user-defined
|
|
* return values, then don't generate the return value scope classes
|
|
* etc... Make the rule have void return value. Don't track for lexer
|
|
* rules.
|
|
*/
|
|
public void referenceRuleLabelPredefinedAttribute(String ruleName) {
|
|
Rule r = getRule(ruleName);
|
|
if ( r!=null && type!=LEXER ) {
|
|
// indicate that an action ref'd an attr unless it's in a lexer
|
|
// so that $ID.text refs don't force lexer rules to define
|
|
// return values...Token objects are created by the caller instead.
|
|
r.referencedPredefinedRuleAttributes = true;
|
|
}
|
|
}
|
|
|
|
public List checkAllRulesForLeftRecursion() {
|
|
return sanity.checkAllRulesForLeftRecursion();
|
|
}
|
|
|
|
/** Return a list of left-recursive rules; no analysis can be done
|
|
* successfully on these. Useful to skip these rules then and also
|
|
* for ANTLRWorks to highlight them.
|
|
*/
|
|
public Set<Rule> getLeftRecursiveRules() {
|
|
if ( nfa==null ) {
|
|
buildNFA();
|
|
}
|
|
if ( leftRecursiveRules!=null ) {
|
|
return leftRecursiveRules;
|
|
}
|
|
sanity.checkAllRulesForLeftRecursion();
|
|
return leftRecursiveRules;
|
|
}
|
|
|
|
public void checkRuleReference(GrammarAST scopeAST,
|
|
GrammarAST refAST,
|
|
GrammarAST argsAST,
|
|
String currentRuleName)
|
|
{
|
|
sanity.checkRuleReference(scopeAST, refAST, argsAST, currentRuleName);
|
|
}
|
|
|
|
/** Rules like "a : ;" and "a : {...} ;" should not generate
|
|
* try/catch blocks for RecognitionException. To detect this
|
|
* it's probably ok to just look for any reference to an atom
|
|
* that can match some input. W/o that, the rule is unlikey to have
|
|
* any else.
|
|
*/
|
|
public boolean isEmptyRule(GrammarAST block) {
|
|
GrammarAST aTokenRefNode =
|
|
block.findFirstType(ANTLRParser.TOKEN_REF);
|
|
GrammarAST aStringLiteralRefNode =
|
|
block.findFirstType(ANTLRParser.STRING_LITERAL);
|
|
GrammarAST aCharLiteralRefNode =
|
|
block.findFirstType(ANTLRParser.CHAR_LITERAL);
|
|
GrammarAST aWildcardRefNode =
|
|
block.findFirstType(ANTLRParser.WILDCARD);
|
|
GrammarAST aRuleRefNode =
|
|
block.findFirstType(ANTLRParser.RULE_REF);
|
|
if ( aTokenRefNode==null&&
|
|
aStringLiteralRefNode==null&&
|
|
aCharLiteralRefNode==null&&
|
|
aWildcardRefNode==null&&
|
|
aRuleRefNode==null )
|
|
{
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
public boolean isAtomTokenType(int ttype) {
|
|
return ttype == ANTLRParser.WILDCARD||
|
|
ttype == ANTLRParser.CHAR_LITERAL||
|
|
ttype == ANTLRParser.CHAR_RANGE||
|
|
ttype == ANTLRParser.STRING_LITERAL||
|
|
ttype == ANTLRParser.NOT||
|
|
(type != LEXER && ttype == ANTLRParser.TOKEN_REF);
|
|
}
|
|
|
|
public int getTokenType(String tokenName) {
|
|
Integer I = null;
|
|
if ( tokenName.charAt(0)=='\'') {
|
|
I = (Integer)composite.stringLiteralToTypeMap.get(tokenName);
|
|
}
|
|
else { // must be a label like ID
|
|
I = (Integer)composite.tokenIDToTypeMap.get(tokenName);
|
|
}
|
|
int i = (I!=null)?I.intValue():Label.INVALID;
|
|
//System.out.println("grammar type "+type+" "+tokenName+"->"+i);
|
|
return i;
|
|
}
|
|
|
|
/** Get the list of tokens that are IDs like BLOCK and LPAREN */
|
|
public Set getTokenIDs() {
|
|
return composite.tokenIDToTypeMap.keySet();
|
|
}
|
|
|
|
/** Return an ordered integer list of token types that have no
|
|
* corresponding token ID like INT or KEYWORD_BEGIN; for stuff
|
|
* like 'begin'.
|
|
*/
|
|
public Collection getTokenTypesWithoutID() {
|
|
List types = new ArrayList();
|
|
for (int t =Label.MIN_TOKEN_TYPE; t<=getMaxTokenType(); t++) {
|
|
String name = getTokenDisplayName(t);
|
|
if ( name.charAt(0)=='\'' ) {
|
|
types.add(Utils.integer(t));
|
|
}
|
|
}
|
|
return types;
|
|
}
|
|
|
|
/** Get a list of all token IDs and literals that have an associated
|
|
* token type.
|
|
*/
|
|
public Set<String> getTokenDisplayNames() {
|
|
Set<String> names = new HashSet<String>();
|
|
for (int t =Label.MIN_TOKEN_TYPE; t <=getMaxTokenType(); t++) {
|
|
names.add(getTokenDisplayName(t));
|
|
}
|
|
return names;
|
|
}
|
|
|
|
/** Given a literal like (the 3 char sequence with single quotes) 'a',
|
|
* return the int value of 'a'. Convert escape sequences here also.
|
|
* ANTLR's antlr.g parser does not convert escape sequences.
|
|
*
|
|
* 11/26/2005: I changed literals to always be '...' even for strings.
|
|
* This routine still works though.
|
|
*/
|
|
public static int getCharValueFromGrammarCharLiteral(String literal) {
|
|
switch ( literal.length() ) {
|
|
case 3 :
|
|
// 'x'
|
|
return literal.charAt(1); // no escape char
|
|
case 4 :
|
|
// '\x' (antlr lexer will catch invalid char)
|
|
if ( Character.isDigit(literal.charAt(2)) ) {
|
|
ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
|
|
"invalid char literal: "+literal);
|
|
return -1;
|
|
}
|
|
int escChar = literal.charAt(2);
|
|
int charVal = ANTLRLiteralEscapedCharValue[escChar];
|
|
if ( charVal==0 ) {
|
|
// Unnecessary escapes like '\{' should just yield {
|
|
return escChar;
|
|
}
|
|
return charVal;
|
|
case 8 :
|
|
// '\u1234'
|
|
String unicodeChars = literal.substring(3,literal.length()-1);
|
|
return Integer.parseInt(unicodeChars, 16);
|
|
default :
|
|
ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
|
|
"invalid char literal: "+literal);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/** ANTLR does not convert escape sequences during the parse phase because
|
|
* it could not know how to print String/char literals back out when
|
|
* printing grammars etc... Someone in China might use the real unicode
|
|
* char in a literal as it will display on their screen; when printing
|
|
* back out, I could not know whether to display or use a unicode escape.
|
|
*
|
|
* This routine converts a string literal with possible escape sequences
|
|
* into a pure string of 16-bit char values. Escapes and unicode \u0000
|
|
* specs are converted to pure chars. return in a buffer; people may
|
|
* want to walk/manipulate further.
|
|
*
|
|
* The NFA construction routine must know the actual char values.
|
|
*/
|
|
public static StringBuffer getUnescapedStringFromGrammarStringLiteral(String literal) {
|
|
//System.out.println("escape: ["+literal+"]");
|
|
StringBuffer buf = new StringBuffer();
|
|
int last = literal.length()-1; // skip quotes on outside
|
|
for (int i=1; i<last; i++) {
|
|
char c = literal.charAt(i);
|
|
if ( c=='\\' ) {
|
|
i++;
|
|
c = literal.charAt(i);
|
|
if ( Character.toUpperCase(c)=='U' ) {
|
|
// \u0000
|
|
i++;
|
|
String unicodeChars = literal.substring(i,i+4);
|
|
// parse the unicode 16 bit hex value
|
|
int val = Integer.parseInt(unicodeChars, 16);
|
|
i+=4-1; // loop will inc by 1; only jump 3 then
|
|
buf.append((char)val);
|
|
}
|
|
else if ( Character.isDigit(c) ) {
|
|
ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
|
|
"invalid char literal: "+literal);
|
|
buf.append("\\"+(char)c);
|
|
}
|
|
else {
|
|
buf.append((char)ANTLRLiteralEscapedCharValue[c]); // normal \x escape
|
|
}
|
|
}
|
|
else {
|
|
buf.append(c); // simple char x
|
|
}
|
|
}
|
|
//System.out.println("string: ["+buf.toString()+"]");
|
|
return buf;
|
|
}
|
|
|
|
/** Pull your token definitions from an existing grammar in memory.
|
|
* You must use Grammar() ctor then this method then setGrammarContent()
|
|
* to make this work. This was useful primarily for testing and
|
|
* interpreting grammars until I added import grammar functionality.
|
|
* When you import a grammar you implicitly import its vocabulary as well
|
|
* and keep the same token type values.
|
|
*
|
|
* Returns the max token type found.
|
|
*/
|
|
public int importTokenVocabulary(Grammar importFromGr) {
|
|
Set importedTokenIDs = importFromGr.getTokenIDs();
|
|
for (Iterator it = importedTokenIDs.iterator(); it.hasNext();) {
|
|
String tokenID = (String) it.next();
|
|
int tokenType = importFromGr.getTokenType(tokenID);
|
|
composite.maxTokenType = Math.max(composite.maxTokenType,tokenType);
|
|
if ( tokenType>=Label.MIN_TOKEN_TYPE ) {
|
|
//System.out.println("import token from grammar "+tokenID+"="+tokenType);
|
|
defineToken(tokenID, tokenType);
|
|
}
|
|
}
|
|
return composite.maxTokenType; // return max found
|
|
}
|
|
|
|
/** Import the rules/tokens of a delegate grammar. All delegate grammars are
|
|
* read during the ctor of first Grammar created.
|
|
*
|
|
* Do not create NFA here because NFA construction needs to hook up with
|
|
* overridden rules in delegation root grammar.
|
|
*/
|
|
public void importGrammar(GrammarAST grammarNameAST, String label) {
|
|
String grammarName = grammarNameAST.getText();
|
|
//System.out.println("import "+gfile.getName());
|
|
String gname = grammarName + GRAMMAR_FILE_EXTENSION;
|
|
BufferedReader br = null;
|
|
try {
|
|
String fullName = tool.getLibraryFile(gname);
|
|
FileReader fr = new FileReader(fullName);
|
|
br = new BufferedReader(fr);
|
|
Grammar delegateGrammar = null;
|
|
delegateGrammar = new Grammar(tool, gname, composite);
|
|
delegateGrammar.label = label;
|
|
|
|
addDelegateGrammar(delegateGrammar);
|
|
|
|
delegateGrammar.parseAndBuildAST(br);
|
|
if ( !validImport(delegateGrammar) ) {
|
|
ErrorManager.grammarError(ErrorManager.MSG_INVALID_IMPORT,
|
|
this,
|
|
grammarNameAST.token,
|
|
this,
|
|
delegateGrammar);
|
|
return;
|
|
}
|
|
if ( this.type==COMBINED &&
|
|
(delegateGrammar.name.equals(this.name+grammarTypeToFileNameSuffix[LEXER])||
|
|
delegateGrammar.name.equals(this.name+grammarTypeToFileNameSuffix[PARSER])) )
|
|
{
|
|
ErrorManager.grammarError(ErrorManager.MSG_IMPORT_NAME_CLASH,
|
|
this,
|
|
grammarNameAST.token,
|
|
this,
|
|
delegateGrammar);
|
|
return;
|
|
}
|
|
if ( delegateGrammar.grammarTree!=null ) {
|
|
// we have a valid grammar
|
|
// deal with combined grammars
|
|
if ( delegateGrammar.type == LEXER && this.type == COMBINED ) {
|
|
// ooops, we wasted some effort; tell lexer to read it in
|
|
// later
|
|
lexerGrammarST.setAttribute("imports", grammarName);
|
|
// but, this parser grammar will need the vocab
|
|
// so add to composite anyway so we suck in the tokens later
|
|
}
|
|
}
|
|
//System.out.println("Got grammar:\n"+delegateGrammar);
|
|
}
|
|
catch (IOException ioe) {
|
|
ErrorManager.error(ErrorManager.MSG_CANNOT_OPEN_FILE,
|
|
gname,
|
|
ioe);
|
|
}
|
|
finally {
|
|
if ( br!=null ) {
|
|
try {
|
|
br.close();
|
|
}
|
|
catch (IOException ioe) {
|
|
ErrorManager.error(ErrorManager.MSG_CANNOT_CLOSE_FILE,
|
|
gname,
|
|
ioe);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/** add new delegate to composite tree */
|
|
protected void addDelegateGrammar(Grammar delegateGrammar) {
|
|
CompositeGrammarTree t = composite.delegateGrammarTreeRoot.findNode(this);
|
|
t.addChild(new CompositeGrammarTree(delegateGrammar));
|
|
// make sure new grammar shares this composite
|
|
delegateGrammar.composite = this.composite;
|
|
}
|
|
|
|
/** Load a vocab file <vocabName>.tokens and return max token type found. */
|
|
public int importTokenVocabulary(GrammarAST tokenVocabOptionAST,
|
|
String vocabName)
|
|
{
|
|
if ( !getGrammarIsRoot() ) {
|
|
ErrorManager.grammarWarning(ErrorManager.MSG_TOKEN_VOCAB_IN_DELEGATE,
|
|
this,
|
|
tokenVocabOptionAST.token,
|
|
name);
|
|
return composite.maxTokenType;
|
|
}
|
|
|
|
File fullFile = tool.getImportedVocabFile(vocabName);
|
|
try {
|
|
FileReader fr = new FileReader(fullFile);
|
|
BufferedReader br = new BufferedReader(fr);
|
|
StreamTokenizer tokenizer = new StreamTokenizer(br);
|
|
tokenizer.parseNumbers();
|
|
tokenizer.wordChars('_', '_');
|
|
tokenizer.eolIsSignificant(true);
|
|
tokenizer.slashSlashComments(true);
|
|
tokenizer.slashStarComments(true);
|
|
tokenizer.ordinaryChar('=');
|
|
tokenizer.quoteChar('\'');
|
|
tokenizer.whitespaceChars(' ',' ');
|
|
tokenizer.whitespaceChars('\t','\t');
|
|
int lineNum = 1;
|
|
int token = tokenizer.nextToken();
|
|
while (token != StreamTokenizer.TT_EOF) {
|
|
String tokenID;
|
|
if ( token == StreamTokenizer.TT_WORD ) {
|
|
tokenID = tokenizer.sval;
|
|
}
|
|
else if ( token == '\'' ) {
|
|
tokenID = "'"+tokenizer.sval+"'";
|
|
}
|
|
else {
|
|
ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
|
|
vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
|
|
Utils.integer(lineNum));
|
|
while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;}
|
|
token = tokenizer.nextToken();
|
|
continue;
|
|
}
|
|
token = tokenizer.nextToken();
|
|
if ( token != '=' ) {
|
|
ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
|
|
vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
|
|
Utils.integer(lineNum));
|
|
while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;}
|
|
token = tokenizer.nextToken();
|
|
continue;
|
|
}
|
|
token = tokenizer.nextToken(); // skip '='
|
|
if ( token != StreamTokenizer.TT_NUMBER ) {
|
|
ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
|
|
vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
|
|
Utils.integer(lineNum));
|
|
while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;}
|
|
token = tokenizer.nextToken();
|
|
continue;
|
|
}
|
|
int tokenType = (int)tokenizer.nval;
|
|
token = tokenizer.nextToken();
|
|
//System.out.println("import "+tokenID+"="+tokenType);
|
|
composite.maxTokenType = Math.max(composite.maxTokenType,tokenType);
|
|
defineToken(tokenID, tokenType);
|
|
lineNum++;
|
|
if ( token != StreamTokenizer.TT_EOL ) {
|
|
ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
|
|
vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
|
|
Utils.integer(lineNum));
|
|
while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;}
|
|
token = tokenizer.nextToken();
|
|
continue;
|
|
}
|
|
token = tokenizer.nextToken(); // skip newline
|
|
}
|
|
br.close();
|
|
}
|
|
catch (FileNotFoundException fnfe) {
|
|
ErrorManager.error(ErrorManager.MSG_CANNOT_FIND_TOKENS_FILE,
|
|
fullFile);
|
|
}
|
|
catch (IOException ioe) {
|
|
ErrorManager.error(ErrorManager.MSG_ERROR_READING_TOKENS_FILE,
|
|
fullFile,
|
|
ioe);
|
|
}
|
|
catch (Exception e) {
|
|
ErrorManager.error(ErrorManager.MSG_ERROR_READING_TOKENS_FILE,
|
|
fullFile,
|
|
e);
|
|
}
|
|
return composite.maxTokenType;
|
|
}
|
|
|
|
/** Given a token type, get a meaningful name for it such as the ID
|
|
* or string literal. If this is a lexer and the ttype is in the
|
|
* char vocabulary, compute an ANTLR-valid (possibly escaped) char literal.
|
|
*/
|
|
public String getTokenDisplayName(int ttype) {
|
|
String tokenName = null;
|
|
int index=0;
|
|
// inside any target's char range and is lexer grammar?
|
|
if ( this.type==LEXER &&
|
|
ttype >= Label.MIN_CHAR_VALUE && ttype <= Label.MAX_CHAR_VALUE )
|
|
{
|
|
return getANTLRCharLiteralForChar(ttype);
|
|
}
|
|
// faux label?
|
|
else if ( ttype<0 ) {
|
|
tokenName = (String)composite.typeToTokenList.get(Label.NUM_FAUX_LABELS+ttype);
|
|
}
|
|
else {
|
|
// compute index in typeToTokenList for ttype
|
|
index = ttype-1; // normalize to 0..n-1
|
|
index += Label.NUM_FAUX_LABELS; // jump over faux tokens
|
|
|
|
if ( index<composite.typeToTokenList.size() ) {
|
|
tokenName = (String)composite.typeToTokenList.get(index);
|
|
if ( tokenName!=null &&
|
|
tokenName.startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX) )
|
|
{
|
|
tokenName = composite.typeToStringLiteralList.get(ttype);
|
|
}
|
|
}
|
|
else {
|
|
tokenName = String.valueOf(ttype);
|
|
}
|
|
}
|
|
//System.out.println("getTokenDisplayName ttype="+ttype+", index="+index+", name="+tokenName);
|
|
return tokenName;
|
|
}
|
|
|
|
/** Get the list of ANTLR String literals */
|
|
public Set<String> getStringLiterals() {
|
|
return composite.stringLiteralToTypeMap.keySet();
|
|
}
|
|
|
|
public String getGrammarTypeString() {
|
|
return grammarTypeToString[type];
|
|
}
|
|
|
|
public int getGrammarMaxLookahead() {
|
|
if ( global_k>=0 ) {
|
|
return global_k;
|
|
}
|
|
Object k = getOption("k");
|
|
if ( k==null ) {
|
|
global_k = 0;
|
|
}
|
|
else if (k instanceof Integer) {
|
|
Integer kI = (Integer)k;
|
|
global_k = kI.intValue();
|
|
}
|
|
else {
|
|
// must be String "*"
|
|
if ( k.equals("*") ) { // this the default anyway
|
|
global_k = 0;
|
|
}
|
|
}
|
|
return global_k;
|
|
}
|
|
|
|
/** Save the option key/value pair and process it; return the key
|
|
* or null if invalid option.
|
|
*/
|
|
public String setOption(String key, Object value, antlr.Token optionsStartToken) {
|
|
if ( legalOption(key) ) {
|
|
ErrorManager.grammarError(ErrorManager.MSG_ILLEGAL_OPTION,
|
|
this,
|
|
optionsStartToken,
|
|
key);
|
|
return null;
|
|
}
|
|
if ( !optionIsValid(key, value) ) {
|
|
return null;
|
|
}
|
|
if ( options==null ) {
|
|
options = new HashMap();
|
|
}
|
|
options.put(key, value);
|
|
return key;
|
|
}
|
|
|
|
public boolean legalOption(String key) {
|
|
switch ( type ) {
|
|
case LEXER :
|
|
return !legalLexerOptions.contains(key);
|
|
case PARSER :
|
|
return !legalParserOptions.contains(key);
|
|
case TREE_PARSER :
|
|
return !legalTreeParserOptions.contains(key);
|
|
default :
|
|
return !legalParserOptions.contains(key);
|
|
}
|
|
}
|
|
|
|
public void setOptions(Map options, antlr.Token optionsStartToken) {
|
|
if ( options==null ) {
|
|
this.options = null;
|
|
return;
|
|
}
|
|
Set keys = options.keySet();
|
|
for (Iterator it = keys.iterator(); it.hasNext();) {
|
|
String optionName = (String) it.next();
|
|
Object optionValue = options.get(optionName);
|
|
String stored=setOption(optionName, optionValue, optionsStartToken);
|
|
if ( stored==null ) {
|
|
it.remove();
|
|
}
|
|
}
|
|
}
|
|
|
|
public Object getOption(String key) {
|
|
return composite.getOption(key);
|
|
}
|
|
|
|
public Object getLocallyDefinedOption(String key) {
|
|
Object value = null;
|
|
if ( options!=null ) {
|
|
value = options.get(key);
|
|
}
|
|
if ( value==null ) {
|
|
value = defaultOptions.get(key);
|
|
}
|
|
return value;
|
|
}
|
|
|
|
public Object getBlockOption(GrammarAST blockAST, String key) {
|
|
String v = (String)blockAST.getBlockOption(key);
|
|
if ( v!=null ) {
|
|
return v;
|
|
}
|
|
if ( type==Grammar.LEXER ) {
|
|
return defaultLexerBlockOptions.get(key);
|
|
}
|
|
return defaultBlockOptions.get(key);
|
|
}
|
|
|
|
public int getUserMaxLookahead(int decision) {
|
|
int user_k = 0;
|
|
GrammarAST blockAST = nfa.grammar.getDecisionBlockAST(decision);
|
|
Object k = blockAST.getBlockOption("k");
|
|
if ( k==null ) {
|
|
user_k = nfa.grammar.getGrammarMaxLookahead();
|
|
return user_k;
|
|
}
|
|
if (k instanceof Integer) {
|
|
Integer kI = (Integer)k;
|
|
user_k = kI.intValue();
|
|
}
|
|
else {
|
|
// must be String "*"
|
|
if ( k.equals("*") ) {
|
|
user_k = 0;
|
|
}
|
|
}
|
|
return user_k;
|
|
}
|
|
|
|
public boolean getAutoBacktrackMode(int decision) {
|
|
NFAState decisionNFAStartState = getDecisionNFAStartState(decision);
|
|
String autoBacktrack =
|
|
(String)getBlockOption(decisionNFAStartState.associatedASTNode, "backtrack");
|
|
|
|
if ( autoBacktrack==null ) {
|
|
autoBacktrack = (String)nfa.grammar.getOption("backtrack");
|
|
}
|
|
return autoBacktrack!=null&&autoBacktrack.equals("true");
|
|
}
|
|
|
|
public boolean optionIsValid(String key, Object value) {
|
|
return true;
|
|
}
|
|
|
|
public boolean buildAST() {
|
|
String outputType = (String)getOption("output");
|
|
if ( outputType!=null ) {
|
|
return outputType.equals("AST");
|
|
}
|
|
return false;
|
|
}
|
|
|
|
public boolean rewriteMode() {
|
|
String outputType = (String)getOption("rewrite");
|
|
if ( outputType!=null ) {
|
|
return outputType.equals("true");
|
|
}
|
|
return false;
|
|
}
|
|
|
|
public boolean isBuiltFromString() {
|
|
return builtFromString;
|
|
}
|
|
|
|
public boolean buildTemplate() {
|
|
String outputType = (String)getOption("output");
|
|
if ( outputType!=null ) {
|
|
return outputType.equals("template");
|
|
}
|
|
return false;
|
|
}
|
|
|
|
public Collection<Rule> getRules() {
|
|
return nameToRuleMap.values();
|
|
}
|
|
|
|
/** Get the set of Rules that need to have manual delegations
|
|
* like "void rule() { importedGrammar.rule(); }"
|
|
*
|
|
* If this grammar is master, get list of all rule definitions from all
|
|
* delegate grammars. Only master has complete interface from combined
|
|
* grammars...we will generated delegates as helper objects.
|
|
*
|
|
* Composite grammars that are not the root/master do not have complete
|
|
* interfaces. It is not my intention that people use subcomposites.
|
|
* Only the outermost grammar should be used from outside code. The
|
|
* other grammar components are specifically generated to work only
|
|
* with the master/root.
|
|
*
|
|
* delegatedRules = imported - overridden
|
|
*/
|
|
public Set<Rule> getDelegatedRules() {
|
|
return composite.getDelegatedRules(this);
|
|
}
|
|
|
|
/** Get set of all rules imported from all delegate grammars even if
|
|
* indirectly delegated.
|
|
*/
|
|
public Set<Rule> getAllImportedRules() {
|
|
return composite.getAllImportedRules(this);
|
|
}
|
|
|
|
/** Get list of all delegates from all grammars directly or indirectly
|
|
* imported into this grammar.
|
|
*/
|
|
public List<Grammar> getDelegates() {
|
|
return composite.getDelegates(this);
|
|
}
|
|
|
|
public List<String> getDelegateNames() {
|
|
// compute delegates:{Grammar g | return g.name;}
|
|
List<String> names = new ArrayList<String>();
|
|
List<Grammar> delegates = composite.getDelegates(this);
|
|
if ( delegates!=null ) {
|
|
for (Grammar g : delegates) {
|
|
names.add(g.name);
|
|
}
|
|
}
|
|
return names;
|
|
}
|
|
|
|
public List<Grammar> getDirectDelegates() {
|
|
return composite.getDirectDelegates(this);
|
|
}
|
|
|
|
/** Get delegates below direct delegates */
|
|
public List<Grammar> getIndirectDelegates() {
|
|
return composite.getIndirectDelegates(this);
|
|
}
|
|
|
|
/** Get list of all delegators. This amounts to the grammars on the path
|
|
* to the root of the delegation tree.
|
|
*/
|
|
public List<Grammar> getDelegators() {
|
|
return composite.getDelegators(this);
|
|
}
|
|
|
|
/** Who's my direct parent grammar? */
|
|
public Grammar getDelegator() {
|
|
return composite.getDelegator(this);
|
|
}
|
|
|
|
public Set<Rule> getDelegatedRuleReferences() {
|
|
return delegatedRuleReferences;
|
|
}
|
|
|
|
public boolean getGrammarIsRoot() {
|
|
return composite.delegateGrammarTreeRoot.grammar == this;
|
|
}
|
|
|
|
public void setRuleAST(String ruleName, GrammarAST t) {
|
|
Rule r = getLocallyDefinedRule(ruleName);
|
|
if ( r!=null ) {
|
|
r.tree = t;
|
|
r.EORNode = t.getLastChild();
|
|
}
|
|
}
|
|
|
|
public NFAState getRuleStartState(String ruleName) {
|
|
return getRuleStartState(null, ruleName);
|
|
}
|
|
|
|
public NFAState getRuleStartState(String scopeName, String ruleName) {
|
|
Rule r = getRule(scopeName, ruleName);
|
|
if ( r!=null ) {
|
|
//System.out.println("getRuleStartState("+scopeName+", "+ruleName+")="+r.startState);
|
|
return r.startState;
|
|
}
|
|
//System.out.println("getRuleStartState("+scopeName+", "+ruleName+")=null");
|
|
return null;
|
|
}
|
|
|
|
public String getRuleModifier(String ruleName) {
|
|
Rule r = getRule(ruleName);
|
|
if ( r!=null ) {
|
|
return r.modifier;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public NFAState getRuleStopState(String ruleName) {
|
|
Rule r = getRule(ruleName);
|
|
if ( r!=null ) {
|
|
return r.stopState;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public int assignDecisionNumber(NFAState state) {
|
|
decisionCount++;
|
|
state.setDecisionNumber(decisionCount);
|
|
return decisionCount;
|
|
}
|
|
|
|
protected Decision getDecision(int decision) {
|
|
int index = decision-1;
|
|
if ( index >= indexToDecision.size() ) {
|
|
return null;
|
|
}
|
|
Decision d = (Decision)indexToDecision.get(index);
|
|
return d;
|
|
}
|
|
|
|
protected Decision createDecision(int decision) {
|
|
int index = decision-1;
|
|
if ( index < indexToDecision.size() ) {
|
|
return getDecision(decision); // don't recreate
|
|
}
|
|
Decision d = new Decision();
|
|
d.decision = decision;
|
|
indexToDecision.setSize(getNumberOfDecisions());
|
|
indexToDecision.set(index, d);
|
|
return d;
|
|
}
|
|
|
|
public List getDecisionNFAStartStateList() {
|
|
List states = new ArrayList(100);
|
|
for (int d = 0; d < indexToDecision.size(); d++) {
|
|
Decision dec = (Decision) indexToDecision.get(d);
|
|
states.add(dec.startState);
|
|
}
|
|
return states;
|
|
}
|
|
|
|
public NFAState getDecisionNFAStartState(int decision) {
|
|
Decision d = getDecision(decision);
|
|
if ( d==null ) {
|
|
return null;
|
|
}
|
|
return d.startState;
|
|
}
|
|
|
|
public DFA getLookaheadDFA(int decision) {
|
|
Decision d = getDecision(decision);
|
|
if ( d==null ) {
|
|
return null;
|
|
}
|
|
return d.dfa;
|
|
}
|
|
|
|
public GrammarAST getDecisionBlockAST(int decision) {
|
|
Decision d = getDecision(decision);
|
|
if ( d==null ) {
|
|
return null;
|
|
}
|
|
return d.blockAST;
|
|
}
|
|
|
|
/** returns a list of column numbers for all decisions
|
|
* on a particular line so ANTLRWorks choose the decision
|
|
* depending on the location of the cursor (otherwise,
|
|
* ANTLRWorks has to give the *exact* location which
|
|
* is not easy from the user point of view).
|
|
*
|
|
* This is not particularly fast as it walks entire line:col->DFA map
|
|
* looking for a prefix of "line:".
|
|
*/
|
|
public List getLookaheadDFAColumnsForLineInFile(int line) {
|
|
String prefix = line+":";
|
|
List columns = new ArrayList();
|
|
for(Iterator iter = lineColumnToLookaheadDFAMap.keySet().iterator();
|
|
iter.hasNext(); ) {
|
|
String key = (String)iter.next();
|
|
if(key.startsWith(prefix)) {
|
|
columns.add(Integer.valueOf(key.substring(prefix.length())));
|
|
}
|
|
}
|
|
return columns;
|
|
}
|
|
|
|
/** Useful for ANTLRWorks to map position in file to the DFA for display */
|
|
public DFA getLookaheadDFAFromPositionInFile(int line, int col) {
|
|
return (DFA)lineColumnToLookaheadDFAMap.get(
|
|
new StringBuffer().append(line + ":").append(col).toString());
|
|
}
|
|
|
|
public Map getLineColumnToLookaheadDFAMap() {
|
|
return lineColumnToLookaheadDFAMap;
|
|
}
|
|
|
|
/*
|
|
public void setDecisionOptions(int decision, Map options) {
|
|
Decision d = createDecision(decision);
|
|
d.options = options;
|
|
}
|
|
|
|
public void setDecisionOption(int decision, String name, Object value) {
|
|
Decision d = getDecision(decision);
|
|
if ( d!=null ) {
|
|
if ( d.options==null ) {
|
|
d.options = new HashMap();
|
|
}
|
|
d.options.put(name,value);
|
|
}
|
|
}
|
|
|
|
public Map getDecisionOptions(int decision) {
|
|
Decision d = getDecision(decision);
|
|
if ( d==null ) {
|
|
return null;
|
|
}
|
|
return d.options;
|
|
}
|
|
*/
|
|
|
|
public int getNumberOfDecisions() {
|
|
return decisionCount;
|
|
}
|
|
|
|
public int getNumberOfCyclicDecisions() {
|
|
int n = 0;
|
|
for (int i=1; i<=getNumberOfDecisions(); i++) {
|
|
Decision d = getDecision(i);
|
|
if ( d.dfa!=null && d.dfa.isCyclic() ) {
|
|
n++;
|
|
}
|
|
}
|
|
return n;
|
|
}
|
|
|
|
/** Set the lookahead DFA for a particular decision. This means
|
|
* that the appropriate AST node must updated to have the new lookahead
|
|
* DFA. This method could be used to properly set the DFAs without
|
|
* using the createLookaheadDFAs() method. You could do this
|
|
*
|
|
* Grammar g = new Grammar("...");
|
|
* g.setLookahead(1, dfa1);
|
|
* g.setLookahead(2, dfa2);
|
|
* ...
|
|
*/
|
|
public void setLookaheadDFA(int decision, DFA lookaheadDFA) {
|
|
Decision d = createDecision(decision);
|
|
d.dfa = lookaheadDFA;
|
|
GrammarAST ast = d.startState.associatedASTNode;
|
|
ast.setLookaheadDFA(lookaheadDFA);
|
|
}
|
|
|
|
public void setDecisionNFA(int decision, NFAState state) {
|
|
Decision d = createDecision(decision);
|
|
d.startState = state;
|
|
}
|
|
|
|
public void setDecisionBlockAST(int decision, GrammarAST blockAST) {
|
|
//System.out.println("setDecisionBlockAST("+decision+", "+blockAST.token);
|
|
Decision d = createDecision(decision);
|
|
d.blockAST = blockAST;
|
|
}
|
|
|
|
public boolean allDecisionDFAHaveBeenCreated() {
|
|
return allDecisionDFACreated;
|
|
}
|
|
|
|
/** How many token types have been allocated so far? */
|
|
public int getMaxTokenType() {
|
|
return composite.maxTokenType;
|
|
}
|
|
|
|
/** What is the max char value possible for this grammar's target? Use
|
|
* unicode max if no target defined.
|
|
*/
|
|
public int getMaxCharValue() {
|
|
if ( generator!=null ) {
|
|
return generator.target.getMaxCharValue(generator);
|
|
}
|
|
else {
|
|
return Label.MAX_CHAR_VALUE;
|
|
}
|
|
}
|
|
|
|
/** Return a set of all possible token or char types for this grammar */
|
|
public IntSet getTokenTypes() {
|
|
if ( type==LEXER ) {
|
|
return getAllCharValues();
|
|
}
|
|
return IntervalSet.of(Label.MIN_TOKEN_TYPE, getMaxTokenType());
|
|
}
|
|
|
|
/** If there is a char vocabulary, use it; else return min to max char
|
|
* as defined by the target. If no target, use max unicode char value.
|
|
*/
|
|
public IntSet getAllCharValues() {
|
|
if ( charVocabulary!=null ) {
|
|
return charVocabulary;
|
|
}
|
|
IntSet allChar = IntervalSet.of(Label.MIN_CHAR_VALUE, getMaxCharValue());
|
|
return allChar;
|
|
}
|
|
|
|
/** Return a string representing the escaped char for code c. E.g., If c
|
|
* has value 0x100, you will get "\u0100". ASCII gets the usual
|
|
* char (non-hex) representation. Control characters are spit out
|
|
* as unicode. While this is specially set up for returning Java strings,
|
|
* it can be used by any language target that has the same syntax. :)
|
|
*
|
|
* 11/26/2005: I changed this to use double quotes, consistent with antlr.g
|
|
* 12/09/2005: I changed so everything is single quotes
|
|
*/
|
|
public static String getANTLRCharLiteralForChar(int c) {
|
|
if ( c<Label.MIN_CHAR_VALUE ) {
|
|
ErrorManager.internalError("invalid char value "+c);
|
|
return "'<INVALID>'";
|
|
}
|
|
if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
|
|
return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
|
|
}
|
|
if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
|
|
!Character.isISOControl((char)c) ) {
|
|
if ( c=='\\' ) {
|
|
return "'\\\\'";
|
|
}
|
|
if ( c=='\'') {
|
|
return "'\\''";
|
|
}
|
|
return '\''+Character.toString((char)c)+'\'';
|
|
}
|
|
// turn on the bit above max "\uFFFF" value so that we pad with zeros
|
|
// then only take last 4 digits
|
|
String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
|
|
String unicodeStr = "'\\u"+hex+"'";
|
|
return unicodeStr;
|
|
}
|
|
|
|
/** For lexer grammars, return everything in unicode not in set.
|
|
* For parser and tree grammars, return everything in token space
|
|
* from MIN_TOKEN_TYPE to last valid token type or char value.
|
|
*/
|
|
public IntSet complement(IntSet set) {
|
|
//System.out.println("complement "+set.toString(this));
|
|
//System.out.println("vocabulary "+getTokenTypes().toString(this));
|
|
IntSet c = set.complement(getTokenTypes());
|
|
//System.out.println("result="+c.toString(this));
|
|
return c;
|
|
}
|
|
|
|
public IntSet complement(int atom) {
|
|
return complement(IntervalSet.of(atom));
|
|
}
|
|
|
|
/** Given set tree like ( SET A B ), check that A and B
|
|
* are both valid sets themselves, else we must tree like a BLOCK
|
|
*/
|
|
public boolean isValidSet(TreeToNFAConverter nfabuilder, GrammarAST t) {
|
|
boolean valid = true;
|
|
try {
|
|
//System.out.println("parse BLOCK as set tree: "+t.toStringTree());
|
|
nfabuilder.testBlockAsSet(t);
|
|
}
|
|
catch (RecognitionException re) {
|
|
// The rule did not parse as a set, return null; ignore exception
|
|
valid = false;
|
|
}
|
|
//System.out.println("valid? "+valid);
|
|
return valid;
|
|
}
|
|
|
|
/** Get the set equivalent (if any) of the indicated rule from this
|
|
* grammar. Mostly used in the lexer to do ~T for some fragment rule
|
|
* T. If the rule AST has a SET use that. If the rule is a single char
|
|
* convert it to a set and return. If rule is not a simple set (w/o actions)
|
|
* then return null.
|
|
* Rules have AST form:
|
|
*
|
|
* ^( RULE ID modifier ARG RET SCOPE block EOR )
|
|
*/
|
|
public IntSet getSetFromRule(TreeToNFAConverter nfabuilder, String ruleName)
|
|
throws RecognitionException
|
|
{
|
|
Rule r = getRule(ruleName);
|
|
if ( r==null ) {
|
|
return null;
|
|
}
|
|
IntSet elements = null;
|
|
//System.out.println("parsed tree: "+r.tree.toStringTree());
|
|
elements = nfabuilder.setRule(r.tree);
|
|
//System.out.println("elements="+elements);
|
|
return elements;
|
|
}
|
|
|
|
/** Decisions are linked together with transition(1). Count how
|
|
* many there are. This is here rather than in NFAState because
|
|
* a grammar decides how NFAs are put together to form a decision.
|
|
*/
|
|
public int getNumberOfAltsForDecisionNFA(NFAState decisionState) {
|
|
if ( decisionState==null ) {
|
|
return 0;
|
|
}
|
|
int n = 1;
|
|
NFAState p = decisionState;
|
|
while ( p.transition[1] !=null ) {
|
|
n++;
|
|
p = (NFAState)p.transition[1].target;
|
|
}
|
|
return n;
|
|
}
|
|
|
|
/** Get the ith alternative (1..n) from a decision; return null when
|
|
* an invalid alt is requested. I must count in to find the right
|
|
* alternative number. For (A|B), you get NFA structure (roughly):
|
|
*
|
|
* o->o-A->o
|
|
* |
|
|
* o->o-B->o
|
|
*
|
|
* This routine returns the leftmost state for each alt. So alt=1, returns
|
|
* the upperleft most state in this structure.
|
|
*/
|
|
public NFAState getNFAStateForAltOfDecision(NFAState decisionState, int alt) {
|
|
if ( decisionState==null || alt<=0 ) {
|
|
return null;
|
|
}
|
|
int n = 1;
|
|
NFAState p = decisionState;
|
|
while ( p!=null ) {
|
|
if ( n==alt ) {
|
|
return p;
|
|
}
|
|
n++;
|
|
Transition next = p.transition[1];
|
|
p = null;
|
|
if ( next!=null ) {
|
|
p = (NFAState)next.target;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/*
|
|
public void computeRuleFOLLOWSets() {
|
|
if ( getNumberOfDecisions()==0 ) {
|
|
createNFAs();
|
|
}
|
|
for (Iterator it = getRules().iterator(); it.hasNext();) {
|
|
Rule r = (Rule)it.next();
|
|
if ( r.isSynPred ) {
|
|
continue;
|
|
}
|
|
LookaheadSet s = ll1Analyzer.FOLLOW(r);
|
|
System.out.println("FOLLOW("+r.name+")="+s);
|
|
}
|
|
}
|
|
*/
|
|
|
|
public LookaheadSet FIRST(NFAState s) {
|
|
return ll1Analyzer.FIRST(s);
|
|
}
|
|
|
|
public LookaheadSet LOOK(NFAState s) {
|
|
return ll1Analyzer.LOOK(s);
|
|
}
|
|
|
|
public void setCodeGenerator(CodeGenerator generator) {
|
|
this.generator = generator;
|
|
}
|
|
|
|
public CodeGenerator getCodeGenerator() {
|
|
return generator;
|
|
}
|
|
|
|
public GrammarAST getGrammarTree() {
|
|
return grammarTree;
|
|
}
|
|
|
|
public Tool getTool() {
|
|
return tool;
|
|
}
|
|
|
|
public void setTool(Tool tool) {
|
|
this.tool = tool;
|
|
}
|
|
|
|
/** given a token type and the text of the literal, come up with a
|
|
* decent token type label. For now it's just T<type>. Actually,
|
|
* if there is an aliased name from tokens like PLUS='+', use it.
|
|
*/
|
|
public String computeTokenNameFromLiteral(int tokenType, String literal) {
|
|
return AUTO_GENERATED_TOKEN_NAME_PREFIX +tokenType;
|
|
}
|
|
|
|
public String toString() {
|
|
return grammarTreeToString(grammarTree);
|
|
}
|
|
|
|
public String grammarTreeToString(GrammarAST t) {
|
|
return grammarTreeToString(t, true);
|
|
}
|
|
|
|
public String grammarTreeToString(GrammarAST t, boolean showActions) {
|
|
String s = null;
|
|
try {
|
|
s = t.getLine()+":"+t.getColumn()+": ";
|
|
s += new ANTLRTreePrinter().toString((AST)t, this, showActions);
|
|
}
|
|
catch (Exception e) {
|
|
s = "<invalid or missing tree structure>";
|
|
}
|
|
return s;
|
|
}
|
|
|
|
public void printGrammar(PrintStream output) {
|
|
ANTLRTreePrinter printer = new ANTLRTreePrinter();
|
|
printer.setASTNodeClass("org.antlr.tool.GrammarAST");
|
|
try {
|
|
String g = printer.toString(grammarTree, this, false);
|
|
output.println(g);
|
|
}
|
|
catch (RecognitionException re) {
|
|
ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,re);
|
|
}
|
|
}
|
|
|
|
}
|