mirror of
https://github.com/marcin-szczepanski/jFuzzyLogic.git
synced 2025-01-07 05:10:28 +01:00
1062 lines
37 KiB
Java
1062 lines
37 KiB
Java
/*
|
|
[The "BSD licence"]
|
|
Copyright (c) 2005-2006 Terence Parr
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions
|
|
are met:
|
|
1. Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
2. Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
3. The name of the author may not be used to endorse or promote products
|
|
derived from this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
package org.antlr.analysis;
|
|
|
|
import org.antlr.codegen.CodeGenerator;
|
|
import org.antlr.misc.IntSet;
|
|
import org.antlr.misc.IntervalSet;
|
|
import org.antlr.misc.Utils;
|
|
import org.antlr.runtime.IntStream;
|
|
import org.antlr.stringtemplate.StringTemplate;
|
|
import org.antlr.tool.*;
|
|
|
|
import java.util.*;
|
|
|
|
/** A DFA (converted from a grammar's NFA).
|
|
* DFAs are used as prediction machine for alternative blocks in all kinds
|
|
* of recognizers (lexers, parsers, tree walkers).
|
|
*/
|
|
public class DFA {
|
|
public static final int REACHABLE_UNKNOWN = -2;
|
|
public static final int REACHABLE_BUSY = -1; // in process of computing
|
|
public static final int REACHABLE_NO = 0;
|
|
public static final int REACHABLE_YES = 1;
|
|
|
|
/** Prevent explosion of DFA states during conversion. The max number
|
|
* of states per alt in a single decision's DFA.
|
|
public static final int MAX_STATES_PER_ALT_IN_DFA = 450;
|
|
*/
|
|
|
|
/** Set to 0 to not terminate early (time in ms) */
|
|
public static int MAX_TIME_PER_DFA_CREATION = 1*1000;
|
|
|
|
/** How many edges can each DFA state have before a "special" state
|
|
* is created that uses IF expressions instead of a table?
|
|
*/
|
|
public static int MAX_STATE_TRANSITIONS_FOR_TABLE = 65534;
|
|
|
|
/** What's the start state for this DFA? */
|
|
public DFAState startState;
|
|
|
|
/** This DFA is being built for which decision? */
|
|
public int decisionNumber = 0;
|
|
|
|
/** From what NFAState did we create the DFA? */
|
|
public NFAState decisionNFAStartState;
|
|
|
|
/** The printable grammar fragment associated with this DFA */
|
|
public String description;
|
|
|
|
/** A set of all uniquely-numbered DFA states. Maps hash of DFAState
|
|
* to the actual DFAState object. We use this to detect
|
|
* existing DFA states. Map<DFAState,DFAState>. Use Map so
|
|
* we can get old state back (Set only allows you to see if it's there).
|
|
* Not used during fixed k lookahead as it's a waste to fill it with
|
|
* a dup of states array.
|
|
*/
|
|
protected Map<DFAState, DFAState> uniqueStates = new HashMap<DFAState, DFAState>();
|
|
|
|
/** Maps the state number to the actual DFAState. Use a Vector as it
|
|
* grows automatically when I set the ith element. This contains all
|
|
* states, but the states are not unique. s3 might be same as s1 so
|
|
* s3 -> s1 in this table. This is how cycles occur. If fixed k,
|
|
* then these states will all be unique as states[i] always points
|
|
* at state i when no cycles exist.
|
|
*
|
|
* This is managed in parallel with uniqueStates and simply provides
|
|
* a way to go from state number to DFAState rather than via a
|
|
* hash lookup.
|
|
*/
|
|
protected Vector<DFAState> states = new Vector<DFAState>();
|
|
|
|
/** Unique state numbers per DFA */
|
|
protected int stateCounter = 0;
|
|
|
|
/** count only new states not states that were rejected as already present */
|
|
protected int numberOfStates = 0;
|
|
|
|
/** User specified max fixed lookahead. If 0, nothing specified. -1
|
|
* implies we have not looked at the options table yet to set k.
|
|
*/
|
|
protected int user_k = -1;
|
|
|
|
/** While building the DFA, track max lookahead depth if not cyclic */
|
|
protected int max_k = -1;
|
|
|
|
/** Is this DFA reduced? I.e., can all states lead to an accept state? */
|
|
protected boolean reduced = true;
|
|
|
|
/** Are there any loops in this DFA?
|
|
* Computed by doesStateReachAcceptState()
|
|
*/
|
|
protected boolean cyclic = false;
|
|
|
|
/** Track whether this DFA has at least one sem/syn pred encountered
|
|
* during a closure operation. This is useful for deciding whether
|
|
* to retry a non-LL(*) with k=1. If no pred, it will not work w/o
|
|
* a pred so don't bother. It would just give another error message.
|
|
*/
|
|
public boolean predicateVisible = false;
|
|
|
|
public boolean hasPredicateBlockedByAction = false;
|
|
|
|
/** Each alt in an NFA derived from a grammar must have a DFA state that
|
|
* predicts it lest the parser not know what to do. Nondeterminisms can
|
|
* lead to this situation (assuming no semantic predicates can resolve
|
|
* the problem) and when for some reason, I cannot compute the lookahead
|
|
* (which might arise from an error in the algorithm or from
|
|
* left-recursion etc...). This list starts out with all alts contained
|
|
* and then in method doesStateReachAcceptState() I remove the alts I
|
|
* know to be uniquely predicted.
|
|
*/
|
|
protected List<Integer> unreachableAlts;
|
|
|
|
protected int nAlts = 0;
|
|
|
|
/** We only want one accept state per predicted alt; track here */
|
|
protected DFAState[] altToAcceptState;
|
|
|
|
/** Track whether an alt discovers recursion for each alt during
|
|
* NFA to DFA conversion; >1 alt with recursion implies nonregular.
|
|
*/
|
|
public IntSet recursiveAltSet = new IntervalSet();
|
|
|
|
/** Which NFA are we converting (well, which piece of the NFA)? */
|
|
public NFA nfa;
|
|
|
|
protected NFAToDFAConverter nfaConverter;
|
|
|
|
/** This probe tells you a lot about a decision and is useful even
|
|
* when there is no error such as when a syntactic nondeterminism
|
|
* is solved via semantic predicates. Perhaps a GUI would want
|
|
* the ability to show that.
|
|
*/
|
|
public DecisionProbe probe = new DecisionProbe(this);
|
|
|
|
/** Track absolute time of the conversion so we can have a failsafe:
|
|
* if it takes too long, then terminate. Assume bugs are in the
|
|
* analysis engine.
|
|
*/
|
|
protected long conversionStartTime;
|
|
|
|
/** Map an edge transition table to a unique set number; ordered so
|
|
* we can push into the output template as an ordered list of sets
|
|
* and then ref them from within the transition[][] table. Like this
|
|
* for C# target:
|
|
* public static readonly DFA30_transition0 =
|
|
* new short[] { 46, 46, -1, 46, 46, -1, -1, -1, -1, -1, -1, -1,...};
|
|
* public static readonly DFA30_transition1 =
|
|
* new short[] { 21 };
|
|
* public static readonly short[][] DFA30_transition = {
|
|
* DFA30_transition0,
|
|
* DFA30_transition0,
|
|
* DFA30_transition1,
|
|
* ...
|
|
* };
|
|
*/
|
|
public Map edgeTransitionClassMap = new LinkedHashMap();
|
|
|
|
/** The unique edge transition class number; every time we see a new
|
|
* set of edges emanating from a state, we number it so we can reuse
|
|
* if it's every seen again for another state. For Java grammar,
|
|
* some of the big edge transition tables are seen about 57 times.
|
|
*/
|
|
protected int edgeTransitionClass =0;
|
|
|
|
/* This DFA can be converted to a transition[state][char] table and
|
|
* the following tables are filled by createStateTables upon request.
|
|
* These are injected into the templates for code generation.
|
|
* See March 25, 2006 entry for description:
|
|
* http://www.antlr.org/blog/antlr3/codegen.tml
|
|
* Often using Vector as can't set ith position in a List and have
|
|
* it extend list size; bizarre.
|
|
*/
|
|
|
|
/** List of special DFAState objects */
|
|
public List specialStates;
|
|
/** List of ST for special states. */
|
|
public List specialStateSTs;
|
|
public Vector accept;
|
|
public Vector eot;
|
|
public Vector eof;
|
|
public Vector min;
|
|
public Vector max;
|
|
public Vector special;
|
|
public Vector transition;
|
|
/** just the Vector<Integer> indicating which unique edge table is at
|
|
* position i.
|
|
*/
|
|
public Vector transitionEdgeTables; // not used by java yet
|
|
protected int uniqueCompressedSpecialStateNum = 0;
|
|
|
|
/** Which generator to use if we're building state tables */
|
|
protected CodeGenerator generator = null;
|
|
|
|
protected DFA() {;}
|
|
|
|
public DFA(int decisionNumber, NFAState decisionStartState) {
|
|
this.decisionNumber = decisionNumber;
|
|
this.decisionNFAStartState = decisionStartState;
|
|
nfa = decisionStartState.nfa;
|
|
nAlts = nfa.grammar.getNumberOfAltsForDecisionNFA(decisionStartState);
|
|
//setOptions( nfa.grammar.getDecisionOptions(getDecisionNumber()) );
|
|
initAltRelatedInfo();
|
|
|
|
//long start = System.currentTimeMillis();
|
|
nfaConverter = new NFAToDFAConverter(this);
|
|
try {
|
|
nfaConverter.convert();
|
|
|
|
// figure out if there are problems with decision
|
|
verify();
|
|
|
|
if ( !probe.isDeterministic() || probe.analysisOverflowed() ) {
|
|
probe.issueWarnings();
|
|
}
|
|
|
|
// must be after verify as it computes cyclic, needed by this routine
|
|
// should be after warnings because early termination or something
|
|
// will not allow the reset to operate properly in some cases.
|
|
resetStateNumbersToBeContiguous();
|
|
|
|
//long stop = System.currentTimeMillis();
|
|
//System.out.println("verify cost: "+(int)(stop-start)+" ms");
|
|
}
|
|
catch (AnalysisTimeoutException at) {
|
|
probe.reportAnalysisTimeout();
|
|
if ( !okToRetryDFAWithK1() ) {
|
|
probe.issueWarnings();
|
|
}
|
|
}
|
|
catch (NonLLStarDecisionException nonLL) {
|
|
probe.reportNonLLStarDecision(this);
|
|
// >1 alt recurses, k=* and no auto backtrack nor manual sem/syn
|
|
if ( !okToRetryDFAWithK1() ) {
|
|
probe.issueWarnings();
|
|
}
|
|
}
|
|
}
|
|
|
|
/** Walk all states and reset their numbers to be a contiguous sequence
|
|
* of integers starting from 0. Only cyclic DFA can have unused positions
|
|
* in states list. State i might be identical to a previous state j and
|
|
* will result in states[i] == states[j]. We don't want to waste a state
|
|
* number on this. Useful mostly for code generation in tables.
|
|
*
|
|
* At the start of this routine, states[i].stateNumber <= i by definition.
|
|
* If states[50].stateNumber is 50 then a cycle during conversion may
|
|
* try to add state 103, but we find that an identical DFA state, named
|
|
* 50, already exists, hence, states[103]==states[50] and both have
|
|
* stateNumber 50 as they point at same object. Afterwards, the set
|
|
* of state numbers from all states should represent a contiguous range
|
|
* from 0..n-1 where n is the number of unique states.
|
|
*/
|
|
public void resetStateNumbersToBeContiguous() {
|
|
if ( getUserMaxLookahead()>0 ) {
|
|
// all numbers are unique already; no states are thrown out.
|
|
return;
|
|
}
|
|
|
|
// walk list of DFAState objects by state number,
|
|
// setting state numbers to 0..n-1
|
|
int snum=0;
|
|
for (int i = 0; i <= getMaxStateNumber(); i++) {
|
|
DFAState s = getState(i);
|
|
// some states are unused after creation most commonly due to cycles
|
|
// or conflict resolution.
|
|
if ( s==null ) {
|
|
continue;
|
|
}
|
|
// state i is mapped to DFAState with state number set to i originally
|
|
// so if it's less than i, then we renumbered it already; that
|
|
// happens when states have been merged or cycles occurred I think.
|
|
// states[50] will point to DFAState with s50 in it but
|
|
// states[103] might also point at this same DFAState. Since
|
|
// 50 < 103 then it's already been renumbered as it points downwards.
|
|
boolean alreadyRenumbered = s.stateNumber<i;
|
|
if ( !alreadyRenumbered ) {
|
|
// state i is a valid state, reset it's state number
|
|
s.stateNumber = snum; // rewrite state numbers to be 0..n-1
|
|
snum++;
|
|
}
|
|
}
|
|
if ( snum!=getNumberOfStates() ) {
|
|
ErrorManager.internalError("DFA "+decisionNumber+": "+
|
|
decisionNFAStartState.getDescription()+" num unique states "+getNumberOfStates()+
|
|
"!= num renumbered states "+snum);
|
|
}
|
|
}
|
|
|
|
// JAVA-SPECIFIC Accessors!!!!! It is so impossible to get arrays
|
|
// or even consistently formatted strings acceptable to java that
|
|
// I am forced to build the individual char elements here
|
|
|
|
public List getJavaCompressedAccept() { return getRunLengthEncoding(accept); }
|
|
public List getJavaCompressedEOT() { return getRunLengthEncoding(eot); }
|
|
public List getJavaCompressedEOF() { return getRunLengthEncoding(eof); }
|
|
public List getJavaCompressedMin() { return getRunLengthEncoding(min); }
|
|
public List getJavaCompressedMax() { return getRunLengthEncoding(max); }
|
|
public List getJavaCompressedSpecial() { return getRunLengthEncoding(special); }
|
|
public List getJavaCompressedTransition() {
|
|
if ( transition==null || transition.size()==0 ) {
|
|
return null;
|
|
}
|
|
List encoded = new ArrayList(transition.size());
|
|
// walk Vector<Vector<FormattedInteger>> which is the transition[][] table
|
|
for (int i = 0; i < transition.size(); i++) {
|
|
Vector transitionsForState = (Vector) transition.elementAt(i);
|
|
encoded.add(getRunLengthEncoding(transitionsForState));
|
|
}
|
|
return encoded;
|
|
}
|
|
|
|
/** Compress the incoming data list so that runs of same number are
|
|
* encoded as number,value pair sequences. 3 -1 -1 -1 28 is encoded
|
|
* as 1 3 3 -1 1 28. I am pretty sure this is the lossless compression
|
|
* that GIF files use. Transition tables are heavily compressed by
|
|
* this technique. I got the idea from JFlex http://jflex.de/
|
|
*
|
|
* Return List<String> where each string is either \xyz for 8bit char
|
|
* and \uFFFF for 16bit. Hideous and specific to Java, but it is the
|
|
* only target bad enough to need it.
|
|
*/
|
|
public List getRunLengthEncoding(List data) {
|
|
if ( data==null || data.size()==0 ) {
|
|
// for states with no transitions we want an empty string ""
|
|
// to hold its place in the transitions array.
|
|
List empty = new ArrayList();
|
|
empty.add("");
|
|
return empty;
|
|
}
|
|
int size = Math.max(2,data.size()/2);
|
|
List encoded = new ArrayList(size); // guess at size
|
|
// scan values looking for runs
|
|
int i = 0;
|
|
Integer emptyValue = Utils.integer(-1);
|
|
while ( i < data.size() ) {
|
|
Integer I = (Integer)data.get(i);
|
|
if ( I==null ) {
|
|
I = emptyValue;
|
|
}
|
|
// count how many v there are?
|
|
int n = 0;
|
|
for (int j = i; j < data.size(); j++) {
|
|
Integer v = (Integer)data.get(j);
|
|
if ( v==null ) {
|
|
v = emptyValue;
|
|
}
|
|
if ( I.equals(v) ) {
|
|
n++;
|
|
}
|
|
else {
|
|
break;
|
|
}
|
|
}
|
|
encoded.add(generator.target.encodeIntAsCharEscape((char)n));
|
|
encoded.add(generator.target.encodeIntAsCharEscape((char)I.intValue()));
|
|
i+=n;
|
|
}
|
|
return encoded;
|
|
}
|
|
|
|
public void createStateTables(CodeGenerator generator) {
|
|
//System.out.println("createTables:\n"+this);
|
|
this.generator = generator;
|
|
description = getNFADecisionStartState().getDescription();
|
|
description =
|
|
generator.target.getTargetStringLiteralFromString(description);
|
|
|
|
// create all the tables
|
|
special = new Vector(this.getNumberOfStates()); // Vector<short>
|
|
special.setSize(this.getNumberOfStates());
|
|
specialStates = new ArrayList(); // List<DFAState>
|
|
specialStateSTs = new ArrayList(); // List<ST>
|
|
accept = new Vector(this.getNumberOfStates()); // Vector<int>
|
|
accept.setSize(this.getNumberOfStates());
|
|
eot = new Vector(this.getNumberOfStates()); // Vector<int>
|
|
eot.setSize(this.getNumberOfStates());
|
|
eof = new Vector(this.getNumberOfStates()); // Vector<int>
|
|
eof.setSize(this.getNumberOfStates());
|
|
min = new Vector(this.getNumberOfStates()); // Vector<int>
|
|
min.setSize(this.getNumberOfStates());
|
|
max = new Vector(this.getNumberOfStates()); // Vector<int>
|
|
max.setSize(this.getNumberOfStates());
|
|
transition = new Vector(this.getNumberOfStates()); // Vector<Vector<int>>
|
|
transition.setSize(this.getNumberOfStates());
|
|
transitionEdgeTables = new Vector(this.getNumberOfStates()); // Vector<Vector<int>>
|
|
transitionEdgeTables.setSize(this.getNumberOfStates());
|
|
|
|
// for each state in the DFA, fill relevant tables.
|
|
Iterator it = null;
|
|
if ( getUserMaxLookahead()>0 ) {
|
|
it = states.iterator();
|
|
}
|
|
else {
|
|
it = getUniqueStates().values().iterator();
|
|
}
|
|
while ( it.hasNext() ) {
|
|
DFAState s = (DFAState)it.next();
|
|
if ( s==null ) {
|
|
// ignore null states; some acylic DFA see this condition
|
|
// when inlining DFA (due to lacking of exit branch pruning?)
|
|
continue;
|
|
}
|
|
if ( s.isAcceptState() ) {
|
|
// can't compute min,max,special,transition on accepts
|
|
accept.set(s.stateNumber,
|
|
Utils.integer(s.getUniquelyPredictedAlt()));
|
|
}
|
|
else {
|
|
createMinMaxTables(s);
|
|
createTransitionTableEntryForState(s);
|
|
createSpecialTable(s);
|
|
createEOTAndEOFTables(s);
|
|
}
|
|
}
|
|
|
|
// now that we have computed list of specialStates, gen code for 'em
|
|
for (int i = 0; i < specialStates.size(); i++) {
|
|
DFAState ss = (DFAState) specialStates.get(i);
|
|
StringTemplate stateST =
|
|
generator.generateSpecialState(ss);
|
|
specialStateSTs.add(stateST);
|
|
}
|
|
|
|
// check that the tables are not messed up by encode/decode
|
|
/*
|
|
testEncodeDecode(min);
|
|
testEncodeDecode(max);
|
|
testEncodeDecode(accept);
|
|
testEncodeDecode(special);
|
|
System.out.println("min="+min);
|
|
System.out.println("max="+max);
|
|
System.out.println("eot="+eot);
|
|
System.out.println("eof="+eof);
|
|
System.out.println("accept="+accept);
|
|
System.out.println("special="+special);
|
|
System.out.println("transition="+transition);
|
|
*/
|
|
}
|
|
|
|
/*
|
|
private void testEncodeDecode(List data) {
|
|
System.out.println("data="+data);
|
|
List encoded = getRunLengthEncoding(data);
|
|
StringBuffer buf = new StringBuffer();
|
|
for (int i = 0; i < encoded.size(); i++) {
|
|
String I = (String)encoded.get(i);
|
|
int v = 0;
|
|
if ( I.startsWith("\\u") ) {
|
|
v = Integer.parseInt(I.substring(2,I.length()), 16);
|
|
}
|
|
else {
|
|
v = Integer.parseInt(I.substring(1,I.length()), 8);
|
|
}
|
|
buf.append((char)v);
|
|
}
|
|
String encodedS = buf.toString();
|
|
short[] decoded = org.antlr.runtime.DFA.unpackEncodedString(encodedS);
|
|
//System.out.println("decoded:");
|
|
for (int i = 0; i < decoded.length; i++) {
|
|
short x = decoded[i];
|
|
if ( x!=((Integer)data.get(i)).intValue() ) {
|
|
System.err.println("problem with encoding");
|
|
}
|
|
//System.out.print(", "+x);
|
|
}
|
|
//System.out.println();
|
|
}
|
|
*/
|
|
|
|
protected void createMinMaxTables(DFAState s) {
|
|
int smin = Label.MAX_CHAR_VALUE + 1;
|
|
int smax = Label.MIN_ATOM_VALUE - 1;
|
|
for (int j = 0; j < s.getNumberOfTransitions(); j++) {
|
|
Transition edge = (Transition) s.transition(j);
|
|
Label label = edge.label;
|
|
if ( label.isAtom() ) {
|
|
if ( label.getAtom()>=Label.MIN_CHAR_VALUE ) {
|
|
if ( label.getAtom()<smin ) {
|
|
smin = label.getAtom();
|
|
}
|
|
if ( label.getAtom()>smax ) {
|
|
smax = label.getAtom();
|
|
}
|
|
}
|
|
}
|
|
else if ( label.isSet() ) {
|
|
IntervalSet labels = (IntervalSet)label.getSet();
|
|
int lmin = labels.getMinElement();
|
|
// if valid char (don't do EOF) and less than current min
|
|
if ( lmin<smin && lmin>=Label.MIN_CHAR_VALUE ) {
|
|
smin = labels.getMinElement();
|
|
}
|
|
if ( labels.getMaxElement()>smax ) {
|
|
smax = labels.getMaxElement();
|
|
}
|
|
}
|
|
}
|
|
|
|
if ( smax<0 ) {
|
|
// must be predicates or pure EOT transition; just zero out min, max
|
|
smin = Label.MIN_CHAR_VALUE;
|
|
smax = Label.MIN_CHAR_VALUE;
|
|
}
|
|
|
|
min.set(s.stateNumber, Utils.integer((char)smin));
|
|
max.set(s.stateNumber, Utils.integer((char)smax));
|
|
|
|
if ( smax<0 || smin>Label.MAX_CHAR_VALUE || smin<0 ) {
|
|
ErrorManager.internalError("messed up: min="+min+", max="+max);
|
|
}
|
|
}
|
|
|
|
protected void createTransitionTableEntryForState(DFAState s) {
|
|
/*
|
|
System.out.println("createTransitionTableEntryForState s"+s.stateNumber+
|
|
" dec "+s.dfa.decisionNumber+" cyclic="+s.dfa.isCyclic());
|
|
*/
|
|
int smax = ((Integer)max.get(s.stateNumber)).intValue();
|
|
int smin = ((Integer)min.get(s.stateNumber)).intValue();
|
|
|
|
Vector stateTransitions = new Vector(smax-smin+1);
|
|
stateTransitions.setSize(smax-smin+1);
|
|
transition.set(s.stateNumber, stateTransitions);
|
|
for (int j = 0; j < s.getNumberOfTransitions(); j++) {
|
|
Transition edge = (Transition) s.transition(j);
|
|
Label label = edge.label;
|
|
if ( label.isAtom() && label.getAtom()>=Label.MIN_CHAR_VALUE ) {
|
|
int labelIndex = label.getAtom()-smin; // offset from 0
|
|
stateTransitions.set(labelIndex,
|
|
Utils.integer(edge.target.stateNumber));
|
|
}
|
|
else if ( label.isSet() ) {
|
|
IntervalSet labels = (IntervalSet)label.getSet();
|
|
int[] atoms = labels.toArray();
|
|
for (int a = 0; a < atoms.length; a++) {
|
|
// set the transition if the label is valid (don't do EOF)
|
|
if ( atoms[a]>=Label.MIN_CHAR_VALUE ) {
|
|
int labelIndex = atoms[a]-smin; // offset from 0
|
|
stateTransitions.set(labelIndex,
|
|
Utils.integer(edge.target.stateNumber));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// track unique state transition tables so we can reuse
|
|
Integer edgeClass = (Integer)edgeTransitionClassMap.get(stateTransitions);
|
|
if ( edgeClass!=null ) {
|
|
//System.out.println("we've seen this array before; size="+stateTransitions.size());
|
|
transitionEdgeTables.set(s.stateNumber, edgeClass);
|
|
}
|
|
else {
|
|
edgeClass = Utils.integer(edgeTransitionClass);
|
|
transitionEdgeTables.set(s.stateNumber, edgeClass);
|
|
edgeTransitionClassMap.put(stateTransitions, edgeClass);
|
|
edgeTransitionClass++;
|
|
}
|
|
}
|
|
|
|
/** Set up the EOT and EOF tables; we cannot put -1 min/max values so
|
|
* we need another way to test that in the DFA transition function.
|
|
*/
|
|
protected void createEOTAndEOFTables(DFAState s) {
|
|
for (int j = 0; j < s.getNumberOfTransitions(); j++) {
|
|
Transition edge = (Transition) s.transition(j);
|
|
Label label = edge.label;
|
|
if ( label.isAtom() ) {
|
|
if ( label.getAtom()==Label.EOT ) {
|
|
// eot[s] points to accept state
|
|
eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
|
|
}
|
|
else if ( label.getAtom()==Label.EOF ) {
|
|
// eof[s] points to accept state
|
|
eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
|
|
}
|
|
}
|
|
else if ( label.isSet() ) {
|
|
IntervalSet labels = (IntervalSet)label.getSet();
|
|
int[] atoms = labels.toArray();
|
|
for (int a = 0; a < atoms.length; a++) {
|
|
if ( atoms[a]==Label.EOT ) {
|
|
// eot[s] points to accept state
|
|
eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
|
|
}
|
|
else if ( atoms[a]==Label.EOF ) {
|
|
eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
protected void createSpecialTable(DFAState s) {
|
|
// number all special states from 0...n-1 instead of their usual numbers
|
|
boolean hasSemPred = false;
|
|
|
|
// TODO this code is very similar to canGenerateSwitch. Refactor to share
|
|
for (int j = 0; j < s.getNumberOfTransitions(); j++) {
|
|
Transition edge = (Transition) s.transition(j);
|
|
Label label = edge.label;
|
|
// can't do a switch if the edges have preds or are going to
|
|
// require gated predicates
|
|
if ( label.isSemanticPredicate() ||
|
|
((DFAState)edge.target).getGatedPredicatesInNFAConfigurations()!=null)
|
|
{
|
|
hasSemPred = true;
|
|
break;
|
|
}
|
|
}
|
|
// if has pred or too big for table, make it special
|
|
int smax = ((Integer)max.get(s.stateNumber)).intValue();
|
|
int smin = ((Integer)min.get(s.stateNumber)).intValue();
|
|
if ( hasSemPred || smax-smin>MAX_STATE_TRANSITIONS_FOR_TABLE ) {
|
|
special.set(s.stateNumber,
|
|
Utils.integer(uniqueCompressedSpecialStateNum));
|
|
uniqueCompressedSpecialStateNum++;
|
|
specialStates.add(s);
|
|
}
|
|
else {
|
|
special.set(s.stateNumber, Utils.integer(-1)); // not special
|
|
}
|
|
}
|
|
|
|
public int predict(IntStream input) {
|
|
Interpreter interp = new Interpreter(nfa.grammar, input);
|
|
return interp.predict(this);
|
|
}
|
|
|
|
/** Add a new DFA state to this DFA if not already present.
|
|
* To force an acyclic, fixed maximum depth DFA, just always
|
|
* return the incoming state. By not reusing old states,
|
|
* no cycles can be created. If we're doing fixed k lookahead
|
|
* don't updated uniqueStates, just return incoming state, which
|
|
* indicates it's a new state.
|
|
*/
|
|
protected DFAState addState(DFAState d) {
|
|
if ( getUserMaxLookahead()>0 ) {
|
|
return d;
|
|
}
|
|
// does a DFA state exist already with everything the same
|
|
// except its state number?
|
|
DFAState existing = (DFAState)uniqueStates.get(d);
|
|
if ( existing != null ) {
|
|
/*
|
|
System.out.println("state "+d.stateNumber+" exists as state "+
|
|
existing.stateNumber);
|
|
*/
|
|
// already there...get the existing DFA state
|
|
return existing;
|
|
}
|
|
|
|
// if not there, then add new state.
|
|
uniqueStates.put(d,d);
|
|
numberOfStates++;
|
|
return d;
|
|
}
|
|
|
|
public void removeState(DFAState d) {
|
|
DFAState it = (DFAState)uniqueStates.remove(d);
|
|
if ( it!=null ) {
|
|
numberOfStates--;
|
|
}
|
|
}
|
|
|
|
public Map<DFAState, DFAState> getUniqueStates() {
|
|
return uniqueStates;
|
|
}
|
|
|
|
/** What is the max state number ever created? This may be beyond
|
|
* getNumberOfStates().
|
|
*/
|
|
public int getMaxStateNumber() {
|
|
return states.size()-1;
|
|
}
|
|
|
|
public DFAState getState(int stateNumber) {
|
|
return (DFAState)states.get(stateNumber);
|
|
}
|
|
|
|
public void setState(int stateNumber, DFAState d) {
|
|
states.set(stateNumber, d);
|
|
}
|
|
|
|
/** Is the DFA reduced? I.e., does every state have a path to an accept
|
|
* state? If not, don't delete as we need to generate an error indicating
|
|
* which paths are "dead ends". Also tracks list of alts with no accept
|
|
* state in the DFA. Must call verify() first before this makes sense.
|
|
*/
|
|
public boolean isReduced() {
|
|
return reduced;
|
|
}
|
|
|
|
/** Is this DFA cyclic? That is, are there any loops? If not, then
|
|
* the DFA is essentially an LL(k) predictor for some fixed, max k value.
|
|
* We can build a series of nested IF statements to match this. In the
|
|
* presence of cycles, we need to build a general DFA and interpret it
|
|
* to distinguish between alternatives.
|
|
*/
|
|
public boolean isCyclic() {
|
|
return cyclic && getUserMaxLookahead()==0;
|
|
}
|
|
|
|
public boolean canInlineDecision() {
|
|
return !isCyclic() &&
|
|
!probe.isNonLLStarDecision() &&
|
|
getNumberOfStates() < CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE;
|
|
}
|
|
|
|
/** Is this DFA derived from the NFA for the Tokens rule? */
|
|
public boolean isTokensRuleDecision() {
|
|
if ( nfa.grammar.type!=Grammar.LEXER ) {
|
|
return false;
|
|
}
|
|
NFAState nfaStart = getNFADecisionStartState();
|
|
Rule r = nfa.grammar.getLocallyDefinedRule(Grammar.ARTIFICIAL_TOKENS_RULENAME);
|
|
NFAState TokensRuleStart = r.startState;
|
|
NFAState TokensDecisionStart =
|
|
(NFAState)TokensRuleStart.transition[0].target;
|
|
return nfaStart == TokensDecisionStart;
|
|
}
|
|
|
|
/** The user may specify a max, acyclic lookahead for any decision. No
|
|
* DFA cycles are created when this value, k, is greater than 0.
|
|
* If this decision has no k lookahead specified, then try the grammar.
|
|
*/
|
|
public int getUserMaxLookahead() {
|
|
if ( user_k>=0 ) { // cache for speed
|
|
return user_k;
|
|
}
|
|
user_k = nfa.grammar.getUserMaxLookahead(decisionNumber);
|
|
return user_k;
|
|
}
|
|
|
|
public boolean getAutoBacktrackMode() {
|
|
return nfa.grammar.getAutoBacktrackMode(decisionNumber);
|
|
}
|
|
|
|
public void setUserMaxLookahead(int k) {
|
|
this.user_k = k;
|
|
}
|
|
|
|
/** Return k if decision is LL(k) for some k else return max int */
|
|
public int getMaxLookaheadDepth() {
|
|
if ( isCyclic() ) {
|
|
return Integer.MAX_VALUE;
|
|
}
|
|
return max_k;
|
|
}
|
|
|
|
/** Return a list of Integer alt numbers for which no lookahead could
|
|
* be computed or for which no single DFA accept state predicts those
|
|
* alts. Must call verify() first before this makes sense.
|
|
*/
|
|
public List<Integer> getUnreachableAlts() {
|
|
return unreachableAlts;
|
|
}
|
|
|
|
/** Once this DFA has been built, need to verify that:
|
|
*
|
|
* 1. it's reduced
|
|
* 2. all alts have an accept state
|
|
*
|
|
* Elsewhere, in the NFA converter, we need to verify that:
|
|
*
|
|
* 3. alts i and j have disjoint lookahead if no sem preds
|
|
* 4. if sem preds, nondeterministic alts must be sufficiently covered
|
|
*
|
|
* This is avoided if analysis bails out for any reason.
|
|
*/
|
|
public void verify() {
|
|
doesStateReachAcceptState(startState);
|
|
}
|
|
|
|
/** figure out if this state eventually reaches an accept state and
|
|
* modify the instance variable 'reduced' to indicate if we find
|
|
* at least one state that cannot reach an accept state. This implies
|
|
* that the overall DFA is not reduced. This algorithm should be
|
|
* linear in the number of DFA states.
|
|
*
|
|
* The algorithm also tracks which alternatives have no accept state,
|
|
* indicating a nondeterminism.
|
|
*
|
|
* Also computes whether the DFA is cyclic.
|
|
*
|
|
* TODO: I call getUniquelyPredicatedAlt too much; cache predicted alt
|
|
*/
|
|
protected boolean doesStateReachAcceptState(DFAState d) {
|
|
if ( d.isAcceptState() ) {
|
|
// accept states have no edges emanating from them so we can return
|
|
d.setAcceptStateReachable(REACHABLE_YES);
|
|
// this alt is uniquely predicted, remove from nondeterministic list
|
|
int predicts = d.getUniquelyPredictedAlt();
|
|
unreachableAlts.remove(Utils.integer(predicts));
|
|
return true;
|
|
}
|
|
|
|
// avoid infinite loops
|
|
d.setAcceptStateReachable(REACHABLE_BUSY);
|
|
|
|
boolean anEdgeReachesAcceptState = false;
|
|
// Visit every transition, track if at least one edge reaches stop state
|
|
// Cannot terminate when we know this state reaches stop state since
|
|
// all transitions must be traversed to set status of each DFA state.
|
|
for (int i=0; i<d.getNumberOfTransitions(); i++) {
|
|
Transition t = d.transition(i);
|
|
DFAState edgeTarget = (DFAState)t.target;
|
|
int targetStatus = edgeTarget.getAcceptStateReachable();
|
|
if ( targetStatus==REACHABLE_BUSY ) { // avoid cycles; they say nothing
|
|
cyclic = true;
|
|
continue;
|
|
}
|
|
if ( targetStatus==REACHABLE_YES ) { // avoid unnecessary work
|
|
anEdgeReachesAcceptState = true;
|
|
continue;
|
|
}
|
|
if ( targetStatus==REACHABLE_NO ) { // avoid unnecessary work
|
|
continue;
|
|
}
|
|
// target must be REACHABLE_UNKNOWN (i.e., unvisited)
|
|
if ( doesStateReachAcceptState(edgeTarget) ) {
|
|
anEdgeReachesAcceptState = true;
|
|
// have to keep looking so don't break loop
|
|
// must cover all states even if we find a path for this state
|
|
}
|
|
}
|
|
if ( anEdgeReachesAcceptState ) {
|
|
d.setAcceptStateReachable(REACHABLE_YES);
|
|
}
|
|
else {
|
|
d.setAcceptStateReachable(REACHABLE_NO);
|
|
reduced = false;
|
|
}
|
|
return anEdgeReachesAcceptState;
|
|
}
|
|
|
|
/** Walk all accept states and find the manually-specified synpreds.
|
|
* Gated preds are not always hoisted
|
|
* I used to do this in the code generator, but that is too late.
|
|
* This converter tries to avoid computing DFA for decisions in
|
|
* syntactic predicates that are not ever used such as those
|
|
* created by autobacktrack mode.
|
|
*/
|
|
public void findAllGatedSynPredsUsedInDFAAcceptStates() {
|
|
int nAlts = getNumberOfAlts();
|
|
for (int i=1; i<=nAlts; i++) {
|
|
DFAState a = getAcceptState(i);
|
|
//System.out.println("alt "+i+": "+a);
|
|
if ( a!=null ) {
|
|
Set synpreds = a.getGatedSyntacticPredicatesInNFAConfigurations();
|
|
if ( synpreds!=null ) {
|
|
// add all the predicates we find (should be just one, right?)
|
|
for (Iterator it = synpreds.iterator(); it.hasNext();) {
|
|
SemanticContext semctx = (SemanticContext) it.next();
|
|
// System.out.println("synpreds: "+semctx);
|
|
nfa.grammar.synPredUsedInDFA(this, semctx);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public NFAState getNFADecisionStartState() {
|
|
return decisionNFAStartState;
|
|
}
|
|
|
|
public DFAState getAcceptState(int alt) {
|
|
return altToAcceptState[alt];
|
|
}
|
|
|
|
public void setAcceptState(int alt, DFAState acceptState) {
|
|
altToAcceptState[alt] = acceptState;
|
|
}
|
|
|
|
public String getDescription() {
|
|
return description;
|
|
}
|
|
|
|
public int getDecisionNumber() {
|
|
return decisionNFAStartState.getDecisionNumber();
|
|
}
|
|
|
|
/** If this DFA failed to finish during construction, we might be
|
|
* able to retry with k=1 but we need to know whether it will
|
|
* potentially succeed. Can only succeed if there is a predicate
|
|
* to resolve the issue. Don't try if k=1 already as it would
|
|
* cycle forever. Timeout can retry with k=1 even if no predicate
|
|
* if k!=1.
|
|
*/
|
|
public boolean okToRetryDFAWithK1() {
|
|
boolean nonLLStarOrOverflowAndPredicateVisible =
|
|
(probe.isNonLLStarDecision()||probe.analysisOverflowed()) &&
|
|
predicateVisible; // auto backtrack or manual sem/syn
|
|
return getUserMaxLookahead()!=1 &&
|
|
(analysisTimedOut() || nonLLStarOrOverflowAndPredicateVisible);
|
|
}
|
|
|
|
public String getReasonForFailure() {
|
|
StringBuffer buf = new StringBuffer();
|
|
if ( probe.isNonLLStarDecision() ) {
|
|
buf.append("non-LL(*)");
|
|
if ( predicateVisible ) {
|
|
buf.append(" && predicate visible");
|
|
}
|
|
}
|
|
if ( probe.analysisOverflowed() ) {
|
|
buf.append("recursion overflow");
|
|
if ( predicateVisible ) {
|
|
buf.append(" && predicate visible");
|
|
}
|
|
}
|
|
if ( analysisTimedOut() ) {
|
|
if ( buf.length()>0 ) {
|
|
buf.append(" && ");
|
|
}
|
|
buf.append("timed out (>");
|
|
buf.append(DFA.MAX_TIME_PER_DFA_CREATION);
|
|
buf.append("ms)");
|
|
}
|
|
buf.append("\n");
|
|
return buf.toString();
|
|
}
|
|
|
|
/** What GrammarAST node (derived from the grammar) is this DFA
|
|
* associated with? It will point to the start of a block or
|
|
* the loop back of a (...)+ block etc...
|
|
*/
|
|
public GrammarAST getDecisionASTNode() {
|
|
return decisionNFAStartState.associatedASTNode;
|
|
}
|
|
|
|
public boolean isGreedy() {
|
|
GrammarAST blockAST = nfa.grammar.getDecisionBlockAST(decisionNumber);
|
|
Object v = nfa.grammar.getBlockOption(blockAST,"greedy");
|
|
if ( v!=null && v.equals("false") ) {
|
|
return false;
|
|
}
|
|
return true;
|
|
|
|
}
|
|
|
|
public DFAState newState() {
|
|
DFAState n = new DFAState(this);
|
|
n.stateNumber = stateCounter;
|
|
stateCounter++;
|
|
states.setSize(n.stateNumber+1);
|
|
states.set(n.stateNumber, n); // track state num to state
|
|
return n;
|
|
}
|
|
|
|
public int getNumberOfStates() {
|
|
if ( getUserMaxLookahead()>0 ) {
|
|
// if using fixed lookahead then uniqueSets not set
|
|
return states.size();
|
|
}
|
|
return numberOfStates;
|
|
}
|
|
|
|
public int getNumberOfAlts() {
|
|
return nAlts;
|
|
}
|
|
|
|
public boolean analysisTimedOut() {
|
|
return probe.analysisTimedOut();
|
|
}
|
|
|
|
protected void initAltRelatedInfo() {
|
|
unreachableAlts = new LinkedList();
|
|
for (int i = 1; i <= nAlts; i++) {
|
|
unreachableAlts.add(Utils.integer(i));
|
|
}
|
|
altToAcceptState = new DFAState[nAlts+1];
|
|
}
|
|
|
|
public String toString() {
|
|
FASerializer serializer = new FASerializer(nfa.grammar);
|
|
if ( startState==null ) {
|
|
return "";
|
|
}
|
|
return serializer.serialize(startState, false);
|
|
}
|
|
|
|
/** EOT (end of token) is a label that indicates when the DFA conversion
|
|
* algorithm would "fall off the end of a lexer rule". It normally
|
|
* means the default clause. So for ('a'..'z')+ you would see a DFA
|
|
* with a state that has a..z and EOT emanating from it. a..z would
|
|
* jump to a state predicting alt 1 and EOT would jump to a state
|
|
* predicting alt 2 (the exit loop branch). EOT implies anything other
|
|
* than a..z. If for some reason, the set is "all char" such as with
|
|
* the wildcard '.', then EOT cannot match anything. For example,
|
|
*
|
|
* BLOCK : '{' (.)* '}'
|
|
*
|
|
* consumes all char until EOF when greedy=true. When all edges are
|
|
* combined for the DFA state after matching '}', you will find that
|
|
* it is all char. The EOT transition has nothing to match and is
|
|
* unreachable. The findNewDFAStatesAndAddDFATransitions() method
|
|
* must know to ignore the EOT, so we simply remove it from the
|
|
* reachable labels. Later analysis will find that the exit branch
|
|
* is not predicted by anything. For greedy=false, we leave only
|
|
* the EOT label indicating that the DFA should stop immediately
|
|
* and predict the exit branch. The reachable labels are often a
|
|
* set of disjoint values like: [<EOT>, 42, {0..41, 43..65534}]
|
|
* due to DFA conversion so must construct a pure set to see if
|
|
* it is same as Label.ALLCHAR.
|
|
*
|
|
* Only do this for Lexers.
|
|
*
|
|
* If EOT coexists with ALLCHAR:
|
|
* 1. If not greedy, modify the labels parameter to be EOT
|
|
* 2. If greedy, remove EOT from the labels set
|
|
protected boolean reachableLabelsEOTCoexistsWithAllChar(OrderedHashSet labels)
|
|
{
|
|
Label eot = new Label(Label.EOT);
|
|
if ( !labels.containsKey(eot) ) {
|
|
return false;
|
|
}
|
|
System.out.println("### contains EOT");
|
|
boolean containsAllChar = false;
|
|
IntervalSet completeVocab = new IntervalSet();
|
|
int n = labels.size();
|
|
for (int i=0; i<n; i++) {
|
|
Label rl = (Label)labels.get(i);
|
|
if ( !rl.equals(eot) ) {
|
|
completeVocab.addAll(rl.getSet());
|
|
}
|
|
}
|
|
System.out.println("completeVocab="+completeVocab);
|
|
if ( completeVocab.equals(Label.ALLCHAR) ) {
|
|
System.out.println("all char");
|
|
containsAllChar = true;
|
|
}
|
|
return containsAllChar;
|
|
}
|
|
*/
|
|
}
|
|
|