|
| 1 | +package cn.alumik.parsetree.lexer; |
| 2 | + |
| 3 | +import cn.alumik.parsetree.exception.AnalysisException; |
| 4 | +import cn.alumik.parsetree.exception.ParsingException; |
| 5 | +import cn.alumik.parsetree.lexer.fsm.FSMState; |
| 6 | +import cn.alumik.parsetree.lexer.fsm.NFA; |
| 7 | +import cn.alumik.parsetree.parser.Grammar; |
| 8 | +import cn.alumik.parsetree.parser.Transition; |
| 9 | +import cn.alumik.parsetree.symbol.*; |
| 10 | + |
| 11 | +import java.util.*; |
| 12 | + |
| 13 | +abstract class AbstractRegex { |
| 14 | + |
| 15 | + private final String mAcceptingRule; |
| 16 | + |
| 17 | + private final String mRegex; |
| 18 | + |
| 19 | + protected final Lexer mLexer; |
| 20 | + |
| 21 | + private NFA mNfa; |
| 22 | + |
| 23 | + private Grammar mGrammar; |
| 24 | + |
| 25 | + public AbstractRegex(String acceptingRule, String regex, Lexer lexer) throws ParsingException, AnalysisException { |
| 26 | + initGrammar(); |
| 27 | + mAcceptingRule = acceptingRule; |
| 28 | + mRegex = regex; |
| 29 | + mLexer = lexer; |
| 30 | + initNFA(); |
| 31 | + } |
| 32 | + |
| 33 | + public String getRegex() { |
| 34 | + return mRegex; |
| 35 | + } |
| 36 | + |
| 37 | + public NFA getNFA() { |
| 38 | + return mNfa; |
| 39 | + } |
| 40 | + |
| 41 | + public Grammar getGrammar() { |
| 42 | + return mGrammar; |
| 43 | + } |
| 44 | + |
| 45 | + public void setGrammar(Grammar grammar) { |
| 46 | + mGrammar = grammar; |
| 47 | + } |
| 48 | + |
| 49 | + private void initNFA() throws ParsingException, AnalysisException { |
| 50 | + final Map<Integer, Map<AbstractSymbol, Transition>> parseTable = mGrammar.getParseTable().getTable(); |
| 51 | + final List<Symbol> symbols = getSymbols(); |
| 52 | + final AbstractSymbol startSymbol = mGrammar.getStartSymbol(); |
| 53 | + |
| 54 | + final Stack<Integer> stateStack = new Stack<>(); |
| 55 | + final Stack<Symbol> symbolStack = new Stack<>(); |
| 56 | + final Stack<NFA> nfaStack = new Stack<>(); |
| 57 | + |
| 58 | + stateStack.push(0); |
| 59 | + int index = 0; |
| 60 | + |
| 61 | + while (index != symbols.size() - 1 || !symbols.get(index).getAbstractSymbol().equals(startSymbol)) { |
| 62 | + final int currentState = stateStack.peek(); |
| 63 | + final Symbol currentSymbol = index < symbols.size() ? symbols.get(index) |
| 64 | + : new TerminalSymbol(mGrammar.getSymbolPool().getTerminalSymbol(AbstractTerminalSymbol.END)); |
| 65 | + final Transition transition = parseTable.get(currentState).get(currentSymbol.getAbstractSymbol()); |
| 66 | + |
| 67 | + if (transition == null) { |
| 68 | + throw new ParsingException( |
| 69 | + String.format(ParsingException.PARSING_ERROR, index + 1, currentSymbol), null); |
| 70 | + } else { |
| 71 | + switch (transition.getAction()) { |
| 72 | + case Transition.SHIFT: |
| 73 | + nfaStack.push(new NFA(mLexer)); |
| 74 | + symbolStack.push(currentSymbol); |
| 75 | + case Transition.GOTO: |
| 76 | + stateStack.push(transition.getNextState()); |
| 77 | + index++; |
| 78 | + break; |
| 79 | + case Transition.REDUCE: |
| 80 | + final RegexProduction regexProduction = (RegexProduction) transition.getReduceProduction(); |
| 81 | + final List<NFA> nodes = new ArrayList<>(); |
| 82 | + final List<Symbol> children = new ArrayList<>(); |
| 83 | + for (final AbstractSymbol ignored : regexProduction.to()) { |
| 84 | + stateStack.pop(); |
| 85 | + nodes.add(nfaStack.pop()); |
| 86 | + children.add(symbolStack.pop()); |
| 87 | + } |
| 88 | + Collections.reverse(nodes); |
| 89 | + Collections.reverse(children); |
| 90 | + nfaStack.push(regexProduction.getNFA(nodes, children)); |
| 91 | + final Symbol newSymbol = |
| 92 | + new NonterminalSymbol((AbstractNonterminalSymbol) regexProduction.from()); |
| 93 | + symbolStack.push(newSymbol); |
| 94 | + index--; |
| 95 | + symbols.set(index, newSymbol); |
| 96 | + } |
| 97 | + } |
| 98 | + } |
| 99 | + if (nfaStack.size() != 1) { |
| 100 | + throw new ParsingException(String.format(ParsingException.PARSING_NOT_COMPLETE, nfaStack), null); |
| 101 | + } else { |
| 102 | + mNfa = nfaStack.pop(); |
| 103 | + for (final FSMState state : mNfa.getFinalStates()) { |
| 104 | + state.addAcceptingRule(mAcceptingRule); |
| 105 | + } |
| 106 | + } |
| 107 | + } |
| 108 | + |
| 109 | + protected abstract void initGrammar() throws AnalysisException; |
| 110 | + |
| 111 | + protected abstract List<Symbol> getSymbols() throws ParsingException, AnalysisException; |
| 112 | +} |
0 commit comments