Skip to content
This repository was archived by the owner on Sep 11, 2022. It is now read-only.

Commit a5156ff

Browse files
committed
Implement all the functions
1 parent 81114bd commit a5156ff

32 files changed

Lines changed: 2214 additions & 4 deletions

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ buildNumber.properties
1010
# https://github.com/takari/maven-wrapper#usage-without-binary-jar
1111
.mvn/wrapper/maven-wrapper.jar
1212
.idea/
13+
out/
Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,25 @@
11
package cn.alumik.parsetree;
22

3-
/**
4-
* Hello world!
5-
*/
3+
import cn.alumik.parsetree.lexer.Lexer;
4+
import cn.alumik.parsetree.parser.ParseTree;
5+
import cn.alumik.parsetree.parser.Parser;
6+
import cn.alumik.parsetree.util.Config;
7+
8+
import java.util.Scanner;
9+
610
public class ParseTreeApp {
711

812
public static void main(String[] args) {
9-
System.out.println("Hello World!");
13+
final Config config = new Config("config.yml");
14+
try {
15+
final Lexer lexer = new Lexer(config);
16+
final Parser parser = new Parser(config);
17+
18+
final Scanner scanner = new Scanner(System.in);
19+
final ParseTree parseTree = parser.parse(lexer.lex(scanner.next()));
20+
parseTree.draw("out/parse-tree.png");
21+
} catch (Exception e) {
22+
e.printStackTrace();
23+
}
1024
}
1125
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package cn.alumik.parsetree.exception;
2+
3+
public class AnalysisException extends Exception {
4+
5+
public static final String INVALID_NAME =
6+
"`%s` is a keyword in ParseTree for %s. Please choose another symbol name.";
7+
8+
public static final String TERMINAL_SYMBOL_NOT_EXIST =
9+
"Terminal symbol `%s` doesn't exist.";
10+
11+
public static final String NONTERMINAL_SYMBOL_NOT_EXIST =
12+
"Nonterminal symbol `%s` doesn't exist.";
13+
14+
public static final String SYMBOL_NOT_EXIST =
15+
"Symbol `%s` doesn't exist.";
16+
17+
public static final String START_SYMBOL_NOT_TERMINAL =
18+
"The start symbol must be a terminal symbol.";
19+
20+
public static final String ILL_FORMED_PRODUCTION =
21+
"A production should be divided into 2 parts by `->`, like `A -> c B d`.";
22+
23+
public static final String ILL_FORMED_PRODUCTION_LEFT =
24+
"There must be 1 and only 1 nonterminal symbol in the left part of a production.";
25+
26+
public static final String ILL_FORMED_PRODUCTION_RIGHT =
27+
"The right part of a production must not be empty. Set it to `null` if you want to input an empty production.";
28+
29+
public AnalysisException(String message, Throwable cause) {
30+
super(message, cause);
31+
}
32+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package cn.alumik.parsetree.exception;
2+
3+
public class ParsingException extends Exception {
4+
5+
public static final String PARSING_ERROR =
6+
"An error occurred when parsing the %s-th symbol `%s`.";
7+
8+
public static final String PARSING_NOT_COMPLETE =
9+
"The parsing process is not complete. Remaining symbols are: %s.";
10+
11+
public static final String LEXICAL_ANALYSIS_FAILED =
12+
"Lexical analysis failed.";
13+
14+
public static final String INVALID_REGEX =
15+
"Some regular expressions are invalid.";
16+
17+
public ParsingException(String message, Throwable cause) {
18+
super(message, cause);
19+
}
20+
}
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
package cn.alumik.parsetree.lexer;
2+
3+
import cn.alumik.parsetree.exception.AnalysisException;
4+
import cn.alumik.parsetree.exception.ParsingException;
5+
import cn.alumik.parsetree.lexer.fsm.FSMState;
6+
import cn.alumik.parsetree.lexer.fsm.NFA;
7+
import cn.alumik.parsetree.parser.Grammar;
8+
import cn.alumik.parsetree.parser.Transition;
9+
import cn.alumik.parsetree.symbol.*;
10+
11+
import java.util.*;
12+
13+
abstract class AbstractRegex {
14+
15+
private final String mAcceptingRule;
16+
17+
private final String mRegex;
18+
19+
protected final Lexer mLexer;
20+
21+
private NFA mNfa;
22+
23+
private Grammar mGrammar;
24+
25+
public AbstractRegex(String acceptingRule, String regex, Lexer lexer) throws ParsingException, AnalysisException {
26+
initGrammar();
27+
mAcceptingRule = acceptingRule;
28+
mRegex = regex;
29+
mLexer = lexer;
30+
initNFA();
31+
}
32+
33+
public String getRegex() {
34+
return mRegex;
35+
}
36+
37+
public NFA getNFA() {
38+
return mNfa;
39+
}
40+
41+
public Grammar getGrammar() {
42+
return mGrammar;
43+
}
44+
45+
public void setGrammar(Grammar grammar) {
46+
mGrammar = grammar;
47+
}
48+
49+
private void initNFA() throws ParsingException, AnalysisException {
50+
final Map<Integer, Map<AbstractSymbol, Transition>> parseTable = mGrammar.getParseTable().getTable();
51+
final List<Symbol> symbols = getSymbols();
52+
final AbstractSymbol startSymbol = mGrammar.getStartSymbol();
53+
54+
final Stack<Integer> stateStack = new Stack<>();
55+
final Stack<Symbol> symbolStack = new Stack<>();
56+
final Stack<NFA> nfaStack = new Stack<>();
57+
58+
stateStack.push(0);
59+
int index = 0;
60+
61+
while (index != symbols.size() - 1 || !symbols.get(index).getAbstractSymbol().equals(startSymbol)) {
62+
final int currentState = stateStack.peek();
63+
final Symbol currentSymbol = index < symbols.size() ? symbols.get(index)
64+
: new TerminalSymbol(mGrammar.getSymbolPool().getTerminalSymbol(AbstractTerminalSymbol.END));
65+
final Transition transition = parseTable.get(currentState).get(currentSymbol.getAbstractSymbol());
66+
67+
if (transition == null) {
68+
throw new ParsingException(
69+
String.format(ParsingException.PARSING_ERROR, index + 1, currentSymbol), null);
70+
} else {
71+
switch (transition.getAction()) {
72+
case Transition.SHIFT:
73+
nfaStack.push(new NFA(mLexer));
74+
symbolStack.push(currentSymbol);
75+
case Transition.GOTO:
76+
stateStack.push(transition.getNextState());
77+
index++;
78+
break;
79+
case Transition.REDUCE:
80+
final RegexProduction regexProduction = (RegexProduction) transition.getReduceProduction();
81+
final List<NFA> nodes = new ArrayList<>();
82+
final List<Symbol> children = new ArrayList<>();
83+
for (final AbstractSymbol ignored : regexProduction.to()) {
84+
stateStack.pop();
85+
nodes.add(nfaStack.pop());
86+
children.add(symbolStack.pop());
87+
}
88+
Collections.reverse(nodes);
89+
Collections.reverse(children);
90+
nfaStack.push(regexProduction.getNFA(nodes, children));
91+
final Symbol newSymbol =
92+
new NonterminalSymbol((AbstractNonterminalSymbol) regexProduction.from());
93+
symbolStack.push(newSymbol);
94+
index--;
95+
symbols.set(index, newSymbol);
96+
}
97+
}
98+
}
99+
if (nfaStack.size() != 1) {
100+
throw new ParsingException(String.format(ParsingException.PARSING_NOT_COMPLETE, nfaStack), null);
101+
} else {
102+
mNfa = nfaStack.pop();
103+
for (final FSMState state : mNfa.getFinalStates()) {
104+
state.addAcceptingRule(mAcceptingRule);
105+
}
106+
}
107+
}
108+
109+
protected abstract void initGrammar() throws AnalysisException;
110+
111+
protected abstract List<Symbol> getSymbols() throws ParsingException, AnalysisException;
112+
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
package cn.alumik.parsetree.lexer;
2+
3+
import cn.alumik.parsetree.exception.AnalysisException;
4+
import cn.alumik.parsetree.exception.ParsingException;
5+
import cn.alumik.parsetree.lexer.fsm.DFA;
6+
import cn.alumik.parsetree.lexer.fsm.FSMState;
7+
import cn.alumik.parsetree.lexer.fsm.NFA;
8+
import cn.alumik.parsetree.symbol.TerminalSymbol;
9+
import cn.alumik.parsetree.util.Config;
10+
11+
import java.util.*;
12+
13+
public class Lexer {
14+
15+
private final Config mConfig;
16+
17+
private DFA mDfa;
18+
19+
private Map<String, String> mAcceptingRules = new LinkedHashMap<>();
20+
21+
private Set<String> mIgnoredSymbols = new HashSet<>();
22+
23+
public Lexer(Config config) throws AnalysisException, ParsingException {
24+
mConfig = config;
25+
initAcceptingRules();
26+
initDFA();
27+
}
28+
29+
public void setDfa(DFA dfa) {
30+
mDfa = dfa;
31+
}
32+
33+
public Map<String, String> getAcceptingRules() {
34+
return mAcceptingRules;
35+
}
36+
37+
public void setAcceptingRule(Map<String, String> acceptingRules) {
38+
mAcceptingRules = acceptingRules;
39+
}
40+
41+
public Set<String> getIgnoredSymbols() {
42+
return mIgnoredSymbols;
43+
}
44+
45+
public List<TerminalSymbol> lex(String input) throws ParsingException {
46+
final List<TerminalSymbol> lexResult = new ArrayList<>();
47+
final StringBuilder stringBuilder = new StringBuilder(input);
48+
while (stringBuilder.length() != 0) {
49+
final Map.Entry<String, Integer> result = mDfa.match(stringBuilder.toString());
50+
if (result.getKey().equals("")) {
51+
throw new ParsingException(ParsingException.LEXICAL_ANALYSIS_FAILED, null);
52+
}
53+
if (!mIgnoredSymbols.contains(result.getKey())) {
54+
final String value = stringBuilder.substring(0, result.getValue());
55+
System.out.println(result.getKey() + ": " + value);
56+
final TerminalSymbol terminalSymbol = new TerminalSymbol(value);
57+
lexResult.add(terminalSymbol);
58+
}
59+
stringBuilder.delete(0, result.getValue());
60+
}
61+
return lexResult;
62+
}
63+
64+
private void initAcceptingRules() {
65+
mAcceptingRules = mConfig.getAcceptingRules();
66+
mIgnoredSymbols = mConfig.getIgnoredSymbols();
67+
}
68+
69+
private void initDFA() throws ParsingException, AnalysisException {
70+
final List<NFA> nfas = new ArrayList<>();
71+
for (final Map.Entry<String, String> rule : mAcceptingRules.entrySet()) {
72+
final Regex regex = new Regex(rule.getKey(), rule.getValue(), this);
73+
nfas.add(regex.getNFA());
74+
}
75+
final FSMState startState = new FSMState(this);
76+
for (final NFA nfa : nfas) {
77+
startState.addTransition('\0', nfa.getStartState());
78+
}
79+
mDfa = new DFA(new NFA(startState, this));
80+
}
81+
}

0 commit comments

Comments
 (0)