Skip to content

Commit bbd4825

Browse files
author
Maarten
committed
Create command-line interface
1 parent b084606 commit bbd4825

File tree

8 files changed

+79
-27
lines changed

8 files changed

+79
-27
lines changed

README.md

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -46,21 +46,59 @@ Furthermore, I needed a efficient parser that does not limit token types
4646
to strings.
4747

4848
## Usage
49-
Download [the latest JAR](https://github.com/digitalheir/java-probabilistic-earley-parser/releases/latest) or grab from Maven:
49+
You can use this project as a library in your Java application or as a standalone command-line app.
50+
51+
### Command line
52+
53+
Download [the latest JAR](https://github.com/digitalheir/java-probabilistic-earley-parser/releases/latest)
54+
55+
Create a UTF8-encoded `.cfg` file that contains your grammar, such as the following:
56+
57+
58+
```
59+
# grammar.cfg
60+
61+
S -> NP VP (1.0) # Use '->'
62+
NP → i (0.5) # or '→'
63+
VP → eat # probability defaults to 1.0
64+
```
65+
66+
By default, the parser will assume that you distinguish non-terminals from terminals by capitalizing them. You can also add a custom category handler if you call the API from Java code.
67+
68+
Execute runnable jar on the terminal:
69+
```
70+
probabilistic-earley-parser-jar-with-dependencies.jar -i grammar.cfg -goal S I EAT
71+
```
72+
73+
This will give the Viterbi parse to the **S**entence "I EAT":
74+
75+
```
76+
0.5
77+
└── <start>
78+
└── S
79+
├── NP
80+
│ └── i (I)
81+
└── VP
82+
└── eat (EAT)
83+
```
84+
85+
### Java library
86+
87+
Grab from Maven:
5088

5189
```xml
5290
<dependencies>
5391
<dependency>
5492
<groupId>org.leibnizcenter</groupId>
5593
<artifactId>probabilistic-earley-parser</artifactId>
56-
<version>0.9.10</version>
94+
<version>0.9.11</version>
5795
</dependency>
5896
</dependencies>
5997
```
6098

6199
or Gradle:
62100
```groovy
63-
compile 'org.leibnizcenter:probabilistic-earley-parser:0.9.10'
101+
compile 'org.leibnizcenter:probabilistic-earley-parser:0.9.11'
64102
```
65103

66104
Most applications will want to interface with the static functions in `Parser`:
@@ -129,17 +167,8 @@ public class Example {
129167
}
130168
}
131169
```
132-
133-
You can parse a text file describing your CFG.
134-
By default, the parser will assume that you distinguish non-terminals from terminals by capitalizing them. You can also add a custom category handler.
135-
136-
```
137-
# grammar.cfg
138-
139-
S -> NP VP (1.0) # Use '->'
140-
NP → i (0.5) # or '→'
141-
VP → eat # probability defaults to 1.0
142-
```
170+
171+
You can parse `.cfg` files as follows:
143172

144173
```java
145174
Grammar<String> g = Grammar.parse(Paths.get("path", "to", "grammar.cfg"), Charset.forName("UTF-8"));

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
<groupId>org.leibnizcenter</groupId>
88
<artifactId>probabilistic-earley-parser</artifactId>
9-
<version>0.9.10</version>
9+
<version>0.9.11</version>
1010
<packaging>jar</packaging>
1111

1212
<licenses>

src/main/java/org/leibnizcenter/cfg/category/terminal/stringterminal/CaseInsenstiveStringTerminal.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ public boolean equals(Object o) {
4141
return true;
4242
}
4343

44+
@Override
45+
public String toString() {
46+
return string;
47+
}
48+
4449
@Override
4550
public int hashCode() {
4651
int result = string.hashCode();

src/main/java/org/leibnizcenter/cfg/earleyparser/CommandLine.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,10 @@ public class CommandLine {
3434
*/
3535
public static void main(String[] args) {
3636
HandleArguments handleArguments = new HandleArguments(args).invoke();
37-
//todo Token.of
3837
ParseTreeWithScore parse = Parser.getViterbiParseWithScore(
3938
handleArguments.getGoal(),
4039
handleArguments.getGrammar(),
41-
Stream.of(handleArguments.getTokens()).map(Token::new).collect(Collectors.toList())
40+
Stream.of(handleArguments.getTokens()).map(Token::of).collect(Collectors.toList())
4241
);
4342

4443
System.out.println(parse.score.semiring.toProbability(parse.score.getScore()));
@@ -69,6 +68,9 @@ NonTerminal getGoal() {
6968
}
7069

7170
HandleArguments invoke() {
71+
if (args.length < 2) {
72+
throw new IllegalArgumentException("No arguments specified.\n\n" + USAGE);
73+
}
7274
int lastOption = 0;
7375
final Map<String, String> options = new HashMap<>();
7476
for (int i = 0; i < args.length; i++) {
@@ -104,7 +106,6 @@ HandleArguments invoke() {
104106
throw new IllegalArgumentException("No goal category specified. \n" + USAGE);
105107
}
106108

107-
//NonTerminal.of//TODO
108109
goal = Category.nonTerminal(options.get(OPTION_GOAL));
109110
if (!grammar.getNonTerminals().contains(goal)) {
110111
throw new IllegalArgumentException("Grammar does not contains non-terminal \"" + goal + "\". \n" + USAGE);

src/main/java/org/leibnizcenter/cfg/earleyparser/ParseTree.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,9 @@ public String toString() {
9696
}
9797

9898
private void toString(StringBuilder sb, String prefix, boolean isTail) {
99-
sb.append(prefix + (isTail ? "└── " : "├── ") + category.toString() + "\n");
99+
sb.append(prefix + (isTail ? "└── " : "├── ") + (
100+
category.toString() + ((this instanceof Token) ? (" (" + ((Token) this).token + ")") : "")
101+
) + "\n");
100102
if (children != null) {
101103
for (int i = 0; i < children.size() - 1; i++) {
102104
children.get(i).toString(sb, prefix + (isTail ? " " : "│ "), false);

src/main/java/org/leibnizcenter/cfg/grammar/Grammar.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ public static Grammar<String> parse(String s, Function<String, Category> parseCa
152152
b.addRules(Arrays.stream(NEWLINE.split(s.trim()))
153153
.map(line -> TRAILING_COMMENT.matcher(line).replaceAll("").trim())
154154
.filter(line -> !line.isEmpty())
155-
.map(line -> Rule.parse(line, parseCategory, semiring)).collect(Collectors.toSet())
155+
.map(line -> Rule.fromString(line, parseCategory, semiring)).collect(Collectors.toSet())
156156
);
157157
return b.build();
158158
}
@@ -171,7 +171,7 @@ public static Grammar<String> parse(Path path, Charset charset, Function<String,
171171
final Collection<Rule> rules = Files.lines(path, charset)
172172
.map(line -> TRAILING_COMMENT.matcher(line).replaceAll("").trim())
173173
.filter(line -> !line.isEmpty())
174-
.map(line -> Rule.parse(line, parseCategory, semiring)).collect(Collectors.toSet());
174+
.map(line -> Rule.fromString(line, parseCategory, semiring)).collect(Collectors.toSet());
175175
b.addRules(rules);
176176
return b.build();
177177
}
@@ -195,7 +195,7 @@ public static Grammar<String> parse(InputStream inputStream, Charset charset, Fu
195195
while (line != null) {
196196
line = TRAILING_COMMENT.matcher(line).replaceAll("").trim();
197197
if (!line.isEmpty())
198-
rules.add(Rule.parse(line, parseCategory, semiring));
198+
rules.add(Rule.fromString(line, parseCategory, semiring));
199199
line = reader.readLine();
200200
}
201201
b.addRules(rules);

src/main/java/org/leibnizcenter/cfg/rule/Rule.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
* @see Grammar
3131
*/
3232
public class Rule {
33-
private static Pattern RULE = Pattern.compile("\\s*([^\\s]+)\\s*(?:->|→)((?:\\s*[^\\s]+\\s*)+)\\s*(\\([0-9](?:[0-9]+)?\\))?\\s*");
33+
private static Pattern RULE = Pattern.compile("\\s*([^\\s]+)\\s*(?:->|→)((?:\\s*[^\\s(]+\\s*)+)\\s*(?:\\(([0-9](?:[.,][0-9]+)?)\\))?\\s*");
3434
private static Pattern WHITESPACE = Pattern.compile("\\s+");
3535
public final NonTerminal left;
3636
public final Category[] right;
@@ -149,9 +149,16 @@ public static Rule create(DblSemiring semiring, double probability, NonTerminal
149149
// return (isPreterminal() && right.length == 1);
150150
// }
151151

152-
public static Rule parse(String line, Function<String, Category> parseCategory, DblSemiring semiring) {
152+
/**
153+
* @param line Of the form "S -> NP VP"
154+
* @param parseCategory how to parse category string into category
155+
* @param semiring
156+
* @return Parsed rule
157+
*/
158+
public static Rule fromString(String line, Function<String, Category> parseCategory, DblSemiring semiring) {
153159
Matcher m = RULE.matcher(line);
154-
if(!m.matches()) throw new IllegalArgumentException("String was not a valid rule: "+line);
160+
if (!m.matches())
161+
throw new IllegalArgumentException("String was not a valid rule: " + line);
155162
else{
156163
final NonTerminal LHS = new NonTerminal(m.group(1));
157164

src/test/java/org/leibnizcenter/cfg/rule/RuleTest.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,14 +82,22 @@ public void isUnitProduction() throws Exception {
8282
public void parse() throws Exception {
8383
assertEquals(
8484
Rule.create(LogSemiring.get(), new NonTerminal("S"), new NonTerminal("NP"), new NonTerminal("VP")),
85-
Rule.parse("S -> NP VP",
85+
Rule.fromString("S -> NP VP",
8686
s -> Character.isUpperCase(s.charAt(0)) ? new NonTerminal(s) : new CaseInsenstiveStringTerminal(s),
8787
LogSemiring.get()
8888
)
8989
);
9090
assertEquals(
9191
Rule.create(LogSemiring.get(), new NonTerminal("S"), new CaseInsenstiveStringTerminal("Np"), new CaseInsenstiveStringTerminal("Vp")),
92-
Rule.parse("S -> nP vP",
92+
Rule.fromString("S -> nP vP",
93+
s -> Character.isUpperCase(s.charAt(0)) ? new NonTerminal(s) : new CaseInsenstiveStringTerminal(s),
94+
LogSemiring.get()
95+
)
96+
);
97+
98+
assertEquals(
99+
Rule.create(LogSemiring.get(), 0.5, new NonTerminal("S"), new CaseInsenstiveStringTerminal("Np"), new CaseInsenstiveStringTerminal("Vp")),
100+
Rule.fromString("S -> nP vP(0.5)",
93101
s -> Character.isUpperCase(s.charAt(0)) ? new NonTerminal(s) : new CaseInsenstiveStringTerminal(s),
94102
LogSemiring.get()
95103
)

0 commit comments

Comments
 (0)