Skip to content

Commit 59d42b9

Browse files
author
Maarten
committed
Allow user to pass scan probability multiplier, resolves #3
1 parent b47b3db commit 59d42b9

File tree

4 files changed

+61
-37
lines changed

4 files changed

+61
-37
lines changed

src/earley/parser.ts

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,10 @@ export function getViterbiParseFromChart<S, T>(state: State<S, T>, chart: Chart<
8888
}
8989

9090

91-
9291
export function parseSentenceIntoChart<S, T>(Start: NonTerminal,
9392
grammar: Grammar<T, S>,
94-
tokens: T[]): [Chart<T, S>, number, State<S, T>] {
93+
tokens: T[],
94+
scanProbability?: (x: T, t: Terminal<T>[]) => S): [Chart<T, S>, number, State<S, T>] {
9595
// ScanProbability scanProbability//TODO
9696

9797
const stateSets: Chart<T, S> = new Chart(grammar);
@@ -125,7 +125,7 @@ export function parseSentenceIntoChart<S, T>(Start: NonTerminal,
125125
tokensWithWords.forEach(
126126
(token: WordWithTypes<T>) => {
127127
predict(i, grammar, stateSets);
128-
scan(i, token, grammar.probabilityMapping.semiring, stateSets);
128+
scan(i, token, grammar.probabilityMapping.semiring, stateSets, scanProbability);
129129
complete(i + 1, stateSets, grammar);
130130

131131
const completedStates: State<S, T>[] = [];
@@ -153,8 +153,13 @@ export interface ParseTreeWithScore<T> {
153153

154154
export function getViterbiParse<S, T>(Start: NonTerminal,
155155
grammar: Grammar<T, S>,
156-
tokens: T[]): ParseTreeWithScore<T> {
157-
const [chart, ignored, init] = parseSentenceIntoChart(Start, grammar, tokens);
156+
tokens: T[],
157+
scanProbability?: (x: T, t: Terminal<T>[]) => S): ParseTreeWithScore<T> {
158+
const [chart, ignored, init] = parseSentenceIntoChart(Start, grammar, tokens, scanProbability);
159+
160+
if (!chart.has(init.rule, tokens.length,
161+
0,
162+
init.rule.right.length)) throw new Error("Could not parse sentence.");
158163

159164
const finalState = chart.getOrCreate(
160165
tokens.length,
@@ -163,6 +168,7 @@ export function getViterbiParse<S, T>(Start: NonTerminal,
163168
init.rule
164169
);
165170

171+
166172
const parseTree: ParseTree<T> = getViterbiParseFromChart(finalState, chart);
167173
const toProbability = grammar.probabilityMapping.toProbability;
168174
const finalScore = chart.getViterbiScore(finalState).innerScore;

src/earley/scan.ts

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import {isNonTerminal, WordWithTypes} from "../grammar/category";
1+
import {isNonTerminal, WordWithTypes, Terminal} from "../grammar/category";
22
import {Semiring} from "semiring";
33
import {Chart} from "./chart/chart";
44
import {getActiveCategory, State, advanceDot} from "./chart/state";
@@ -10,19 +10,18 @@ import {getActiveCategory, State, advanceDot} from "./chart/state";
1010
* @param tokenPosition The start index of the scan.
1111
* @param word
1212
* @param types
13-
* //@param scanProbability Function that provides the probability of scanning the given token at this position. Might be null for a probability of 1.0.
13+
* @param scanProbability Function that provides the probability of scanning the given token at this position. Might be null for a probability of 1.0.
1414
* @param sr
1515
* @param stateSets
1616
*/
1717
export function scan<S, T>(tokenPosition: number,
1818
{word, types}: WordWithTypes<T>,
19-
// scanProbability:(x:T)=>number,//TODO
2019
sr: Semiring<S>,
21-
stateSets: Chart<T, S>) {
20+
stateSets: Chart<T, S>,
21+
scanProbability?: (x: T, t: Terminal<T>[]) => S) {
2222
const changes: any[] = [];
23-
// TODO
24-
// const scanProb:number = !scanProbability ? NaN : scanProbability(tokenPosition);
25-
const scanProb: S = sr.multiplicativeIdentity;
23+
24+
const scanProb: S = !!scanProbability ? scanProbability(word, types) : undefined;
2625

2726
/*
2827
* Get all states that are active on a terminal

test/earley/parser.spec.ts

Lines changed: 43 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,31 @@
1-
import {NonTerminal, Terminal, Category} from "../../src/grammar/category";
1+
import {NonTerminal, Terminal} from "../../src/grammar/category";
22
import {getViterbiParse, ParseTreeWithScore, Grammar} from "../../src/index";
33

4-
import * as Mocha from 'mocha'
5-
import {expect} from 'chai';
6-
import {scan} from "../../src/earley/scan";
7-
import {LogSemiring} from "semiring";
8-
import {Chart} from "../../src/earley/chart/chart";
4+
import {expect} from "chai";
95
import {g, A} from "../sample-grammar";
106
import {parseSentenceIntoChart} from "../../src/earley/parser";
117

12-
//TODO
13-
describe('parser', () => {
8+
// TODO
9+
describe("parser", () => {
1410

1511

16-
it('should complete correctly', () => {
12+
it("should complete correctly", () => {
1713
// complete(
1814
// 0,
1915
// "e",
2016
// LogSemiring,
2117
// ss
2218
// )
2319
});
24-
it('should predict correctly', () => {
20+
it("should predict correctly", () => {
2521
// complete(
2622
// 0,
2723
// "e",
2824
// LogSemiring,
2925
// ss
3026
// )
3127
});
32-
it('should parse the man chase the man with a stick', () => {
28+
it("should parse the man chase the man with a stick", () => {
3329
const S: NonTerminal = "S";
3430
const NP: NonTerminal = "NP";
3531
const VP: NonTerminal = "VP";
@@ -47,8 +43,8 @@ describe('parser', () => {
4743
const stick: Terminal<string> = (token) => !!token.match(/stick/);
4844
const with_: Terminal<string> = (token) => !!token.match(/with/);
4945

50-
const grammar: Grammar<string,number> = Grammar.builder("test")
51-
//.setSemiring(new LogSemiring()) // If not set, defaults to Log semiring which is probably what you want
46+
const grammar: Grammar<string, number> = Grammar.builder("test")
47+
// .setSemiring(new LogSemiring()) // If not set, defaults to Log semiring which is probably what you want
5248
.addNewRule(
5349
1.0, // Probability between 0.0 and 1.0, defaults to 1.0. The builder takes care of converting it to the semiring element
5450
S, // Left hand side of the rule
@@ -88,18 +84,45 @@ describe('parser', () => {
8884
grammar,
8985
tokens
9086
);
91-
//console.log(JSON.stringify(viterbi.parseTree)); // {"category":"<start>","children":[{"category":"S","children":[{"category":"NP","children":[{"category":"Det","children":[{"token":"The","children":[ ]}]},{"category":"N","children":[{"token":"man","children":[]}]}]},{"category":"VP","children":[{"category":"TV","children":[{"token":"chased","children":[]}]},{"category":"NP","children":[{"category":"Det","children":[{"token":"the","children":[]}]},{"category":"N","children":[{"token":"man","c hildren":[]}]},{"category":"Mod","children":[{"token":"with","children":[]},{"category":"NP","children":[{"category":"Det","children":[{"token":"a", "children":[]}]},{"category":"N","children":[{"token":"stick","children":[]}]}]}]}]}]}]}]}
92-
//console.log(viterbi.probability); // 0.6
93-
//Parser.recognize(S, grammar, Tokens.tokenize("the", "stick", "chased", "the", "man"))
87+
// console.log(JSON.stringify(viterbi.parseTree)); // {"category":"<start>","children":[{"category":"S","children":[{"category":"NP","children":[{"category":"Det","children":[{"token":"The","children":[ ]}]},{"category":"N","children":[{"token":"man","children":[]}]}]},{"category":"VP","children":[{"category":"TV","children":[{"token":"chased","children":[]}]},{"category":"NP","children":[{"category":"Det","children":[{"token":"the","children":[]}]},{"category":"N","children":[{"token":"man","c hildren":[]}]},{"category":"Mod","children":[{"token":"with","children":[]},{"category":"NP","children":[{"category":"Det","children":[{"token":"a", "children":[]}]},{"category":"N","children":[{"token":"stick","children":[]}]}]}]}]}]}]}]}
88+
// console.log(viterbi.probability); // 0.6
89+
// Parser.recognize(S, grammar, Tokens.tokenize("the", "stick", "chased", "the", "man"))
9490
});
9591

9692

97-
it('should parse aaaaa', () => {
98-
const tokens = ["a", "a", "a", "e"];
99-
const [chart, i, init] = parseSentenceIntoChart(
93+
const tokens = ["a", "a", "a", "e"];
94+
it("should deal with scan probability correctly", () => {
95+
const p1 = getViterbiParse(
10096
A,
10197
g,
102-
tokens
98+
tokens,
99+
(ignore, ignored) => {
100+
return g.probabilityMapping.fromProbability(1.0);
101+
}
102+
).probability;
103+
104+
const p2 = getViterbiParse(
105+
A,
106+
g,
107+
tokens,
108+
(word, ignored) => {
109+
return word === "a" ? g.probabilityMapping.fromProbability(0.5) : undefined;
110+
}
111+
).probability;
112+
113+
const eq = p2 * 2 * 2 * 2;
114+
const epsilon = 0.0000000000000001;
115+
expect(p1).to.be.above(eq - epsilon).and.below(eq + epsilon);
116+
});
117+
118+
it("should parse aaae", () => {
119+
const [chart, ignored, init] = parseSentenceIntoChart(
120+
A,
121+
g,
122+
tokens,
123+
(word, terminalTypes) => {
124+
return g.probabilityMapping.fromProbability(1.0);
125+
}
103126
);
104127

105128
expect(chart.getCompletedStates(tokens.length).has(
@@ -108,9 +131,5 @@ it('should parse aaaaa', () => {
108131
)
109132
)).to.equal(true);
110133

111-
/*console.log(g.probabilityMapping.toProbability(
112-
chart.viterbiScores.get(chart.getOrCreate(
113-
tokens.length, 0, init.rule.right.length, init.rule
114-
)).innerScore));*/
115134
});
116135
});

test/sample-grammar.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ export const g:Grammar<string, number> = builder
2020
.addNewRule(0.5, C, [D])
2121
.addNewRule(0.5, D, [E])
2222
.addNewRule(0.5, D, [a])
23-
.addNewRule(0.5, E, [E,E])
23+
.addNewRule(0.5, E, [E, E])
2424
.addNewRule(0.5, E, [e])
2525
//.addRule(0.1, E, [C])
2626
.build();

0 commit comments

Comments
 (0)