Skip to content

Commit 83aa8da

Browse files
committed
Document the frontend's NLTK -> tableau conversion (#79)
1 parent af7d88e commit 83aa8da

1 file changed

Lines changed: 69 additions & 0 deletions

File tree

frontend/src/app/services/parse.service.ts

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ export type ParseResponse = any;
1111
export class ParseService {
1212
private http = inject(HttpClient);
1313

14+
// Sink for triggering parse-and-prove requests to the backend.
1415
public submit$ = new Subject<ParseInput>();
1516

17+
// Raw responses from the backend.
1618
public parse$ = this.submit$.pipe(
1719
switchMap((form) =>
1820
this.http.post<ParseResponse>("/api/problem/parse", form).pipe(
@@ -25,10 +27,19 @@ export class ParseService {
2527
share()
2628
);
2729

30+
// Proofs from the response in a format that is easier to handle than the
31+
// stringly NLTK.Tree serialization.
2832
public proofs$ = this.parse$.pipe(map(extractProofs));
2933
}
3034

35+
/**
36+
* Given a `{data: {ccg_trees, proofs}}` response from the backend, tease out
37+
* the `proofs` and apply the `nltk2tableau` transformation to each proof
38+
* within.
39+
*/
3140
function extractProofs(response: ParseResponse) {
41+
// This is basically just `_.mapObject(response.data.proofs, nltk2tableau)`,
42+
// but we don't depend on Underscore yet.
3243
const {entailment, contradiction} = response.data.proofs;
3344
return {
3445
entailment: nltk2tableau(entailment),
@@ -40,21 +51,65 @@ const emptyTableau = (): any => ({nodes: []});
4051
const pairNodes = (other: any[]) =>
4152
(node: any, index: number) => [node, other[index]];
4253

54+
/**
55+
* Given a single proof in serialized NLTK.Tree format, return the same proof in
56+
* a more convenient format where each node has explicitly labeled `id`, `rule`,
57+
* `mod`, `head`, `args` and `sign`.
58+
*
59+
* This function can handle proof trees of arbitrary depth without causing a
60+
* stack overflow.
61+
*/
4362
export function nltk2tableau(nltk: any) {
4463
const tree = emptyTableau();
64+
// We will be using trampolining instead of recursion. A general but
65+
// admittedly not very helpful description can be found in the first bullet
66+
// of
67+
// https://en.wikipedia.org/wiki/Trampoline_(computing)#High-level_programming.
68+
// The gist is this: instead of recursing, functions return pieces of work
69+
// ("thunks") that should be executed next. The trampoline, which in this
70+
// case is the current function, takes care of executing the thunks.
71+
72+
// The list of thunks to execute. One will be added for each tree node, one
73+
// will be taken off on each iteration. Since we are always calling
74+
// `nltkNode2tableauNode`, we leave this implicit in the thunk
75+
// representation. It consists only of the arguments that we are passing to
76+
// the function.
4577
const todo = [[nltk, tree]];
78+
// The core of the trampoline. Process one thunk at a time. We know we are
79+
// done when the list is empty.
4680
while (todo.length) {
81+
// The algorithm is written such that the order in which we process
82+
// child nodes does not matter, so we can simply pop thunks off the back
83+
// of the todo list. tsc is apparently unable to infer that `todo` is
84+
// guaranteed to be nonempty on the next line, hence the silencing
85+
// comment.
4786
// @ts-ignore
4887
let task: [any, any] = todo.pop();
88+
// Processing a thunk might produce zero or more new thunks, depending
89+
// on the number of children of the processed node.
4990
let newTasks = nltkNode2tableauNode(...task);
5091
todo.push(...newTasks);
5192
}
5293
return tree;
5394
}
5495

96+
/**
97+
* Internal function that maps a single serialized NLTK.Tree node to an
98+
* explicitly structured node inside `tree`. This function relies on an external
99+
* trampoline (in `nltk2tableau`) and returns thunks instead of recursing into
100+
* child nodes.
101+
*/
55102
function nltkNode2tableauNode(nltk: any, tree: any) {
103+
// `nltk` is in a strictly nested format: each node contains its children.
104+
// Our own format does not follow this rule: chains of nodes with single
105+
// children are stored as adjacent elements in an array. `tree` is the
106+
// object that contains this array (`tree.nodes`). When there is a
107+
// bifurcation, the subtrees are stored inside `tree` rather than in the
108+
// bifurcating node.
56109
const node: any = {};
57110
tree.nodes.push(node);
111+
// Tease out the different parts of the stringified node payload. The
112+
// following lines will be rewritten using regular expressions.
58113
const lines = nltk.node.split('\n');
59114
if (lines[0] === 'Closed') {
60115
node.rule = lines[1];
@@ -82,13 +137,27 @@ function nltkNode2tableauNode(nltk: any, tree: any) {
82137
} else if (lines[0] !== 'Model') {
83138
node.head = nltk.node;
84139
}
140+
// With the node itself having been decoded, now comes the part where a
141+
// "normal" function would recurse and where we return follow-up thunks
142+
// instead.
85143
if (!nltk.children || !nltk.children.length) {
86144
node.end = true;
145+
// Leaf node. No recursion, no thunks.
87146
return [];
88147
}
89148
if (nltk.children.length === 1) {
149+
// Node with one child, the most common case. The child will be appended
150+
// to `tree.nodes` after the current node.
90151
return [[nltk.children[0], tree]];
91152
}
153+
// Multiple children. Most likely two, but the code would transparently
154+
// support larger numbers of children. We create empty subtrees so we can
155+
// pass those in the thunks as containers for the child nodes. The fact that
156+
// the subtrees have already been created, is what makes it possible to
157+
// process the children in arbitrary order.
92158
tree.subtrees = nltk.children.map(emptyTableau);
159+
// We essentially do a `_.zip(nltk.children, tree.subtrees)` in order to
160+
// create the thunks. I could have fumbled with `Iterator.zip` instead, but
161+
// that feels like more hassle and the polyfill would be huge.
93162
return nltk.children.map(pairNodes(tree.subtrees));
94163
}

0 commit comments

Comments
 (0)