@@ -11,8 +11,10 @@ export type ParseResponse = any;
1111export class ParseService {
1212 private http = inject ( HttpClient ) ;
1313
14+ // Sink for triggering parse-and-prove requests to the backend.
1415 public submit$ = new Subject < ParseInput > ( ) ;
1516
17+ // Raw responses from the backend.
1618 public parse$ = this . submit$ . pipe (
1719 switchMap ( ( form ) =>
1820 this . http . post < ParseResponse > ( "/api/problem/parse" , form ) . pipe (
@@ -25,10 +27,19 @@ export class ParseService {
2527 share ( )
2628 ) ;
2729
30+ // Proofs from the response in a format that is easier to handle than the
31+ // stringly NLTK.Tree serialization.
2832 public proofs$ = this . parse$ . pipe ( map ( extractProofs ) ) ;
2933}
3034
35+ /**
36+ * Given a `{data: {ccg_trees, proofs}}` response from the backend, tease out
37+ * the `proofs` and apply the `nltk2tableau` transformation to each proof
38+ * within.
39+ */
3140function extractProofs ( response : ParseResponse ) {
41+ // This is basically just `_.mapObject(response.data.proofs, nltk2tableau)`,
42+ // but we don't depend on Underscore yet.
3243 const { entailment, contradiction} = response . data . proofs ;
3344 return {
3445 entailment : nltk2tableau ( entailment ) ,
@@ -40,21 +51,65 @@ const emptyTableau = (): any => ({nodes: []});
4051const pairNodes = ( other : any [ ] ) =>
4152 ( node : any , index : number ) => [ node , other [ index ] ] ;
4253
54+ /**
55+ * Given a single proof in serialized NLTK.Tree format, return the same proof in
56+ * a more convenient format where each node has explicitly labeled `id`, `rule`,
57+ * `mod`, `head`, `args` and `sign`.
58+ *
59+ * This function can handle proof trees of arbitrary depth without causing a
60+ * stack overflow.
61+ */
4362export function nltk2tableau ( nltk : any ) {
4463 const tree = emptyTableau ( ) ;
64+ // We will be using trampolining instead of recursion. A general but
65+ // admittedly not very helpful description can be found in the first bullet
66+ // of
67+ // https://en.wikipedia.org/wiki/Trampoline_(computing)#High-level_programming.
68+ // The gist is this: instead of recursing, functions return pieces of work
69+ // ("thunks") that should be executed next. The trampoline, which in this
70+ // case is the current function, takes care of executing the thunks.
71+
72+ // The list of thunks to execute. One will be added for each tree node, one
73+ // will be taken off on each iteration. Since we are always calling
74+ // `nltkNode2tableauNode`, we leave this implicit in the thunk
75+ // representation. It consists only of the arguments that we are passing to
76+ // the function.
4577 const todo = [ [ nltk , tree ] ] ;
78+ // The core of the trampoline. Process one thunk at a time. We know we are
79+ // done when the list is empty.
4680 while ( todo . length ) {
81+ // The algorithm is written such that the order in which we process
82+ // child nodes does not matter, so we can simply pop thunks off the back
83+ // of the todo list. tsc is apparently unable to infer that `todo` is
84+ // guaranteed to be nonempty on the next line, hence the silencing
85+ // comment.
4786 // @ts -ignore
4887 let task : [ any , any ] = todo . pop ( ) ;
88+ // Processing a thunk might produce zero or more new thunks, depending
89+ // on the number of children of the processed node.
4990 let newTasks = nltkNode2tableauNode ( ...task ) ;
5091 todo . push ( ...newTasks ) ;
5192 }
5293 return tree ;
5394}
5495
96+ /**
97+ * Internal function that maps a single serialized NLTK.Tree node to an
98+ * explicitly structured node inside `tree`. This function relies on an external
99+ * trampoline (in `nltk2tableau`) and returns thunks instead of recursing into
100+ * child nodes.
101+ */
55102function nltkNode2tableauNode ( nltk : any , tree : any ) {
103+ // `nltk` is in a strictly nested format: each node contains its children.
104+ // Our own format does not follow this rule: chains of nodes with single
105+ // children are stored as adjacent elements in an array. `tree` is the
106+ // object that contains this array (`tree.nodes`). When there is a
107+ // bifurcation, the subtrees are stored inside `tree` rather than in the
108+ // bifurcating node.
56109 const node : any = { } ;
57110 tree . nodes . push ( node ) ;
111+ // Tease out the different parts of the stringified node payload. The
112+ // following lines will be rewritten using regular expressions.
58113 const lines = nltk . node . split ( '\n' ) ;
59114 if ( lines [ 0 ] === 'Closed' ) {
60115 node . rule = lines [ 1 ] ;
@@ -82,13 +137,27 @@ function nltkNode2tableauNode(nltk: any, tree: any) {
82137 } else if ( lines [ 0 ] !== 'Model' ) {
83138 node . head = nltk . node ;
84139 }
140+ // With the node itself having been decoded, now comes the part where a
141+ // "normal" function would recurse and where we return follow-up thunks
142+ // instead.
85143 if ( ! nltk . children || ! nltk . children . length ) {
86144 node . end = true ;
145+ // Leaf node. No recursion, no thunks.
87146 return [ ] ;
88147 }
89148 if ( nltk . children . length === 1 ) {
149+ // Node with one child, the most common case. The child will be appended
150+ // to `tree.nodes` after the current node.
90151 return [ [ nltk . children [ 0 ] , tree ] ] ;
91152 }
153+ // Multiple children. Most likely two, but the code would transparently
154+ // support larger numbers of children. We create empty subtrees so we can
155+ // pass those in the thunks as containers for the child nodes. The fact that
156+ // the subtrees have already been created, is what makes it possible to
157+ // process the children in arbitrary order.
92158 tree . subtrees = nltk . children . map ( emptyTableau ) ;
159+ // We essentially do a `_.zip(nltk.children, tree.subtrees)` in order to
160+ // create the thunks. I could have fumbled with `Iterator.zip` instead, but
161+ // that feels like more hassle and the polyfill would be huge.
93162 return nltk . children . map ( pairNodes ( tree . subtrees ) ) ;
94163}
0 commit comments