Skip to content

Commit 36b1110

Browse files
XanderVertegaalCopilot
andcommitted
Expand extractRule and add unit tests
Co-authored-by: Copilot <copilot@github.com>
1 parent 80ee176 commit 36b1110

2 files changed

Lines changed: 58 additions & 15 deletions

File tree

frontend/src/app/annotate/annotation-parse-results/parse-tree-table/parse-tree-table.component.spec.ts

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { ComponentFixture, TestBed } from '@angular/core/testing';
22

3-
import { ParseTreeTableComponent } from './parse-tree-table.component';
3+
import { ParseTreeTableComponent, extractRule } from './parse-tree-table.component';
44
import { TreeWithType } from '../annotation-parse-results.component';
55

66
const mockTree: TreeWithType = {
@@ -35,3 +35,46 @@ describe('ParseTreeTableComponent', () => {
3535
expect(component).toBeTruthy();
3636
});
3737
});
38+
39+
describe('extractRule', () => {
40+
describe('standard format', () => {
41+
it('should extract rule and content from standard format', () => {
42+
const result = extractRule('fa[s:ng-np]');
43+
expect(result).toEqual({ rule: 'fa', content: 's:ng-np' });
44+
});
45+
46+
it('should extract rule with complex content', () => {
47+
const result = extractRule('fa[(s:dcl\\np)/np]');
48+
expect(result).toEqual({ rule: 'fa', content: '(s:dcl\\np)/np' });
49+
});
50+
});
51+
52+
describe('trivial @ rule', () => {
53+
it('should return empty rule for @ symbol', () => {
54+
const result = extractRule('@[np:nb]');
55+
expect(result).toEqual({ rule: '', content: 'np:nb' });
56+
});
57+
});
58+
59+
describe('handle extra brackets', () => {
60+
it('should strip extra opening brackets from content', () => {
61+
const result = extractRule('fa[s:[ng-np]');
62+
expect(result).toEqual({ rule: 'fa', content: 's:ng-np' });
63+
});
64+
65+
it('should strip extra closing brackets from content', () => {
66+
const result = extractRule('fa[s:ng]-np]');
67+
expect(result).toEqual({ rule: 'fa', content: 's:ng-np' });
68+
});
69+
70+
it('should strip multiple extra brackets from content', () => {
71+
const result = extractRule('fa[s:[ng]-[np]]');
72+
expect(result).toEqual({ rule: 'fa', content: 's:ng-np' });
73+
});
74+
75+
it('should strip all internal brackets', () => {
76+
const result = extractRule('ba[[[s:dcl]]]');
77+
expect(result).toEqual({ rule: 'ba', content: 's:dcl' });
78+
});
79+
});
80+
});

frontend/src/app/annotate/annotation-parse-results/parse-tree-table/parse-tree-table.component.ts

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -72,33 +72,33 @@ function buildUnaryNode(node: UnaryNode): TreeNodeDisplay {
7272
/**
7373
* Parses a node string to extract the rule and the content.
7474
*
75-
* A node string is usually of the form "A(B)", where a is the rule applied
75+
* A node string is usually of the form "A[B]", where a is the rule applied
7676
* and B is the resulting category. The rule is anything everything before
77-
* the first parenthesis. Everything within it is the content. For example,
78-
* in "fa(s:ng-np)", "fa" is the rule and "s:ng-np" is the content.
77+
* the first bracket. Everything within it is the content. For example,
78+
* in "fa[s:ng-np]", "fa" is the rule and "s:ng-np" is the content.
7979
*
80-
* Due to a bug in the CCG parser, sometimes the node string can have
81-
* multiple layers of parentheses, e.g. fa(((s:ng-np)-(s:ng-np))).
82-
* function only strips off the first.
80+
* If there are more brackets, we ignore them.
81+
*
82+
* The rule symbolised by '@' is trivial and all too common, so it is ignored.
8383
*
8484
*/
85-
function extractRule(nodeString: string): { rule: string, content: string; } {
86-
const firstParen = nodeString.indexOf('(');
87-
const lastParen = nodeString.lastIndexOf(')');
85+
export function extractRule(nodeString: string): { rule: string, content: string; } {
86+
const firstBracket = nodeString.indexOf('[');
87+
const lastBracket = nodeString.lastIndexOf(']');
8888

8989
// Return a fallback value if the string is not what we expect.
90-
if (firstParen === -1 || lastParen === -1 || lastParen < firstParen) {
90+
if (firstBracket === -1 || lastBracket === -1 || lastBracket < firstBracket) {
9191
return {
9292
rule: "",
9393
content: nodeString
9494
};
9595
}
9696

97-
const rule = nodeString.slice(0, firstParen);
98-
// Strip off any remaining parentheses due to the CCG parser bug.
99-
const content = nodeString.slice(firstParen + 1, lastParen).replaceAll('(', '').replaceAll(')', '');
97+
const rule = nodeString.slice(0, firstBracket);
98+
// Strip off any remaining brackets.
99+
const content = nodeString.slice(firstBracket + 1, lastBracket).replaceAll('[', '').replaceAll(']', '');
100100

101-
return { rule, content };
101+
return { rule: rule === '@' ? "" : rule, content };
102102
}
103103

104104
@Component({

0 commit comments

Comments
 (0)