Skip to content

Commit 1675317

Browse files
akoclaude
andcommitted
Implement Phase 3: BSON XPath source type extraction via raw scanning
The page parser only reads metadata, not widget trees, so the original Phase 3 approach (adding SDK types for CustomWidgetXPathSource etc.) wouldn't work for reading. Instead, scan raw page BSON recursively for XPathConstraint fields at any nesting depth. Changes: - sdk/mpr/reader_types.go: add ListRawUnitsByType() for raw BSON access - mdl/catalog/builder_xpath.go: replace widget-tree-based extraction with scanBSONForXPath() that recursively walks raw BSON documents (pages and snippets) to find all XPathConstraint fields, including those in CustomWidgetXPathSource, ListViewXPathSource, and SelectorXPathSource BSON types - Handles both map[string]any and bson.D formats at all nesting levels - Extracts entity context from EntityRef.QualifiedName in source nodes This completes the catalog extraction for all 3 XPath BSON source types (57 occurrences across test projects) that were previously unreachable. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 74f0425 commit 1675317

4 files changed

Lines changed: 192 additions & 129 deletions

File tree

docs/11-proposals/xpath-gaps-proposal.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
**Date:** 2026-03-19
44
**Branch:** xpath
5-
**Status:** Phase 1-2, 4 Implemented (Phase 3 pending)
5+
**Status:** All phases implemented
66

77
## 1. Background
88

mdl/catalog/builder_xpath.go

Lines changed: 123 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ import (
88
"fmt"
99
"strings"
1010

11+
"go.mongodb.org/mongo-driver/bson"
12+
1113
"github.com/mendixlabs/mxcli/sdk/microflows"
12-
"github.com/mendixlabs/mxcli/sdk/pages"
1314
)
1415

1516
// XPath usage types
@@ -101,22 +102,34 @@ func (b *Builder) buildXPathExpressions() error {
101102
}
102103
}
103104

104-
// 3. Extract from page/widget data sources
105-
pageList, err := b.cachedPages()
106-
if err == nil {
107-
for _, pg := range pageList {
108-
moduleID := b.hierarchy.findModuleID(pg.ContainerID)
105+
// 3. Extract from page/snippet data sources by scanning raw BSON
106+
// The page parser only reads metadata, not the widget tree, so we scan the
107+
// raw BSON documents for XPathConstraint fields at any nesting depth.
108+
for _, typePrefix := range []string{"Forms$Page", "Pages$Page", "Forms$Snippet", "Pages$Snippet"} {
109+
rawPages, err := b.reader.ListRawUnitsByType(typePrefix)
110+
if err != nil {
111+
continue
112+
}
113+
for _, ru := range rawPages {
114+
moduleID := b.hierarchy.findModuleID(ru.ContainerID)
109115
moduleName := b.hierarchy.getModuleName(moduleID)
110-
sourceQN := moduleName + "." + pg.Name
111116

112-
if pg.LayoutCall != nil {
113-
for _, arg := range pg.LayoutCall.Arguments {
114-
if arg.Widget != nil {
115-
count += b.extractWidgetXPath(stmt, arg.Widget, "PAGE", string(pg.ID), sourceQN, moduleName,
116-
projectID, projectName, snapshotID, snapshotDate, snapshotSource, sourceID, sourceBranch, sourceRevision)
117-
}
118-
}
117+
// Parse just the Name field from the BSON
118+
var raw map[string]any
119+
if err := bson.Unmarshal(ru.Contents, &raw); err != nil {
120+
continue
119121
}
122+
name, _ := raw["Name"].(string)
123+
sourceQN := moduleName + "." + name
124+
125+
docType := "PAGE"
126+
if strings.Contains(typePrefix, "Snippet") {
127+
docType = "SNIPPET"
128+
}
129+
130+
// Scan the full BSON tree for XPathConstraint fields
131+
count += scanBSONForXPath(stmt, raw, docType, string(ru.ID), sourceQN, moduleName,
132+
projectID, projectName, snapshotID, snapshotDate, snapshotSource, sourceID, sourceBranch, sourceRevision)
120133
}
121134
}
122135

@@ -175,152 +188,134 @@ func (b *Builder) extractMicroflowXPath(stmt *sql.Stmt,
175188
return count
176189
}
177190

178-
// extractWidgetXPath recursively extracts XPath constraints from page widgets.
179-
func (b *Builder) extractWidgetXPath(stmt *sql.Stmt,
180-
w pages.Widget, docType, docID, docQN, moduleName,
191+
// scanBSONForXPath recursively scans a BSON map for XPathConstraint fields
192+
// and inserts records into the xpath_expressions table.
193+
// This works on raw BSON data, avoiding the need for a full widget tree parser.
194+
func scanBSONForXPath(stmt *sql.Stmt, raw map[string]any,
195+
docType, docID, docQN, moduleName,
181196
projectID, projectName, snapshotID, snapshotDate, snapshotSource, sourceID, sourceBranch, sourceRevision string) int {
182197

183-
if w == nil {
184-
return 0
185-
}
186-
187198
count := 0
188199

189-
// Helper to recurse into children
190-
recurse := func(widgets []pages.Widget) int {
191-
n := 0
192-
for _, child := range widgets {
193-
n += b.extractWidgetXPath(stmt, child, docType, docID, docQN, moduleName,
194-
projectID, projectName, snapshotID, snapshotDate, snapshotSource, sourceID, sourceBranch, sourceRevision)
195-
}
196-
return n
200+
// Check if this node has an XPathConstraint
201+
xpath := extractString(raw["XPathConstraint"])
202+
if xpath == "" {
203+
xpath = extractString(raw["XpathConstraint"])
197204
}
198-
199-
// Extract XPath from DatabaseSource
200-
extractFromDS := func(ds pages.DataSource, widgetName string) {
201-
if ds == nil {
202-
return
203-
}
204-
dbSrc, ok := ds.(*pages.DatabaseSource)
205-
if !ok || dbSrc.XPathConstraint == "" {
206-
return
207-
}
208-
209-
entityQN := dbSrc.EntityName
210-
if entityQN == "" && dbSrc.EntityID != "" {
211-
entityQN = b.resolveEntityID(dbSrc.EntityID)
205+
if xpath != "" {
206+
bsonType, _ := raw["$Type"].(string)
207+
bsonID := extractBsonIDString(raw["$ID"])
208+
entityQN := resolveEntityRefFromBSON(raw)
209+
210+
componentType := "WIDGET"
211+
if strings.Contains(bsonType, "AccessRule") {
212+
componentType = "ACCESS_RULE"
212213
}
213214

214-
id := xpathID(docID, string(dbSrc.ID), dbSrc.XPathConstraint)
215-
isParam := boolToInt(containsVariable(dbSrc.XPathConstraint))
216-
refs := extractReferencedEntities(dbSrc.XPathConstraint)
215+
id := xpathID(docID, bsonID, xpath)
216+
isParam := boolToInt(containsVariable(xpath))
217+
refs := extractReferencedEntities(xpath)
217218

218219
stmt.Exec(id, docType, docID, docQN,
219-
"WIDGET", string(dbSrc.ID), widgetName,
220-
dbSrc.XPathConstraint, entityQN, refs,
220+
componentType, bsonID, bsonType,
221+
xpath, entityQN, refs,
221222
isParam, XPathUsageDatasource, moduleName,
222223
projectID, projectName, snapshotID, snapshotDate,
223224
snapshotSource, sourceID, sourceBranch, sourceRevision)
224225
count++
225226
}
226227

227-
switch widget := w.(type) {
228-
case *pages.DataView:
229-
extractFromDS(widget.DataSource, widget.Name)
230-
count += recurse(widget.Widgets)
231-
count += recurse(widget.FooterWidgets)
232-
233-
case *pages.ListView:
234-
extractFromDS(widget.DataSource, widget.Name)
235-
count += recurse(widget.Widgets)
236-
237-
case *pages.DataGrid:
238-
extractFromDS(widget.DataSource, widget.Name)
239-
count += recurse(widget.ControlBarWidgets)
240-
241-
case *pages.TemplateGrid:
242-
extractFromDS(widget.DataSource, widget.Name)
243-
count += recurse(widget.Widgets)
244-
count += recurse(widget.ControlBarWidgets)
245-
246-
case *pages.Gallery:
247-
extractFromDS(widget.DataSource, widget.Name)
248-
if widget.ContentWidget != nil {
249-
count += b.extractWidgetXPath(stmt, widget.ContentWidget, docType, docID, docQN, moduleName,
228+
// Recurse into all nested maps and arrays
229+
for _, v := range raw {
230+
switch val := v.(type) {
231+
case map[string]any:
232+
count += scanBSONForXPath(stmt, val, docType, docID, docQN, moduleName,
250233
projectID, projectName, snapshotID, snapshotDate, snapshotSource, sourceID, sourceBranch, sourceRevision)
251-
}
252-
count += recurse(widget.FilterWidgets)
253-
254-
case *pages.Container:
255-
count += recurse(widget.Widgets)
256-
257-
case *pages.LayoutGrid:
258-
for _, row := range widget.Rows {
259-
for _, col := range row.Columns {
260-
count += recurse(col.Widgets)
234+
case bson.D:
235+
m := make(map[string]any)
236+
for _, elem := range val {
237+
m[elem.Key] = elem.Value
261238
}
262-
}
263-
264-
case *pages.CustomWidget:
265-
if widget.WidgetObject != nil {
266-
count += b.extractWidgetObjectXPath(stmt, widget.WidgetObject, docType, docID, docQN, moduleName,
239+
count += scanBSONForXPath(stmt, m, docType, docID, docQN, moduleName,
267240
projectID, projectName, snapshotID, snapshotDate, snapshotSource, sourceID, sourceBranch, sourceRevision)
241+
default:
242+
scanBSONArray(v, func(child map[string]any) {
243+
count += scanBSONForXPath(stmt, child, docType, docID, docQN, moduleName,
244+
projectID, projectName, snapshotID, snapshotDate, snapshotSource, sourceID, sourceBranch, sourceRevision)
245+
})
268246
}
269247
}
270248

271249
return count
272250
}
273251

274-
// extractWidgetObjectXPath extracts XPath from pluggable widget property objects.
275-
func (b *Builder) extractWidgetObjectXPath(stmt *sql.Stmt,
276-
obj *pages.WidgetObject, docType, docID, docQN, moduleName,
277-
projectID, projectName, snapshotID, snapshotDate, snapshotSource, sourceID, sourceBranch, sourceRevision string) int {
278-
279-
if obj == nil {
280-
return 0
281-
}
282-
283-
count := 0
284-
for _, prop := range obj.Properties {
285-
if prop.Value == nil {
286-
continue
252+
// scanBSONArray iterates array values, calling fn for each map element.
253+
func scanBSONArray(v any, fn func(map[string]any)) {
254+
switch arr := v.(type) {
255+
case bson.A:
256+
for _, item := range arr {
257+
switch m := item.(type) {
258+
case map[string]any:
259+
fn(m)
260+
case bson.D:
261+
mapped := make(map[string]any)
262+
for _, elem := range m {
263+
mapped[elem.Key] = elem.Value
264+
}
265+
fn(mapped)
266+
}
287267
}
288-
val := prop.Value
289-
290-
// Check datasource-typed properties
291-
if val.DataSource != nil {
292-
dbSrc, ok := val.DataSource.(*pages.DatabaseSource)
293-
if ok && dbSrc.XPathConstraint != "" {
294-
entityQN := dbSrc.EntityName
295-
if entityQN == "" && dbSrc.EntityID != "" {
296-
entityQN = b.resolveEntityID(dbSrc.EntityID)
268+
case []any:
269+
for _, item := range arr {
270+
switch m := item.(type) {
271+
case map[string]any:
272+
fn(m)
273+
case bson.D:
274+
mapped := make(map[string]any)
275+
for _, elem := range m {
276+
mapped[elem.Key] = elem.Value
297277
}
298-
299-
id := xpathID(docID, string(dbSrc.ID), dbSrc.XPathConstraint)
300-
isParam := boolToInt(containsVariable(dbSrc.XPathConstraint))
301-
refs := extractReferencedEntities(dbSrc.XPathConstraint)
302-
303-
stmt.Exec(id, docType, docID, docQN,
304-
"WIDGET", string(dbSrc.ID), "",
305-
dbSrc.XPathConstraint, entityQN, refs,
306-
isParam, XPathUsageDatasource, moduleName,
307-
projectID, projectName, snapshotID, snapshotDate,
308-
snapshotSource, sourceID, sourceBranch, sourceRevision)
309-
count++
278+
fn(mapped)
310279
}
311280
}
281+
}
282+
}
312283

313-
// Recurse into nested widget objects
314-
for _, child := range val.Objects {
315-
count += b.extractWidgetObjectXPath(stmt, child, docType, docID, docQN, moduleName,
316-
projectID, projectName, snapshotID, snapshotDate, snapshotSource, sourceID, sourceBranch, sourceRevision)
284+
// resolveEntityRefFromBSON extracts a qualified entity name from a BSON node
285+
// that has an EntityRef field (common in data source nodes).
286+
func resolveEntityRefFromBSON(raw map[string]any) string {
287+
// Try EntityRef (used by most data sources)
288+
if entityRef, ok := raw["EntityRef"].(map[string]any); ok {
289+
if name, ok := entityRef["QualifiedName"].(string); ok {
290+
return name
317291
}
318-
for _, child := range val.Widgets {
319-
count += b.extractWidgetXPath(stmt, child, docType, docID, docQN, moduleName,
320-
projectID, projectName, snapshotID, snapshotDate, snapshotSource, sourceID, sourceBranch, sourceRevision)
292+
}
293+
// Try bson.D format
294+
if entityRef, ok := raw["EntityRef"].(bson.D); ok {
295+
for _, elem := range entityRef {
296+
if elem.Key == "QualifiedName" {
297+
if name, ok := elem.Value.(string); ok {
298+
return name
299+
}
300+
}
321301
}
322302
}
323-
return count
303+
return ""
304+
}
305+
306+
// extractBsonIDString extracts a BSON ID as a string from various formats.
307+
func extractBsonIDString(v any) string {
308+
if v == nil {
309+
return ""
310+
}
311+
switch id := v.(type) {
312+
case string:
313+
return id
314+
case []byte:
315+
return fmt.Sprintf("%x", id)
316+
default:
317+
return fmt.Sprintf("%v", id)
318+
}
324319
}
325320

326321
// xpathID generates a deterministic ID from the document, component, and expression.

mdl/catalog/builder_xpath_test.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,41 @@ func TestContainsVariable(t *testing.T) {
8080
}
8181
}
8282

83+
func TestResolveEntityRefFromBSON(t *testing.T) {
84+
tests := []struct {
85+
name string
86+
raw map[string]any
87+
want string
88+
}{
89+
{
90+
"with EntityRef map",
91+
map[string]any{
92+
"EntityRef": map[string]any{"QualifiedName": "Module.Entity"},
93+
},
94+
"Module.Entity",
95+
},
96+
{
97+
"no EntityRef",
98+
map[string]any{"Name": "test"},
99+
"",
100+
},
101+
{
102+
"empty EntityRef",
103+
map[string]any{"EntityRef": map[string]any{}},
104+
"",
105+
},
106+
}
107+
108+
for _, tt := range tests {
109+
t.Run(tt.name, func(t *testing.T) {
110+
got := resolveEntityRefFromBSON(tt.raw)
111+
if got != tt.want {
112+
t.Errorf("resolveEntityRefFromBSON() = %q, want %q", got, tt.want)
113+
}
114+
})
115+
}
116+
}
117+
83118
func TestXpathID(t *testing.T) {
84119
// Deterministic: same input -> same output
85120
id1 := xpathID("doc1", "comp1", "[Active = true]")

sdk/mpr/reader_types.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,39 @@ type UnitInfo struct {
268268
Type string
269269
}
270270

271+
// RawUnit holds raw unit data with BSON contents.
272+
type RawUnit struct {
273+
ID model.ID
274+
ContainerID model.ID
275+
Type string
276+
Contents []byte
277+
}
278+
279+
// ListRawUnitsByType returns all raw units matching the given type prefix,
280+
// including their BSON contents. This is useful for scanning BSON directly
281+
// without full parsing.
282+
func (r *Reader) ListRawUnitsByType(typePrefix string) ([]*RawUnit, error) {
283+
units, err := r.listUnitsByType(typePrefix)
284+
if err != nil {
285+
return nil, err
286+
}
287+
288+
var result []*RawUnit
289+
for _, u := range units {
290+
contents, err := r.resolveContents(u.ID, u.Contents)
291+
if err != nil {
292+
continue
293+
}
294+
result = append(result, &RawUnit{
295+
ID: model.ID(u.ID),
296+
ContainerID: model.ID(u.ContainerID),
297+
Type: u.Type,
298+
Contents: contents,
299+
})
300+
}
301+
return result, nil
302+
}
303+
271304
// ListUnits returns all units with their IDs and types.
272305
func (r *Reader) ListUnits() ([]*UnitInfo, error) {
273306
units, err := r.listUnitsByType("")

0 commit comments

Comments
 (0)