@@ -14,6 +14,13 @@ interface TrimmedNode {
1414 tag : string ;
1515 attrs : Record < string , string > ;
1616 score : number ;
17+ platform : 'android' | 'ios' ;
18+ /** 1-based position among all elements with the same primary selector (only set for duplicates) */
19+ domPos ?: number ;
20+ /** The xpath attribute name used to build the positional xpath ('content-desc', 'text', 'name', 'label') */
21+ xpathAttrName ?: string ;
22+ /** The selector value used for the positional xpath */
23+ xpathKey ?: string ;
1724}
1825
1926/** Result of trimDOM — compact XML plus pre-computed element counts. */
@@ -60,6 +67,25 @@ export function trimDOM(
6067 walkIOS ( parsed , nodes ) ;
6168 }
6269
70+ // Detect duplicate elements (same primary selector in DOM order) and annotate with
71+ // positional xpath so the LLM can precisely target a specific occurrence.
72+ const keyCount = new Map < string , number > ( ) ;
73+ for ( const node of nodes ) {
74+ const sel = getPrimarySelector ( node ) ;
75+ if ( sel ) keyCount . set ( sel . key , ( keyCount . get ( sel . key ) ?? 0 ) + 1 ) ;
76+ }
77+ const keyPos = new Map < string , number > ( ) ;
78+ for ( const node of nodes ) {
79+ const sel = getPrimarySelector ( node ) ;
80+ if ( sel && ( keyCount . get ( sel . key ) ?? 0 ) > 1 ) {
81+ const pos = ( keyPos . get ( sel . key ) ?? 0 ) + 1 ;
82+ keyPos . set ( sel . key , pos ) ;
83+ node . domPos = pos ;
84+ node . xpathAttrName = sel . attrName ;
85+ node . xpathKey = sel . key ;
86+ }
87+ }
88+
6389 // Sort by relevance score and take top N
6490 nodes . sort ( ( a , b ) => b . score - a . score ) ;
6591 const top = nodes . slice ( 0 , maxElements ) ;
@@ -75,10 +101,15 @@ export function trimDOM(
75101
76102 // Build compact XML with element numbering
77103 const lines = top . map ( ( node , i ) => {
78- const attrs = Object . entries ( node . attrs )
104+ const attrs = { ...node . attrs } ;
105+ // Add positional xpath for duplicate elements so the LLM can select precisely
106+ if ( node . domPos !== undefined && node . xpathKey && node . xpathAttrName ) {
107+ attrs . xpath = `(//*[@${ node . xpathAttrName } =${ xpathString ( node . xpathKey ) } ])[${ node . domPos } ]` ;
108+ }
109+ const attrStr = Object . entries ( attrs )
79110 . map ( ( [ k , v ] ) => `${ k } ="${ escapeXml ( v ) } "` )
80111 . join ( ' ' ) ;
81- return `<${ node . tag } idx="${ i + 1 } " ${ attrs } />` ;
112+ return `<${ node . tag } idx="${ i + 1 } " ${ attrStr } />` ;
82113 } ) ;
83114
84115 return {
@@ -182,7 +213,7 @@ function walkAndroid(node: any, result: TrimmedNode[], parentContext: string = '
182213 attrs . in = parentContext ;
183214 }
184215
185- result . push ( { tag, attrs, score } ) ;
216+ result . push ( { tag, attrs, score, platform : 'android' } ) ;
186217 }
187218
188219 walkChildrenAndroid ( node , result , childContext ) ;
@@ -278,7 +309,7 @@ function walkIOS(node: any, result: TrimmedNode[], parentContext: string = ''):
278309 attrs . in = parentContext ;
279310 }
280311
281- result . push ( { tag, attrs, score } ) ;
312+ result . push ( { tag, attrs, score, platform : 'ios' } ) ;
282313 }
283314
284315 walkChildrenIOS ( node , result , childContext ) ;
@@ -309,3 +340,32 @@ function escapeXml(str: string): string {
309340 . replace ( / < / g, '<' )
310341 . replace ( / > / g, '>' ) ;
311342}
343+
344+ /**
345+ * Return the primary selector key and the corresponding raw Appium XML attribute name
346+ * for a trimmed node. Used to detect and annotate duplicate elements.
347+ */
348+ function getPrimarySelector (
349+ node : TrimmedNode
350+ ) : { key : string ; attrName : string } | null {
351+ if ( node . platform === 'android' ) {
352+ if ( node . attrs . desc ) return { key : node . attrs . desc , attrName : 'content-desc' } ;
353+ if ( node . attrs . text ) return { key : node . attrs . text , attrName : 'text' } ;
354+ } else {
355+ if ( node . attrs . name ) return { key : node . attrs . name , attrName : 'name' } ;
356+ if ( node . attrs . text ) return { key : node . attrs . text , attrName : 'label' } ;
357+ }
358+ return null ;
359+ }
360+
361+ /**
362+ * Produce a quoted xpath string literal, handling values that contain single quotes.
363+ * xpath 1.0 has no escape sequence for quotes, so we use concat() when needed.
364+ */
365+ function xpathString ( value : string ) : string {
366+ if ( ! value . includes ( "'" ) ) return `'${ value } '` ;
367+ if ( ! value . includes ( '"' ) ) return `"${ value } "` ;
368+ // Contains both quote types: split around single quotes and concat
369+ const parts = value . split ( "'" ) . map ( ( p ) => `'${ p } '` ) . join ( `, "'", ` ) ;
370+ return `concat(${ parts } )` ;
371+ }
0 commit comments