11import { JavaMethod , JavaClass , AccessLevel } from './types' ;
22
3+ /**
4+ * Represents a region in source code that should be skipped
5+ */
6+ interface SkipRegion {
7+ start : number ;
8+ end : number ;
9+ }
10+
311/**
412 * Parser for Java source code to extract methods and class structure
513 */
@@ -20,8 +28,14 @@ export class JavaParser {
2028 return null ;
2129 }
2230
23- const methods = this . extractMethods ( className ) ;
24- const { preMethodsContent, postMethodsContent } = this . extractNonMethodContent ( methods ) ;
31+ // Find the main class body boundaries
32+ const classBodyBounds = this . findMainClassBody ( className ) ;
33+ if ( ! classBodyBounds ) {
34+ return null ;
35+ }
36+
37+ const methods = this . extractMethods ( className , classBodyBounds ) ;
38+ const { preMethodsContent, postMethodsContent } = this . extractNonMethodContent ( methods , classBodyBounds ) ;
2539
2640 return {
2741 name : className ,
@@ -40,42 +54,166 @@ export class JavaParser {
4054 return match ? match [ 1 ] : null ;
4155 }
4256
57+ /**
58+ * Find the boundaries of the main class body
59+ */
60+ private findMainClassBody ( className : string ) : { start : number ; end : number ; bodyStart : number } | null {
61+ // Find the main class declaration
62+ const classPattern = new RegExp ( `(?:public\\s+)?(?:abstract\\s+)?(?:final\\s+)?class\\s+${ className } \\s*(?:extends\\s+\\w+)?(?:\\s+implements\\s+[\\w\\s,]+)?\\s*\\{` ) ;
63+ const match = this . source . match ( classPattern ) ;
64+ if ( ! match ) {
65+ return null ;
66+ }
67+
68+ const classStart = match . index ! ;
69+ const bodyStart = classStart + match [ 0 ] . length - 1 ; // Position of opening brace
70+ const bodyEnd = this . findMatchingBrace ( bodyStart ) ;
71+
72+ if ( bodyEnd === - 1 ) {
73+ return null ;
74+ }
75+
76+ return { start : classStart , end : bodyEnd , bodyStart } ;
77+ }
78+
79+ /**
80+ * Find all regions to skip: nested classes and initializer blocks
81+ */
82+ private findSkipRegions ( classBodyBounds : { start : number ; end : number ; bodyStart : number } ) : SkipRegion [ ] {
83+ const skipRegions : SkipRegion [ ] = [ ] ;
84+ const bodyContent = this . source . substring ( classBodyBounds . bodyStart + 1 , classBodyBounds . end ) ;
85+ const offset = classBodyBounds . bodyStart + 1 ;
86+
87+ // Find nested classes (static and non-static inner classes)
88+ const nestedClassPattern = / (?: (?: p u b l i c | p r o t e c t e d | p r i v a t e | s t a t i c | f i n a l | a b s t r a c t ) \s + ) * c l a s s \s + \w + \s * (?: e x t e n d s \s + \w + ) ? (?: \s + i m p l e m e n t s \s + [ \w \s , ] + ) ? \s * \{ / g;
89+ let match ;
90+ while ( ( match = nestedClassPattern . exec ( bodyContent ) ) !== null ) {
91+ const start = offset + match . index ;
92+ const bracePos = start + match [ 0 ] . length - 1 ;
93+ const end = this . findMatchingBrace ( bracePos ) ;
94+ if ( end !== - 1 ) {
95+ skipRegions . push ( { start, end : end + 1 } ) ;
96+ // Skip past this class to avoid finding inner classes within inner classes
97+ nestedClassPattern . lastIndex = end - offset + 1 ;
98+ }
99+ }
100+
101+ // Find static initializer blocks: "static {"
102+ const staticInitPattern = / \b s t a t i c \s * \{ / g;
103+ while ( ( match = staticInitPattern . exec ( bodyContent ) ) !== null ) {
104+ // Make sure this is a standalone static block, not "static class" or "static method"
105+ const beforeMatch = bodyContent . substring ( 0 , match . index ) ;
106+ const lastNewline = beforeMatch . lastIndexOf ( '\n' ) ;
107+ const lineStart = lastNewline === - 1 ? 0 : lastNewline + 1 ;
108+ const lineBeforeStatic = bodyContent . substring ( lineStart , match . index ) . trim ( ) ;
109+
110+ // If there's other code on the same line before "static", skip this match
111+ if ( lineBeforeStatic . length > 0 && ! lineBeforeStatic . startsWith ( '//' ) && ! lineBeforeStatic . startsWith ( '*' ) ) {
112+ continue ;
113+ }
114+
115+ const start = offset + match . index ;
116+ const bracePos = start + match [ 0 ] . length - 1 ;
117+ const end = this . findMatchingBrace ( bracePos ) ;
118+ if ( end !== - 1 ) {
119+ skipRegions . push ( { start, end : end + 1 } ) ;
120+ }
121+ }
122+
123+ // Find instance initializer blocks: standalone "{"
124+ // These are blocks that start with just "{" at the beginning of a line
125+ const lines = bodyContent . split ( '\n' ) ;
126+ let pos = 0 ;
127+ for ( let i = 0 ; i < lines . length ; i ++ ) {
128+ const line = lines [ i ] ;
129+ const trimmedLine = line . trim ( ) ;
130+
131+ // Instance initializer: a line with just "{"
132+ if ( trimmedLine === '{' ) {
133+ const start = offset + pos + line . indexOf ( '{' ) ;
134+ const end = this . findMatchingBrace ( start ) ;
135+ if ( end !== - 1 ) {
136+ // Make sure this isn't already covered by a skip region
137+ if ( ! skipRegions . some ( r => start >= r . start && start <= r . end ) ) {
138+ skipRegions . push ( { start, end : end + 1 } ) ;
139+ }
140+ }
141+ }
142+
143+ pos += line . length + 1 ; // +1 for newline
144+ }
145+
146+ return skipRegions ;
147+ }
148+
149+ /**
150+ * Check if a position is within any skip region
151+ */
152+ private isInSkipRegion ( pos : number , skipRegions : SkipRegion [ ] ) : boolean {
153+ return skipRegions . some ( r => pos >= r . start && pos < r . end ) ;
154+ }
155+
43156 /**
44157 * Extract all methods from the source
45158 */
46- private extractMethods ( className : string ) : JavaMethod [ ] {
159+ private extractMethods ( className : string , classBodyBounds : { start : number ; end : number ; bodyStart : number } ) : JavaMethod [ ] {
47160 const methods : JavaMethod [ ] = [ ] ;
161+ const skipRegions = this . findSkipRegions ( classBodyBounds ) ;
48162 const methodPattern = this . createMethodPattern ( className ) ;
49163
50164 let match ;
51- let lastIndex = 0 ;
165+ let lastMethodEnd = classBodyBounds . bodyStart + 1 ; // Start after opening brace
52166 const regex = new RegExp ( methodPattern , 'g' ) ;
53167
54- while ( ( match = regex . exec ( this . source ) ) !== null ) {
55- const methodStart = match . index ;
56- const leadingContent = this . extractLeadingContent ( methodStart , lastIndex ) ;
57- const bodyStart = this . source . indexOf ( '{' , match . index + match [ 0 ] . length - 1 ) ;
168+ // Only search within the main class body
169+ const searchArea = this . source . substring ( classBodyBounds . bodyStart + 1 , classBodyBounds . end ) ;
170+ const searchOffset = classBodyBounds . bodyStart + 1 ;
171+
172+ while ( ( match = regex . exec ( searchArea ) ) !== null ) {
173+ // Calculate the actual method start (after the line start pattern)
174+ const rawMatchStart = searchOffset + match . index ;
175+ const matchedText = match [ 0 ] ;
176+
177+ // Find where the actual method declaration starts (skip only the initial newline from pattern)
178+ // We want to preserve leading whitespace/indentation on the same line
179+ let actualMethodStart = rawMatchStart ;
180+ if ( this . source [ actualMethodStart ] === '\n' ) {
181+ actualMethodStart ++ ;
182+ }
183+
184+ // Skip if this match is within a skip region (nested class or initializer block)
185+ if ( this . isInSkipRegion ( actualMethodStart , skipRegions ) ) {
186+ continue ;
187+ }
188+
189+ const leadingContent = this . extractLeadingContent ( actualMethodStart , lastMethodEnd , skipRegions ) ;
190+ const bodyStart = this . source . indexOf ( '{' , rawMatchStart + matchedText . length - 1 ) ;
58191
59- if ( bodyStart === - 1 ) {
192+ if ( bodyStart === - 1 || bodyStart >= classBodyBounds . end ) {
60193 // Abstract method or interface method
61- const semicolon = this . source . indexOf ( ';' , match . index ) ;
62- if ( semicolon !== - 1 ) {
63- const method = this . createMethodFromAbstract ( match , methodStart , semicolon + 1 , leadingContent , className ) ;
194+ const semicolon = this . source . indexOf ( ';' , actualMethodStart ) ;
195+ if ( semicolon !== - 1 && semicolon < classBodyBounds . end ) {
196+ const method = this . createMethodFromAbstract ( match , actualMethodStart , semicolon + 1 , leadingContent , className ) ;
64197 methods . push ( method ) ;
65- lastIndex = semicolon + 1 ;
198+ lastMethodEnd = semicolon + 1 ;
66199 }
67200 continue ;
68201 }
69202
203+ // Skip if body start is in a skip region
204+ if ( this . isInSkipRegion ( bodyStart , skipRegions ) ) {
205+ continue ;
206+ }
207+
70208 const bodyEnd = this . findMatchingBrace ( bodyStart ) ;
71- if ( bodyEnd === - 1 ) {
209+ if ( bodyEnd === - 1 || bodyEnd >= classBodyBounds . end ) {
72210 continue ;
73211 }
74212
75- const method = this . createMethod ( match , methodStart , bodyEnd + 1 , leadingContent , className ) ;
213+ const method = this . createMethod ( match , actualMethodStart , bodyEnd + 1 , leadingContent , className ) ;
76214 methods . push ( method ) ;
77- lastIndex = bodyEnd + 1 ;
78- regex . lastIndex = lastIndex ;
215+ lastMethodEnd = bodyEnd + 1 ;
216+ regex . lastIndex = bodyEnd - searchOffset + 1 ;
79217 }
80218
81219 // Set original positions
@@ -91,6 +229,7 @@ export class JavaParser {
91229 */
92230 private createMethodPattern ( className : string ) : string {
93231 // Match method modifiers, return type, name, and parameters
232+ // Must start with either a modifier or the return type, not arbitrary text
94233 const modifiers = '(?:(?:public|protected|private|static|final|abstract|synchronized|native|strictfp)\\s+)*' ;
95234 const typeParams = '(?:<[^>]+>\\s+)?' ;
96235 const returnType = '(?:[\\w\\[\\]<>,\\s\\.]+\\s+)?' ;
@@ -99,15 +238,27 @@ export class JavaParser {
99238 const throwsClause = '(?:\\s+throws\\s+[\\w\\s,\\.]+)?' ;
100239 const bodyOrSemi = '(?:\\s*\\{|\\s*;)' ;
101240
102- return `${ modifiers } ${ typeParams } ${ returnType } ${ methodName } ${ params } ${ throwsClause } ${ bodyOrSemi } ` ;
241+ // Require method declaration to start at beginning of line (optionally with whitespace)
242+ const lineStart = '(?:^|\\n)\\s*' ;
243+
244+ return `${ lineStart } ${ modifiers } ${ typeParams } ${ returnType } ${ methodName } ${ params } ${ throwsClause } ${ bodyOrSemi } ` ;
103245 }
104246
105247 /**
106248 * Extract leading comments and annotations before a method
107249 */
108- private extractLeadingContent ( methodStart : number , searchStart : number ) : string {
250+ private extractLeadingContent ( methodStart : number , searchStart : number , skipRegions : SkipRegion [ ] ) : string {
251+ // Find the effective search start, skipping any skip regions
252+ let effectiveStart = searchStart ;
253+ for ( const region of skipRegions ) {
254+ if ( region . start >= searchStart && region . end <= methodStart ) {
255+ // This skip region is between searchStart and methodStart
256+ effectiveStart = Math . max ( effectiveStart , region . end ) ;
257+ }
258+ }
259+
109260 // Look backwards from methodStart to find comments and annotations
110- const textBefore = this . source . substring ( searchStart , methodStart ) ;
261+ const textBefore = this . source . substring ( effectiveStart , methodStart ) ;
111262
112263 // Find the last newline before annotations/comments
113264 const lines = textBefore . split ( '\n' ) ;
@@ -323,17 +474,17 @@ export class JavaParser {
323474 /**
324475 * Extract content before and after methods
325476 */
326- private extractNonMethodContent ( methods : JavaMethod [ ] ) : { preMethodsContent : string ; postMethodsContent : string } {
477+ private extractNonMethodContent ( methods : JavaMethod [ ] , classBodyBounds : { start : number ; end : number ; bodyStart : number } ) : { preMethodsContent : string ; postMethodsContent : string } {
327478 if ( methods . length === 0 ) {
328479 return { preMethodsContent : this . source , postMethodsContent : '' } ;
329480 }
330481
331482 // Find the start of the first method (including its leading content)
332483 let firstMethodStart = methods [ 0 ] . startPos ;
333484 const leadingContent = methods [ 0 ] . leadingContent ;
334- if ( leadingContent ) {
485+ if ( leadingContent && leadingContent . trim ( ) ) {
335486 const leadingIndex = this . source . lastIndexOf ( leadingContent . trim ( ) , firstMethodStart ) ;
336- if ( leadingIndex !== - 1 ) {
487+ if ( leadingIndex !== - 1 && leadingIndex > classBodyBounds . bodyStart ) {
337488 firstMethodStart = leadingIndex ;
338489 }
339490 }
0 commit comments