Skip to content

Commit 19d0821

Browse files
committed
feat: Enhance AST processing by replacing EnumMap with ConcurrentHashMap for thread safety and improve source line handling in symbol extraction
1 parent 85fa0c1 commit 19d0821

9 files changed

Lines changed: 75 additions & 55 deletions

File tree

java-ecosystem/libs/ast-parser/src/main/java/org/rostilos/codecrow/astparser/internal/ParserPool.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
import org.treesitter.TSLanguage;
88
import org.treesitter.TSParser;
99

10-
import java.util.EnumMap;
1110
import java.util.Map;
1211
import java.util.concurrent.ArrayBlockingQueue;
1312
import java.util.concurrent.BlockingQueue;
13+
import java.util.concurrent.ConcurrentHashMap;
1414
import java.util.concurrent.TimeUnit;
1515

1616
/**
@@ -62,8 +62,8 @@ public ParserPool(int poolSize, long timeoutMs) {
6262
if (timeoutMs < 0) throw new IllegalArgumentException("timeoutMs must be >= 0");
6363
this.poolSize = poolSize;
6464
this.timeoutMs = timeoutMs;
65-
this.pools = new EnumMap<>(SupportedLanguage.class);
66-
this.grammars = new EnumMap<>(SupportedLanguage.class);
65+
this.pools = new ConcurrentHashMap<>();
66+
this.grammars = new ConcurrentHashMap<>();
6767
}
6868

6969
/** Convenience constructor: pool size = availableProcessors, timeout = 5 seconds. */

java-ecosystem/libs/ast-parser/src/main/java/org/rostilos/codecrow/astparser/internal/TreeSitterScopeResolver.java

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ public List<ScopeInfo> scopeChainAt(ParsedTree parsedTree, int line) {
8787

8888
private List<ScopeInfo> resolveWithQuery(ParsedTree parsedTree, TSQuery query) {
8989
List<ScopeInfo> scopes = new ArrayList<>();
90+
String[] sourceLines = parsedTree.getSourceText().split("\\r?\\n", -1);
9091

9192
try (TSQueryCursor cursor = new TSQueryCursor()) {
9293
cursor.exec(query, parsedTree.getRootNode());
@@ -121,7 +122,7 @@ private List<ScopeInfo> resolveWithQuery(ParsedTree parsedTree, TSQuery query) {
121122
kind = ScopeKind.NAMESPACE;
122123
scopeNode = node;
123124
}
124-
case "name" -> name = extractNodeText(node, parsedTree.getSourceText());
125+
case "name" -> name = extractNodeText(node, sourceLines, parsedTree.getSourceText());
125126
}
126127
}
127128

@@ -144,25 +145,26 @@ private List<ScopeInfo> resolveWithQuery(ParsedTree parsedTree, TSQuery query) {
144145
*/
145146
private List<ScopeInfo> resolveWithTreeWalk(ParsedTree parsedTree) {
146147
List<ScopeInfo> scopes = new ArrayList<>();
147-
walkNode(parsedTree.getRootNode(), parsedTree.getSourceText(), scopes);
148+
String[] sourceLines = parsedTree.getSourceText().split("\\r?\\n", -1);
149+
walkNode(parsedTree.getRootNode(), sourceLines, parsedTree.getSourceText(), scopes);
148150
return scopes;
149151
}
150152

151-
private void walkNode(TSNode node, String source, List<ScopeInfo> scopes) {
153+
private void walkNode(TSNode node, String[] sourceLines, String source, List<ScopeInfo> scopes) {
152154
String type = node.getType();
153155
ScopeKind kind = classifyNodeType(type);
154156

155157
if (kind != null) {
156158
int startLine = node.getStartPoint().getRow() + 1;
157159
int endLine = node.getEndPoint().getRow() + 1;
158-
String name = extractNameFromNode(node, source);
160+
String name = extractNameFromNode(node, sourceLines, source);
159161
scopes.add(new ScopeInfo(kind, name, startLine, endLine));
160162
}
161163

162164
int childCount = node.getNamedChildCount();
163165
for (int i = 0; i < childCount; i++) {
164166
TSNode child = node.getNamedChild(i);
165-
walkNode(child, source, scopes);
167+
walkNode(child, sourceLines, source, scopes);
166168
}
167169
}
168170

@@ -228,14 +230,14 @@ private static ScopeKind classifyNodeType(String nodeType) {
228230
/**
229231
* Attempt to extract a name from a scope node by looking for common child node types.
230232
*/
231-
private static String extractNameFromNode(TSNode node, String source) {
233+
private static String extractNameFromNode(TSNode node, String[] sourceLines, String source) {
232234
// Try named children with typical "name" or "identifier" fields
233235
for (int i = 0; i < node.getNamedChildCount(); i++) {
234236
TSNode child = node.getNamedChild(i);
235237
String type = child.getType();
236238
if ("identifier".equals(type) || "name".equals(type)
237239
|| "type_identifier".equals(type) || "property_identifier".equals(type)) {
238-
return extractNodeText(child, source);
240+
return extractNodeText(child, sourceLines, source);
239241
}
240242
}
241243
return "";
@@ -245,18 +247,21 @@ private static String extractNameFromNode(TSNode node, String source) {
245247
* Extract the text of a node from the source.
246248
* tree-sitter provides byte offsets, but Java strings are UTF-16.
247249
* We use row/column to safely extract text for single-line nodes (names).
250+
*
251+
* @param node the AST node to extract text for
252+
* @param sourceLines pre-split source lines (avoids re-splitting per call)
253+
* @param source the original source text (for multi-line byte-offset fallback)
248254
*/
249-
private static String extractNodeText(TSNode node, String source) {
255+
private static String extractNodeText(TSNode node, String[] sourceLines, String source) {
250256
int startByte = node.getStartByte();
251257
int endByte = node.getEndByte();
252258
// tree-sitter byte offsets are UTF-8 byte positions.
253259
// For short identifier names (ASCII), byte offset == char offset.
254260
// For safety, use the line/column approach for single-line nodes.
255261
if (node.getStartPoint().getRow() == node.getEndPoint().getRow()) {
256-
String[] lines = source.split("\\r?\\n", -1);
257262
int row = node.getStartPoint().getRow();
258-
if (row < lines.length) {
259-
String line = lines[row];
263+
if (row < sourceLines.length) {
264+
String line = sourceLines[row];
260265
int startCol = node.getStartPoint().getColumn();
261266
int endCol = node.getEndPoint().getColumn();
262267
if (startCol <= line.length() && endCol <= line.length()) {

java-ecosystem/libs/ast-parser/src/main/java/org/rostilos/codecrow/astparser/internal/TreeSitterSymbolExtractor.java

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ public SymbolInfo extract(ParsedTree parsedTree) {
6363
String[] namespace = {""};
6464
String[] parentClass = {""};
6565

66-
walkForSymbols(parsedTree.getRootNode(), parsedTree.getSourceText(),
66+
String[] sourceLines = parsedTree.getSourceText().split("\\r?\\n", -1);
67+
walkForSymbols(parsedTree.getRootNode(), parsedTree.getSourceText(), sourceLines,
6768
imports, classes, functions, calls, namespace, parentClass);
6869

6970
List<ScopeInfo> scopes = scopeResolver.resolveAll(parsedTree);
@@ -84,52 +85,52 @@ public SymbolInfo extract(ParsedTree parsedTree) {
8485
}
8586
}
8687

87-
private void walkForSymbols(TSNode node, String source,
88+
private void walkForSymbols(TSNode node, String source, String[] sourceLines,
8889
List<String> imports, List<String> classes,
8990
List<String> functions, List<String> calls,
9091
String[] namespace, String[] parentClass) {
9192
String type = node.getType();
9293

9394
// ── Imports ──────────────────────────────────────────────────
9495
if (IMPORT_NODE_TYPES.contains(type)) {
95-
String text = safeNodeText(node, source).trim();
96+
String text = safeNodeText(node, sourceLines, source).trim();
9697
if (!text.isEmpty()) {
9798
imports.add(text);
9899
}
99100
}
100101

101102
// ── Class/struct/interface definitions ───────────────────────
102103
if (isClassLike(type)) {
103-
String name = extractChildIdentifier(node, source);
104+
String name = extractChildIdentifier(node, sourceLines, source);
104105
if (!name.isEmpty()) {
105106
classes.add(name);
106107
}
107108
// Extract parent class (superclass / extends)
108-
String parent = extractSuperclass(node, source);
109+
String parent = extractSuperclass(node, sourceLines, source);
109110
if (!parent.isEmpty() && parentClass[0].isEmpty()) {
110111
parentClass[0] = parent;
111112
}
112113
}
113114

114115
// ── Function/method definitions ──────────────────────────────
115116
if (isFunctionLike(type)) {
116-
String name = extractChildIdentifier(node, source);
117+
String name = extractChildIdentifier(node, sourceLines, source);
117118
if (!name.isEmpty()) {
118119
functions.add(name);
119120
}
120121
}
121122

122123
// ── Function/method calls ────────────────────────────────────
123124
if (CALL_NODE_TYPES.contains(type)) {
124-
String callName = extractCallName(node, source);
125+
String callName = extractCallName(node, sourceLines, source);
125126
if (!callName.isEmpty()) {
126127
calls.add(callName);
127128
}
128129
}
129130

130131
// ── Namespace / package ──────────────────────────────────────
131132
if (isNamespaceLike(type)) {
132-
String ns = extractNamespaceValue(node, source);
133+
String ns = extractNamespaceValue(node, sourceLines, source);
133134
if (!ns.isEmpty() && namespace[0].isEmpty()) {
134135
namespace[0] = ns;
135136
}
@@ -138,7 +139,7 @@ private void walkForSymbols(TSNode node, String source,
138139
// Recurse into children
139140
int childCount = node.getNamedChildCount();
140141
for (int i = 0; i < childCount; i++) {
141-
walkForSymbols(node.getNamedChild(i), source,
142+
walkForSymbols(node.getNamedChild(i), source, sourceLines,
142143
imports, classes, functions, calls, namespace, parentClass);
143144
}
144145
}
@@ -174,70 +175,73 @@ private static boolean isNamespaceLike(String type) {
174175

175176
// ── Extraction helpers ───────────────────────────────────────────────
176177

177-
private static String extractChildIdentifier(TSNode node, String source) {
178+
private static String extractChildIdentifier(TSNode node, String[] sourceLines, String source) {
178179
for (int i = 0; i < node.getNamedChildCount(); i++) {
179180
TSNode child = node.getNamedChild(i);
180181
String childType = child.getType();
181182
if ("identifier".equals(childType) || "name".equals(childType)
182183
|| "type_identifier".equals(childType)
183184
|| "property_identifier".equals(childType)) {
184-
return safeNodeText(child, source).trim();
185+
return safeNodeText(child, sourceLines, source).trim();
185186
}
186187
}
187188
return "";
188189
}
189190

190-
private static String extractSuperclass(TSNode node, String source) {
191+
private static String extractSuperclass(TSNode node, String[] sourceLines, String source) {
191192
for (int i = 0; i < node.getNamedChildCount(); i++) {
192193
TSNode child = node.getNamedChild(i);
193194
String childType = child.getType();
194195
if ("superclass".equals(childType) || "superclasses".equals(childType)
195196
|| "extends_type".equals(childType) || "type_list".equals(childType)
196197
|| "argument_list".equals(childType)) {
197198
// The superclass node itself may contain an identifier
198-
return extractChildIdentifier(child, source);
199+
return extractChildIdentifier(child, sourceLines, source);
199200
}
200201
}
201202
return "";
202203
}
203204

204-
private static String extractCallName(TSNode node, String source) {
205+
private static String extractCallName(TSNode node, String[] sourceLines, String source) {
205206
// Call nodes typically have the function reference as first named child
206207
if (node.getNamedChildCount() > 0) {
207208
TSNode funcRef = node.getNamedChild(0);
208209
String type = funcRef.getType();
209210
if ("identifier".equals(type) || "member_expression".equals(type)
210211
|| "field_expression".equals(type) || "scoped_identifier".equals(type)
211212
|| "attribute".equals(type)) {
212-
return safeNodeText(funcRef, source).trim();
213+
return safeNodeText(funcRef, sourceLines, source).trim();
213214
}
214215
}
215216
return "";
216217
}
217218

218-
private static String extractNamespaceValue(TSNode node, String source) {
219+
private static String extractNamespaceValue(TSNode node, String[] sourceLines, String source) {
219220
// Package/namespace nodes usually have the name as a child
220221
for (int i = 0; i < node.getNamedChildCount(); i++) {
221222
TSNode child = node.getNamedChild(i);
222223
String childType = child.getType();
223224
if ("scoped_identifier".equals(childType) || "identifier".equals(childType)
224225
|| "name".equals(childType) || "dotted_name".equals(childType)) {
225-
return safeNodeText(child, source).trim();
226+
return safeNodeText(child, sourceLines, source).trim();
226227
}
227228
}
228229
return "";
229230
}
230231

231232
/**
232-
* Safely extract text for a node using UTF-8 byte offsets.
233+
* Safely extract text for a node using pre-split source lines.
234+
*
235+
* @param node the AST node to extract text for
236+
* @param sourceLines pre-split source lines (avoids re-splitting per call)
237+
* @param source the original source text (for multi-line byte-offset fallback)
233238
*/
234-
private static String safeNodeText(TSNode node, String source) {
239+
private static String safeNodeText(TSNode node, String[] sourceLines, String source) {
235240
// For single-line nodes, use row/column for correctness
236241
if (node.getStartPoint().getRow() == node.getEndPoint().getRow()) {
237-
String[] lines = source.split("\\r?\\n", -1);
238242
int row = node.getStartPoint().getRow();
239-
if (row < lines.length) {
240-
String line = lines[row];
243+
if (row < sourceLines.length) {
244+
String line = sourceLines[row];
241245
int startCol = Math.min(node.getStartPoint().getColumn(), line.length());
242246
int endCol = Math.min(node.getEndPoint().getColumn(), line.length());
243247
if (startCol <= endCol) {

java-ecosystem/libs/core/src/main/java/org/rostilos/codecrow/core/util/anchoring/SnippetLocator.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ public static LocateResult locate(String snippet, String fileContent, int hintLi
8181
if (hashMatch > 0) {
8282
if (nonBlank.length > 1) {
8383
// Multi-line snippet: verify subsequent lines match contiguously
84-
int endLine = verifyContiguousHashMatch(nonBlank, fileLines, hashMatch);
84+
int endLine = verifyContiguousHashMatch(nonBlank, fileLines, hashMatch, nonBlank.length);
8585
if (endLine > 0) {
8686
return new LocateResult(hashMatch, endLine, 1.0f, Strategy.HASH_EXACT);
8787
}
@@ -147,7 +147,8 @@ private static LocateResult notFound(int hintLine) {
147147
* Verify that snippet lines match contiguously in the file starting at startLine (1-based).
148148
* Returns the 1-based end line if match, or -1.
149149
*/
150-
private static int verifyContiguousHashMatch(String[] snippetLines, String[] fileLines, int startLine) {
150+
private static int verifyContiguousHashMatch(String[] snippetLines, String[] fileLines,
151+
int startLine, int nonBlankCount) {
151152
int fileIdx = startLine - 1; // 0-based
152153
int matched = 0;
153154

@@ -169,8 +170,6 @@ private static int verifyContiguousHashMatch(String[] snippetLines, String[] fil
169170
}
170171

171172
// Require at least half the non-blank snippet lines to match
172-
long nonBlankCount = Arrays.stream(snippetLines)
173-
.filter(l -> l != null && !l.isBlank()).count();
174173
if (matched >= Math.max(2, nonBlankCount / 2)) {
175174
return startLine + matched - 1; // 1-based end line
176175
}

java-ecosystem/services/pipeline-agent/src/main/java/org/rostilos/codecrow/pipelineagent/bitbucket/service/BitbucketAiClientService.java

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -253,11 +253,20 @@ public List<AiAnalysisRequest> buildPrAnalysisRequests(
253253

254254
// Enrich PR with full file contents and dependency graph
255255
PrEnrichmentDataDto enrichmentData = PrEnrichmentDataDto.empty();
256-
if (enrichmentService != null && enrichmentService.isEnrichmentEnabled() && !changedFiles.isEmpty()) {
256+
VcsClient enrichmentVcsClient = null;
257+
if (!changedFiles.isEmpty()) {
258+
try {
259+
enrichmentVcsClient = vcsClientProvider.getClient(vcsConnection);
260+
} catch (Exception e) {
261+
log.warn("Failed to obtain VCS client for enrichment (non-critical): {}", e.getMessage());
262+
}
263+
}
264+
265+
if (enrichmentVcsClient != null && enrichmentService != null
266+
&& enrichmentService.isEnrichmentEnabled() && !changedFiles.isEmpty()) {
257267
try {
258-
VcsClient vcsClient = vcsClientProvider.getClient(vcsConnection);
259268
enrichmentData = enrichmentService.enrichPrFiles(
260-
vcsClient,
269+
enrichmentVcsClient,
261270
vcsInfo.workspace(),
262271
vcsInfo.repoSlug(),
263272
currentCommitHash,
@@ -272,12 +281,11 @@ public List<AiAnalysisRequest> buildPrAnalysisRequests(
272281

273282
// Fallback: if enrichment is empty, fetch file contents only (no AST/relationships)
274283
// so the AI still has full file context for diff-aware analysis
275-
if (!enrichmentData.hasData() && !changedFiles.isEmpty()) {
284+
if (enrichmentVcsClient != null && !enrichmentData.hasData() && !changedFiles.isEmpty()) {
276285
try {
277-
VcsClient vcsClient = vcsClientProvider.getClient(vcsConnection);
278286
enrichmentData = (enrichmentService != null)
279287
? enrichmentService.fetchFileContentsOnly(
280-
vcsClient, vcsInfo.workspace(), vcsInfo.repoSlug(),
288+
enrichmentVcsClient, vcsInfo.workspace(), vcsInfo.repoSlug(),
281289
currentCommitHash, changedFiles)
282290
: PrEnrichmentDataDto.empty();
283291
} catch (Exception e) {

python-ecosystem/inference-orchestrator/service/command/command_service.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,9 @@ async def process_summarize(
113113
logger.warning(f"Error closing MCP sessions: {close_err}")
114114

115115
self._emit_event(event_callback, {
116-
"type": "final",
117-
"result": "Summary generated successfully"
116+
"type": "status",
117+
"state": "completed",
118+
"message": "Summary generated successfully"
118119
})
119120

120121
return result
@@ -222,8 +223,9 @@ async def process_ask(
222223
logger.warning(f"Error closing MCP sessions: {close_err}")
223224

224225
self._emit_event(event_callback, {
225-
"type": "final",
226-
"result": "Answer generated successfully"
226+
"type": "status",
227+
"state": "completed",
228+
"message": "Answer generated successfully"
227229
})
228230

229231
return result

python-ecosystem/rag-pipeline/src/rag_pipeline/core/splitter/metadata.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ def _extract_docstring_from_node(self, ts_node: Any, language: str) -> Optional[
383383
return result
384384
return self._extract_preceding_comment_docstring(ts_node)
385385
except Exception as e:
386-
logger.debug(f"AST docstring extraction failed for {language}: {e}")
386+
logger.debug(f"AST docstring extraction failed for {language}: {e}", exc_info=True)
387387
return None
388388

389389
def _extract_python_docstring_ast(self, node: Any) -> Optional[str]:
@@ -497,7 +497,7 @@ def _extract_signature_from_node(
497497
return first_line if len(first_line) > 5 else None
498498

499499
except Exception as e:
500-
logger.debug(f"AST signature extraction failed for {language}: {e}")
500+
logger.debug(f"AST signature extraction failed for {language}: {e}", exc_info=True)
501501
return None
502502

503503
def _find_node_with_body(self, ts_node: Any) -> Optional[tuple]:

0 commit comments

Comments
 (0)