feat: Enhance AST processing by replacing EnumMap with ConcurrentHashMap for thread safety and improve source line handling in symbol extraction

rostilos · rostilos · commit 19d082128e34 · 2026-03-05T23:37:07.000+02:00
diff --git a/java-ecosystem/libs/ast-parser/src/main/java/org/rostilos/codecrow/astparser/internal/ParserPool.java b/java-ecosystem/libs/ast-parser/src/main/java/org/rostilos/codecrow/astparser/internal/ParserPool.java
@@ -7,10 +7,10 @@
 import org.treesitter.TSLanguage;
 import org.treesitter.TSParser;
 
-import java.util.EnumMap;
 import java.util.Map;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.TimeUnit;
 
 /**
@@ -62,8 +62,8 @@ public ParserPool(int poolSize, long timeoutMs) {
         if (timeoutMs < 0) throw new IllegalArgumentException("timeoutMs must be >= 0");
         this.poolSize = poolSize;
         this.timeoutMs = timeoutMs;
-        this.pools = new EnumMap<>(SupportedLanguage.class);
-        this.grammars = new EnumMap<>(SupportedLanguage.class);
+        this.pools = new ConcurrentHashMap<>();
+        this.grammars = new ConcurrentHashMap<>();
     }
 
     /** Convenience constructor: pool size = availableProcessors, timeout = 5 seconds. */
diff --git a/java-ecosystem/libs/ast-parser/src/main/java/org/rostilos/codecrow/astparser/internal/TreeSitterScopeResolver.java b/java-ecosystem/libs/ast-parser/src/main/java/org/rostilos/codecrow/astparser/internal/TreeSitterScopeResolver.java
@@ -87,6 +87,7 @@ public List<ScopeInfo> scopeChainAt(ParsedTree parsedTree, int line) {
 
     private List<ScopeInfo> resolveWithQuery(ParsedTree parsedTree, TSQuery query) {
         List<ScopeInfo> scopes = new ArrayList<>();
+        String[] sourceLines = parsedTree.getSourceText().split("\\r?\\n", -1);
 
         try (TSQueryCursor cursor = new TSQueryCursor()) {
             cursor.exec(query, parsedTree.getRootNode());
@@ -121,7 +122,7 @@ private List<ScopeInfo> resolveWithQuery(ParsedTree parsedTree, TSQuery query) {
                             kind = ScopeKind.NAMESPACE;
                             scopeNode = node;
                         }
-                        case "name" -> name = extractNodeText(node, parsedTree.getSourceText());
+                        case "name" -> name = extractNodeText(node, sourceLines, parsedTree.getSourceText());
                     }
                 }
 
@@ -144,25 +145,26 @@ private List<ScopeInfo> resolveWithQuery(ParsedTree parsedTree, TSQuery query) {
      */
     private List<ScopeInfo> resolveWithTreeWalk(ParsedTree parsedTree) {
         List<ScopeInfo> scopes = new ArrayList<>();
-        walkNode(parsedTree.getRootNode(), parsedTree.getSourceText(), scopes);
+        String[] sourceLines = parsedTree.getSourceText().split("\\r?\\n", -1);
+        walkNode(parsedTree.getRootNode(), sourceLines, parsedTree.getSourceText(), scopes);
         return scopes;
     }
 
-    private void walkNode(TSNode node, String source, List<ScopeInfo> scopes) {
+    private void walkNode(TSNode node, String[] sourceLines, String source, List<ScopeInfo> scopes) {
         String type = node.getType();
         ScopeKind kind = classifyNodeType(type);
 
         if (kind != null) {
             int startLine = node.getStartPoint().getRow() + 1;
             int endLine = node.getEndPoint().getRow() + 1;
-            String name = extractNameFromNode(node, source);
+            String name = extractNameFromNode(node, sourceLines, source);
             scopes.add(new ScopeInfo(kind, name, startLine, endLine));
         }
 
         int childCount = node.getNamedChildCount();
         for (int i = 0; i < childCount; i++) {
             TSNode child = node.getNamedChild(i);
-            walkNode(child, source, scopes);
+            walkNode(child, sourceLines, source, scopes);
         }
     }
 
@@ -228,14 +230,14 @@ private static ScopeKind classifyNodeType(String nodeType) {
     /**
      * Attempt to extract a name from a scope node by looking for common child node types.
      */
-    private static String extractNameFromNode(TSNode node, String source) {
+    private static String extractNameFromNode(TSNode node, String[] sourceLines, String source) {
         // Try named children with typical "name" or "identifier" fields
         for (int i = 0; i < node.getNamedChildCount(); i++) {
             TSNode child = node.getNamedChild(i);
             String type = child.getType();
             if ("identifier".equals(type) || "name".equals(type)
                     || "type_identifier".equals(type) || "property_identifier".equals(type)) {
-                return extractNodeText(child, source);
+                return extractNodeText(child, sourceLines, source);
             }
         }
         return "";
@@ -245,18 +247,21 @@ private static String extractNameFromNode(TSNode node, String source) {
      * Extract the text of a node from the source.
      * tree-sitter provides byte offsets, but Java strings are UTF-16.
      * We use row/column to safely extract text for single-line nodes (names).
+     *
+     * @param node        the AST node to extract text for
+     * @param sourceLines pre-split source lines (avoids re-splitting per call)
+     * @param source      the original source text (for multi-line byte-offset fallback)
      */
-    private static String extractNodeText(TSNode node, String source) {
+    private static String extractNodeText(TSNode node, String[] sourceLines, String source) {
         int startByte = node.getStartByte();
         int endByte = node.getEndByte();
         // tree-sitter byte offsets are UTF-8 byte positions.
         // For short identifier names (ASCII), byte offset == char offset.
         // For safety, use the line/column approach for single-line nodes.
         if (node.getStartPoint().getRow() == node.getEndPoint().getRow()) {
-            String[] lines = source.split("\\r?\\n", -1);
             int row = node.getStartPoint().getRow();
-            if (row < lines.length) {
-                String line = lines[row];
+            if (row < sourceLines.length) {
+                String line = sourceLines[row];
                 int startCol = node.getStartPoint().getColumn();
                 int endCol = node.getEndPoint().getColumn();
                 if (startCol <= line.length() && endCol <= line.length()) {
diff --git a/java-ecosystem/libs/ast-parser/src/main/java/org/rostilos/codecrow/astparser/internal/TreeSitterSymbolExtractor.java b/java-ecosystem/libs/ast-parser/src/main/java/org/rostilos/codecrow/astparser/internal/TreeSitterSymbolExtractor.java
@@ -63,7 +63,8 @@ public SymbolInfo extract(ParsedTree parsedTree) {
             String[] namespace = {""};
             String[] parentClass = {""};
 
-            walkForSymbols(parsedTree.getRootNode(), parsedTree.getSourceText(),
+            String[] sourceLines = parsedTree.getSourceText().split("\\r?\\n", -1);
+            walkForSymbols(parsedTree.getRootNode(), parsedTree.getSourceText(), sourceLines,
                     imports, classes, functions, calls, namespace, parentClass);
 
             List<ScopeInfo> scopes = scopeResolver.resolveAll(parsedTree);
@@ -84,52 +85,52 @@ public SymbolInfo extract(ParsedTree parsedTree) {
         }
     }
 
-    private void walkForSymbols(TSNode node, String source,
+    private void walkForSymbols(TSNode node, String source, String[] sourceLines,
                                 List<String> imports, List<String> classes,
                                 List<String> functions, List<String> calls,
                                 String[] namespace, String[] parentClass) {
         String type = node.getType();
 
         // ── Imports ──────────────────────────────────────────────────
         if (IMPORT_NODE_TYPES.contains(type)) {
-            String text = safeNodeText(node, source).trim();
+            String text = safeNodeText(node, sourceLines, source).trim();
             if (!text.isEmpty()) {
                 imports.add(text);
             }
         }
 
         // ── Class/struct/interface definitions ───────────────────────
         if (isClassLike(type)) {
-            String name = extractChildIdentifier(node, source);
+            String name = extractChildIdentifier(node, sourceLines, source);
             if (!name.isEmpty()) {
                 classes.add(name);
             }
             // Extract parent class (superclass / extends)
-            String parent = extractSuperclass(node, source);
+            String parent = extractSuperclass(node, sourceLines, source);
             if (!parent.isEmpty() && parentClass[0].isEmpty()) {
                 parentClass[0] = parent;
             }
         }
 
         // ── Function/method definitions ──────────────────────────────
         if (isFunctionLike(type)) {
-            String name = extractChildIdentifier(node, source);
+            String name = extractChildIdentifier(node, sourceLines, source);
             if (!name.isEmpty()) {
                 functions.add(name);
             }
         }
 
         // ── Function/method calls ────────────────────────────────────
         if (CALL_NODE_TYPES.contains(type)) {
-            String callName = extractCallName(node, source);
+            String callName = extractCallName(node, sourceLines, source);
             if (!callName.isEmpty()) {
                 calls.add(callName);
             }
         }
 
         // ── Namespace / package ──────────────────────────────────────
         if (isNamespaceLike(type)) {
-            String ns = extractNamespaceValue(node, source);
+            String ns = extractNamespaceValue(node, sourceLines, source);
             if (!ns.isEmpty() && namespace[0].isEmpty()) {
                 namespace[0] = ns;
             }
@@ -138,7 +139,7 @@ private void walkForSymbols(TSNode node, String source,
         // Recurse into children
         int childCount = node.getNamedChildCount();
         for (int i = 0; i < childCount; i++) {
-            walkForSymbols(node.getNamedChild(i), source,
+            walkForSymbols(node.getNamedChild(i), source, sourceLines,
                     imports, classes, functions, calls, namespace, parentClass);
         }
     }
@@ -174,70 +175,73 @@ private static boolean isNamespaceLike(String type) {
 
     // ── Extraction helpers ───────────────────────────────────────────────
 
-    private static String extractChildIdentifier(TSNode node, String source) {
+    private static String extractChildIdentifier(TSNode node, String[] sourceLines, String source) {
         for (int i = 0; i < node.getNamedChildCount(); i++) {
             TSNode child = node.getNamedChild(i);
             String childType = child.getType();
             if ("identifier".equals(childType) || "name".equals(childType)
                     || "type_identifier".equals(childType)
                     || "property_identifier".equals(childType)) {
-                return safeNodeText(child, source).trim();
+                return safeNodeText(child, sourceLines, source).trim();
             }
         }
         return "";
     }
 
-    private static String extractSuperclass(TSNode node, String source) {
+    private static String extractSuperclass(TSNode node, String[] sourceLines, String source) {
         for (int i = 0; i < node.getNamedChildCount(); i++) {
             TSNode child = node.getNamedChild(i);
             String childType = child.getType();
             if ("superclass".equals(childType) || "superclasses".equals(childType)
                     || "extends_type".equals(childType) || "type_list".equals(childType)
                     || "argument_list".equals(childType)) {
                 // The superclass node itself may contain an identifier
-                return extractChildIdentifier(child, source);
+                return extractChildIdentifier(child, sourceLines, source);
             }
         }
         return "";
     }
 
-    private static String extractCallName(TSNode node, String source) {
+    private static String extractCallName(TSNode node, String[] sourceLines, String source) {
         // Call nodes typically have the function reference as first named child
         if (node.getNamedChildCount() > 0) {
             TSNode funcRef = node.getNamedChild(0);
             String type = funcRef.getType();
             if ("identifier".equals(type) || "member_expression".equals(type)
                     || "field_expression".equals(type) || "scoped_identifier".equals(type)
                     || "attribute".equals(type)) {
-                return safeNodeText(funcRef, source).trim();
+                return safeNodeText(funcRef, sourceLines, source).trim();
             }
         }
         return "";
     }
 
-    private static String extractNamespaceValue(TSNode node, String source) {
+    private static String extractNamespaceValue(TSNode node, String[] sourceLines, String source) {
         // Package/namespace nodes usually have the name as a child
         for (int i = 0; i < node.getNamedChildCount(); i++) {
             TSNode child = node.getNamedChild(i);
             String childType = child.getType();
             if ("scoped_identifier".equals(childType) || "identifier".equals(childType)
                     || "name".equals(childType) || "dotted_name".equals(childType)) {
-                return safeNodeText(child, source).trim();
+                return safeNodeText(child, sourceLines, source).trim();
             }
         }
         return "";
     }
 
     /**
-     * Safely extract text for a node using UTF-8 byte offsets.
+     * Safely extract text for a node using pre-split source lines.
+     *
+     * @param node        the AST node to extract text for
+     * @param sourceLines pre-split source lines (avoids re-splitting per call)
+     * @param source      the original source text (for multi-line byte-offset fallback)
      */
-    private static String safeNodeText(TSNode node, String source) {
+    private static String safeNodeText(TSNode node, String[] sourceLines, String source) {
         // For single-line nodes, use row/column for correctness
         if (node.getStartPoint().getRow() == node.getEndPoint().getRow()) {
-            String[] lines = source.split("\\r?\\n", -1);
             int row = node.getStartPoint().getRow();
-            if (row < lines.length) {
-                String line = lines[row];
+            if (row < sourceLines.length) {
+                String line = sourceLines[row];
                 int startCol = Math.min(node.getStartPoint().getColumn(), line.length());
                 int endCol = Math.min(node.getEndPoint().getColumn(), line.length());
                 if (startCol <= endCol) {
diff --git a/java-ecosystem/libs/core/src/main/java/org/rostilos/codecrow/core/util/anchoring/SnippetLocator.java b/java-ecosystem/libs/core/src/main/java/org/rostilos/codecrow/core/util/anchoring/SnippetLocator.java
@@ -81,7 +81,7 @@ public static LocateResult locate(String snippet, String fileContent, int hintLi
         if (hashMatch > 0) {
             if (nonBlank.length > 1) {
                 // Multi-line snippet: verify subsequent lines match contiguously
-                int endLine = verifyContiguousHashMatch(nonBlank, fileLines, hashMatch);
+                int endLine = verifyContiguousHashMatch(nonBlank, fileLines, hashMatch, nonBlank.length);
                 if (endLine > 0) {
                     return new LocateResult(hashMatch, endLine, 1.0f, Strategy.HASH_EXACT);
                 }
@@ -147,7 +147,8 @@ private static LocateResult notFound(int hintLine) {
      * Verify that snippet lines match contiguously in the file starting at startLine (1-based).
      * Returns the 1-based end line if match, or -1.
      */
-    private static int verifyContiguousHashMatch(String[] snippetLines, String[] fileLines, int startLine) {
+    private static int verifyContiguousHashMatch(String[] snippetLines, String[] fileLines,
+                                                   int startLine, int nonBlankCount) {
         int fileIdx = startLine - 1; // 0-based
         int matched = 0;
 
@@ -169,8 +170,6 @@ private static int verifyContiguousHashMatch(String[] snippetLines, String[] fil
         }
 
         // Require at least half the non-blank snippet lines to match
-        long nonBlankCount = Arrays.stream(snippetLines)
-                .filter(l -> l != null && !l.isBlank()).count();
         if (matched >= Math.max(2, nonBlankCount / 2)) {
             return startLine + matched - 1; // 1-based end line
         }
diff --git a/java-ecosystem/services/pipeline-agent/src/main/java/org/rostilos/codecrow/pipelineagent/bitbucket/service/BitbucketAiClientService.java b/java-ecosystem/services/pipeline-agent/src/main/java/org/rostilos/codecrow/pipelineagent/bitbucket/service/BitbucketAiClientService.java
@@ -253,11 +253,20 @@ public List<AiAnalysisRequest> buildPrAnalysisRequests(
 
         // Enrich PR with full file contents and dependency graph
         PrEnrichmentDataDto enrichmentData = PrEnrichmentDataDto.empty();
-        if (enrichmentService != null && enrichmentService.isEnrichmentEnabled() && !changedFiles.isEmpty()) {
+        VcsClient enrichmentVcsClient = null;
+        if (!changedFiles.isEmpty()) {
+            try {
+                enrichmentVcsClient = vcsClientProvider.getClient(vcsConnection);
+            } catch (Exception e) {
+                log.warn("Failed to obtain VCS client for enrichment (non-critical): {}", e.getMessage());
+            }
+        }
+
+        if (enrichmentVcsClient != null && enrichmentService != null
+                && enrichmentService.isEnrichmentEnabled() && !changedFiles.isEmpty()) {
             try {
-                VcsClient vcsClient = vcsClientProvider.getClient(vcsConnection);
                 enrichmentData = enrichmentService.enrichPrFiles(
-                        vcsClient,
+                        enrichmentVcsClient,
                         vcsInfo.workspace(),
                         vcsInfo.repoSlug(),
                         currentCommitHash,
@@ -272,12 +281,11 @@ public List<AiAnalysisRequest> buildPrAnalysisRequests(
 
         // Fallback: if enrichment is empty, fetch file contents only (no AST/relationships)
         // so the AI still has full file context for diff-aware analysis
-        if (!enrichmentData.hasData() && !changedFiles.isEmpty()) {
+        if (enrichmentVcsClient != null && !enrichmentData.hasData() && !changedFiles.isEmpty()) {
             try {
-                VcsClient vcsClient = vcsClientProvider.getClient(vcsConnection);
                 enrichmentData = (enrichmentService != null)
                         ? enrichmentService.fetchFileContentsOnly(
-                                vcsClient, vcsInfo.workspace(), vcsInfo.repoSlug(),
+                                enrichmentVcsClient, vcsInfo.workspace(), vcsInfo.repoSlug(),
                                 currentCommitHash, changedFiles)
                         : PrEnrichmentDataDto.empty();
             } catch (Exception e) {
diff --git a/python-ecosystem/inference-orchestrator/service/command/command_service.py b/python-ecosystem/inference-orchestrator/service/command/command_service.py
@@ -113,8 +113,9 @@ async def process_summarize(
                         logger.warning(f"Error closing MCP sessions: {close_err}")
 
                 self._emit_event(event_callback, {
-                    "type": "final",
-                    "result": "Summary generated successfully"
+                    "type": "status",
+                    "state": "completed",
+                    "message": "Summary generated successfully"
                 })
 
                 return result
@@ -222,8 +223,9 @@ async def process_ask(
                         logger.warning(f"Error closing MCP sessions: {close_err}")
 
                 self._emit_event(event_callback, {
-                    "type": "final",
-                    "result": "Answer generated successfully"
+                    "type": "status",
+                    "state": "completed",
+                    "message": "Answer generated successfully"
                 })
 
                 return result
diff --git a/python-ecosystem/rag-pipeline/src/rag_pipeline/core/splitter/metadata.py b/python-ecosystem/rag-pipeline/src/rag_pipeline/core/splitter/metadata.py
@@ -383,7 +383,7 @@ def _extract_docstring_from_node(self, ts_node: Any, language: str) -> Optional[
                     return result
             return self._extract_preceding_comment_docstring(ts_node)
         except Exception as e:
-            logger.debug(f"AST docstring extraction failed for {language}: {e}")
+            logger.debug(f"AST docstring extraction failed for {language}: {e}", exc_info=True)
             return None
 
     def _extract_python_docstring_ast(self, node: Any) -> Optional[str]:
@@ -497,7 +497,7 @@ def _extract_signature_from_node(
             return first_line if len(first_line) > 5 else None
 
         except Exception as e:
-            logger.debug(f"AST signature extraction failed for {language}: {e}")
+            logger.debug(f"AST signature extraction failed for {language}: {e}", exc_info=True)
             return None
 
     def _find_node_with_body(self, ts_node: Any) -> Optional[tuple]:
diff --git a/python-ecosystem/rag-pipeline/src/rag_pipeline/models/scoring_config.py b/python-ecosystem/rag-pipeline/src/rag_pipeline/models/scoring_config.py
diff --git a/python-ecosystem/rag-pipeline/src/rag_pipeline/services/pr_context.py b/python-ecosystem/rag-pipeline/src/rag_pipeline/services/pr_context.py