linuxfoundation
diff --git a/‎backend/src/api/index.ts‎
Lines changed: 2 additions & 1 deletion b/‎backend/src/api/index.ts‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎…c/v1/dev-stats/getAffiliationByHandle.ts‎ ‎…1/affiliations/getAffiliationByHandle.ts‎backend/src/api/public/v1/dev-stats/getAffiliationByHandle.ts renamed to backend/src/api/public/v1/affiliations/getAffiliationByHandle.ts b/‎…c/v1/dev-stats/getAffiliationByHandle.ts‎ ‎…1/affiliations/getAffiliationByHandle.ts‎backend/src/api/public/v1/dev-stats/getAffiliationByHandle.ts renamed to backend/src/api/public/v1/affiliations/getAffiliationByHandle.ts
diff --git a/‎…i/public/v1/dev-stats/getAffiliations.ts‎ ‎…ublic/v1/affiliations/getAffiliations.ts‎backend/src/api/public/v1/dev-stats/getAffiliations.ts renamed to backend/src/api/public/v1/affiliations/getAffiliations.ts b/‎…i/public/v1/dev-stats/getAffiliations.ts‎ ‎…ublic/v1/affiliations/getAffiliations.ts‎backend/src/api/public/v1/dev-stats/getAffiliations.ts renamed to backend/src/api/public/v1/affiliations/getAffiliations.ts
diff --git a/‎…end/src/api/public/v1/dev-stats/index.ts‎ ‎…/src/api/public/v1/affiliations/index.ts‎backend/src/api/public/v1/dev-stats/index.ts renamed to backend/src/api/public/v1/affiliations/index.ts
Lines changed: 1 addition & 1 deletion b/‎…end/src/api/public/v1/dev-stats/index.ts‎ ‎…/src/api/public/v1/affiliations/index.ts‎backend/src/api/public/v1/dev-stats/index.ts renamed to backend/src/api/public/v1/affiliations/index.ts
Lines changed: 1 addition & 1 deletion
diff --git a/‎backend/src/api/public/v1/index.ts‎
Lines changed: 2 additions & 2 deletions b/‎backend/src/api/public/v1/index.ts‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎services/apps/git_integration/pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎services/apps/git_integration/pyproject.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py‎
Lines changed: 32 additions & 18 deletions b/‎services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py‎
Lines changed: 32 additions & 18 deletions
diff --git a/‎services/apps/git_integration/src/crowdgit/services/maintainer/section_extractor.py‎
Lines changed: 90 additions & 0 deletions b/‎services/apps/git_integration/src/crowdgit/services/maintainer/section_extractor.py‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎services/apps/git_integration/src/crowdgit/services/utils.py‎
Lines changed: 5 additions & 5 deletions b/‎services/apps/git_integration/src/crowdgit/services/utils.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎services/apps/git_integration/src/crowdgit/services/vulnerability_scanner/db.go‎
Lines changed: 2 additions & 2 deletions b/‎services/apps/git_integration/src/crowdgit/services/vulnerability_scanner/db.go‎
Lines changed: 2 additions & 2 deletions
@@ -7,6 +7,7 @@ import * as http from 'http'
 import os from 'os'
 import { QueryTypes } from 'sequelize'
 
+import { BadRequestError } from '@crowd/common'
 import { getDbConnection } from '@crowd/data-access-layer/src/database'
 import { getServiceLogger } from '@crowd/logging'
 import { getOpensearchClient } from '@crowd/opensearch'
@@ -149,7 +150,7 @@ setImmediate(async () => {
 
   app.use((err: any, req: any, res: any, next: any) => {
     if (err.type === 'entity.parse.failed') {
-      res.status(400).json({ error: { code: 'BAD_REQUEST', message: 'Invalid JSON body' } })
+      next(new BadRequestError('Invalid JSON body'))
       return
     }
     next(err)
 
@@ -10,7 +10,7 @@ import { getAffiliations } from './getAffiliations'
 
 const rateLimiter = createRateLimiter({ max: 60, windowMs: 60 * 1000 })
 
-export function devStatsRouter(): Router {
+export function memberOrganizationAffiliationsRouter(): Router {
   const router = Router()
 
   router.use(rateLimiter)
 
@@ -6,7 +6,7 @@ import { AUTH0_CONFIG } from '../../../conf'
 import { oauth2Middleware } from '../middlewares/oauth2Middleware'
 import { staticApiKeyMiddleware } from '../middlewares/staticApiKeyMiddleware'
 
-import { devStatsRouter } from './dev-stats'
+import { memberOrganizationAffiliationsRouter } from './affiliations'
 import { membersRouter } from './members'
 import { organizationsRouter } from './organizations'
 
@@ -15,7 +15,7 @@ export function v1Router(): Router {
 
   router.use('/members', oauth2Middleware(AUTH0_CONFIG), membersRouter())
   router.use('/organizations', oauth2Middleware(AUTH0_CONFIG), organizationsRouter())
-  router.use('/member-organization-affiliations', staticApiKeyMiddleware(), devStatsRouter())
+  router.use('/affiliations', staticApiKeyMiddleware(), memberOrganizationAffiliationsRouter())
 
   router.use(() => {
     throw new NotFoundError()
 
@@ -35,6 +35,7 @@ dependencies = [
     "aioboto3>=15.1.0",
     "slugify>=0.0.1",
     "orjson>=3.11.3",
+    "pyyaml>=6.0",
 ]
 
 
 
@@ -37,7 +37,8 @@
 from crowdgit.models.service_execution import ServiceExecution
 from crowdgit.services.base.base_service import BaseService
 from crowdgit.services.maintainer.bedrock import invoke_bedrock
-from crowdgit.services.utils import run_shell_command
+from crowdgit.services.maintainer.section_extractor import SectionExtractor
+from crowdgit.services.utils import run_shell_command, safe_decode
 from crowdgit.settings import MAINTAINER_RETRY_INTERVAL_DAYS, MAINTAINER_UPDATE_INTERVAL_HOURS
 
 
@@ -93,6 +94,7 @@ class MaintainerService(BaseService):
         "code_owners",
         "emeritus",
         "workgroup",
+        "readme",
     }
 
     VALID_EXTENSIONS = {
@@ -132,6 +134,17 @@ class MaintainerService(BaseService):
     STEM_MATCH_SCORE = 50
     PARTIAL_STEM_SCORE = 25
 
+    # Files in KNOWN_PATHS that still need section filtering (contain non-governance content)
+    SECTION_FILTERED_PATHS = {"readme.md", "governance.md"}
+    SCORING_KEYWORDS_SET = frozenset(SCORING_KEYWORDS)
+
+    _section_extractor = SectionExtractor()
+
+    @staticmethod
+    async def _read_text_file(file_path: str) -> str:
+        async with aiofiles.open(file_path, "rb") as f:
+            return safe_decode(await f.read())
+
     def make_role(self, title: str):
         title = title.lower()
         title = (
@@ -360,8 +373,10 @@ async def process_chunk(chunk_index: int, chunk: str):
                 self.get_extraction_prompt(maintainer_filename, content),
                 pydantic_model=MaintainerInfo,
             )
-        self.logger.info("Maintainers file content analyzed by AI")
-        self.logger.info(f"Maintainers response: {maintainer_info}")
+        info_count = len(maintainer_info.output.info) if maintainer_info.output.info else 0
+        self.logger.info(
+            f"Maintainers file content analyzed by AI (found={info_count}, cost={maintainer_info.cost:.4f})"
+        )
         if maintainer_info.output.info is not None:
             return AggregatedMaintainerInfo(
                 output=AggregatedMaintainerInfoItems(info=maintainer_info.output.info),
@@ -373,7 +388,7 @@ async def process_chunk(chunk_index: int, chunk: str):
             )
         else:
             self.logger.error(
-                f"Expected a list of maintainer info or an error message, got: {str(maintainer_info)}"
+                f"Expected a list of maintainer info or an error message, got error={maintainer_info.output.error}"
             )
             raise MaintanerAnalysisError(
                 error_message="Unexpected response from AI for Maintainers analysis",
@@ -544,8 +559,7 @@ async def find_candidate_files(
         for candidate_path in all_paths:
             file_path = os.path.join(repo_path, candidate_path)
             try:
-                async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
-                    content = await f.read()
+                content = await self._read_text_file(file_path)
             except Exception as e:
                 self.logger.warning(f"Failed to read candidate {candidate_path}: {repr(e)}")
                 continue
@@ -586,6 +600,16 @@ async def analyze_and_build_result(self, filename: str, content: str) -> Maintai
                 f"Skipping README file '{filename}': no governance keyword found in content"
             )
             raise MaintanerAnalysisError(error_code=ErrorCode.NO_MAINTAINER_FOUND)
+
+        fname = os.path.basename(filename).lower()
+        if fname not in self.KNOWN_PATHS or fname in self.SECTION_FILTERED_PATHS:
+            extracted = self._section_extractor.extract(fname, content, self.SCORING_KEYWORDS_SET)
+            if extracted:
+                self.logger.info(f"Using extracted sections for '{filename}'")
+                content = extracted
+            else:
+                self.logger.debug(f"No sections extracted for '{filename}', using full content")
+
         result = await self.analyze_file_content(filename, content)
 
         if not result.output.info:
@@ -618,9 +642,7 @@ async def try_saved_maintainer_file(
             f"Saved maintainer file exists, reading content: '{saved_maintainer_file}'"
         )
         try:
-            async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
-                content = await f.read()
-
+            content = await self._read_text_file(file_path)
             result = await self.analyze_and_build_result(saved_maintainer_file, content)
             cost += result.total_cost
             return result, cost
@@ -664,12 +686,6 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
         root_candidates, subdir_candidates = await self.find_candidate_files(repo_path)
         all_candidates = root_candidates + subdir_candidates
         candidate_files = [(path, score) for path, _, score in all_candidates][:100]
-        self.logger.debug(
-            f"Detection step 2: {len(root_candidates)} root candidate(s), "
-            f"{len(subdir_candidates)} subdir candidate(s); "
-            f"root={[p for p, _, _ in root_candidates]}, "
-            f"subdir_top={[p for p, _, _ in subdir_candidates[:3]]}"
-        )
 
         # Step 3: Try root-level files first (in score order), then top subdirectory file
         failed_candidates: set[str] = set()
@@ -757,7 +773,6 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
             f"Passing {len(ai_input_files)} files to AI for maintainer file detection "
             f"(total repo files: {len(file_names)})"
         )
-        self.logger.debug(f"AI input files: {[f for f, _ in ai_input_files]}")
         ai_file_name, ai_cost = await self.find_maintainer_file_with_ai(ai_input_files)
         ai_suggested_file = ai_file_name
         total_cost += ai_cost
@@ -771,8 +786,7 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
                 )
             else:
                 try:
-                    async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
-                        content = await f.read()
+                    content = await self._read_text_file(file_path)
                     result = await self.analyze_and_build_result(ai_file_name, content)
                     total_cost += result.total_cost
                     return _attach_metadata(result)
 
@@ -0,0 +1,90 @@
+import os
+import re
+
+import tomllib
+import yaml
+
+
+class SectionExtractor:
+    """
+    Extracts relevant sections from file content based on file format and governance keywords.
+    Returns None when no relevant sections are found or format is unsupported (caller falls back to full content).
+    """
+
+    def extract(self, filename: str, content: str, keywords: set[str]) -> str | None:
+        """
+        Returns extracted relevant section text, or None if no sections found.
+        filename should be the basename (lowercased).
+        """
+        ext = os.path.splitext(filename)[1]
+        if ext in (".md", ".markdown"):
+            return self._extract_markdown_sections(content, keywords)
+        elif ext in (".yaml", ".yml"):
+            return self._extract_yaml_sections(content, keywords)
+        elif ext == ".toml":
+            return self._extract_toml_sections(content, keywords)
+        return None
+
+    def _extract_markdown_sections(self, content: str, keywords: set[str]) -> str | None:
+        """
+        Splits content on `#`-style heading lines only (# / ## / ###...).
+        Includes a section if its heading text contains any keyword.
+        Returns joined matching sections, or None if none match.
+        """
+        heading_pattern = re.compile(r"^#{1,6}\s+", re.MULTILINE)
+        # Split into (heading_line, body) pairs; first element may be pre-heading content
+        parts = heading_pattern.split(content)
+        headings = heading_pattern.findall(content)
+
+        # parts[0] is text before the first heading (skip it)
+        # parts[1..] correspond to headings[0..]
+        matching_sections = []
+        for i, heading_marker in enumerate(headings):
+            block = parts[i + 1]  # block starts right after the heading marker
+            # The first line of block is the heading text
+            first_newline = block.find("\n")
+            heading_text = block[:first_newline].strip() if first_newline != -1 else block.strip()
+            if any(kw in heading_text.lower() for kw in keywords):
+                matching_sections.append(f"{heading_marker}{block}")
+
+        return "".join(matching_sections) if matching_sections else None
+
+    def _extract_yaml_sections(self, content: str, keywords: set[str]) -> str | None:
+        """
+        Parses YAML and returns top-level keys whose name contains any keyword, serialized back to YAML.
+        Returns None if no keys match or parsing fails.
+        """
+        try:
+            data = yaml.safe_load(content)
+        except yaml.YAMLError:
+            return None
+
+        if not isinstance(data, dict):
+            return None
+
+        matching = {k: v for k, v in data.items() if any(kw in str(k).lower() for kw in keywords)}
+        if not matching:
+            return None
+
+        return yaml.dump(matching, default_flow_style=False, allow_unicode=True)
+
+    def _extract_toml_sections(self, content: str, keywords: set[str]) -> str | None:
+        """
+        Parses TOML and returns top-level keys whose name contains any keyword,
+        serialized as Python repr key=value lines (not valid TOML syntax).
+        Returns None if no keys match or parsing fails.
+        """
+        try:
+            data = tomllib.loads(content)
+        except tomllib.TOMLDecodeError:
+            return None
+
+        matching = {k: v for k, v in data.items() if any(kw in k.lower() for kw in keywords)}
+        if not matching:
+            return None
+
+        # Serialize matching keys back as simple TOML representation
+        lines = []
+        for k, v in matching.items():
+            lines.append(f"{k} = {repr(v)}")
+        return "\n".join(lines)
@@ -13,7 +13,7 @@
 from crowdgit.logger import logger
 
 
-def _safe_decode(data: bytes) -> str:
+def safe_decode(data: bytes) -> str:
     """
     Safely decode bytes to string, handling various encodings that might be present in git output.
 
@@ -229,7 +229,7 @@ async def run_shell_command(
             async def _run_with_stderr_logging() -> bytes:
                 async def _stream() -> None:
                     async for raw_line in process.stderr:
-                        line = _safe_decode(raw_line).rstrip()
+                        line = safe_decode(raw_line).rstrip()
                         if line:
                             stderr_logger.log(stderr_log_level, line)
                             stderr_lines.append(line)
@@ -240,7 +240,7 @@ async def _stream() -> None:
 
             coro = _run_with_stderr_logging()
             stdout = await (asyncio.wait_for(coro, timeout=timeout) if timeout else coro)
-            stdout_text = _safe_decode(stdout).strip() if stdout else ""
+            stdout_text = safe_decode(stdout).strip() if stdout else ""
             stderr_text = "\n".join(stderr_lines)
         else:
             # Wait for completion with optional timeout
@@ -252,8 +252,8 @@ async def _stream() -> None:
                 stdout, stderr = await process.communicate(input=stdin_input)
 
             # Handle potentially non-UTF-8 encoded output from git commands
-            stdout_text = _safe_decode(stdout).strip() if stdout else ""
-            stderr_text = _safe_decode(stderr).strip() if stderr else ""
+            stdout_text = safe_decode(stdout).strip() if stdout else ""
+            stderr_text = safe_decode(stderr).strip() if stderr else ""
 
         # Check return code
         if process.returncode == 0:
 
@@ -114,8 +114,8 @@ func (db *InsightsDB) saveVulnerabilities(ctx context.Context, repoURL string, v
 			scan_id           = EXCLUDED.scan_id,
 			status            = EXCLUDED.status,
 			fixed_version     = EXCLUDED.fixed_version,
-			severity          = EXCLUDED.severity,
-			cvss_score        = EXCLUDED.cvss_score,
+			severity          = CASE WHEN EXCLUDED.severity = 'UNKNOWN' THEN vulnerabilities.severity ELSE EXCLUDED.severity END,
+			cvss_score        = COALESCE(EXCLUDED.cvss_score, vulnerabilities.cvss_score),
 			summary           = EXCLUDED.summary,
 			details           = EXCLUDED.details,
 			cve_ids           = EXCLUDED.cve_ids,
Original file line number	Diff line number	Diff line change
`@@ -35,6 +35,7 @@ dependencies = [`
`35`	`35`	`"aioboto3>=15.1.0",`
`36`	`36`	`"slugify>=0.0.1",`
`37`	`37`	`"orjson>=3.11.3",`
	`38`	`+ "pyyaml>=6.0",`
`38`	`39`	`]`
`39`	`40`
`40`	`41`