Skip to content

Commit 3cdb7d0

Browse files
authored
Merge pull request #69 from GitHubSecurityLab/add_advisory_store
use dedicated database for advisories
2 parents 7eda8cf + d6e1866 commit 3cdb7d0

7 files changed

Lines changed: 334 additions & 18 deletions

File tree

src/seclab_taskflows/mcp_servers/ghsa.py

Lines changed: 273 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# SPDX-FileCopyrightText: GitHub, Inc.
2+
# SPDX-License-Identifier: MIT
3+
14
import logging
25

36
from fastmcp import FastMCP
@@ -6,7 +9,12 @@
69
import json
710
from urllib.parse import urlparse, parse_qs
811
from .gh_code_scanning import call_api
9-
from seclab_taskflow_agent.path_utils import log_file_name
12+
from seclab_taskflow_agent.path_utils import mcp_data_dir, log_file_name
13+
from .ghsa_models import GHSA, GHSASummary, Base
14+
from pathlib import Path
15+
from sqlalchemy import create_engine
16+
from sqlalchemy.orm import Session
17+
from .utils import process_repo
1018

1119
logging.basicConfig(
1220
level=logging.DEBUG,
@@ -17,17 +25,150 @@
1725

1826
mcp = FastMCP("GitHubRepoAdvisories")
1927

28+
MEMORY = mcp_data_dir("seclab-taskflows", "ghsa", "GHSA_DIR")
29+
30+
31+
def ghsa_to_dict(result):
32+
return {
33+
"id": result.id,
34+
"ghsa_id": result.ghsa_id,
35+
"repo": result.repo.lower(),
36+
"severity": result.severity,
37+
"cve_id": result.cve_id,
38+
"description": result.description,
39+
"summary": result.summary,
40+
"published_at": result.published_at,
41+
"state": result.state,
42+
}
43+
44+
45+
def ghsa_summary_to_dict(summary):
46+
return {
47+
"id": summary.id,
48+
"repo": summary.repo.lower(),
49+
"total_advisories": summary.total_advisories,
50+
"high_severity_count": summary.high_severity_count,
51+
"medium_severity_count": summary.medium_severity_count,
52+
"low_severity_count": summary.low_severity_count,
53+
"summary_notes": summary.summary_notes,
54+
}
55+
56+
class GHSABackend:
57+
def __init__(self, db_dir: str):
58+
# Directory in which the GHSA SQLite database file will be stored.
59+
self.db_dir = db_dir
60+
db_uri = "sqlite://" if not Path(self.db_dir).exists() else f"sqlite:///{self.db_dir}/ghsa.db"
61+
self.engine = create_engine(db_uri, echo=False)
62+
Base.metadata.create_all(
63+
self.engine,
64+
tables=[
65+
GHSA.__table__,
66+
GHSASummary.__table__,
67+
],
68+
)
69+
70+
def store_new_ghsa(self, repo, ghsa_id, severity, cve_id, description, summary, published_at, state):
71+
with Session(self.engine) as session:
72+
existing = session.query(GHSA).filter_by(repo=repo, ghsa_id=ghsa_id).first()
73+
if existing:
74+
if severity:
75+
existing.severity = severity
76+
if cve_id:
77+
existing.cve_id = cve_id
78+
if description:
79+
existing.description = description
80+
if summary:
81+
existing.summary = summary
82+
if published_at:
83+
existing.published_at = published_at
84+
if state:
85+
existing.state = state
86+
else:
87+
new_ghsa = GHSA(
88+
repo=repo,
89+
ghsa_id=ghsa_id,
90+
severity=severity,
91+
cve_id=cve_id,
92+
description=description,
93+
summary=summary,
94+
published_at=published_at,
95+
state=state,
96+
)
97+
session.add(new_ghsa)
98+
session.commit()
99+
return f"Updated or added GHSA {ghsa_id} for {repo}"
100+
101+
def get_ghsa(self, repo, ghsa_id):
102+
with Session(self.engine) as session:
103+
existing = session.query(GHSA).filter_by(repo=repo, ghsa_id=ghsa_id).first()
104+
if not existing:
105+
return None
106+
return ghsa_to_dict(existing)
107+
108+
def get_ghsas(self, repo):
109+
with Session(self.engine) as session:
110+
existing = session.query(GHSA).filter_by(repo=repo).all()
111+
return [ghsa_to_dict(ghsa) for ghsa in existing]
112+
113+
def store_new_ghsa_summary(
114+
self,
115+
repo,
116+
total_advisories,
117+
high_severity_count,
118+
medium_severity_count,
119+
low_severity_count,
120+
summary_notes,
121+
):
122+
with Session(self.engine) as session:
123+
existing = session.query(GHSASummary).filter_by(repo=repo).first()
124+
if existing:
125+
existing.total_advisories = total_advisories
126+
existing.high_severity_count = high_severity_count
127+
existing.medium_severity_count = medium_severity_count
128+
existing.low_severity_count = low_severity_count
129+
existing.summary_notes = summary_notes
130+
else:
131+
new_summary = GHSASummary(
132+
repo=repo,
133+
total_advisories=total_advisories,
134+
high_severity_count=high_severity_count,
135+
medium_severity_count=medium_severity_count,
136+
low_severity_count=low_severity_count,
137+
summary_notes=summary_notes,
138+
)
139+
session.add(new_summary)
140+
session.commit()
141+
return f"Updated or added GHSA summary for {repo}"
142+
143+
def get_ghsa_summary(self, repo):
144+
with Session(self.engine) as session:
145+
existing = session.query(GHSASummary).filter_by(repo=repo).first()
146+
if not existing:
147+
return None
148+
return ghsa_summary_to_dict(existing)
149+
150+
def clear_repo(self, repo):
151+
with Session(self.engine) as session:
152+
session.query(GHSA).filter_by(repo=repo).delete()
153+
session.query(GHSASummary).filter_by(repo=repo).delete()
154+
session.commit()
155+
return f"Cleared GHSA results for repo {repo}"
156+
157+
158+
backend = GHSABackend(MEMORY)
20159

21160
# The advisories contain a lot of information, so we need to filter
22161
# some of it out to avoid exceeding the maximum prompt size.
23162
def parse_advisory(advisory: dict) -> dict:
24163
logging.debug(f"advisory: {advisory}")
25164
return {
26-
"ghsa_id": advisory.get("ghsa_id", ""),
27-
"cve_id": advisory.get("cve_id", ""),
28-
"summary": advisory.get("summary", ""),
29-
"published_at": advisory.get("published_at", ""),
30-
"state": advisory.get("state", ""),
165+
"ghsa_id": advisory.get("ghsa_id") or "",
166+
"cve_id": advisory.get("cve_id") or "",
167+
"summary": advisory.get("summary") or "",
168+
"description": advisory.get("description") or "",
169+
"severity": advisory.get("severity") or "",
170+
"published_at": advisory.get("published_at") or "",
171+
"state": advisory.get("state") or "",
31172
}
32173

33174

@@ -70,6 +211,132 @@ async def fetch_GHSA_list(
70211
return results
71212
return json.dumps(results, indent=2)
72213

214+
@mcp.tool()
215+
async def fetch_and_store_GHSA_list(
216+
owner: str = Field(description="The owner of the repo"), repo: str = Field(description="The repository name"),
217+
return_results: bool = Field(description="Whether to return the fetched results as a JSON string", default=False)
218+
) -> str:
219+
"""Fetch all GitHub Security Advisories (GHSAs) for a specific repository and store them in the database."""
220+
results = await fetch_GHSA_list_from_gh(owner, repo)
221+
if isinstance(results, str):
222+
return results
223+
for advisory in results:
224+
backend.store_new_ghsa(
225+
process_repo(owner, repo),
226+
advisory["ghsa_id"],
227+
advisory["severity"],
228+
advisory["cve_id"],
229+
advisory["description"],
230+
advisory["summary"],
231+
advisory["published_at"],
232+
advisory["state"],
233+
)
234+
if return_results:
235+
return json.dumps(results, indent=2)
236+
return f"Fetched and stored {len(results)} GHSAs for {owner}/{repo}"
237+
238+
@mcp.tool()
239+
def store_new_ghsa(
240+
owner: str = Field(description="The owner of the GitHub repository"),
241+
repo: str = Field(description="The name of the GitHub repository"),
242+
ghsa_id: str = Field(description="The GHSA ID of the advisory"),
243+
severity: str = Field(description="The severity of the advisory"),
244+
cve_id: str = Field(description="The CVE ID if available", default=""),
245+
description: str = Field(description="Description for this advisory", default=""),
246+
summary: str = Field(description="Summary for this advisory", default=""),
247+
published_at: str = Field(description="Published timestamp for this advisory", default=""),
248+
state: str = Field(description="State for this advisory (e.g. published, withdrawn)", default=""),
249+
):
250+
"""Store a GHSA advisory record in the database."""
251+
return backend.store_new_ghsa(
252+
process_repo(owner, repo), ghsa_id, severity, cve_id, description, summary, published_at, state
253+
)
254+
255+
@mcp.tool()
256+
def get_ghsa_from_db(
257+
owner: str = Field(description="The owner of the GitHub repository"),
258+
repo: str = Field(description="The name of the GitHub repository"),
259+
ghsa_id: str = Field(description="The GHSA ID of the advisory"),
260+
):
261+
"""Get a GHSA advisory record from the database."""
262+
repo_name = process_repo(owner, repo)
263+
result = backend.get_ghsa(repo_name, ghsa_id)
264+
if not result:
265+
return f"Error: No GHSA entry exists in repo: {repo_name} and ghsa_id {ghsa_id}"
266+
return json.dumps(result)
267+
268+
269+
@mcp.tool()
270+
def get_ghsas_for_repo_from_db(
271+
owner: str = Field(description="The owner of the GitHub repository"),
272+
repo: str = Field(description="The name of the GitHub repository"),
273+
):
274+
"""Get all GHSA advisory records for a repository."""
275+
return json.dumps(backend.get_ghsas(process_repo(owner, repo)))
276+
277+
@mcp.tool()
278+
def store_new_ghsa_summary(
279+
owner: str = Field(description="The owner of the GitHub repository"),
280+
repo: str = Field(description="The name of the GitHub repository"),
281+
total_advisories: int = Field(description="Total number of advisories"),
282+
high_severity_count: int = Field(description="Number of high severity advisories"),
283+
medium_severity_count: int = Field(description="Number of medium severity advisories"),
284+
low_severity_count: int = Field(description="Number of low severity advisories"),
285+
summary_notes: str = Field(description="Notes for the advisory summary", default=""),
286+
):
287+
"""Store GHSA summary statistics for a repository."""
288+
return backend.store_new_ghsa_summary(
289+
process_repo(owner, repo),
290+
total_advisories,
291+
high_severity_count,
292+
medium_severity_count,
293+
low_severity_count,
294+
summary_notes,
295+
)
296+
297+
298+
@mcp.tool()
299+
def update_ghsa_summary_notes(
300+
owner: str = Field(description="The owner of the GitHub repository"),
301+
repo: str = Field(description="The name of the GitHub repository"),
302+
summary_notes: str = Field(description="New notes for the advisory summary", default=""),
303+
):
304+
"""Update summary notes for the GHSA summary for a repository."""
305+
repo_name = process_repo(owner, repo)
306+
existing = backend.get_ghsa_summary(repo_name)
307+
if not existing:
308+
return f"Error: No GHSA summary exists in repo: {repo_name}"
309+
return backend.store_new_ghsa_summary(
310+
repo_name,
311+
existing["total_advisories"],
312+
existing["high_severity_count"],
313+
existing["medium_severity_count"],
314+
existing["low_severity_count"],
315+
summary_notes,
316+
)
317+
318+
319+
@mcp.tool()
320+
def get_ghsa_summary(
321+
owner: str = Field(description="The owner of the GitHub repository"),
322+
repo: str = Field(description="The name of the GitHub repository"),
323+
):
324+
"""Get the GHSA summary for a repository."""
325+
repo_name = process_repo(owner, repo)
326+
result = backend.get_ghsa_summary(repo_name)
327+
if not result:
328+
return f"Error: No GHSA summary exists in repo: {repo_name}"
329+
return json.dumps(result)
330+
331+
332+
@mcp.tool()
333+
def clear_repo(
334+
owner: str = Field(description="The owner of the GitHub repository"),
335+
repo: str = Field(description="The name of the GitHub repository"),
336+
):
337+
"""Clear GHSA and GHSA summary records for a repository."""
338+
return backend.clear_repo(process_repo(owner, repo))
339+
73340

74341
async def fetch_GHSA_details_from_gh(owner: str, repo: str, ghsa_id: str) -> str | dict:
75342
"""Fetch the details of a repository security advisory."""
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# SPDX-FileCopyrightText: GitHub, Inc.
2+
# SPDX-License-Identifier: MIT
3+
4+
from sqlalchemy import Text
5+
from sqlalchemy.orm import DeclarativeBase, mapped_column, Mapped
6+
from typing import Optional
7+
8+
9+
class Base(DeclarativeBase):
10+
pass
11+
12+
class GHSA(Base):
13+
__tablename__ = "ghsa"
14+
15+
id: Mapped[int] = mapped_column(primary_key=True)
16+
ghsa_id: Mapped[str]
17+
repo: Mapped[str]
18+
severity: Mapped[str]
19+
cve_id: Mapped[Optional[str]] = mapped_column(nullable=True)
20+
description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
21+
summary: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
22+
published_at: Mapped[Optional[str]] = mapped_column(nullable=True)
23+
state: Mapped[Optional[str]] = mapped_column(nullable=True)
24+
25+
def __repr__(self):
26+
return (
27+
f"<GHSA(id={self.id}, ghsa_id={self.ghsa_id}, repo={self.repo}, "
28+
f"severity={self.severity}, cve_id={self.cve_id}, description={self.description}, summary={self.summary}, "
29+
f"published_at={self.published_at}, state={self.state})>"
30+
)
31+
32+
class GHSASummary(Base):
33+
__tablename__ = "ghsa_summary"
34+
35+
id: Mapped[int] = mapped_column(primary_key=True)
36+
repo: Mapped[str]
37+
total_advisories: Mapped[int]
38+
high_severity_count: Mapped[int]
39+
medium_severity_count: Mapped[int]
40+
low_severity_count: Mapped[int]
41+
summary_notes: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
42+
43+
def __repr__(self):
44+
return (
45+
f"<GHSASummary(id={self.id}, repo={self.repo}, total_advisories={self.total_advisories}, "
46+
f"high_severity_count={self.high_severity_count}, medium_severity_count={self.medium_severity_count}, "
47+
f"low_severity_count={self.low_severity_count}, summary_notes={self.summary_notes})>"
48+
)

src/seclab_taskflows/prompts/audit/known_security_advisories.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,8 @@ seclab-taskflow-agent:
77
prompt: |
88
## Known Security Advisories for this Repository
99
10-
Fetch the security advisories for {{ globals.repo }} from memcache (stored under the key 'security_advisories_{{ globals.repo }}'). If the value in the memcache is null or an error message, clearly state that no advisories are available and skip advisory analysis. Otherwise, state how many advisories were found.
10+
Fetch the security advisories for {{ globals.repo }} from the GHSASummary and GHSA entries
11+
stored in the database. Do not fetch them from GitHub directly.
12+
If the value in the database is null or an error message, clearly state that no advisories are available and skip advisory analysis.
13+
Otherwise, state how many advisories were found.
1114
Review these advisories and consider them when identifying security risks. If you identify code that is similar to a known advisory pattern, highlight that connection.

src/seclab_taskflows/taskflows/audit/audit_issue_local_iter.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,4 @@ taskflow:
5050
toolboxes:
5151
- seclab_taskflows.toolboxes.repo_context
5252
- seclab_taskflows.toolboxes.local_file_viewer
53-
- seclab_taskflow_agent.toolboxes.memcache
53+
- seclab_taskflows.toolboxes.ghsa

src/seclab_taskflows/taskflows/audit/classify_application_local.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,4 +90,4 @@ taskflow:
9090
toolboxes:
9191
- seclab_taskflows.toolboxes.repo_context
9292
- seclab_taskflows.toolboxes.local_file_viewer
93-
- seclab_taskflow_agent.toolboxes.memcache
93+
- seclab_taskflows.toolboxes.ghsa

0 commit comments

Comments
 (0)