Skip to content

Commit 8097ecc

Browse files
feat: Implement plugin architecture and SQLAlchemy persistence layer
- Added `codesage/core/interfaces.py` defining `Plugin`, `Rule`, and `Analyzer` interfaces. - Implemented `PluginManager` in `codesage/cli/plugin_loader.py` for dynamic plugin loading. - Created `codesage/history/models.py` with SQLAlchemy models (`Project`, `Snapshot`, `Issue`, `Dependency`). - Refactored `codesage/history/store.py` to use `StorageEngine` with SQLAlchemy. - Updated `codesage scan` command to load plugins and persist results to DB. - Added `sqlalchemy` and `alembic` dependencies. - Added sample custom rule plugin in `examples/plugins/my_custom_rule.py`. - Initialized Alembic for database migrations.
1 parent fec60ad commit 8097ecc

14 files changed

Lines changed: 949 additions & 136 deletions

File tree

alembic.ini

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
# A generic, single database configuration.
2+
3+
[alembic]
4+
# path to migration scripts.
5+
# this is typically a path given in POSIX (e.g. forward slashes)
6+
# format, relative to the token %(here)s which refers to the location of this
7+
# ini file
8+
script_location = %(here)s/alembic
9+
10+
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
11+
# Uncomment the line below if you want the files to be prepended with date and time
12+
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
13+
# for all available tokens
14+
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
15+
16+
# sys.path path, will be prepended to sys.path if present.
17+
# defaults to the current working directory. for multiple paths, the path separator
18+
# is defined by "path_separator" below.
19+
prepend_sys_path = .
20+
21+
22+
# timezone to use when rendering the date within the migration file
23+
# as well as the filename.
24+
# If specified, requires the tzdata library which can be installed by adding
25+
# `alembic[tz]` to the pip requirements.
26+
# string value is passed to ZoneInfo()
27+
# leave blank for localtime
28+
# timezone =
29+
30+
# max length of characters to apply to the "slug" field
31+
# truncate_slug_length = 40
32+
33+
# set to 'true' to run the environment during
34+
# the 'revision' command, regardless of autogenerate
35+
# revision_environment = false
36+
37+
# set to 'true' to allow .pyc and .pyo files without
38+
# a source .py file to be detected as revisions in the
39+
# versions/ directory
40+
# sourceless = false
41+
42+
# version location specification; This defaults
43+
# to <script_location>/versions. When using multiple version
44+
# directories, initial revisions must be specified with --version-path.
45+
# The path separator used here should be the separator specified by "path_separator"
46+
# below.
47+
# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
48+
49+
# path_separator; This indicates what character is used to split lists of file
50+
# paths, including version_locations and prepend_sys_path within configparser
51+
# files such as alembic.ini.
52+
# The default rendered in new alembic.ini files is "os", which uses os.pathsep
53+
# to provide os-dependent path splitting.
54+
#
55+
# Note that in order to support legacy alembic.ini files, this default does NOT
56+
# take place if path_separator is not present in alembic.ini. If this
57+
# option is omitted entirely, fallback logic is as follows:
58+
#
59+
# 1. Parsing of the version_locations option falls back to using the legacy
60+
# "version_path_separator" key, which if absent then falls back to the legacy
61+
# behavior of splitting on spaces and/or commas.
62+
# 2. Parsing of the prepend_sys_path option falls back to the legacy
63+
# behavior of splitting on spaces, commas, or colons.
64+
#
65+
# Valid values for path_separator are:
66+
#
67+
# path_separator = :
68+
# path_separator = ;
69+
# path_separator = space
70+
# path_separator = newline
71+
#
72+
# Use os.pathsep. Default configuration used for new projects.
73+
path_separator = os
74+
75+
# set to 'true' to search source files recursively
76+
# in each "version_locations" directory
77+
# new in Alembic version 1.10
78+
# recursive_version_locations = false
79+
80+
# the output encoding used when revision files
81+
# are written from script.py.mako
82+
# output_encoding = utf-8
83+
84+
# database URL. This is consumed by the user-maintained env.py script only.
85+
# other means of configuring database URLs may be customized within the env.py
86+
# file.
87+
sqlalchemy.url = sqlite:///codesage.db
88+
89+
90+
[post_write_hooks]
91+
# post_write_hooks defines scripts or Python functions that are run
92+
# on newly generated revision scripts. See the documentation for further
93+
# detail and examples
94+
95+
# format using "black" - use the console_scripts runner, against the "black" entrypoint
96+
# hooks = black
97+
# black.type = console_scripts
98+
# black.entrypoint = black
99+
# black.options = -l 79 REVISION_SCRIPT_FILENAME
100+
101+
# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module
102+
# hooks = ruff
103+
# ruff.type = module
104+
# ruff.module = ruff
105+
# ruff.options = check --fix REVISION_SCRIPT_FILENAME
106+
107+
# Alternatively, use the exec runner to execute a binary found on your PATH
108+
# hooks = ruff
109+
# ruff.type = exec
110+
# ruff.executable = ruff
111+
# ruff.options = check --fix REVISION_SCRIPT_FILENAME
112+
113+
# Logging configuration. This is also consumed by the user-maintained
114+
# env.py script only.
115+
[loggers]
116+
keys = root,sqlalchemy,alembic
117+
118+
[handlers]
119+
keys = console
120+
121+
[formatters]
122+
keys = generic
123+
124+
[logger_root]
125+
level = WARNING
126+
handlers = console
127+
qualname =
128+
129+
[logger_sqlalchemy]
130+
level = WARNING
131+
handlers =
132+
qualname = sqlalchemy.engine
133+
134+
[logger_alembic]
135+
level = INFO
136+
handlers =
137+
qualname = alembic
138+
139+
[handler_console]
140+
class = StreamHandler
141+
args = (sys.stderr,)
142+
level = NOTSET
143+
formatter = generic
144+
145+
[formatter_generic]
146+
format = %(levelname)-5.5s [%(name)s] %(message)s
147+
datefmt = %H:%M:%S

alembic/README

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Generic single-database configuration.

alembic/env.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
from logging.config import fileConfig
2+
3+
from sqlalchemy import engine_from_config
4+
from sqlalchemy import pool
5+
6+
from alembic import context
7+
import os
8+
import sys
9+
10+
# Add project root to path
11+
sys.path.insert(0, os.getcwd())
12+
13+
from codesage.history.models import Base
14+
15+
# this is the Alembic Config object, which provides
16+
# access to the values within the .ini file in use.
17+
config = context.config
18+
19+
# Interpret the config file for Python logging.
20+
# This line sets up loggers basically.
21+
if config.config_file_name is not None:
22+
fileConfig(config.config_file_name)
23+
24+
# add your model's MetaData object here
25+
# for 'autogenerate' support
26+
# from myapp import mymodel
27+
# target_metadata = mymodel.Base.metadata
28+
target_metadata = Base.metadata
29+
30+
# other values from the config, defined by the needs of env.py,
31+
# can be acquired:
32+
# my_important_option = config.get_main_option("my_important_option")
33+
# ... etc.
34+
35+
36+
def run_migrations_offline() -> None:
37+
"""Run migrations in 'offline' mode.
38+
39+
This configures the context with just a URL
40+
and not an Engine, though an Engine is acceptable
41+
here as well. By skipping the Engine creation
42+
we don't even need a DBAPI to be available.
43+
44+
Calls to context.execute() here emit the given string to the
45+
script output.
46+
47+
"""
48+
url = config.get_main_option("sqlalchemy.url")
49+
context.configure(
50+
url=url,
51+
target_metadata=target_metadata,
52+
literal_binds=True,
53+
dialect_opts={"paramstyle": "named"},
54+
)
55+
56+
with context.begin_transaction():
57+
context.run_migrations()
58+
59+
60+
def run_migrations_online() -> None:
61+
"""Run migrations in 'online' mode.
62+
63+
In this scenario we need to create an Engine
64+
and associate a connection with the context.
65+
66+
"""
67+
connectable = engine_from_config(
68+
config.get_section(config.config_ini_section, {}),
69+
prefix="sqlalchemy.",
70+
poolclass=pool.NullPool,
71+
)
72+
73+
with connectable.connect() as connection:
74+
context.configure(
75+
connection=connection, target_metadata=target_metadata
76+
)
77+
78+
with context.begin_transaction():
79+
context.run_migrations()
80+
81+
82+
if context.is_offline_mode():
83+
run_migrations_offline()
84+
else:
85+
run_migrations_online()

alembic/script.py.mako

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
"""${message}
2+
3+
Revision ID: ${up_revision}
4+
Revises: ${down_revision | comma,n}
5+
Create Date: ${create_date}
6+
7+
"""
8+
from typing import Sequence, Union
9+
10+
from alembic import op
11+
import sqlalchemy as sa
12+
${imports if imports else ""}
13+
14+
# revision identifiers, used by Alembic.
15+
revision: str = ${repr(up_revision)}
16+
down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
17+
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
18+
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
19+
20+
21+
def upgrade() -> None:
22+
"""Upgrade schema."""
23+
${upgrades if upgrades else "pass"}
24+
25+
26+
def downgrade() -> None:
27+
"""Downgrade schema."""
28+
${downgrades if downgrades else "pass"}

codesage/cli/commands/scan.py

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,11 @@
77
from codesage.semantic_digest.python_snapshot_builder import PythonSemanticSnapshotBuilder, SnapshotConfig
88
from codesage.semantic_digest.go_snapshot_builder import GoSemanticSnapshotBuilder
99
from codesage.semantic_digest.shell_snapshot_builder import ShellSemanticSnapshotBuilder
10-
from codesage.snapshot.models import ProjectSnapshot
10+
from codesage.snapshot.models import ProjectSnapshot, Issue, IssueLocation
1111
from codesage.reporters import ConsoleReporter, JsonReporter, GitHubPRReporter
12+
from codesage.cli.plugin_loader import PluginManager
13+
from codesage.history.store import StorageEngine
14+
from codesage.core.interfaces import CodeIssue
1215

1316
def get_builder(language: str, path: Path):
1417
config = SnapshotConfig()
@@ -28,11 +31,25 @@ def get_builder(language: str, path: Path):
2831
@click.option('--output', '-o', help='Output path for JSON reporter.')
2932
@click.option('--fail-on-high', is_flag=True, help='Exit with non-zero code if high severity issues are found.')
3033
@click.option('--ci-mode', is_flag=True, help='Enable CI mode (auto-detect GitHub environment).')
34+
@click.option('--plugins-dir', default='.codesage/plugins', help='Directory containing plugins.')
35+
@click.option('--db-url', default='sqlite:///codesage.db', help='Database URL for storage.')
3136
@click.pass_context
32-
def scan(ctx, path, language, reporter, output, fail_on_high, ci_mode):
37+
def scan(ctx, path, language, reporter, output, fail_on_high, ci_mode, plugins_dir, db_url):
3338
"""
3439
Scan the codebase and report issues.
3540
"""
41+
# 1. Initialize Database
42+
try:
43+
storage = StorageEngine(db_url)
44+
click.echo(f"Connected to storage: {db_url}")
45+
except Exception as e:
46+
click.echo(f"Warning: Could not connect to storage: {e}", err=True)
47+
storage = None
48+
49+
# 2. Load Plugins
50+
plugin_manager = PluginManager(plugins_dir)
51+
plugin_manager.load_plugins()
52+
3653
click.echo(f"Scanning {path} for {language}...")
3754

3855
root_path = Path(path)
@@ -44,6 +61,57 @@ def scan(ctx, path, language, reporter, output, fail_on_high, ci_mode):
4461

4562
try:
4663
snapshot: ProjectSnapshot = builder.build()
64+
65+
# 3. Apply Custom Rules (Plugins)
66+
for rule in plugin_manager.rules:
67+
for file_path, file_snapshot in snapshot.files.items():
68+
try:
69+
content = ""
70+
full_path = root_path / file_path
71+
if full_path.exists():
72+
content = full_path.read_text(errors='ignore')
73+
74+
issues = rule.check(str(file_path), content, {})
75+
if issues:
76+
for i in issues:
77+
# Convert plugin CodeIssue to standard Issue model
78+
79+
# Map severity to Issue severity Literal
80+
severity = "warning"
81+
if i.severity.lower() in ["info", "warning", "error"]:
82+
severity = i.severity.lower()
83+
elif i.severity.lower() == "high":
84+
severity = "error"
85+
elif i.severity.lower() == "low":
86+
severity = "info"
87+
88+
new_issue = Issue(
89+
rule_id=rule.id,
90+
severity=severity,
91+
message=i.description,
92+
location=IssueLocation(
93+
file_path=str(file_path),
94+
line=i.line_number
95+
),
96+
symbol=None,
97+
tags=["custom-rule"]
98+
)
99+
100+
if file_snapshot.issues is None:
101+
file_snapshot.issues = []
102+
file_snapshot.issues.append(new_issue)
103+
104+
except Exception as e:
105+
click.echo(f"Error running rule {rule.id} on {file_path}: {e}", err=True)
106+
107+
# 4. Save to Storage
108+
if storage:
109+
try:
110+
storage.save_snapshot(snapshot.metadata.project_name, snapshot)
111+
click.echo("Snapshot saved to database.")
112+
except Exception as e:
113+
click.echo(f"Failed to save snapshot: {e}", err=True)
114+
47115
except Exception as e:
48116
click.echo(f"Scan failed: {e}", err=True)
49117
ctx.exit(1)

0 commit comments

Comments
 (0)