Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@ a deprecation window (see `GOVERNANCE.md` § Scope discipline).

### Added

- **W015 `join-function-on-column`** - warns when a function wraps a
column inside a `JOIN ... ON` predicate, the JOIN-side companion to W003.
`JOIN customers c ON UPPER(o.email) = UPPER(c.email)` defeats every
index on the joined column. The pattern stops at the next clause keyword
(`WHERE`, `GROUP BY`, `ORDER BY`, `HAVING`, the next `JOIN`, or `UNION`)
so a clean JOIN with a dirty WHERE leaves W015 quiet and lets W003 own
that case. Contributed by [@mvanhorn](https://github.com/mvanhorn)
([#33](https://github.com/Pawansingh3889/sql-guard/pull/33)).
- W023 `scalar-udf-in-where`: warns on `<schema>.<name>(...)` calls in
`WHERE`/`HAVING`/`ON` clauses, the canonical T-SQL scalar-UDF
anti-pattern. Built-ins (no schema prefix) are unaffected.
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ One bad SQL query can delete production data, expose customer records, or bring

| | |
|---|---|
| Rules | 40 (10 errors, 25 warnings, 5 Python-source) |
| Rules | 41 (10 errors, 26 warnings, 5 Python-source) |
| Tests | 152 |
| Coverage | 86% |
| Scan speed | 0.08s across 200 files |
Expand All @@ -43,7 +43,7 @@ print(result.summary()) # "1 error, 0 warnings in 1 statement"

---

Fast, rule-based SQL linter. 40 rules (35 SQL + 5 Python), including SQL Server-focused rules for T-SQL shops. Inline disable, project config, git-changed-only mode, and SARIF output for GitHub Code Scanning. 500+ monthly downloads on PyPI.
Fast, rule-based SQL linter. 41 rules (36 SQL + 5 Python), including SQL Server-focused rules for T-SQL shops. Inline disable, project config, git-changed-only mode, and SARIF output for GitHub Code Scanning. 500+ monthly downloads on PyPI.

Catches dangerous SQL before it reaches production -- DELETE without WHERE, UPDATE without WHERE, SQL injection patterns, SELECT *, and 20 more. Runs as a **CLI tool**, **pre-commit hook**, and **GitHub Action**.

Expand Down Expand Up @@ -227,6 +227,7 @@ sql-sop list-rules # show every registered rule
| W009 | `missing-semicolon` | Statement not terminated with `;` |
| W010 | `commented-out-code` | `-- SELECT * FROM old_table` -- use version control |
| W013 | `window-missing-partition` | `OVER ()` -- unpredictable results and unclear intent |
| W015 | `join-function-on-column` | `JOIN customers c ON UPPER(o.email) = UPPER(c.email)` -- kills index seek |
| W016 | `not-in-with-subquery` | `WHERE id NOT IN (SELECT ...)` -- silently returns 0 rows on NULL
| W017 | `leading-wildcard-like` | `WHERE name LIKE '%smith'` -- non-SARGable, full scan |
| W018 | `or-across-columns` | `WHERE a = 1 OR b = 2` -- defeats single-column indexes |
Expand Down
2 changes: 2 additions & 0 deletions sql_guard/rules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
FunctionOnIndexedColumn,
GroupByOrdinal,
HardcodedValues,
JoinFunctionOnColumn,
LeadingWildcardLike,
MissingLimit,
MissingSemicolon,
Expand Down Expand Up @@ -73,6 +74,7 @@
GroupByOrdinal(),
NotInWithSubquery(),
LeadingWildcardLike(),
JoinFunctionOnColumn(),
OrAcrossColumns(),
TruncateTable(),
CountDistinctUnbounded(),
Expand Down
33 changes: 33 additions & 0 deletions sql_guard/rules/warnings.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,3 +555,36 @@ def check_statement(self, statement: str, start_line: int, file: str) -> Finding
),
)
return None


class JoinFunctionOnColumn(Rule):
"""W015: Function wrapping a column in JOIN ... ON kills index usage."""

id = "W015"
name = "join-function-on-column"
severity = "warning"
description = "Function on column in JOIN ... ON prevents index usage"
multiline = False

# Match a function call inside the ON predicate only. The negative
# lookahead stops the inner match at the next clause keyword (WHERE,
# GROUP/ORDER BY, HAVING, JOIN, UNION) or end-of-statement so a clean
# JOIN followed by an unrelated WHERE function isn't flagged here --
# W003 owns that case.
_pattern = Rule._compile(
r"\bJOIN\b[^;]*?\bON\b"
r"(?:(?!\b(?:WHERE|GROUP\s+BY|ORDER\s+BY|HAVING|JOIN|UNION)\b).)*?"
r"\b(YEAR|MONTH|DAY|DATE|UPPER|LOWER|TRIM|CAST|CONVERT|SUBSTRING|COALESCE)\s*\("
)

def check_line(self, line: str, line_number: int, file: str) -> Finding | None:
if self._pattern.search(line):
return Finding(
rule_id=self.id,
severity=self.severity,
file=file,
line=line_number,
message="Function on column in JOIN ... ON -- kills index usage",
suggestion="Materialize the function into a stored column on both sides: JOIN customers c ON o.email_lower = c.email_lower",
)
return None
5 changes: 5 additions & 0 deletions tests/fixtures/warnings.sql
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,8 @@ WHERE id NOT IN (SELECT customer_id FROM orders);
SELECT order_id, total
FROM orders
WHERE dbo.fn_IsHighValue(total) = 1;

-- W015: Function on column in JOIN ... ON
SELECT *
FROM orders o
JOIN customers c ON UPPER(o.email) = UPPER(c.email);
55 changes: 55 additions & 0 deletions tests/test_new_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,3 +275,58 @@ def test_w023_passes_plain_where():
def test_w023_passes_table_column_reference():
rule = ScalarUdfInWhere()
assert _stmt(rule, "SELECT id FROM t WHERE x.y = 1;") is None



# W015 join-function-on-column ------------------------------------------------


def test_w015_flags_upper_function_in_join_on():
from sql_guard.rules.warnings import JoinFunctionOnColumn

rule = JoinFunctionOnColumn()
finding = _line(
rule, "JOIN customers c ON UPPER(o.email) = UPPER(c.email)"
)
assert finding is not None
assert finding.rule_id == "W015"
assert finding.severity == "warning"


def test_w015_flags_year_in_join_on():
from sql_guard.rules.warnings import JoinFunctionOnColumn

rule = JoinFunctionOnColumn()
finding = _line(rule, "JOIN events e ON YEAR(o.created_at) = YEAR(e.day)")
assert finding is not None
assert finding.rule_id == "W015"


def test_w015_passes_when_join_uses_materialized_columns():
from sql_guard.rules.warnings import JoinFunctionOnColumn

rule = JoinFunctionOnColumn()
assert _line(
rule, "JOIN customers c ON o.email_lower = c.email_lower"
) is None


def test_w015_does_not_flag_function_in_where_only():
"""W003 owns the WHERE case; W015 should stay quiet there."""
from sql_guard.rules.warnings import JoinFunctionOnColumn

rule = JoinFunctionOnColumn()
assert _line(rule, "WHERE UPPER(email) = 'A@B.COM'") is None


def test_w015_does_not_flag_clean_join_with_dirty_where():
"""W015 must stop at the next clause keyword so it doesn't poach W003's WHERE case."""
from sql_guard.rules.warnings import JoinFunctionOnColumn

rule = JoinFunctionOnColumn()
sql = (
"SELECT * FROM orders o "
"JOIN customers c ON o.customer_id = c.id "
"WHERE UPPER(o.email) = 'A@B.COM'"
)
assert _line(rule, sql) is None
19 changes: 15 additions & 4 deletions tests/test_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,18 @@

class TestRuleRegistry:
def test_all_rules_loaded(self) -> None:
assert len(ALL_RULES) == 35
assert len(ALL_RULES) == 36

def test_10_errors(self) -> None:
# 8 E-series + 2 T-series (T002 xp-cmdshell, T004 deprecated-outer-join).
errors = [r for r in ALL_RULES if r.severity == "error"]
assert len(errors) == 10

def test_24_warnings(self) -> None:
# 19 W-series + 3 S-series + 3 T-series (T001 with-nolock,
def test_26_warnings(self) -> None:
# 20 W-series + 3 S-series + 3 T-series (T001 with-nolock,
# T003 cursor-declaration, T005 create-index-without-online).
warnings = [r for r in ALL_RULES if r.severity == "warning"]
assert len(warnings) == 25
assert len(warnings) == 26

def test_unique_ids(self) -> None:
ids = [r.id for r in ALL_RULES]
Expand Down Expand Up @@ -243,3 +243,14 @@ def test_fail_fast_stops_early(self) -> None:
def test_nonexistent_path(self) -> None:
result = check(["nonexistent_dir/"])
assert result.files_checked == 0

def test_w015_join_function_on_column(self) -> None:
from sql_guard.rules import get_rules
from sql_guard.rules.warnings import JoinFunctionOnColumn

# Confirm registration
assert any(isinstance(r, JoinFunctionOnColumn) for r in get_rules())

findings = check([str(FIXTURES / "warnings.sql")])
w015 = [f for f in findings.findings if f.rule_id == "W015"]
assert len(w015) >= 1
Loading