Skip to content

Commit ed2dcdc

Browse files
committed
initial commit
0 parents  commit ed2dcdc

15 files changed

Lines changed: 2184 additions & 0 deletions

pyproject.toml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
[project]
2+
name = "sql-redis"
3+
version = "0.1.0"
4+
description = "SQL to Redis command translation utility"
5+
requires-python = ">=3.11"
6+
dependencies = []
7+
8+
[project.optional-dependencies]
9+
dev = [
10+
"pytest>=8.0.0",
11+
"testcontainers[redis]>=4.0.0",
12+
"redis>=5.0.0",
13+
]
14+
15+
[build-system]
16+
requires = ["hatchling"]
17+
build-backend = "hatchling.build"
18+
19+
[tool.pytest.ini_options]
20+
testpaths = ["tests"]
21+

sql_redis/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
"""SQL to Redis command translation utility."""
2+
3+
from sql_redis.translator import translate_sql
4+
5+
__all__ = ["translate_sql"]
6+

sql_redis/analyzer.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
"""SQL analyzer component - resolves field types from schema."""
2+
3+
from dataclasses import dataclass, field
4+
5+
from sql_redis.parser import ParsedQuery, AggregationSpec, ComputedField, Condition
6+
7+
8+
@dataclass
9+
class VectorSearchAnalysis:
10+
"""Analyzed vector search details."""
11+
field: str
12+
k: int
13+
alias: str
14+
15+
16+
@dataclass
17+
class AnalyzedQuery:
18+
"""Result of analyzing a parsed SQL query with schema context."""
19+
20+
parsed: ParsedQuery = field(default_factory=ParsedQuery)
21+
field_types: dict[str, str] = field(default_factory=dict)
22+
aggregations: list[AggregationSpec] = field(default_factory=list)
23+
computed_fields: list[ComputedField] = field(default_factory=list)
24+
groupby_fields: list[str] = field(default_factory=list)
25+
is_global_aggregation: bool = False
26+
vector_search: VectorSearchAnalysis | None = None
27+
has_prefilter: bool = False
28+
29+
def get_field_type(self, field_name: str) -> str | None:
30+
"""Get the type of a field."""
31+
return self.field_types.get(field_name)
32+
33+
def get_conditions_by_type(self, field_type: str) -> list[Condition]:
34+
"""Get conditions for fields of a specific type."""
35+
return [
36+
c for c in self.parsed.conditions
37+
if self.field_types.get(c.field) == field_type
38+
]
39+
40+
41+
class Analyzer:
42+
"""Analyzes parsed SQL queries with schema context."""
43+
44+
def __init__(self, schemas: dict[str, dict[str, str]]):
45+
"""Initialize analyzer with schema registry data.
46+
47+
Args:
48+
schemas: Dict mapping index names to field->type dicts.
49+
"""
50+
self._schemas = schemas
51+
52+
def analyze(self, parsed: ParsedQuery) -> AnalyzedQuery:
53+
"""Analyze a parsed query, resolving field types.
54+
55+
Args:
56+
parsed: The parsed SQL query.
57+
58+
Returns:
59+
An AnalyzedQuery with field types resolved.
60+
61+
Raises:
62+
ValueError: If the index or a field is unknown.
63+
"""
64+
raise NotImplementedError("Analyzer.analyze is not yet implemented")
65+

sql_redis/parser.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
"""SQL parser component using sqlglot."""
2+
3+
from dataclasses import dataclass, field
4+
5+
import sqlglot
6+
from sqlglot import exp
7+
8+
9+
@dataclass
10+
class AggregationSpec:
11+
"""Specification for an aggregation function."""
12+
function: str
13+
field: str | None = None
14+
alias: str | None = None
15+
16+
17+
@dataclass
18+
class ComputedField:
19+
"""Specification for a computed/APPLY field."""
20+
expression: str
21+
alias: str
22+
23+
24+
@dataclass
25+
class VectorSearchSpec:
26+
"""Specification for vector search."""
27+
field: str
28+
alias: str
29+
k: int | None = None
30+
31+
32+
@dataclass
33+
class Condition:
34+
"""A WHERE condition."""
35+
field: str
36+
operator: str
37+
value: object
38+
negated: bool = False
39+
40+
41+
@dataclass
42+
class ParsedQuery:
43+
"""Result of parsing a SQL query."""
44+
index: str = ""
45+
fields: list[str] = field(default_factory=list)
46+
conditions: list[Condition] = field(default_factory=list)
47+
boolean_operator: str = "AND"
48+
aggregations: list[AggregationSpec] = field(default_factory=list)
49+
computed_fields: list[ComputedField] = field(default_factory=list)
50+
vector_search: VectorSearchSpec | None = None
51+
groupby_fields: list[str] = field(default_factory=list)
52+
orderby_fields: list[tuple[str, str]] = field(default_factory=list) # (field, ASC|DESC)
53+
limit: int | None = None
54+
offset: int | None = None
55+
56+
57+
class SQLParser:
58+
"""Parses SQL into a ParsedQuery structure."""
59+
60+
def parse(self, sql: str) -> ParsedQuery:
61+
"""Parse a SQL statement into a ParsedQuery.
62+
63+
Args:
64+
sql: The SQL statement to parse.
65+
66+
Returns:
67+
A ParsedQuery containing the extracted components.
68+
"""
69+
ast = sqlglot.parse_one(sql)
70+
result = ParsedQuery()
71+
72+
# Extract FROM clause (index name)
73+
from_clause = ast.find(exp.From)
74+
if from_clause:
75+
table = from_clause.find(exp.Table)
76+
if table:
77+
result.index = table.name
78+
79+
# Extract SELECT fields
80+
select = ast.find(exp.Select)
81+
if select:
82+
for expression in select.expressions:
83+
if isinstance(expression, exp.Column):
84+
result.fields.append(expression.name)
85+
elif isinstance(expression, exp.Star):
86+
result.fields.append("*")
87+
88+
return result
89+

sql_redis/query_builder.py

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
"""RediSearch query builder - generates query syntax from analyzed queries."""
2+
3+
4+
class QueryBuilder:
5+
"""Builds RediSearch query syntax from conditions."""
6+
7+
def build_text_condition(
8+
self,
9+
field: str | list[str],
10+
operator: str,
11+
value: str,
12+
negated: bool = False
13+
) -> str:
14+
"""Build query syntax for TEXT field conditions.
15+
16+
Args:
17+
field: Field name or list of field names for multi-field search.
18+
operator: One of =, MATCH, LIKE, FUZZY.
19+
value: The search term or pattern.
20+
negated: If True, prefix with - for negation.
21+
22+
Returns:
23+
RediSearch query syntax like @field:term or @field:"phrase".
24+
"""
25+
raise NotImplementedError("build_text_condition is not yet implemented")
26+
27+
def build_tag_condition(
28+
self,
29+
field: str,
30+
operator: str,
31+
value: str | list[str]
32+
) -> str:
33+
"""Build query syntax for TAG field conditions.
34+
35+
Args:
36+
field: Field name.
37+
operator: One of =, !=, IN.
38+
value: Tag value or list of values for IN.
39+
40+
Returns:
41+
RediSearch query syntax like @field:{value} or @field:{v1|v2}.
42+
"""
43+
raise NotImplementedError("build_tag_condition is not yet implemented")
44+
45+
def build_numeric_condition(
46+
self,
47+
field: str,
48+
operator: str,
49+
value: int | float | tuple[int | float, int | float]
50+
) -> str:
51+
"""Build query syntax for NUMERIC field conditions.
52+
53+
Args:
54+
field: Field name.
55+
operator: One of =, !=, <, <=, >, >=, BETWEEN.
56+
value: Numeric value or (min, max) tuple for BETWEEN.
57+
58+
Returns:
59+
RediSearch query syntax like @field:[min max].
60+
"""
61+
raise NotImplementedError("build_numeric_condition is not yet implemented")
62+
63+
def build_vector_condition(
64+
self,
65+
field: str,
66+
k: int,
67+
alias: str,
68+
prefilter: str | None = None
69+
) -> str:
70+
"""Build query syntax for VECTOR KNN search.
71+
72+
Args:
73+
field: Vector field name.
74+
k: Number of nearest neighbors.
75+
alias: Alias for the distance score.
76+
prefilter: Optional pre-filter query string.
77+
78+
Returns:
79+
RediSearch query syntax like =>[KNN k @field $BLOB AS alias].
80+
"""
81+
raise NotImplementedError("build_vector_condition is not yet implemented")
82+
83+
def build_geo_filter(
84+
self,
85+
field: str,
86+
lon: float,
87+
lat: float,
88+
radius: float,
89+
unit: str = "km"
90+
) -> str:
91+
"""Build GEOFILTER clause for GEO fields.
92+
93+
Args:
94+
field: GEO field name.
95+
lon: Longitude.
96+
lat: Latitude.
97+
radius: Search radius.
98+
unit: Distance unit (km, m, mi, ft).
99+
100+
Returns:
101+
GEOFILTER clause like "GEOFILTER field lon lat radius unit".
102+
"""
103+
raise NotImplementedError("build_geo_filter is not yet implemented")
104+
105+
def build_geo_distance_apply(
106+
self,
107+
field: str,
108+
lon: float,
109+
lat: float,
110+
alias: str,
111+
unit: str = "m"
112+
) -> str:
113+
"""Build APPLY geodistance expression.
114+
115+
Args:
116+
field: GEO field name.
117+
lon: Longitude.
118+
lat: Latitude.
119+
alias: Alias for the distance result.
120+
unit: Distance unit for conversion.
121+
122+
Returns:
123+
APPLY clause like 'APPLY "geodistance(@field, lon, lat)" AS alias'.
124+
"""
125+
raise NotImplementedError("build_geo_distance_apply is not yet implemented")
126+
127+
def combine_conditions(
128+
self,
129+
conditions: list[str],
130+
operator: str = "AND"
131+
) -> str:
132+
"""Combine multiple condition strings with boolean operator.
133+
134+
Args:
135+
conditions: List of query condition strings.
136+
operator: Boolean operator (AND, OR).
137+
138+
Returns:
139+
Combined query string.
140+
"""
141+
raise NotImplementedError("combine_conditions is not yet implemented")
142+
143+
def build_query_string(
144+
self,
145+
text_conditions: list[tuple] | None = None,
146+
numeric_conditions: list[tuple] | None = None,
147+
tag_conditions: list[tuple] | None = None,
148+
field_types: dict[str, str] | None = None
149+
) -> str:
150+
"""Build complete query string from conditions.
151+
152+
Args:
153+
text_conditions: List of (field, operator, value) tuples.
154+
numeric_conditions: List of (field, operator, value) tuples.
155+
tag_conditions: List of (field, operator, value) tuples.
156+
field_types: Dict mapping field names to types.
157+
158+
Returns:
159+
Complete RediSearch query string.
160+
"""
161+
raise NotImplementedError("build_query_string is not yet implemented")
162+

0 commit comments

Comments
 (0)