77//! META-AGENT (follow-up wiring, do NOT do here):
88//! - gate behind feature `postgrest`; add `pub mod postgrest;` to lib.rs;
99//! - add `postgrest = ["dep:serde", "dep:serde_json"]` to Cargo.toml [features].
10+ //! - epiphany E4 outlook (PR #278): add an OPTIONAL feature
11+ //! `datafusion-dispatch = ["dep:datafusion"]` that compiles the stub
12+ //! `parsed_query_to_plan` at the bottom of this file into a real
13+ //! PostgREST → DataFusion → RlsRewriter dispatcher. Keep it optional
14+ //! so the zero-dep posture of the crate is preserved by default.
1015//!
1116//! This module is dependency-free: it uses only `std`, and emits JSON via
1217//! a small hand-rolled writer so it compiles under the crate's default
@@ -188,8 +193,25 @@ pub struct ParseError {
188193}
189194
190195impl ParseError {
191- fn new ( msg : impl Into < String > ) -> Self {
192- Self { message : msg. into ( ) }
196+ /// Construct a `ParseError` from any string-ish value.
197+ ///
198+ /// Public so downstream crates (e.g. an upcoming `PostgRestDispatcher`
199+ /// that wires PostgREST → DataFusion → RlsRewriter) can surface their
200+ /// own parse-time errors through this same type.
201+ pub fn new ( message : impl Into < String > ) -> Self {
202+ Self { message : message. into ( ) }
203+ }
204+ }
205+
206+ impl From < String > for ParseError {
207+ fn from ( message : String ) -> Self {
208+ Self { message }
209+ }
210+ }
211+
212+ impl From < & str > for ParseError {
213+ fn from ( message : & str ) -> Self {
214+ Self { message : message. to_string ( ) }
193215 }
194216}
195217
@@ -201,6 +223,51 @@ impl std::fmt::Display for ParseError {
201223
202224impl std:: error:: Error for ParseError { }
203225
226+ // ── URL decoding (HIGH) ──────────────────────────────────────────────────────
227+
228+ /// Percent-decode a URL component. Handles `%XX` and `+` → space (form-encoding).
229+ /// Returns None if malformed (incomplete %XX or invalid hex).
230+ fn percent_decode ( s : & str ) -> Option < String > {
231+ let mut out = Vec :: with_capacity ( s. len ( ) ) ;
232+ let bytes = s. as_bytes ( ) ;
233+ let mut i = 0 ;
234+ while i < bytes. len ( ) {
235+ match bytes[ i] {
236+ b'+' => {
237+ out. push ( b' ' ) ;
238+ i += 1 ;
239+ }
240+ b'%' if i + 2 < bytes. len ( ) => {
241+ let hex = std:: str:: from_utf8 ( & bytes[ i + 1 ..i + 3 ] ) . ok ( ) ?;
242+ let v = u8:: from_str_radix ( hex, 16 ) . ok ( ) ?;
243+ out. push ( v) ;
244+ i += 3 ;
245+ }
246+ b'%' => return None , // truncated escape
247+ c => {
248+ out. push ( c) ;
249+ i += 1 ;
250+ }
251+ }
252+ }
253+ String :: from_utf8 ( out) . ok ( )
254+ }
255+
256+ // ── Table-name validation (MEDIUM) ───────────────────────────────────────────
257+
258+ /// Validate a PostgREST table name: ASCII alphanumeric + underscore, not
259+ /// starting with `_` (rejects magic / reserved names), not empty.
260+ ///
261+ /// This rejects path-traversal (`..`), dotted paths (`users.json`), and any
262+ /// non-ASCII identifier. PostgREST's real surface is more permissive (quoted
263+ /// identifiers can contain anything), but the SMB+MedCare subset only ever
264+ /// uses simple snake_case names; tighter validation is the safer default.
265+ fn is_valid_table_name ( s : & str ) -> bool {
266+ !s. is_empty ( )
267+ && s. chars ( ) . all ( |c| c. is_ascii_alphanumeric ( ) || c == '_' )
268+ && !s. starts_with ( '_' )
269+ }
270+
204271// ── Path parser ──────────────────────────────────────────────────────────────
205272
206273/// Pure function — no I/O. Parses path query string into [`ParsedQuery`].
@@ -230,6 +297,14 @@ pub fn parse_path(path: &str) -> Result<ParsedQuery, ParseError> {
230297 ) ) ) ;
231298 }
232299
300+ // Reject path-traversal (`..`), dotted paths (`users.json`), magic
301+ // names (`_internal`), and any non-ASCII-identifier table label.
302+ if !is_valid_table_name ( table_part) {
303+ return Err ( ParseError :: new ( format ! (
304+ "invalid table name: {table_part}"
305+ ) ) ) ;
306+ }
307+
233308 let mut parsed = ParsedQuery {
234309 table : table_part. to_string ( ) ,
235310 ..Default :: default ( )
@@ -261,8 +336,18 @@ pub fn parse_path(path: &str) -> Result<ParsedQuery, ParseError> {
261336 }
262337
263338 match key {
264- "select" => parsed. select = Some ( value. to_string ( ) ) ,
265- "order" => parsed. order = Some ( value. to_string ( ) ) ,
339+ "select" => {
340+ let decoded = percent_decode ( value) . ok_or_else ( || {
341+ ParseError :: new ( format ! ( "malformed urlencoding in select: {value}" ) )
342+ } ) ?;
343+ parsed. select = Some ( decoded) ;
344+ }
345+ "order" => {
346+ let decoded = percent_decode ( value) . ok_or_else ( || {
347+ ParseError :: new ( format ! ( "malformed urlencoding in order: {value}" ) )
348+ } ) ?;
349+ parsed. order = Some ( decoded) ;
350+ }
266351 "limit" => {
267352 parsed. limit = Some ( value. parse :: < u64 > ( ) . map_err ( |_| {
268353 ParseError :: new ( format ! ( "bad limit: {value}" ) )
@@ -286,10 +371,19 @@ pub fn parse_path(path: &str) -> Result<ParsedQuery, ParseError> {
286371 let op = FilterOp :: parse ( op_str) . ok_or_else ( || {
287372 ParseError :: new ( format ! ( "unknown filter op: {op_str}" ) )
288373 } ) ?;
374+ // URL-decode AFTER `op.` split so e.g. `eq.foo%40bar.com`
375+ // yields `foo@bar.com` (and not a confused dot-split on a
376+ // decoded `.`). Op tokens themselves are ASCII and never
377+ // need decoding.
378+ let decoded = percent_decode ( val) . ok_or_else ( || {
379+ ParseError :: new ( format ! (
380+ "malformed urlencoding in filter value: {val}"
381+ ) )
382+ } ) ?;
289383 parsed. filters . push ( Filter {
290384 column : key. to_string ( ) ,
291385 op,
292- value : val . to_string ( ) ,
386+ value : decoded ,
293387 } ) ;
294388 }
295389 }
@@ -400,6 +494,11 @@ fn push_optional_string(out: &mut String, v: Option<&str>) {
400494}
401495
402496/// Encode a Rust `&str` as a JSON string literal (with surrounding quotes).
497+ ///
498+ /// Supplementary-plane characters (U+10000 and above) are emitted as a
499+ /// UTF-16 surrogate pair `\uXXXX\uXXXX` per RFC 8259 §7. BMP characters
500+ /// pass through as-is (UTF-8 in the JSON text is fine; we only escape
501+ /// when we have to).
403502fn json_string ( s : & str ) -> String {
404503 let mut out = String :: with_capacity ( s. len ( ) + 2 ) ;
405504 out. push ( '"' ) ;
@@ -415,6 +514,15 @@ fn json_string(s: &str) -> String {
415514 c if ( c as u32 ) < 0x20 => {
416515 out. push_str ( & format ! ( "\\ u{:04x}" , c as u32 ) ) ;
417516 }
517+ c if ( c as u32 ) > 0xFFFF => {
518+ // Supplementary plane: encode as UTF-16 surrogate pair.
519+ // high = 0xD800 + ((cp - 0x10000) >> 10)
520+ // low = 0xDC00 + ((cp - 0x10000) & 0x3FF)
521+ let cp = c as u32 - 0x10000 ;
522+ let high = 0xD800 + ( cp >> 10 ) ;
523+ let low = 0xDC00 + ( cp & 0x3FF ) ;
524+ out. push_str ( & format ! ( "\\ u{:04x}\\ u{:04x}" , high, low) ) ;
525+ }
418526 c => out. push ( c) ,
419527 }
420528 }
@@ -656,4 +764,191 @@ mod tests {
656764 assert ! ( s. contains( "\" offset\" :null" ) , "{s}" ) ;
657765 assert ! ( s. contains( "\" filters\" :[]" ) , "{s}" ) ;
658766 }
767+
768+ // ── Filter-op coverage (LOW) — ilike / in / is / like ────────────────
769+
770+ #[ test]
771+ fn parse_ilike_filter ( ) {
772+ let q = parse_path ( "users?email=ilike.*@example.com" ) . expect ( "parse ok" ) ;
773+ assert_eq ! ( q. table, "users" ) ;
774+ assert_eq ! ( q. filters. len( ) , 1 ) ;
775+ let f = & q. filters [ 0 ] ;
776+ assert_eq ! ( f. column, "email" ) ;
777+ assert_eq ! ( f. op, FilterOp :: ILike ) ;
778+ assert_eq ! ( f. value, "*@example.com" ) ;
779+ }
780+
781+ #[ test]
782+ fn parse_in_filter ( ) {
783+ let q = parse_path ( "users?id=in.(1,2,3)" ) . expect ( "parse ok" ) ;
784+ assert_eq ! ( q. filters. len( ) , 1 ) ;
785+ let f = & q. filters [ 0 ] ;
786+ assert_eq ! ( f. column, "id" ) ;
787+ assert_eq ! ( f. op, FilterOp :: In ) ;
788+ assert_eq ! ( f. value, "(1,2,3)" ) ;
789+ }
790+
791+ #[ test]
792+ fn parse_is_null_filter ( ) {
793+ let q = parse_path ( "users?status=is.null" ) . expect ( "parse ok" ) ;
794+ let f = & q. filters [ 0 ] ;
795+ assert_eq ! ( f. column, "status" ) ;
796+ assert_eq ! ( f. op, FilterOp :: Is ) ;
797+ assert_eq ! ( f. value, "null" ) ;
798+ }
799+
800+ #[ test]
801+ fn parse_like_filter ( ) {
802+ let q = parse_path ( "users?name=like.J*" ) . expect ( "parse ok" ) ;
803+ let f = & q. filters [ 0 ] ;
804+ assert_eq ! ( f. column, "name" ) ;
805+ assert_eq ! ( f. op, FilterOp :: Like ) ;
806+ assert_eq ! ( f. value, "J*" ) ;
807+ }
808+
809+ // ── URL-decode tests (HIGH) ──────────────────────────────────────────
810+
811+ #[ test]
812+ fn parse_url_decode_space ( ) {
813+ let q = parse_path ( "users?name=eq.John%20Doe" ) . expect ( "parse ok" ) ;
814+ assert_eq ! ( q. filters[ 0 ] . value, "John Doe" ) ;
815+ }
816+
817+ #[ test]
818+ fn parse_url_decode_at_sign ( ) {
819+ let q = parse_path ( "users?email=eq.foo%40bar.com" ) . expect ( "parse ok" ) ;
820+ assert_eq ! ( q. filters[ 0 ] . value, "foo@bar.com" ) ;
821+ }
822+
823+ #[ test]
824+ fn parse_url_decode_plus_sign ( ) {
825+ // %2B → '+' (literal plus, NOT a space — that's only bare `+`).
826+ let q = parse_path ( "users?notes=eq.Hello%2BWorld" ) . expect ( "parse ok" ) ;
827+ assert_eq ! ( q. filters[ 0 ] . value, "Hello+World" ) ;
828+ }
829+
830+ #[ test]
831+ fn parse_url_decode_bare_plus_is_space ( ) {
832+ // form-encoding convention: bare `+` decodes to space.
833+ let q = parse_path ( "users?name=eq.John+Doe" ) . expect ( "parse ok" ) ;
834+ assert_eq ! ( q. filters[ 0 ] . value, "John Doe" ) ;
835+ }
836+
837+ #[ test]
838+ fn parse_url_decode_select_and_order ( ) {
839+ let q = parse_path ( "users?select=id%2Cname&order=created.desc" )
840+ . expect ( "parse ok" ) ;
841+ assert_eq ! ( q. select. as_deref( ) , Some ( "id,name" ) ) ;
842+ assert_eq ! ( q. order. as_deref( ) , Some ( "created.desc" ) ) ;
843+ }
844+
845+ #[ test]
846+ fn parse_url_decode_truncated_escape_errors ( ) {
847+ let err = parse_path ( "users?name=eq.foo%2" ) . expect_err ( "must error" ) ;
848+ assert ! ( err. message. contains( "malformed urlencoding" ) , "{}" , err. message) ;
849+ }
850+
851+ #[ test]
852+ fn parse_url_decode_bad_hex_errors ( ) {
853+ let err = parse_path ( "users?name=eq.foo%ZZbar" ) . expect_err ( "must error" ) ;
854+ assert ! ( err. message. contains( "malformed urlencoding" ) , "{}" , err. message) ;
855+ }
856+
857+ // ── Table-validation tests (MEDIUM) ──────────────────────────────────
858+
859+ #[ test]
860+ fn parse_table_traversal_errors ( ) {
861+ // `..` contains `.`, plus `/` → caught by either nested-path or
862+ // invalid-table-name guard. Either error message is acceptable.
863+ let err = parse_path ( "../../../etc/passwd" ) . expect_err ( "must error" ) ;
864+ assert ! (
865+ err. message. contains( "nested path" ) || err. message. contains( "invalid table name" ) ,
866+ "{}" ,
867+ err. message
868+ ) ;
869+ }
870+
871+ #[ test]
872+ fn parse_table_with_period_errors ( ) {
873+ let err = parse_path ( "users.json?id=eq.1" ) . expect_err ( "must error" ) ;
874+ assert ! ( err. message. contains( "invalid table name" ) , "{}" , err. message) ;
875+ }
876+
877+ #[ test]
878+ fn parse_table_underscore_prefix_errors ( ) {
879+ let err = parse_path ( "_internal?id=eq.1" ) . expect_err ( "must error" ) ;
880+ assert ! ( err. message. contains( "invalid table name" ) , "{}" , err. message) ;
881+ }
882+
883+ #[ test]
884+ fn parse_table_unicode_errors ( ) {
885+ let err = parse_path ( "ünits?id=eq.1" ) . expect_err ( "must error" ) ;
886+ assert ! ( err. message. contains( "invalid table name" ) , "{}" , err. message) ;
887+ }
888+
889+ #[ test]
890+ fn parse_table_leading_slash_still_works ( ) {
891+ // Existing behaviour preserved.
892+ let q = parse_path ( "/users" ) . expect ( "parse ok" ) ;
893+ assert_eq ! ( q. table, "users" ) ;
894+ }
895+
896+ // ── ParseError public surface ────────────────────────────────────────
897+
898+ #[ test]
899+ fn parse_error_public_constructors ( ) {
900+ let a = ParseError :: new ( "a" ) ;
901+ let b: ParseError = "b" . into ( ) ;
902+ let c: ParseError = String :: from ( "c" ) . into ( ) ;
903+ assert_eq ! ( a. message, "a" ) ;
904+ assert_eq ! ( b. message, "b" ) ;
905+ assert_eq ! ( c. message, "c" ) ;
906+ // Display impl is reachable.
907+ assert ! ( format!( "{a}" ) . contains( "a" ) ) ;
908+ }
909+
910+ // ── Surrogate-pair JSON escaping (LOW) ───────────────────────────────
911+
912+ #[ test]
913+ fn json_string_supplementary_plane_emits_surrogate_pair ( ) {
914+ // U+1F600 GRINNING FACE → 😀
915+ let out = json_string ( "\u{1F600} " ) ;
916+ assert_eq ! ( out, "\" \\ ud83d\\ ude00\" " , "{out}" ) ;
917+ }
918+
919+ #[ test]
920+ fn json_string_bmp_passes_through ( ) {
921+ // BMP chars are emitted as UTF-8, not escaped.
922+ let out = json_string ( "café" ) ;
923+ assert_eq ! ( out, "\" café\" " ) ;
924+ }
925+ }
926+
927+ // ── EPIPHANY E4 SEED — PostgREST → DataFusion dispatch stub ──────────────────
928+ //
929+ // Doc-only stub for the upcoming `PostgRestDispatcher` (PR #278 outlook E4)
930+ // that wires PostgREST → DataFusion → RlsRewriter. Compiled only under the
931+ // optional `datafusion-dispatch` feature so the zero-dep default posture of
932+ // the crate is preserved.
933+ //
934+ // META-AGENT (follow-up wiring, do NOT do here):
935+ // - add `datafusion-dispatch = ["dep:datafusion"]` to Cargo.toml [features];
936+ // - add `datafusion = { version = "...", optional = true }` to dependencies;
937+ // - flesh out `parsed_query_to_plan` to translate a `ParsedQuery` into a
938+ // `LogicalPlan` (TableScan → Filter → Projection → Sort → Limit), then
939+ // hand it to `crate::rls::RlsRewriter` for row-level-security overlay.
940+
941+ /// Convert a [`ParsedQuery`] to a DataFusion `LogicalPlan`.
942+ ///
943+ /// **NOT IMPLEMENTED** in this PR. Stub interface for the upcoming
944+ /// `PostgRestDispatcher` that wires PostgREST → DataFusion → RlsRewriter.
945+ /// See PR #278 outlook epiphany E4.
946+ #[ cfg( feature = "datafusion-dispatch" ) ]
947+ pub fn parsed_query_to_plan (
948+ _query : & ParsedQuery ,
949+ _ctx : & datafusion:: execution:: context:: SessionContext ,
950+ ) -> Result < datafusion:: logical_expr:: LogicalPlan , ParseError > {
951+ Err ( ParseError :: new (
952+ "parsed_query_to_plan: not yet implemented (PR #278 outlook E4)" ,
953+ ) )
659954}
0 commit comments