|
| 1 | +//! Normalize Coverage Test |
| 2 | +//! |
| 3 | +//! This test verifies that all AST node types with `location` fields are handled |
| 4 | +//! in the `normalize.rs` clear_location function. |
| 5 | +//! |
| 6 | +//! When `libpg_query` is upgraded and new node types are added that have location |
| 7 | +//! fields, this test will fail and list exactly which types need to be added to |
| 8 | +//! the normalization code. |
| 9 | +//! |
| 10 | +//! ## When this test fails: |
| 11 | +//! 1. Look at the "Missing handlers" list |
| 12 | +//! 2. Add each missing type to `clear_location` in `normalize.rs` |
| 13 | +//! 3. For each type, set `(*n).location = 0;` at minimum |
| 14 | +//! 4. Check if the type needs additional normalization beyond location clearing |
| 15 | +
|
| 16 | +use regex::Regex; |
| 17 | +use std::collections::HashSet; |
| 18 | +use std::fs; |
| 19 | +use std::path::PathBuf; |
| 20 | + |
| 21 | +/// Extract all struct names that have a `location: i32` field from protobuf.rs |
| 22 | +fn extract_types_with_location() -> HashSet<String> { |
| 23 | + let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); |
| 24 | + let protobuf_path = manifest_dir |
| 25 | + .parent() |
| 26 | + .unwrap() |
| 27 | + .join("pgls_query/src/protobuf.rs"); |
| 28 | + |
| 29 | + let content = fs::read_to_string(&protobuf_path) |
| 30 | + .expect("Failed to read protobuf.rs - has it been generated?"); |
| 31 | + |
| 32 | + let mut types_with_location = HashSet::new(); |
| 33 | + |
| 34 | + // Match struct definitions and their fields |
| 35 | + // Pattern: `pub struct TypeName {` followed by fields until `}` |
| 36 | + let struct_re = Regex::new(r"pub struct (\w+)\s*\{([^}]+)\}").unwrap(); |
| 37 | + let location_field_re = Regex::new(r"pub location:\s*i32").unwrap(); |
| 38 | + |
| 39 | + for cap in struct_re.captures_iter(&content) { |
| 40 | + let struct_name = cap[1].to_string(); |
| 41 | + let struct_body = &cap[2]; |
| 42 | + |
| 43 | + if location_field_re.is_match(struct_body) { |
| 44 | + types_with_location.insert(struct_name); |
| 45 | + } |
| 46 | + } |
| 47 | + |
| 48 | + types_with_location |
| 49 | +} |
| 50 | + |
| 51 | +/// Extract all NodeMut variants handled in clear_location from normalize.rs |
| 52 | +fn extract_handled_types() -> HashSet<String> { |
| 53 | + let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); |
| 54 | + let normalize_path = manifest_dir.join("src/normalize.rs"); |
| 55 | + |
| 56 | + let content = fs::read_to_string(&normalize_path).expect("Failed to read normalize.rs"); |
| 57 | + |
| 58 | + // Find the clear_location function |
| 59 | + let start = content |
| 60 | + .find("fn clear_location") |
| 61 | + .expect("Could not find clear_location function"); |
| 62 | + let end = content[start..] |
| 63 | + .find("\n}\n\n") |
| 64 | + .map(|i| start + i) |
| 65 | + .unwrap_or(content.len()); |
| 66 | + |
| 67 | + let clear_location_body = &content[start..end]; |
| 68 | + |
| 69 | + // Extract NodeMut::TypeName patterns |
| 70 | + let variant_re = Regex::new(r"NodeMut::(\w+)\(").unwrap(); |
| 71 | + |
| 72 | + variant_re |
| 73 | + .captures_iter(clear_location_body) |
| 74 | + .map(|cap| cap[1].to_string()) |
| 75 | + .collect() |
| 76 | +} |
| 77 | + |
| 78 | +/// Types that are known to not need location clearing in `clear_location`. |
| 79 | +/// These are either: |
| 80 | +/// - Internal types not directly in the AST (e.g., ParseResult, ScanResult) |
| 81 | +/// - Planner/executor nodes that don't appear in parsed SQL ASTs |
| 82 | +/// - Types handled in separate normalization functions |
| 83 | +/// - Types where location is handled via parent node traversal |
| 84 | +fn known_exceptions() -> HashSet<&'static str> { |
| 85 | + [ |
| 86 | + // ===== Internal/container types ===== |
| 87 | + "ParseResult", |
| 88 | + "ScanResult", |
| 89 | + "ScanToken", |
| 90 | + "Node", |
| 91 | + "RawStmt", // location is stmt_location, handled separately |
| 92 | + "Integer", |
| 93 | + "Float", |
| 94 | + "Boolean", |
| 95 | + "List", |
| 96 | + "IntList", |
| 97 | + "OidList", |
| 98 | + // ===== Planner/executor nodes (not in parser output) ===== |
| 99 | + // These are internal representations created during query planning, |
| 100 | + // not during parsing. They won't appear in parsed SQL ASTs. |
| 101 | + "Aggref", // aggregate reference - planner node |
| 102 | + "ArrayCoerceExpr", // array coercion - planner node |
| 103 | + "ArrayExpr", // constructed array - planner node |
| 104 | + "CoerceToDomain", // domain coercion - planner node |
| 105 | + "CoerceToDomainValue", // domain value coercion - planner node |
| 106 | + "CoerceViaIo", // I/O coercion - planner node |
| 107 | + "CollateExpr", // collation expression - planner node |
| 108 | + "ConvertRowtypeExpr", // row type conversion - planner node |
| 109 | + "DistinctExpr", // DISTINCT expression - planner node |
| 110 | + "FuncExpr", // function expression - planner node (FuncCall is parser) |
| 111 | + "NullIfExpr", // NULLIF expression - planner node |
| 112 | + "OpExpr", // operator expression - planner node (AExpr is parser) |
| 113 | + "Param", // query parameter - planner node (ParamRef is parser) |
| 114 | + "RelabelType", // type relabeling - planner node |
| 115 | + "ScalarArrayOpExpr", // scalar array op - planner node |
| 116 | + "TableFunc", // table function - planner node |
| 117 | + "Var", // variable reference - planner node (ColumnRef is parser) |
| 118 | + "WindowFunc", // window function - planner node (FuncCall is parser) |
| 119 | + // ===== Handled in separate normalization functions ===== |
| 120 | + "MergeSupportFunc", // handled in normalize_merge_support_func |
| 121 | + "SqlValueFunction", // handled in normalize_sql_value_function |
| 122 | + "WithClause", // handled in normalize_merge_support_func_recursive |
| 123 | + // ===== Handled via parent node ===== |
| 124 | + "JsonFormat", // handled via JsonFuncExpr, JsonArrayConstructor, etc. |
| 125 | + "JsonConstructorExpr", // internal JSON constructor - planner node |
| 126 | + "JsonExpr", // internal JSON expression - planner node |
| 127 | + // ===== Parser nodes that need review ===== |
| 128 | + // These appear in parsed ASTs but may not need explicit handling |
| 129 | + // if the pretty-print tests pass without them |
| 130 | + "CteCycleClause", // CTE CYCLE clause - rarely used |
| 131 | + "CteSearchClause", // CTE SEARCH clause - rarely used |
| 132 | + "PartitionRangeDatum", // partition range datum - handled via PartitionBoundSpec |
| 133 | + "PlAssignStmt", // PL/pgSQL assignment - not standard SQL |
| 134 | + ] |
| 135 | + .into_iter() |
| 136 | + .collect() |
| 137 | +} |
| 138 | + |
| 139 | +#[test] |
| 140 | +fn all_location_fields_are_normalized() { |
| 141 | + let types_with_location = extract_types_with_location(); |
| 142 | + let handled_types = extract_handled_types(); |
| 143 | + let exceptions = known_exceptions(); |
| 144 | + |
| 145 | + let mut missing: Vec<_> = types_with_location |
| 146 | + .iter() |
| 147 | + .filter(|t| !handled_types.contains(*t) && !exceptions.contains(t.as_str())) |
| 148 | + .collect(); |
| 149 | + |
| 150 | + missing.sort(); |
| 151 | + |
| 152 | + if !missing.is_empty() { |
| 153 | + panic!( |
| 154 | + "\n\n\ |
| 155 | + ========================================\n\ |
| 156 | + AST NORMALIZATION COVERAGE INCOMPLETE\n\ |
| 157 | + ========================================\n\n\ |
| 158 | + The following types have `location` fields but are NOT handled\n\ |
| 159 | + in the `clear_location` function in normalize.rs:\n\n\ |
| 160 | + {}\n\n\ |
| 161 | + To fix:\n\ |
| 162 | + 1. Add a handler for each type in the `clear_location` match\n\ |
| 163 | + 2. At minimum: `NodeMut::TypeName(n) => {{ (*n).location = 0; }}`\n\ |
| 164 | + 3. Check if additional normalization is needed\n\n\ |
| 165 | + If a type should NOT be normalized, add it to `known_exceptions()`\n\ |
| 166 | + in this test file with a comment explaining why.\n\ |
| 167 | + ========================================\n", |
| 168 | + missing |
| 169 | + .iter() |
| 170 | + .map(|t| format!(" - {}", t)) |
| 171 | + .collect::<Vec<_>>() |
| 172 | + .join("\n") |
| 173 | + ); |
| 174 | + } |
| 175 | +} |
| 176 | + |
| 177 | +#[test] |
| 178 | +fn no_obsolete_handlers() { |
| 179 | + let types_with_location = extract_types_with_location(); |
| 180 | + let handled_types = extract_handled_types(); |
| 181 | + |
| 182 | + // Check for handlers that reference types that don't exist or don't have location |
| 183 | + let mut obsolete: Vec<_> = handled_types |
| 184 | + .iter() |
| 185 | + .filter(|t| !types_with_location.contains(*t)) |
| 186 | + .collect(); |
| 187 | + |
| 188 | + obsolete.sort(); |
| 189 | + |
| 190 | + if !obsolete.is_empty() { |
| 191 | + println!( |
| 192 | + "\nNote: The following handlers in clear_location reference types \ |
| 193 | + that don't have a `location` field (they may have other fields being normalized):\n\ |
| 194 | + {}\n\ |
| 195 | + This is fine if the handler normalizes other fields, but worth reviewing.\n", |
| 196 | + obsolete |
| 197 | + .iter() |
| 198 | + .map(|t| format!(" - {}", t)) |
| 199 | + .collect::<Vec<_>>() |
| 200 | + .join("\n") |
| 201 | + ); |
| 202 | + } |
| 203 | + // This is just informational, not a failure |
| 204 | +} |
0 commit comments