diff --git a/Cargo.lock b/Cargo.lock index 828fa4ae..6235cdc2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2417,8 +2417,14 @@ dependencies = [ name = "squawk-fmt" version = "2.48.0" dependencies = [ + "anyhow", + "camino", + "clap", + "dir-test", "insta", "itertools", + "rowan", + "squawk-lexer", "squawk-syntax", "tiny_pretty", ] diff --git a/crates/squawk_fmt/Cargo.toml b/crates/squawk_fmt/Cargo.toml index 216f23ef..9fe1da29 100644 --- a/crates/squawk_fmt/Cargo.toml +++ b/crates/squawk_fmt/Cargo.toml @@ -13,13 +13,23 @@ repository.workspace = true [lib] doctest = false +[[bin]] +name = "squawk-fmt" +path = "src/main.rs" + [dependencies] tiny_pretty.workspace = true itertools.workspace = true squawk-syntax.workspace = true +rowan.workspace = true +clap.workspace = true +anyhow.workspace = true [dev-dependencies] insta.workspace = true +dir-test.workspace = true +camino.workspace = true +squawk-lexer.workspace = true [lints] workspace = true diff --git a/crates/squawk_fmt/src/fmt.rs b/crates/squawk_fmt/src/fmt.rs new file mode 100644 index 00000000..de1d8873 --- /dev/null +++ b/crates/squawk_fmt/src/fmt.rs @@ -0,0 +1,546 @@ +use itertools::Itertools; +use rowan::Direction; +use squawk_syntax::ast::{self, AstNode}; +use squawk_syntax::{SyntaxKind, SyntaxNode, SyntaxToken}; +use tiny_pretty::Doc; +use tiny_pretty::{PrintOptions, print}; + +// TODO: anytime we have `syntax().to_string()`, it means we have to do more to +// actually convert the data into the IR. to_string() is a temp hack + +fn build_source_file(source_file: &ast::SourceFile) -> Doc<'_> { + let mut doc = Doc::nil(); + for el in source_file.syntax().children_with_tokens() { + match el { + rowan::NodeOrToken::Node(node) => { + if let Some(stmt) = ast::Stmt::cast(node) { + match stmt { + ast::Stmt::Select(select) => { + doc = doc.append(build_select_doc(&select)); + } + ast::Stmt::CreateTable(create_table) => { + doc = doc.append(build_create_table(&create_table)); + } + _ => (), + } + } + } + rowan::NodeOrToken::Token(token) => { + if token.kind() == SyntaxKind::COMMENT { + doc = doc.append(Doc::text(token.text().to_string())); + } else if token.kind() == SyntaxKind::WHITESPACE { + // TODO: I think we can improve this + let lines = token.text().lines().count(); + if lines >= 2 { + doc = doc.append(Doc::empty_line()).append(Doc::empty_line()); + } else { + doc = doc.append(Doc::empty_line()); + } + } else if token.kind() == SyntaxKind::SEMICOLON { + doc = doc.append(Doc::text(";")); + } + } + } + } + doc +} + +fn build_create_table<'a>(create_table: &ast::CreateTable) -> Doc<'a> { + Doc::text("create") + .append(Doc::space()) + .append(Doc::text("table")) + .append(Doc::space()) + .append(Doc::text( + create_table.path().map(|x| x.syntax().to_string()).unwrap(), + )) + .append(Doc::text("(")) + .append( + Doc::line_or_nil() + .append(Doc::list( + Itertools::intersperse( + create_table + .table_arg_list() + .unwrap() + .args() + .map(build_table_arg), + Doc::text(",").append(Doc::hard_line()), + ) + .collect(), + )) + .nest(2) + .append(Doc::line_or_nil()) + .group(), + ) + .append(Doc::text(")")) +} + +fn build_table_arg<'a>(create_table: ast::TableArg) -> Doc<'a> { + match create_table { + ast::TableArg::Column(column) => Doc::text(column.name().unwrap().syntax().to_string()) + .append(Doc::space()) + .append(Doc::text(column.ty().unwrap().syntax().to_string())), + ast::TableArg::LikeClause(_like_clause) => todo!(), + ast::TableArg::TableConstraint(_table_constraint) => todo!(), + } +} + +fn build_select_doc<'a>(select: &ast::Select) -> Doc<'a> { + let mut doc = Doc::text("select").append(Doc::line_or_space()); + + if let Some(select_clause) = select.select_clause() { + if let Some(distinct_clause) = select_clause.distinct_clause() { + doc = doc.append(leading_comments(distinct_clause.syntax())); + doc = doc.append(Doc::text("distinct")).append(Doc::space()); + } + if let Some(all_token) = select_clause.all_token() { + doc = doc.append(leading_comments_token(&all_token)); + doc = doc.append(Doc::text("all")).append(Doc::space()); + } + if let Some(target_list) = select_clause.target_list() { + doc = doc.append(leading_comments(target_list.syntax())); + doc = doc + .append(Doc::list( + Itertools::intersperse( + target_list.targets().flat_map(build_target), + Doc::text(",").append(Doc::line_or_space()), + ) + .collect(), + )) + .nest(2); + } + } + + if let Some(from) = &select.from_clause() { + doc = doc.append( + Doc::line_or_space() + .append(Doc::text("from")) + .append(Doc::space()) + .append(Doc::text( + from.from_items().next().unwrap().syntax().to_string(), + )), + ); + } + + if let Some(group) = &select.group_by_clause() { + doc = doc.append( + Doc::line_or_space() + .append(Doc::text("group by")) + .append(Doc::space()) + .append(Doc::text( + group.group_by_list().unwrap().syntax().to_string(), + )), + ); + } + + doc.group() +} + +fn build_expr<'a>(expr: ast::Expr) -> Doc<'a> { + match expr { + ast::Expr::ArrayExpr(array_expr) => { + let mut doc = Doc::nil(); + + // nested parts of array expressions don't require the array token + if array_expr.array_token().is_some() { + doc = doc.append(Doc::text("array")); + }; + + if let Some(select) = array_expr.select() { + doc = doc + .append(Doc::text("(")) + .append(build_select_doc(&select)) + .append(Doc::text(")")) + } else { + doc = doc + .append(Doc::text("[")) + .append(Doc::list( + Itertools::intersperse( + array_expr.exprs().map(build_expr), + Doc::text(",").append(Doc::space()), + ) + .collect(), + )) + .append(Doc::text("]")); + } + + doc + } + ast::Expr::BetweenExpr(between_expr) => { + let mut doc = build_expr(between_expr.target().unwrap()); + if between_expr.not_token().is_some() { + doc = doc.append(Doc::space()).append(Doc::text("not")); + } + doc = doc.append(Doc::space()).append(Doc::text("between")); + if between_expr.symmetric_token().is_some() { + doc = doc.append(Doc::space()).append(Doc::text("symmetric")); + } + doc.append(Doc::space()) + .append(build_expr(between_expr.start().unwrap())) + .append(Doc::space()) + .append(Doc::text("and")) + .append(Doc::space()) + .append(build_expr(between_expr.end().unwrap())) + } + ast::Expr::BinExpr(bin_expr) => build_expr(bin_expr.lhs().unwrap()) + .append(Doc::space()) + .append(build_op(bin_expr.op().unwrap())) + .append(Doc::space()) + .append(build_expr(bin_expr.rhs().unwrap())), + // ast::Expr::CallExpr(call_expr) => todo!(), + // ast::Expr::CaseExpr(case_expr) => todo!(), + ast::Expr::CastExpr(cast_expr) => { + let mut doc = Doc::nil(); + if cast_expr.colon_colon().is_some() { + doc = doc + .append(build_expr(cast_expr.expr().unwrap())) + .append(Doc::text("::")) + .append(build_type(cast_expr.ty().unwrap())) + } else if cast_expr.as_token().is_some() { + if cast_expr.cast_token().is_some() { + doc = doc.append(Doc::text("cast")) + } else if cast_expr.treat_token().is_some() { + doc = doc.append(Doc::text("treat")) + } + doc = doc + .append(Doc::text("(")) + .append(build_expr(cast_expr.expr().unwrap())) + .append(Doc::space()) + .append(Doc::text("as")) + .append(Doc::space()) + .append(build_type(cast_expr.ty().unwrap())) + .append(Doc::text(")")) + } else { + doc = doc + .append(build_type(cast_expr.ty().unwrap())) + .append(Doc::space()) + .append(build_literal(cast_expr.literal().unwrap())) + } + doc + } + // ast::Expr::FieldExpr(field_expr) => todo!(), + // ast::Expr::IndexExpr(index_expr) => todo!(), + // ast::Expr::Literal(literal) => todo!(), + // ast::Expr::NameRef(name_ref) => todo!(), + // ast::Expr::ParenExpr(paren_expr) => todo!(), + ast::Expr::PostfixExpr(postfix_expr) => { + let expr = build_expr(postfix_expr.expr().unwrap()); + let op = match postfix_expr.op().unwrap() { + ast::PostfixOp::AtLocal(_) => Doc::text("at local"), + ast::PostfixOp::IsNull(_) => Doc::text("isnull"), + ast::PostfixOp::NotNull(_) => Doc::text("notnull"), + ast::PostfixOp::IsJson(n) => { + let mut doc = Doc::text("is json"); + if let Some(clause) = n.json_keys_unique_clause() { + doc = doc + .append(Doc::space()) + .append(build_json_keys_unique_clause(clause)); + } + doc + } + ast::PostfixOp::IsJsonArray(n) => { + let mut doc = Doc::text("is json array"); + if let Some(clause) = n.json_keys_unique_clause() { + doc = doc + .append(Doc::space()) + .append(build_json_keys_unique_clause(clause)); + } + doc + } + ast::PostfixOp::IsJsonObject(n) => { + let mut doc = Doc::text("is json object"); + if let Some(clause) = n.json_keys_unique_clause() { + doc = doc + .append(Doc::space()) + .append(build_json_keys_unique_clause(clause)); + } + doc + } + ast::PostfixOp::IsJsonScalar(n) => { + let mut doc = Doc::text("is json scalar"); + if let Some(clause) = n.json_keys_unique_clause() { + doc = doc + .append(Doc::space()) + .append(build_json_keys_unique_clause(clause)); + } + doc + } + ast::PostfixOp::IsJsonValue(n) => { + let mut doc = Doc::text("is json value"); + if let Some(clause) = n.json_keys_unique_clause() { + doc = doc + .append(Doc::space()) + .append(build_json_keys_unique_clause(clause)); + } + doc + } + ast::PostfixOp::IsNormalized(n) => { + let mut doc = Doc::text("is"); + if let Some(form) = n.unicode_normal_form() { + doc = doc + .append(Doc::space()) + .append(build_unicode_normal_form(form)); + } + doc.append(Doc::space()).append(Doc::text("normalized")) + } + ast::PostfixOp::IsNotJson(n) => { + let mut doc = Doc::text("is not json"); + if let Some(clause) = n.json_keys_unique_clause() { + doc = doc + .append(Doc::space()) + .append(build_json_keys_unique_clause(clause)); + } + doc + } + ast::PostfixOp::IsNotJsonArray(n) => { + let mut doc = Doc::text("is not json array"); + if let Some(clause) = n.json_keys_unique_clause() { + doc = doc + .append(Doc::space()) + .append(build_json_keys_unique_clause(clause)); + } + doc + } + ast::PostfixOp::IsNotJsonObject(n) => { + let mut doc = Doc::text("is not json object"); + if let Some(clause) = n.json_keys_unique_clause() { + doc = doc + .append(Doc::space()) + .append(build_json_keys_unique_clause(clause)); + } + doc + } + ast::PostfixOp::IsNotJsonScalar(n) => { + let mut doc = Doc::text("is not json scalar"); + if let Some(clause) = n.json_keys_unique_clause() { + doc = doc + .append(Doc::space()) + .append(build_json_keys_unique_clause(clause)); + } + doc + } + ast::PostfixOp::IsNotJsonValue(n) => { + let mut doc = Doc::text("is not json value"); + if let Some(clause) = n.json_keys_unique_clause() { + doc = doc + .append(Doc::space()) + .append(build_json_keys_unique_clause(clause)); + } + doc + } + ast::PostfixOp::IsNotNormalized(n) => { + let mut doc = Doc::text("is not"); + if let Some(form) = n.unicode_normal_form() { + doc = doc + .append(Doc::space()) + .append(build_unicode_normal_form(form)); + } + doc.append(Doc::space()).append(Doc::text("normalized")) + } + }; + expr.append(Doc::space()).append(op) + } + // ast::Expr::PrefixExpr(prefix_expr) => todo!(), + // ast::Expr::SliceExpr(slice_expr) => todo!(), + // ast::Expr::TupleExpr(tuple_expr) => todo!(), + _ => Doc::text(expr.syntax().to_string()), + } +} + +fn build_json_keys_unique_clause<'a>(clause: ast::JsonKeysUniqueClause) -> Doc<'a> { + let prefix = if clause.with_token().is_some() { + "with" + } else { + "without" + }; + Doc::text(prefix) + .append(Doc::space()) + .append(Doc::text("unique")) + .append(Doc::space()) + .append(Doc::text("keys")) +} + +fn build_unicode_normal_form<'a>(form: ast::UnicodeNormalForm) -> Doc<'a> { + if form.nfc_token().is_some() { + Doc::text("nfc") + } else if form.nfd_token().is_some() { + Doc::text("nfd") + } else if form.nfkc_token().is_some() { + Doc::text("nfkc") + } else { + Doc::text("nfkd") + } +} + +fn build_op<'a>(op: ast::BinOp) -> Doc<'a> { + match op { + ast::BinOp::And(_) => todo!(), + ast::BinOp::AtTimeZone(_) => todo!(), + ast::BinOp::Caret(_) => todo!(), + ast::BinOp::Collate(_) => todo!(), + ast::BinOp::ColonColon(_) => todo!(), + ast::BinOp::ColonEq(_) => todo!(), + ast::BinOp::CustomOp(custom_op) => Doc::text(custom_op.syntax().to_string()), + ast::BinOp::Eq(_) => todo!(), + ast::BinOp::FatArrow(_) => todo!(), + ast::BinOp::Gteq(_) => todo!(), + ast::BinOp::Ilike(_) => todo!(), + ast::BinOp::In(_) => todo!(), + ast::BinOp::Is(_) => todo!(), + ast::BinOp::IsDistinctFrom(_) => todo!(), + ast::BinOp::IsNot(_) => todo!(), + ast::BinOp::IsNotDistinctFrom(_) => todo!(), + ast::BinOp::LAngle(_) => todo!(), + ast::BinOp::Like(_) => todo!(), + ast::BinOp::Lteq(_) => todo!(), + ast::BinOp::Minus(_) => todo!(), + ast::BinOp::Neq(_) => todo!(), + ast::BinOp::Neqb(_) => todo!(), + ast::BinOp::NotIlike(_) => todo!(), + ast::BinOp::NotIn(_) => todo!(), + ast::BinOp::NotLike(_) => todo!(), + ast::BinOp::NotSimilarTo(_) => todo!(), + ast::BinOp::OperatorCall(_) => todo!(), + ast::BinOp::Or(_) => todo!(), + ast::BinOp::Overlaps(_) => todo!(), + ast::BinOp::Percent(_) => todo!(), + ast::BinOp::Plus(_) => Doc::text("+"), + ast::BinOp::RAngle(_) => todo!(), + ast::BinOp::SimilarTo(_) => todo!(), + ast::BinOp::Slash(_) => todo!(), + ast::BinOp::Star(_) => todo!(), + } +} + +fn build_literal<'a>(lit: ast::Literal) -> Doc<'a> { + Doc::text(lit.syntax().to_string()) +} + +fn build_type<'a>(ty: ast::Type) -> Doc<'a> { + Doc::text(ty.syntax().to_string()) +} + +fn leading_comments_token<'a>(node: &SyntaxToken) -> Doc<'a> { + let mut doc = Doc::nil(); + for next in node.siblings_with_tokens(Direction::Prev).skip(1) { + println!("prev"); + match next { + rowan::NodeOrToken::Node(node) => { + println!("before node {:?}", node); + break; + } + rowan::NodeOrToken::Token(token) => { + println!("before token {:?}", token); + if token.kind() == SyntaxKind::COMMENT { + doc = doc + .append(Doc::text(token.text().to_string())) + .append(Doc::space()); + } else if token.kind() == SyntaxKind::WHITESPACE { + continue; + } else { + break; + } + } + } + } + doc +} + +fn leading_comments<'a>(node: &SyntaxNode) -> Doc<'a> { + let mut doc = Doc::nil(); + for next in node.siblings_with_tokens(Direction::Prev).skip(1) { + println!("prev"); + match next { + rowan::NodeOrToken::Node(node) => { + println!("before node {:?}", node); + break; + } + rowan::NodeOrToken::Token(token) => { + println!("before token {:?}", token); + if token.kind() == SyntaxKind::COMMENT { + let is_block = token.text().starts_with("--"); + doc = doc + .append(Doc::text(token.text().to_string())) + .append(if is_block { + Doc::hard_line() + } else { + Doc::space() + }); + } else if token.kind() == SyntaxKind::WHITESPACE { + continue; + } else { + break; + } + } + } + } + doc +} + +fn trailing_comments<'a>(node: &SyntaxNode) -> Doc<'a> { + let mut doc = Doc::nil(); + for next in node.siblings_with_tokens(Direction::Next).skip(1) { + println!("after"); + match next { + rowan::NodeOrToken::Node(node) => { + println!("after node {:?}", node); + break; + } + rowan::NodeOrToken::Token(token) => { + println!("after token {:?}", token); + if token.kind() == SyntaxKind::COMMENT { + doc = doc + .append(Doc::space()) + .append(Doc::text(token.text().to_string())); + } else if token.kind() == SyntaxKind::WHITESPACE { + continue; + } else { + break; + } + } + } + } + doc +} + +fn build_target<'a>(target: ast::Target) -> Option> { + let mut doc = leading_comments(target.syntax()); + + if target.star_token().is_some() { + return Some(doc.append(Doc::text("*"))); + } + let expr = target.expr()?; + doc = doc.append(build_expr(expr)); + + if let Some(as_name) = target.as_name() { + if as_name.as_token().is_some() { + doc = doc.append(Doc::space()).append(Doc::text("as")) + } + + if let Some(name) = as_name.name() { + // TODO: quoting or not? + doc = doc + .append(Doc::space()) + .append(Doc::text(name.syntax().to_string())); + } + } + + doc = doc.append(trailing_comments(target.syntax())); + + Some(doc) +} + +pub fn fmt(text: &str) -> String { + let parse = ast::SourceFile::parse(text); + let file = parse.tree(); + println!("{}", text); + println!("---"); + println!("{:#?}", file.syntax()); + println!("---"); + debug_assert_eq!( + parse.errors(), + vec![], + "should bail out when there's parse errors" + ); + let doc = build_source_file(&file); + print(&doc, &PrintOptions::default()) +} diff --git a/crates/squawk_fmt/src/lib.rs b/crates/squawk_fmt/src/lib.rs index 8971ab22..c665cfd6 100644 --- a/crates/squawk_fmt/src/lib.rs +++ b/crates/squawk_fmt/src/lib.rs @@ -1,143 +1,3 @@ -use itertools::Itertools; -use squawk_syntax::ast::{self, AstNode}; -use tiny_pretty::Doc; -use tiny_pretty::{PrintOptions, print}; +mod fmt; -fn build_source_file(source_file: &ast::SourceFile) -> Doc<'_> { - let mut doc = Doc::nil(); - for stmt in source_file.stmts() { - match stmt { - ast::Stmt::Select(select) => { - doc = doc.append(build_select_doc(select)); - } - ast::Stmt::CreateTable(create_table) => { - doc = doc.append(build_create_table(create_table)) - } - _ => (), - } - doc = doc - .append(Doc::text(";")) - .append(Doc::empty_line()) - .append(Doc::empty_line()); - } - doc -} - -fn build_create_table<'a>(create_table: ast::CreateTable) -> Doc<'a> { - Doc::text("create") - .append(Doc::space()) - .append(Doc::text("table")) - .append(Doc::space()) - .append(Doc::text( - create_table.path().map(|x| x.syntax().to_string()).unwrap(), - )) - .append(Doc::text("(")) - .append( - Doc::line_or_nil() - .append(Doc::list( - Itertools::intersperse( - create_table - .table_arg_list() - .unwrap() - .args() - .map(build_table_arg), - Doc::text(",").append(Doc::line_or_space()), - ) - .collect(), - )) - .nest(2) - .append(Doc::line_or_nil()) - .group(), - ) - .append(Doc::text(")")) -} - -fn build_table_arg<'a>(create_table: ast::TableArg) -> Doc<'a> { - match create_table { - ast::TableArg::Column(column) => Doc::text(column.name().unwrap().syntax().to_string()) - .append(Doc::space()) - .append(Doc::text(column.ty().unwrap().syntax().to_string())), - ast::TableArg::LikeClause(_like_clause) => todo!(), - ast::TableArg::TableConstraint(_table_constraint) => todo!(), - } -} - -fn build_select_doc<'a>(select: ast::Select) -> Doc<'a> { - let mut doc = Doc::text("select").append(Doc::space()); - - if let Some(targets) = select - .select_clause() - .and_then(|x| x.target_list()) - .map(|x| x.targets()) - { - doc = doc - .append( - Doc::line_or_nil().append(Doc::list( - Itertools::intersperse( - targets.flat_map(|x| Some(Doc::text(x.expr()?.syntax().to_string()))), - Doc::text(",").append(Doc::line_or_space()), - ) - .collect(), - )), - ) - .nest(2); - } - - if let Some(from) = &select.from_clause() { - doc = doc.append( - Doc::line_or_space() - .append(Doc::text("from")) - .append(Doc::space()) - .append(Doc::text( - from.from_items().next().unwrap().syntax().to_string(), - )), - ); - } - - if let Some(group) = &select.group_by_clause() { - doc = doc.append( - Doc::line_or_space() - .append(Doc::text("group by")) - .append(Doc::space()) - .append(Doc::text( - group.group_by_list().unwrap().syntax().to_string(), - )), - ); - } - - doc.group() -} - -pub fn fmt(text: &str) -> String { - let parse = ast::SourceFile::parse(text); - let file = parse.tree(); - let doc = build_source_file(&file); - print(&doc, &PrintOptions::default()) -} - -#[cfg(test)] -mod tests { - use super::*; - use insta::assert_snapshot; - - #[test] - fn select() { - assert_snapshot!(fmt(" -select a(), date_trunc(1, 2), foo(), avg(a - b), bar(carrot), buzz(potato), foo.b from t group by c; -create table t(a int, b text); -"), @r" - select - a(), - date_trunc(1, 2), - foo(), - avg(a - b), - bar(carrot), - buzz(potato), - foo.b - from t - group by c; - - create table t(a int, b text); - "); - } -} +pub use fmt::fmt; diff --git a/crates/squawk_fmt/src/main.rs b/crates/squawk_fmt/src/main.rs new file mode 100644 index 00000000..5c550ef2 --- /dev/null +++ b/crates/squawk_fmt/src/main.rs @@ -0,0 +1,28 @@ +use std::io::{self, Read}; +use std::path::PathBuf; + +use anyhow::Result; +use clap::Parser; + +#[derive(Parser)] +#[command(name = "squawk-fmt")] +struct Cli { + /// File to format; reads from stdin if omitted + file: Option, +} + +fn main() -> Result<()> { + let cli = Cli::parse(); + + let input = match cli.file { + Some(path) => std::fs::read_to_string(&path)?, + None => { + let mut buf = String::new(); + io::stdin().read_to_string(&mut buf)?; + buf + } + }; + + print!("{}", squawk_fmt::fmt(&input)); + Ok(()) +} diff --git a/crates/squawk_fmt/tests/after/comments.snap b/crates/squawk_fmt/tests/after/comments.snap new file mode 100644 index 00000000..12d1228b --- /dev/null +++ b/crates/squawk_fmt/tests/after/comments.snap @@ -0,0 +1,22 @@ +--- +source: crates/squawk_fmt/tests/tests.rs +input_file: crates/squawk_fmt/tests/before/comments.sql +--- +/** + * some license header maybe + * + * + */ + +-- intentional new line follows, we should keep that + +/* bar */ +/* followed by another comment */ + +/* /* */ */ + +-- -- -- + +-- we have a big gap here, but we should only output a couple lines at most + +/* end */ diff --git a/crates/squawk_fmt/tests/after/create_table.snap b/crates/squawk_fmt/tests/after/create_table.snap new file mode 100644 index 00000000..fc097621 --- /dev/null +++ b/crates/squawk_fmt/tests/after/create_table.snap @@ -0,0 +1,10 @@ +--- +source: crates/squawk_fmt/tests/tests.rs +input_file: crates/squawk_fmt/tests/before/create_table.sql +--- +create table u(); + +create table t( + a int, + b text +); diff --git a/crates/squawk_fmt/tests/after/select.snap b/crates/squawk_fmt/tests/after/select.snap new file mode 100644 index 00000000..d1440041 --- /dev/null +++ b/crates/squawk_fmt/tests/after/select.snap @@ -0,0 +1,11 @@ +--- +source: crates/squawk_fmt/tests/tests.rs +input_file: crates/squawk_fmt/tests/before/select.sql +--- +select 1;select 2;select 3; +select 'hello'; +select now(); + +select + 'really long string ', + 'another really long string'; diff --git a/crates/squawk_fmt/tests/after/select_comments.snap b/crates/squawk_fmt/tests/after/select_comments.snap new file mode 100644 index 00000000..4765b473 --- /dev/null +++ b/crates/squawk_fmt/tests/after/select_comments.snap @@ -0,0 +1,19 @@ +--- +source: crates/squawk_fmt/tests/tests.rs +input_file: crates/squawk_fmt/tests/before/select_comments.sql +--- +-- intentional new line follows, we should keep that + +/* bar */ +select + /*a*/ 1 /*b*/, + /*c*/ 2/*d*/; + +select /*z*/; + +select /*a*/ *; + +select /*a*/ all /*b*/ 1; + +select + /*a*/ distinct /*b*/ 1; diff --git a/crates/squawk_fmt/tests/after/select_expr.snap b/crates/squawk_fmt/tests/after/select_expr.snap new file mode 100644 index 00000000..e0793356 --- /dev/null +++ b/crates/squawk_fmt/tests/after/select_expr.snap @@ -0,0 +1,61 @@ +--- +source: crates/squawk_fmt/tests/tests.rs +input_file: crates/squawk_fmt/tests/before/select_expr.sql +--- +select + -- array expr + array[1, 2], + array(select 1), + array[[1, 2], [3, 4]], + -- between expr + 2 between 1 and 3, + 2 not between 1 and 3, + 2 between symmetric 1 and 3, + -- bin expr + 1 + 1, + 2 @@@ 2, + -- call expr + date_trunc('month', now()), + -- case expr + case when x > 1 then 1 else 0 end, + -- cast expr + cast(1 as int8), + treat(2 as bigint), + 1::int8, + int8 '1', + -- field expr + foo.bar, + -- index expr + a[1], + -- literal + 42, + -- name ref + foo, + -- paren expr + (1), + -- postfix expr + 1 isnull, + 2 notnull, + x is json, + x is json with unique keys, + x is json without unique keys, + x is json array, + x is json array with unique keys, + x is json object, + x is json scalar, + x is json value, + x is normalized, + x is nfc normalized, + x is not json, + x is not json array, + x is not json object, + x is not json scalar, + x is not json value, + x is not normalized, + x is not nfkd normalized, + -- prefix expr + @-@ 10, + -- slice expr + c[:2][2:], + -- tuple expr + (1, 2, 3); diff --git a/crates/squawk_fmt/tests/before/comments.sql b/crates/squawk_fmt/tests/before/comments.sql new file mode 100644 index 00000000..ad00ec82 --- /dev/null +++ b/crates/squawk_fmt/tests/before/comments.sql @@ -0,0 +1,27 @@ +/** + * some license header maybe + * + * + */ + +-- intentional new line follows, we should keep that + +/* bar */ +/* followed by another comment */ + + +/* /* */ */ + + +-- -- -- + +-- we have a big gap here, but we should only output a couple lines at most + + + + + + + + +/* end */ diff --git a/crates/squawk_fmt/tests/before/create_table.sql b/crates/squawk_fmt/tests/before/create_table.sql new file mode 100644 index 00000000..27dfa7df --- /dev/null +++ b/crates/squawk_fmt/tests/before/create_table.sql @@ -0,0 +1,3 @@ +create table u(); + +create table t(a int,b text); diff --git a/crates/squawk_fmt/tests/before/select.sql b/crates/squawk_fmt/tests/before/select.sql new file mode 100644 index 00000000..d3b14bd4 --- /dev/null +++ b/crates/squawk_fmt/tests/before/select.sql @@ -0,0 +1,5 @@ +select 1;select 2;select 3; +select 'hello'; +select now(); + +select 'really long string ', 'another really long string'; diff --git a/crates/squawk_fmt/tests/before/select_comments.sql b/crates/squawk_fmt/tests/before/select_comments.sql new file mode 100644 index 00000000..632b7ff3 --- /dev/null +++ b/crates/squawk_fmt/tests/before/select_comments.sql @@ -0,0 +1,12 @@ +-- intentional new line follows, we should keep that + +/* bar */ +select/*a*/1/*b*/,/*c*/2/*d*/; + +select/*z*/; + +select/*a*/*; + +select/*a*/all/*b*/1; + +select/*a*/distinct/*b*/1; diff --git a/crates/squawk_fmt/tests/before/select_expr.sql b/crates/squawk_fmt/tests/before/select_expr.sql new file mode 100644 index 00000000..96629a03 --- /dev/null +++ b/crates/squawk_fmt/tests/before/select_expr.sql @@ -0,0 +1,57 @@ +select + -- array expr + array[1,2], + array(select 1), + array[[1,2],[3,4]], + -- between expr + 2 between 1 and 3, + 2 not between 1 and 3, + 2 between symmetric 1 and 3, + -- bin expr + 1 + 1, + 2@@@2, + -- call expr + date_trunc('month', now()), + -- case expr + case when x > 1 then 1 else 0 end , + -- cast expr + cast(1 as int8), + treat(2 as bigint), + 1::int8, + int8 '1', + -- field expr + foo.bar, + -- index expr + a[1], + -- literal + 42, + -- name ref + foo, + -- paren expr + (1), + -- postfix expr + 1 isnull, + 2 notnull, + x is json, + x is json with unique keys, + x is json without unique keys, + x is json array, + x is json array with unique keys, + x is json object, + x is json scalar, + x is json value, + x is normalized, + x is nfc normalized, + x is not json, + x is not json array, + x is not json object, + x is not json scalar, + x is not json value, + x is not normalized, + x is not nfkd normalized, + -- prefix expr + @-@ 10, + -- slice expr + c[:2][2:], + -- tuple expr + (1, 2, 3); diff --git a/crates/squawk_fmt/tests/tests.rs b/crates/squawk_fmt/tests/tests.rs new file mode 100644 index 00000000..fa15d63d --- /dev/null +++ b/crates/squawk_fmt/tests/tests.rs @@ -0,0 +1,88 @@ +use camino::Utf8Path; +use dir_test::{Fixture, dir_test}; +use insta::{assert_snapshot, with_settings}; +use squawk_lexer::{Token, TokenKind, tokenize}; + +#[dir_test( + dir: "$CARGO_MANIFEST_DIR/tests/before", + glob: "*.sql", +)] +fn fmt(fixture: Fixture<&str>) { + let content = fixture.content(); + let absolute_fixture_path = Utf8Path::new(fixture.path()); + let test_name = absolute_fixture_path + .file_name() + .and_then(|x| x.strip_suffix(".sql")) + .unwrap(); + + let formatted = squawk_fmt::fmt(content); + + assert_no_dropped_tokens(content, &formatted); + + with_settings!({ + omit_expression => true, + input_file => absolute_fixture_path, + snapshot_path => "after", + prepend_module_to_snapshot => false, + }, { + assert_snapshot!(test_name, formatted); + }); +} + +fn meaningful_tokens(text: &str) -> Vec<(TokenKind, &str)> { + let mut tokens: Vec<(TokenKind, &str)> = vec![]; + let mut offset = 0; + for Token { kind, len } in tokenize(text) { + let len = len as usize; + if kind != TokenKind::Eof && kind != TokenKind::Whitespace { + tokens.push((kind, &text[offset..offset + len])); + } + offset += len; + } + tokens +} + +fn assert_no_dropped_tokens(before: &str, after: &str) { + let before_tokens = meaningful_tokens(before); + let after_tokens = meaningful_tokens(after); + + let before_len = before_tokens.len(); + let after_len = after_tokens.len(); + + for (i, ((bkind, btext), (akind, atext))) in + before_tokens.iter().zip(after_tokens.iter()).enumerate() + { + assert!( + bkind == akind && btext.eq_ignore_ascii_case(atext), + "token mismatch at position {i}:\n before: {bkind:?} {btext:?}\n after: {akind:?} {atext:?}" + ); + } + + assert!( + before_len == after_len, + "token count mismatch: before has {before_len} tokens, after has {after_len} tokens\n {}", + if before_len > after_len { + let dropped = &before_tokens[after_len..]; + format!( + "dropped {} token(s): {}", + dropped.len(), + dropped + .iter() + .map(|(k, t)| format!("{k:?} {t:?}")) + .collect::>() + .join(", ") + ) + } else { + let extra = &after_tokens[before_len..]; + format!( + "extra {} token(s): {}", + extra.len(), + extra + .iter() + .map(|(k, t)| format!("{k:?} {t:?}")) + .collect::>() + .join(", ") + ) + } + ); +} diff --git a/crates/squawk_parser/src/lib.rs b/crates/squawk_parser/src/lib.rs index 7de8de2f..cc45f73f 100644 --- a/crates/squawk_parser/src/lib.rs +++ b/crates/squawk_parser/src/lib.rs @@ -221,7 +221,9 @@ impl<'t> Parser<'t> { | SyntaxKind::NFKC_KW | SyntaxKind::NFKD_KW ) { + let fm = self.start(); self.bump_any(); + fm.complete(self, SyntaxKind::UNICODE_NORMAL_FORM); } self.bump(SyntaxKind::NORMALIZED_KW); m.complete(self, SyntaxKind::IS_NOT_NORMALIZED); @@ -237,7 +239,9 @@ impl<'t> Parser<'t> { | SyntaxKind::NFKC_KW | SyntaxKind::NFKD_KW ) { + let fm = self.start(); self.bump_any(); + fm.complete(self, SyntaxKind::UNICODE_NORMAL_FORM); } self.bump(SyntaxKind::NORMALIZED_KW); m.complete(self, SyntaxKind::IS_NORMALIZED); @@ -742,17 +746,11 @@ impl<'t> Parser<'t> { | SyntaxKind::NFKC_KW | SyntaxKind::NFKD_KW ) { - if self.nth_at(3, SyntaxKind::NOT_KW) - && self.nth_at(4, SyntaxKind::NORMALIZED_KW) - { - return true; - } - } else { - if self.nth_at(2, SyntaxKind::NOT_KW) - && self.nth_at(3, SyntaxKind::NORMALIZED_KW) - { + if self.nth_at(3, SyntaxKind::NORMALIZED_KW) { return true; } + } else if self.nth_at(2, SyntaxKind::NORMALIZED_KW) { + return true; } } return false; diff --git a/crates/squawk_parser/tests/snapshots/tests__select_operators_ok.snap b/crates/squawk_parser/tests/snapshots/tests__select_operators_ok.snap index 2c6c1f6c..c07e478e 100644 --- a/crates/squawk_parser/tests/snapshots/tests__select_operators_ok.snap +++ b/crates/squawk_parser/tests/snapshots/tests__select_operators_ok.snap @@ -1765,7 +1765,8 @@ SOURCE_FILE IS_NORMALIZED IS_KW "is" WHITESPACE " " - NFC_KW "nfc" + UNICODE_NORMAL_FORM + NFC_KW "nfc" WHITESPACE " " NORMALIZED_KW "normalized" SEMICOLON ";" @@ -1783,7 +1784,8 @@ SOURCE_FILE IS_NORMALIZED IS_KW "is" WHITESPACE " " - NFD_KW "nfd" + UNICODE_NORMAL_FORM + NFD_KW "nfd" WHITESPACE " " NORMALIZED_KW "normalized" SEMICOLON ";" @@ -1801,7 +1803,8 @@ SOURCE_FILE IS_NORMALIZED IS_KW "is" WHITESPACE " " - NFKC_KW "nfkc" + UNICODE_NORMAL_FORM + NFKC_KW "nfkc" WHITESPACE " " NORMALIZED_KW "normalized" SEMICOLON ";" @@ -1819,7 +1822,8 @@ SOURCE_FILE IS_NORMALIZED IS_KW "is" WHITESPACE " " - NFKD_KW "nfkd" + UNICODE_NORMAL_FORM + NFKD_KW "nfkd" WHITESPACE " " NORMALIZED_KW "normalized" SEMICOLON ";" @@ -1830,20 +1834,18 @@ SOURCE_FILE WHITESPACE " " TARGET_LIST TARGET - BIN_EXPR + POSTFIX_EXPR LITERAL UNICODE_ESC_STRING "U&'\\0061\\0308bc'" WHITESPACE " " - IS_NOT + IS_NOT_NORMALIZED IS_KW "is" WHITESPACE " " NOT_KW "not" - WHITESPACE " " - NAME_REF - NFD_KW "nfd" - WHITESPACE " " - AS_NAME - NAME + WHITESPACE " " + UNICODE_NORMAL_FORM + NFD_KW "nfd" + WHITESPACE " " NORMALIZED_KW "normalized" SEMICOLON ";" WHITESPACE "\n\n"