Skip to content

Commit 0c19e08

Browse files
authored
MySQL: Add support for DEFAULT CHARACTER SET in CREATE DATABASE (apache#2182)
1 parent 5e5c16c commit 0c19e08

File tree

4 files changed

+165
-0
lines changed

4 files changed

+165
-0
lines changed

src/ast/helpers/stmt_create_database.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,14 @@ pub struct CreateDatabaseBuilder {
8585
pub storage_serialization_policy: Option<StorageSerializationPolicy>,
8686
/// Optional comment attached to the database.
8787
pub comment: Option<String>,
88+
/// Optional default character set (MySQL).
89+
///
90+
/// <https://dev.mysql.com/doc/refman/8.4/en/create-database.html>
91+
pub default_charset: Option<String>,
92+
/// Optional default collation (MySQL).
93+
///
94+
/// <https://dev.mysql.com/doc/refman/8.4/en/create-database.html>
95+
pub default_collation: Option<String>,
8896
/// Optional catalog sync configuration.
8997
pub catalog_sync: Option<String>,
9098
/// Optional catalog sync namespace mode.
@@ -120,6 +128,8 @@ impl CreateDatabaseBuilder {
120128
default_ddl_collation: None,
121129
storage_serialization_policy: None,
122130
comment: None,
131+
default_charset: None,
132+
default_collation: None,
123133
catalog_sync: None,
124134
catalog_sync_namespace_mode: None,
125135
catalog_sync_namespace_flatten_delimiter: None,
@@ -218,6 +228,18 @@ impl CreateDatabaseBuilder {
218228
self
219229
}
220230

231+
/// Set the default character set for the database.
232+
pub fn default_charset(mut self, default_charset: Option<String>) -> Self {
233+
self.default_charset = default_charset;
234+
self
235+
}
236+
237+
/// Set the default collation for the database.
238+
pub fn default_collation(mut self, default_collation: Option<String>) -> Self {
239+
self.default_collation = default_collation;
240+
self
241+
}
242+
221243
/// Set the catalog sync for the database.
222244
pub fn catalog_sync(mut self, catalog_sync: Option<String>) -> Self {
223245
self.catalog_sync = catalog_sync;
@@ -272,6 +294,8 @@ impl CreateDatabaseBuilder {
272294
default_ddl_collation: self.default_ddl_collation,
273295
storage_serialization_policy: self.storage_serialization_policy,
274296
comment: self.comment,
297+
default_charset: self.default_charset,
298+
default_collation: self.default_collation,
275299
catalog_sync: self.catalog_sync,
276300
catalog_sync_namespace_mode: self.catalog_sync_namespace_mode,
277301
catalog_sync_namespace_flatten_delimiter: self.catalog_sync_namespace_flatten_delimiter,
@@ -302,6 +326,8 @@ impl TryFrom<Statement> for CreateDatabaseBuilder {
302326
default_ddl_collation,
303327
storage_serialization_policy,
304328
comment,
329+
default_charset,
330+
default_collation,
305331
catalog_sync,
306332
catalog_sync_namespace_mode,
307333
catalog_sync_namespace_flatten_delimiter,
@@ -323,6 +349,8 @@ impl TryFrom<Statement> for CreateDatabaseBuilder {
323349
default_ddl_collation,
324350
storage_serialization_policy,
325351
comment,
352+
default_charset,
353+
default_collation,
326354
catalog_sync,
327355
catalog_sync_namespace_mode,
328356
catalog_sync_namespace_flatten_delimiter,

src/ast/mod.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4227,6 +4227,10 @@ pub enum Statement {
42274227
storage_serialization_policy: Option<StorageSerializationPolicy>,
42284228
/// Optional comment.
42294229
comment: Option<String>,
4230+
/// Optional default character set (MySQL).
4231+
default_charset: Option<String>,
4232+
/// Optional default collation (MySQL).
4233+
default_collation: Option<String>,
42304234
/// Optional catalog sync identifier.
42314235
catalog_sync: Option<String>,
42324236
/// Catalog sync namespace mode.
@@ -5081,6 +5085,8 @@ impl fmt::Display for Statement {
50815085
default_ddl_collation,
50825086
storage_serialization_policy,
50835087
comment,
5088+
default_charset,
5089+
default_collation,
50845090
catalog_sync,
50855091
catalog_sync_namespace_mode,
50865092
catalog_sync_namespace_flatten_delimiter,
@@ -5140,6 +5146,14 @@ impl fmt::Display for Statement {
51405146
write!(f, " COMMENT = '{comment}'")?;
51415147
}
51425148

5149+
if let Some(charset) = default_charset {
5150+
write!(f, " DEFAULT CHARACTER SET {charset}")?;
5151+
}
5152+
5153+
if let Some(collation) = default_collation {
5154+
write!(f, " DEFAULT COLLATE {collation}")?;
5155+
}
5156+
51435157
if let Some(sync) = catalog_sync {
51445158
write!(f, " CATALOG_SYNC = '{sync}'")?;
51455159
}

src/parser/mod.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5341,6 +5341,34 @@ impl<'a> Parser<'a> {
53415341
None
53425342
};
53435343

5344+
// Parse MySQL-style [DEFAULT] CHARACTER SET and [DEFAULT] COLLATE options
5345+
//
5346+
// Note: The docs only mention `CHARACTER SET`, but `CHARSET` is also supported.
5347+
// Furthermore, MySQL will only accept one character set, raising an error if there is more
5348+
// than one, but will accept multiple collations and use the last one.
5349+
//
5350+
// <https://dev.mysql.com/doc/refman/8.4/en/create-database.html>
5351+
let mut default_charset = None;
5352+
let mut default_collation = None;
5353+
loop {
5354+
let has_default = self.parse_keyword(Keyword::DEFAULT);
5355+
if default_charset.is_none() && self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET])
5356+
|| self.parse_keyword(Keyword::CHARSET)
5357+
{
5358+
let _ = self.consume_token(&Token::Eq);
5359+
default_charset = Some(self.parse_identifier()?.value);
5360+
} else if self.parse_keyword(Keyword::COLLATE) {
5361+
let _ = self.consume_token(&Token::Eq);
5362+
default_collation = Some(self.parse_identifier()?.value);
5363+
} else if has_default {
5364+
// DEFAULT keyword not followed by CHARACTER SET, CHARSET, or COLLATE
5365+
self.prev_token();
5366+
break;
5367+
} else {
5368+
break;
5369+
}
5370+
}
5371+
53445372
Ok(Statement::CreateDatabase {
53455373
db_name,
53465374
if_not_exists: ine,
@@ -5357,6 +5385,8 @@ impl<'a> Parser<'a> {
53575385
default_ddl_collation: None,
53585386
storage_serialization_policy: None,
53595387
comment: None,
5388+
default_charset,
5389+
default_collation,
53605390
catalog_sync: None,
53615391
catalog_sync_namespace_mode: None,
53625392
catalog_sync_namespace_flatten_delimiter: None,

tests/sqlparser_mysql.rs

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4621,3 +4621,96 @@ fn test_optimizer_hints() {
46214621
DELETE /*+ foobar */ FROM table_name",
46224622
);
46234623
}
4624+
4625+
#[test]
4626+
fn parse_create_database_with_charset() {
4627+
// Test DEFAULT CHARACTER SET with = sign
4628+
mysql_and_generic().verified_stmt("CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4");
4629+
4630+
// Test DEFAULT CHARACTER SET without = sign (normalized form)
4631+
mysql_and_generic().one_statement_parses_to(
4632+
"CREATE DATABASE mydb DEFAULT CHARACTER SET = utf8mb4",
4633+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4",
4634+
);
4635+
4636+
// Test CHARACTER SET without DEFAULT
4637+
mysql_and_generic().one_statement_parses_to(
4638+
"CREATE DATABASE mydb CHARACTER SET utf8mb4",
4639+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4",
4640+
);
4641+
4642+
// Test CHARSET shorthand
4643+
mysql_and_generic().one_statement_parses_to(
4644+
"CREATE DATABASE mydb CHARSET utf8mb4",
4645+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4",
4646+
);
4647+
4648+
// Test DEFAULT CHARSET shorthand
4649+
mysql_and_generic().one_statement_parses_to(
4650+
"CREATE DATABASE mydb DEFAULT CHARSET utf8mb4",
4651+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4",
4652+
);
4653+
4654+
// Test DEFAULT COLLATE
4655+
mysql_and_generic().verified_stmt("CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci");
4656+
4657+
// Test COLLATE without DEFAULT
4658+
mysql_and_generic().one_statement_parses_to(
4659+
"CREATE DATABASE mydb COLLATE utf8mb4_unicode_ci",
4660+
"CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci",
4661+
);
4662+
4663+
// Test both CHARACTER SET and COLLATE together
4664+
mysql_and_generic().verified_stmt(
4665+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_unicode_ci",
4666+
);
4667+
4668+
// Test IF NOT EXISTS with CHARACTER SET
4669+
mysql_and_generic()
4670+
.verified_stmt("CREATE DATABASE IF NOT EXISTS mydb DEFAULT CHARACTER SET utf16");
4671+
4672+
// Test the exact syntax from the issue
4673+
mysql_and_generic().one_statement_parses_to(
4674+
"CREATE DATABASE IF NOT EXISTS noria DEFAULT CHARACTER SET = utf16",
4675+
"CREATE DATABASE IF NOT EXISTS noria DEFAULT CHARACTER SET utf16",
4676+
);
4677+
}
4678+
4679+
#[test]
4680+
fn parse_create_database_with_charset_errors() {
4681+
// Missing charset name after CHARACTER SET
4682+
assert!(mysql_and_generic()
4683+
.parse_sql_statements("CREATE DATABASE mydb DEFAULT CHARACTER SET")
4684+
.is_err());
4685+
4686+
// Missing charset name after CHARSET
4687+
assert!(mysql_and_generic()
4688+
.parse_sql_statements("CREATE DATABASE mydb CHARSET")
4689+
.is_err());
4690+
4691+
// Missing collation name after COLLATE
4692+
assert!(mysql_and_generic()
4693+
.parse_sql_statements("CREATE DATABASE mydb DEFAULT COLLATE")
4694+
.is_err());
4695+
4696+
// Equals sign but no value
4697+
assert!(mysql_and_generic()
4698+
.parse_sql_statements("CREATE DATABASE mydb CHARACTER SET =")
4699+
.is_err());
4700+
}
4701+
4702+
#[test]
4703+
fn parse_create_database_with_charset_option_ordering() {
4704+
// MySQL allows COLLATE before CHARACTER SET - output is normalized to CHARACTER SET first
4705+
// (matches MySQL's own SHOW CREATE DATABASE output order)
4706+
mysql_and_generic().one_statement_parses_to(
4707+
"CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci DEFAULT CHARACTER SET utf8mb4",
4708+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_unicode_ci",
4709+
);
4710+
4711+
// COLLATE first without DEFAULT keywords
4712+
mysql_and_generic().one_statement_parses_to(
4713+
"CREATE DATABASE mydb COLLATE utf8mb4_unicode_ci CHARACTER SET utf8mb4",
4714+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_unicode_ci",
4715+
);
4716+
}

0 commit comments

Comments
 (0)