@@ -46,13 +46,15 @@ use crate::{
4646
4747use super :: dml:: InsertOp ;
4848use super :: plan:: ColumnUnnestList ;
49+ use arrow:: compute:: can_cast_types;
4950use arrow:: datatypes:: { DataType , Field , Fields , Schema , SchemaRef } ;
5051use datafusion_common:: display:: ToStringifiedPlan ;
5152use datafusion_common:: file_options:: file_type:: FileType ;
5253use datafusion_common:: {
53- get_target_functional_dependencies, internal_err, not_impl_err, plan_datafusion_err,
54- plan_err, Column , DFSchema , DFSchemaRef , DataFusionError , FunctionalDependencies ,
55- Result , ScalarValue , TableReference , ToDFSchema , UnnestOptions ,
54+ exec_err, get_target_functional_dependencies, internal_err, not_impl_err,
55+ plan_datafusion_err, plan_err, Column , DFSchema , DFSchemaRef , DataFusionError ,
56+ FunctionalDependencies , Result , ScalarValue , TableReference , ToDFSchema ,
57+ UnnestOptions ,
5658} ;
5759use datafusion_expr_common:: type_coercion:: binary:: type_union_resolution;
5860
@@ -172,12 +174,45 @@ impl LogicalPlanBuilder {
172174 /// `value`. See the [Postgres VALUES](https://www.postgresql.org/docs/current/queries-values.html)
173175 /// documentation for more details.
174176 ///
177+ /// so it's usually better to override the default names with a table alias list.
178+ ///
179+ /// If the values include params/binders such as $1, $2, $3, etc, then the `param_data_types` should be provided.
180+ pub fn values ( values : Vec < Vec < Expr > > ) -> Result < Self > {
181+ if values. is_empty ( ) {
182+ return plan_err ! ( "Values list cannot be empty" ) ;
183+ }
184+ let n_cols = values[ 0 ] . len ( ) ;
185+ if n_cols == 0 {
186+ return plan_err ! ( "Values list cannot be zero length" ) ;
187+ }
188+ for ( i, row) in values. iter ( ) . enumerate ( ) {
189+ if row. len ( ) != n_cols {
190+ return plan_err ! (
191+ "Inconsistent data length across values list: got {} values in row {} but expected {}" ,
192+ row. len( ) ,
193+ i,
194+ n_cols
195+ ) ;
196+ }
197+ }
198+
199+ // Infer from data itself
200+ Self :: infer_data ( values)
201+ }
202+
203+ /// Create a values list based relation, and the schema is inferred from data itself or table schema if provided, consuming
204+ /// `value`. See the [Postgres VALUES](https://www.postgresql.org/docs/current/queries-values.html)
205+ /// documentation for more details.
206+ ///
175207 /// By default, it assigns the names column1, column2, etc. to the columns of a VALUES table.
176208 /// The column names are not specified by the SQL standard and different database systems do it differently,
177209 /// so it's usually better to override the default names with a table alias list.
178210 ///
179211 /// If the values include params/binders such as $1, $2, $3, etc, then the `param_data_types` should be provided.
180- pub fn values ( mut values : Vec < Vec < Expr > > ) -> Result < Self > {
212+ pub fn values_with_schema (
213+ values : Vec < Vec < Expr > > ,
214+ schema : & DFSchemaRef ,
215+ ) -> Result < Self > {
181216 if values. is_empty ( ) {
182217 return plan_err ! ( "Values list cannot be empty" ) ;
183218 }
@@ -196,16 +231,53 @@ impl LogicalPlanBuilder {
196231 }
197232 }
198233
199- let empty_schema = DFSchema :: empty ( ) ;
234+ // Check the type of value against the schema
235+ Self :: infer_values_from_schema ( values, schema)
236+ }
237+
238+ fn infer_values_from_schema (
239+ values : Vec < Vec < Expr > > ,
240+ schema : & DFSchema ,
241+ ) -> Result < Self > {
242+ let n_cols = values[ 0 ] . len ( ) ;
243+ let mut field_types: Vec < DataType > = Vec :: with_capacity ( n_cols) ;
244+ for j in 0 ..n_cols {
245+ let field_type = schema. field ( j) . data_type ( ) ;
246+ for row in values. iter ( ) {
247+ let value = & row[ j] ;
248+ let data_type = value. get_type ( schema) ?;
249+
250+ if !data_type. equals_datatype ( field_type) {
251+ if can_cast_types ( & data_type, field_type) {
252+ } else {
253+ return exec_err ! (
254+ "type mistmatch and can't cast to got {} and {}" ,
255+ data_type,
256+ field_type
257+ ) ;
258+ }
259+ }
260+ }
261+ field_types. push ( field_type. to_owned ( ) ) ;
262+ }
263+
264+ Self :: infer_inner ( values, & field_types, schema)
265+ }
266+
267+ fn infer_data ( values : Vec < Vec < Expr > > ) -> Result < Self > {
268+ let n_cols = values[ 0 ] . len ( ) ;
269+ let schema = DFSchema :: empty ( ) ;
270+
200271 let mut field_types: Vec < DataType > = Vec :: with_capacity ( n_cols) ;
201272 for j in 0 ..n_cols {
202273 let mut common_type: Option < DataType > = None ;
203274 for ( i, row) in values. iter ( ) . enumerate ( ) {
204275 let value = & row[ j] ;
205- let data_type = value. get_type ( & empty_schema ) ?;
276+ let data_type = value. get_type ( & schema ) ?;
206277 if data_type == DataType :: Null {
207278 continue ;
208279 }
280+
209281 if let Some ( prev_type) = common_type {
210282 // get common type of each column values.
211283 let data_types = vec ! [ prev_type. clone( ) , data_type. clone( ) ] ;
@@ -221,14 +293,22 @@ impl LogicalPlanBuilder {
221293 // since the code loop skips NULL
222294 field_types. push ( common_type. unwrap_or ( DataType :: Null ) ) ;
223295 }
296+
297+ Self :: infer_inner ( values, & field_types, & schema)
298+ }
299+
300+ fn infer_inner (
301+ mut values : Vec < Vec < Expr > > ,
302+ field_types : & [ DataType ] ,
303+ schema : & DFSchema ,
304+ ) -> Result < Self > {
224305 // wrap cast if data type is not same as common type.
225306 for row in & mut values {
226307 for ( j, field_type) in field_types. iter ( ) . enumerate ( ) {
227308 if let Expr :: Literal ( ScalarValue :: Null ) = row[ j] {
228309 row[ j] = Expr :: Literal ( ScalarValue :: try_from ( field_type) ?) ;
229310 } else {
230- row[ j] =
231- std:: mem:: take ( & mut row[ j] ) . cast_to ( field_type, & empty_schema) ?;
311+ row[ j] = std:: mem:: take ( & mut row[ j] ) . cast_to ( field_type, schema) ?;
232312 }
233313 }
234314 }
@@ -243,6 +323,7 @@ impl LogicalPlanBuilder {
243323 . collect :: < Vec < _ > > ( ) ;
244324 let dfschema = DFSchema :: from_unqualified_fields ( fields. into ( ) , HashMap :: new ( ) ) ?;
245325 let schema = DFSchemaRef :: new ( dfschema) ;
326+
246327 Ok ( Self :: new ( LogicalPlan :: Values ( Values { schema, values } ) ) )
247328 }
248329
0 commit comments