@@ -637,6 +637,32 @@ def test_cast_on_column_alias_still_requires_describe(session):
637637 _ = df2 ._plan .attributes
638638
639639
640+ def test_select_inference_skips_on_duplicate_parent_keys_and_missing_alias_name (
641+ session ,
642+ ):
643+ """SelectStatement.select: (1) duplicate parent output aliases — collision skips inference
644+ on a follow-up select; DESCRIBE is not skipped when resolving schema for the duplicate-alias
645+ frame. (2) Alias with missing output name — defensive inference abort."""
646+ df = session .create_dataframe ([[1 , 2 , 3 ]], schema = ["a" , "b" , "c" ])
647+ _ = df .schema
648+ dup = df .select ((col ("a" ) + 1 ).as_ ("b" ), (col ("c" ) + 1 ).as_ ("b" ))
649+ with SqlCounter (query_count = 0 , describe_count = 1 ):
650+ _ = dup .schema
651+
652+ dup_outer = dup .select (lit (1 ).alias ("x" ))
653+ _ = dup_outer ._plan .attributes
654+
655+ # Scenario B: hit missing-projected-name guard without DataFrame.resolve (which would
656+ # quote_name(None) on the Alias). Call SelectStatement.select directly.
657+ df2 = session .create_dataframe ([[1 ]], schema = ["a" ])
658+ _ = df2 .schema
659+ bad = col ("a" ).alias ("out" )
660+ object .__setattr__ (bad ._expression , "name" , None )
661+ inner = df2 ._select_statement
662+ new_ss = inner .select ([bad ._named ()])
663+ assert new_ss .attributes is None
664+
665+
640666def test_select_star_after_cached_parent (session ):
641667 """SELECT * after parent schema is cached: infer_metadata can copy child attributes when reduce_describe is on."""
642668 df = session .create_dataframe ([[1 , 2 ]], schema = ["a" , "b" ])
0 commit comments