@@ -202,6 +202,73 @@ class ResolveBinBySuite extends AnalysisTest {
202202 assert(bi.distributeColumns.map(_.exprId) == Seq (value.exprId))
203203 }
204204
205+ test(" rescaled DISTRIBUTE columns are produced attributes that shadow the input" ) {
206+ val bi = ResolveBinBy .apply(unresolved()).asInstanceOf [BinBy ]
207+
208+ // The input column is still read (held in distributeColumns) but is not forwarded by identity.
209+ assert(bi.distributeColumns.map(_.exprId) == Seq (value.exprId))
210+ assert(! bi.output.exists(_.exprId == value.exprId))
211+
212+ // The output `value` keeps its name, type, and position, but has a fresh exprId, and it is a
213+ // produced attribute. This is what prevents the rescaled value from being confused with the
214+ // input by any rule that reasons on exprId (predicate pushdown, constraints, CSE).
215+ val outValue = bi.output(ltzChild.output.indexWhere(_.exprId == value.exprId))
216+ assert(outValue.name == " value" )
217+ assert(outValue.dataType == DoubleType )
218+ assert(outValue.exprId != value.exprId)
219+ assert(bi.scaledDistributeColumns.map(_.exprId) == Seq (outValue.exprId))
220+ assert(bi.producedAttributes.contains(outValue))
221+
222+ // Forwarded (non-distribute) columns keep their identity.
223+ assert(bi.output.exists(_.exprId == label.exprId))
224+ assert(bi.output.exists(_.exprId == tsStart.exprId))
225+ }
226+
227+ test(" each of multiple DISTRIBUTE columns is replaced in place with a distinct fresh id" ) {
228+ val value2 = $" value2" .double
229+ val child = LocalRelation (tsStart, tsEnd, value, value2, label)
230+ val bi = ResolveBinBy .apply(
231+ unresolved(child = child, distribute = Seq (value, value2))).asInstanceOf [BinBy ]
232+
233+ assert(bi.distributeColumns.map(_.exprId) == Seq (value.exprId, value2.exprId))
234+ assert(bi.scaledDistributeColumns.length == 2 )
235+
236+ // Each input column is replaced at its own position by a fresh-id, same-name attribute.
237+ Seq (value, value2).foreach { in =>
238+ val pos = child.output.indexWhere(_.exprId == in.exprId)
239+ val out = bi.output(pos)
240+ assert(out.name == in.name)
241+ assert(out.exprId != in.exprId)
242+ assert(! bi.output.exists(_.exprId == in.exprId))
243+ }
244+
245+ // The two scaled columns are distinct; non-distribute columns keep their identity.
246+ assert(bi.scaledDistributeColumns.map(_.exprId).distinct.length == 2 )
247+ assert(bi.output.exists(_.exprId == label.exprId))
248+ assert(bi.output.exists(_.exprId == tsStart.exprId))
249+ }
250+
251+ test(" rescaled DISTRIBUTE column drops the input qualifier and metadata (computed value)" ) {
252+ // The rescaled column is a computed value, not a rename, so it must not inherit the input's
253+ // qualifier or metadata (else stale value-derived metadata such as ML min/max could ride along).
254+ val md = new MetadataBuilder ().putString(" comment" , " a measure" ).build()
255+ val qualifiedValue = AttributeReference (" value" , DoubleType , nullable = true , md)()
256+ val child = SubqueryAlias (" m" , LocalRelation (tsStart, tsEnd, qualifiedValue))
257+ val bi = ResolveBinBy .apply(
258+ unresolved(child = child, distribute = Seq (UnresolvedAttribute (Seq (" m" , " value" )))))
259+ .asInstanceOf [BinBy ]
260+
261+ // The resolved input carries the qualifier and metadata...
262+ assert(bi.distributeColumns.head.qualifier == Seq (" m" ))
263+ assert(bi.distributeColumns.head.metadata == md)
264+
265+ // ...but the produced output column drops both and has a fresh id.
266+ val outValue = bi.output.find(_.name == " value" ).get
267+ assert(outValue.exprId != qualifiedValue.exprId)
268+ assert(outValue.qualifier.isEmpty)
269+ assert(outValue.metadata == Metadata .empty)
270+ }
271+
205272 test(" multipart identifiers disambiguate same-name columns across a JOIN" ) {
206273 val t1Start = AttributeReference (" ts_start" , TimestampType , nullable = true )()
207274 val t1End = AttributeReference (" ts_end" , TimestampType , nullable = true )()
@@ -330,9 +397,12 @@ class ResolveBinBySuite extends AnalysisTest {
330397
331398 val binBys = analyzed.collect { case b : BinBy => b }
332399 assert(binBys.size == 2 , s " expected two BinBy nodes, got ${binBys.size}" )
333- val appendedExprIds = binBys.flatMap(_.appendedAttributes.map(_.exprId))
334- assert(appendedExprIds.distinct.size == appendedExprIds.size,
335- " appended BinBy attributes must have distinct exprIds across the two join sides" )
400+ // All produced attributes (the scaled DISTRIBUTE columns plus the three appended ones) must be
401+ // renewed on one side, so both dedup phases have to cover them.
402+ val producedExprIds = binBys.flatMap(b =>
403+ (b.scaledDistributeColumns ++ b.appendedAttributes).map(_.exprId))
404+ assert(producedExprIds.distinct.size == producedExprIds.size,
405+ " produced BinBy attributes must have distinct exprIds across the two join sides" )
336406 }
337407
338408 // `super.test` escapes the suite-wide flag-on wrapper; pin the flag off explicitly.
0 commit comments