@@ -34,8 +34,7 @@ import org.apache.spark.sql.execution.streaming.operators.stateful.{StatefulOper
3434import org .apache .spark .sql .execution .streaming .operators .stateful .join .StreamingSymmetricHashJoinHelper ._
3535import org .apache .spark .sql .execution .streaming .state .{DropLastNFieldsStatePartitionKeyExtractor , KeyStateEncoderSpec , NoopStatePartitionKeyExtractor , NoPrefixKeyStateEncoderSpec , StatePartitionKeyExtractor , StateSchemaBroadcast , StateStore , StateStoreCheckpointInfo , StateStoreColFamilySchema , StateStoreConf , StateStoreErrors , StateStoreId , StateStoreMetrics , StateStoreProvider , StateStoreProviderId , SupportsFineGrainedReplay , TimestampAsPostfixKeyStateEncoderSpec , TimestampAsPrefixKeyStateEncoderSpec , TimestampKeyStateEncoder }
3636import org .apache .spark .sql .internal .SQLConf
37- import org .apache .spark .sql .types .{BinaryType , BooleanType , ByteType , DataType , DateType , DoubleType , FloatType , IntegerType , LongType , NullType , ShortType , StringType , StructField , StructType , TimestampNTZType , TimestampType }
38- import org .apache .spark .unsafe .types .UTF8String
37+ import org .apache .spark .sql .types .{BooleanType , DataType , LongType , NullType , StructField , StructType }
3938import org .apache .spark .util .NextIterator
4039
4140/**
@@ -702,6 +701,7 @@ class SymmetricHashJoinStateManagerV4(
702701 }
703702
704703 private var currentTs = - 1L
704+ private var pastUpperBound = false
705705 private val valueAndMatchPairs = scala.collection.mutable.ArrayBuffer [ValueAndMatchPair ]()
706706
707707 private def flushAccumulated (): GetValuesResult = {
@@ -719,16 +719,16 @@ class SymmetricHashJoinStateManagerV4(
719719
720720 @ tailrec
721721 override protected def getNext (): GetValuesResult = {
722- if (! iter.hasNext) {
722+ if (pastUpperBound || ! iter.hasNext) {
723723 flushAccumulated()
724724 } else {
725725 val unsafeRowPair = iter.next()
726726 val ts = TimestampKeyStateEncoder .extractTimestamp(unsafeRowPair.key)
727727
728- // Filter out entries outside [minTs, maxTs]. This is essential when using
729- // prefixScan (which returns all timestamps for the key) and serves as a
730- // safety guard for rangeScan as well.
731- if (ts < minTs || ts > maxTs ) {
728+ if (ts > maxTs) {
729+ pastUpperBound = true
730+ getNext()
731+ } else if (ts < minTs) {
732732 getNext()
733733 } else if (currentTs == - 1L || currentTs == ts) {
734734 currentTs = ts
@@ -819,22 +819,16 @@ class SymmetricHashJoinStateManagerV4(
819819 case class EvictedKeysResult (key : UnsafeRow , timestamp : Long , numValues : Int )
820820
821821 private def defaultInternalRow (schema : StructType ): InternalRow = {
822- InternalRow .fromSeq(schema.map(f => defaultValueForType (f.dataType)))
822+ InternalRow .fromSeq(schema.map(f => Literal .default (f.dataType).value ))
823823 }
824824
825- private def defaultValueForType (dt : DataType ): Any = dt match {
826- case BooleanType => false
827- case ByteType => 0 .toByte
828- case ShortType => 0 .toShort
829- case IntegerType | DateType => 0
830- case LongType | TimestampType | TimestampNTZType => 0L
831- case FloatType => 0.0f
832- case DoubleType => 0.0
833- case StringType => UTF8String .EMPTY_UTF8
834- case BinaryType => Array .emptyByteArray
835- case st : StructType => defaultInternalRow(st)
836- case _ => null
837- }
825+ /**
826+ * Reusable default key row for scan boundary construction. Safe to reuse because
827+ * createKeyRow only reads this row (via BoundReference evaluations) and writes to
828+ * the projection's own internal buffer.
829+ */
830+ private lazy val defaultKey : UnsafeRow = UnsafeProjection .create(keySchema)
831+ .apply(defaultInternalRow(keySchema))
838832
839833 /**
840834 * Build a scan boundary row for rangeScan. The TsWithKeyTypeStore uses
@@ -844,8 +838,6 @@ class SymmetricHashJoinStateManagerV4(
844838 * timestamp matters for ordering in the prefix encoder.
845839 */
846840 private def createScanBoundaryRow (timestamp : Long ): UnsafeRow = {
847- val defaultKey = UnsafeProjection .create(keySchema)
848- .apply(defaultInternalRow(keySchema))
849841 createKeyRow(defaultKey, timestamp).copy()
850842 }
851843
0 commit comments