diff --git a/.scalafmt.conf b/.scalafmt.conf
index 771bfd31a..a6f90c806 100644
--- a/.scalafmt.conf
+++ b/.scalafmt.conf
@@ -1,28 +1,32 @@
-version = 3.8.6
+version = 3.11.1
runner.dialect = scala213
-newlines.beforeMultilineDef = keep
-newlines.topLevelStatements = [before]
-newlines.beforeCurlyLambdaParams = multilineWithCaseOnly
-newlines.afterCurlyLambdaParams = squash
-newlines.implicitParamListModifierForce = [after]
-newlines.avoidForSimpleOverflow = [tooLong]
-newlines.avoidInResultType = true
-newlines.sometimesBeforeColonInMethodReturnType = false
-newlines.beforeTypeBounds = keep
+align.openParenCallSite = true
+align.openParenDefnSite = true
-verticalMultiline.atDefnSite = true
-verticalMultiline.arityThreshold = 10
+maxColumn = 150
+continuationIndent.defnSite = 2
-spaces.inImportCurlyBraces = true
+assumeStandardLibraryStripMargin = true
+danglingParentheses.preset = true
+
+rewrite.rules = [SortImports, RedundantParens, SortModifiers]
+
+newlines.source = keep
+newlines.afterCurlyLambda = preserve
+
+spaces.inImportCurlyBraces = false
includeCurlyBraceInSelectChains = false
includeNoParensInSelectChains = false
optIn.breakChainOnFirstMethodDot = false
-docstrings.style = Asterisk
-docstrings.wrap = no
+literals.long = Upper
+literals.float = Upper
+literals.double = Upper
-literals.long=Upper
-literals.float=Upper
-literals.double=Upper
+docstrings = JavaDoc
+docstrings.style = keep
+docstrings.wrap = no
+docstrings.oneline = keep
+docstrings.blankFirstLine = keep
diff --git a/build.sbt b/build.sbt
index afa1fe404..9f4e2d2e8 100644
--- a/build.sbt
+++ b/build.sbt
@@ -204,9 +204,9 @@ lazy val docs = project
.dependsOn(dataset, cats, ml)
def sparkDependencies(
- sparkVersion: String,
- scope: Configuration = Provided
- ) = Seq(
+ sparkVersion: String,
+ scope: Configuration = Provided
+) = Seq(
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % sparkVersion % scope,
"org.apache.spark" %% "spark-sql" % sparkVersion % scope
@@ -378,10 +378,11 @@ lazy val spark40Settings = Seq[Setting[_]](
lazy val spark34Settings = Seq[Setting[_]](
tlVersionIntroduced := Map("2.12" -> "0.14.1", "2.13" -> "0.14.1"),
mimaPreviousArtifacts := Set(
- organization.value %% moduleName.value
- .split("-")
- .dropRight(1)
- .mkString("-") % "0.14.1"
+ organization.value %%
+ moduleName.value
+ .split("-")
+ .dropRight(1)
+ .mkString("-") % "0.14.1"
)
)
diff --git a/cats/src/main/scala/frameless/cats/FramelessSyntax.scala b/cats/src/main/scala/frameless/cats/FramelessSyntax.scala
index 663ae5958..7dabb4ff3 100644
--- a/cats/src/main/scala/frameless/cats/FramelessSyntax.scala
+++ b/cats/src/main/scala/frameless/cats/FramelessSyntax.scala
@@ -13,8 +13,8 @@ trait FramelessSyntax extends frameless.FramelessSyntax {
def withLocalProperty(key: String, value: String): F[A] =
for {
session <- ask
- _ <- delay(session.sparkContext.setLocalProperty(key, value))
- a <- fa
+ _ <- delay(session.sparkContext.setLocalProperty(key, value))
+ a <- fa
} yield a
def withGroupId(groupId: String): F[A] = withLocalProperty("spark.jobGroup.id", groupId)
diff --git a/cats/src/main/scala/frameless/cats/implicits.scala b/cats/src/main/scala/frameless/cats/implicits.scala
index 1fa869a7f..7b084adfc 100644
--- a/cats/src/main/scala/frameless/cats/implicits.scala
+++ b/cats/src/main/scala/frameless/cats/implicits.scala
@@ -66,9 +66,9 @@ object outer {
def combine(lhs: RDD[(K, V)], rhs: RDD[(K, V)]): RDD[(K, V)] =
lhs.fullOuterJoin(rhs).mapValues {
case (Some(x), Some(y)) => x |+| y
- case (None, Some(y)) => y
- case (Some(x), None) => x
- case (None, None) => m.empty
+ case (None, Some(y)) => y
+ case (Some(x), None) => x
+ case (None, None) => m.empty
}
}
}
diff --git a/cats/src/test/scala/frameless/cats/test.scala b/cats/src/test/scala/frameless/cats/test.scala
index d75bc3bfd..7780647e2 100644
--- a/cats/src/test/scala/frameless/cats/test.scala
+++ b/cats/src/test/scala/frameless/cats/test.scala
@@ -21,7 +21,7 @@ import org.scalatest.matchers.should.Matchers
import org.scalatest.propspec.AnyPropSpec
trait SparkTests {
- val appID: String = new java.util.Date().toString + math.floor(math.random() * 10E4).toLong.toString
+ val appID: String = new java.util.Date().toString + math.floor(math.random() * 10e4).toLong.toString
val conf: SparkConf = new SparkConf()
.setMaster("local[*]")
@@ -68,7 +68,7 @@ class Test extends AnyPropSpec with Matchers with ScalaCheckPropertyChecks with
PropertyCheckConfiguration(minSize = PosInt(10))
property("spark is working") {
- sc.parallelize(Seq(1, 2, 3)).collect() shouldBe Array(1,2,3)
+ sc.parallelize(Seq(1, 2, 3)).collect() shouldBe Array(1, 2, 3)
}
property("inner pairwise monoid") {
@@ -120,10 +120,10 @@ class Test extends AnyPropSpec with Matchers with ScalaCheckPropertyChecks with
property("pair rdd numeric commutative semigroup example") {
import frameless.cats.implicits._
- val seq = Seq( ("a",2), ("b",3), ("d",6), ("b",2), ("d",1) )
+ val seq = Seq(("a", 2), ("b", 3), ("d", 6), ("b", 2), ("d", 1))
val rdd = seq.toRdd
- rdd.cminByKey.collect().toSeq should contain theSameElementsAs Seq( ("a",2), ("b",2), ("d",1) )
- rdd.cmaxByKey.collect().toSeq should contain theSameElementsAs Seq( ("a",2), ("b",3), ("d",6) )
- rdd.csumByKey.collect().toSeq should contain theSameElementsAs Seq( ("a",2), ("b",5), ("d",7) )
+ rdd.cminByKey.collect().toSeq should contain theSameElementsAs Seq(("a", 2), ("b", 2), ("d", 1))
+ rdd.cmaxByKey.collect().toSeq should contain theSameElementsAs Seq(("a", 2), ("b", 3), ("d", 6))
+ rdd.csumByKey.collect().toSeq should contain theSameElementsAs Seq(("a", 2), ("b", 5), ("d", 7))
}
}
diff --git a/core/src/main/scala/frameless/CatalystAverageable.scala b/core/src/main/scala/frameless/CatalystAverageable.scala
index 401ed65fc..cb0c8504f 100644
--- a/core/src/main/scala/frameless/CatalystAverageable.scala
+++ b/core/src/main/scala/frameless/CatalystAverageable.scala
@@ -19,8 +19,8 @@ object CatalystAverageable {
private[this] def of[In, Out]: CatalystAverageable[In, Out] = theInstance.asInstanceOf[CatalystAverageable[In, Out]]
implicit val framelessAverageableBigDecimal: CatalystAverageable[BigDecimal, BigDecimal] = of[BigDecimal, BigDecimal]
- implicit val framelessAverageableDouble: CatalystAverageable[Double, Double] = of[Double, Double]
- implicit val framelessAverageableLong: CatalystAverageable[Long, Double] = of[Long, Double]
- implicit val framelessAverageableInt: CatalystAverageable[Int, Double] = of[Int, Double]
- implicit val framelessAverageableShort: CatalystAverageable[Short, Double] = of[Short, Double]
+ implicit val framelessAverageableDouble: CatalystAverageable[Double, Double] = of[Double, Double]
+ implicit val framelessAverageableLong: CatalystAverageable[Long, Double] = of[Long, Double]
+ implicit val framelessAverageableInt: CatalystAverageable[Int, Double] = of[Int, Double]
+ implicit val framelessAverageableShort: CatalystAverageable[Short, Double] = of[Short, Double]
}
diff --git a/core/src/main/scala/frameless/CatalystBitShift.scala b/core/src/main/scala/frameless/CatalystBitShift.scala
index 753a61907..67aaccc62 100644
--- a/core/src/main/scala/frameless/CatalystBitShift.scala
+++ b/core/src/main/scala/frameless/CatalystBitShift.scala
@@ -12,9 +12,9 @@ object CatalystBitShift {
private[this] val theInstance = new CatalystBitShift[Any, Any] {}
private[this] def of[In, Out]: CatalystBitShift[In, Out] = theInstance.asInstanceOf[CatalystBitShift[In, Out]]
- implicit val framelessBitShiftBigDecimal: CatalystBitShift[BigDecimal, Int] = of[BigDecimal, Int]
- implicit val framelessBitShiftDouble : CatalystBitShift[Byte, Int] = of[Byte, Int]
- implicit val framelessBitShiftInt : CatalystBitShift[Short, Int] = of[Short, Int]
- implicit val framelessBitShiftLong : CatalystBitShift[Int, Int] = of[Int, Int]
- implicit val framelessBitShiftShort : CatalystBitShift[Long, Long] = of[Long, Long]
+ implicit val framelessBitShiftBigDecimal: CatalystBitShift[BigDecimal, Int] = of[BigDecimal, Int]
+ implicit val framelessBitShiftDouble: CatalystBitShift[Byte, Int] = of[Byte, Int]
+ implicit val framelessBitShiftInt: CatalystBitShift[Short, Int] = of[Short, Int]
+ implicit val framelessBitShiftLong: CatalystBitShift[Int, Int] = of[Int, Int]
+ implicit val framelessBitShiftShort: CatalystBitShift[Long, Long] = of[Long, Long]
}
diff --git a/core/src/main/scala/frameless/CatalystCast.scala b/core/src/main/scala/frameless/CatalystCast.scala
index 1a8a21573..7ceb563f7 100644
--- a/core/src/main/scala/frameless/CatalystCast.scala
+++ b/core/src/main/scala/frameless/CatalystCast.scala
@@ -8,25 +8,25 @@ object CatalystCast {
implicit def framelessCastToString[T]: CatalystCast[T, String] = of[T, String]
- implicit def framelessNumericToLong [A: CatalystNumeric]: CatalystCast[A, Long] = of[A, Long]
- implicit def framelessNumericToInt [A: CatalystNumeric]: CatalystCast[A, Int] = of[A, Int]
- implicit def framelessNumericToShort [A: CatalystNumeric]: CatalystCast[A, Short] = of[A, Short]
- implicit def framelessNumericToByte [A: CatalystNumeric]: CatalystCast[A, Byte] = of[A, Byte]
+ implicit def framelessNumericToLong[A: CatalystNumeric]: CatalystCast[A, Long] = of[A, Long]
+ implicit def framelessNumericToInt[A: CatalystNumeric]: CatalystCast[A, Int] = of[A, Int]
+ implicit def framelessNumericToShort[A: CatalystNumeric]: CatalystCast[A, Short] = of[A, Short]
+ implicit def framelessNumericToByte[A: CatalystNumeric]: CatalystCast[A, Byte] = of[A, Byte]
implicit def framelessNumericToDecimal[A: CatalystNumeric]: CatalystCast[A, BigDecimal] = of[A, BigDecimal]
- implicit def framelessNumericToDouble [A: CatalystNumeric]: CatalystCast[A, Double] = of[A, Double]
+ implicit def framelessNumericToDouble[A: CatalystNumeric]: CatalystCast[A, Double] = of[A, Double]
implicit def framelessBooleanToNumeric[A: CatalystNumeric]: CatalystCast[Boolean, A] = of[Boolean, A]
// doesn't make any sense to include:
// - sqlDateToBoolean: always None
// - sqlTimestampToBoolean: compares us to 0
- implicit val framelessStringToBoolean : CatalystCast[String, Option[Boolean]] = of[String, Option[Boolean]]
- implicit val framelessLongToBoolean : CatalystCast[Long, Boolean] = of[Long, Boolean]
- implicit val framelessIntToBoolean : CatalystCast[Int, Boolean] = of[Int, Boolean]
- implicit val framelessShortToBoolean : CatalystCast[Short, Boolean] = of[Short, Boolean]
- implicit val framelessByteToBoolean : CatalystCast[Byte, Boolean] = of[Byte, Boolean]
- implicit val framelessBigDecimalToBoolean: CatalystCast[BigDecimal, Boolean] = of[BigDecimal, Boolean]
- implicit val framelessDoubleToBoolean : CatalystCast[Double, Boolean] = of[Double, Boolean]
+ implicit val framelessStringToBoolean: CatalystCast[String, Option[Boolean]] = of[String, Option[Boolean]]
+ implicit val framelessLongToBoolean: CatalystCast[Long, Boolean] = of[Long, Boolean]
+ implicit val framelessIntToBoolean: CatalystCast[Int, Boolean] = of[Int, Boolean]
+ implicit val framelessShortToBoolean: CatalystCast[Short, Boolean] = of[Short, Boolean]
+ implicit val framelessByteToBoolean: CatalystCast[Byte, Boolean] = of[Byte, Boolean]
+ implicit val framelessBigDecimalToBoolean: CatalystCast[BigDecimal, Boolean] = of[BigDecimal, Boolean]
+ implicit val framelessDoubleToBoolean: CatalystCast[Double, Boolean] = of[Double, Boolean]
// TODO
@@ -38,9 +38,8 @@ object CatalystCast {
// implicit object stringToLong extends CatalystCast[String, Option[Long]]
// implicit object stringToSqlDate extends CatalystCast[String, Option[SQLDate]]
-
// needs verification:
- //implicit object sqlTimestampToSqlDate extends CatalystCast[SQLTimestamp, SQLDate]
+ // implicit object sqlTimestampToSqlDate extends CatalystCast[SQLTimestamp, SQLDate]
// needs verification:
// implicit object sqlTimestampToDecimal extends CatalystCast[SQLTimestamp, BigDecimal]
diff --git a/core/src/main/scala/frameless/CatalystCollection.scala b/core/src/main/scala/frameless/CatalystCollection.scala
index 3456869a0..9fd4f6ba1 100644
--- a/core/src/main/scala/frameless/CatalystCollection.scala
+++ b/core/src/main/scala/frameless/CatalystCollection.scala
@@ -9,8 +9,8 @@ object CatalystCollection {
private[this] val theInstance = new CatalystCollection[Any] {}
private[this] def of[A[_]]: CatalystCollection[A] = theInstance.asInstanceOf[CatalystCollection[A]]
- implicit val arrayObject : CatalystCollection[Array] = of[Array]
- implicit val seqObject : CatalystCollection[Seq] = of[Seq]
- implicit val listObject : CatalystCollection[List] = of[List]
+ implicit val arrayObject: CatalystCollection[Array] = of[Array]
+ implicit val seqObject: CatalystCollection[Seq] = of[Seq]
+ implicit val listObject: CatalystCollection[List] = of[List]
implicit val vectorObject: CatalystCollection[Vector] = of[Vector]
}
diff --git a/core/src/main/scala/frameless/CatalystDivisible.scala b/core/src/main/scala/frameless/CatalystDivisible.scala
index c9080a5d8..c78622df2 100644
--- a/core/src/main/scala/frameless/CatalystDivisible.scala
+++ b/core/src/main/scala/frameless/CatalystDivisible.scala
@@ -13,9 +13,9 @@ object CatalystDivisible {
private[this] def of[In, Out]: CatalystDivisible[In, Out] = theInstance.asInstanceOf[CatalystDivisible[In, Out]]
implicit val framelessDivisibleBigDecimal: CatalystDivisible[BigDecimal, BigDecimal] = of[BigDecimal, BigDecimal]
- implicit val framelessDivisibleDouble : CatalystDivisible[Double, Double] = of[Double, Double]
- implicit val framelessDivisibleInt : CatalystDivisible[Int, Double] = of[Int, Double]
- implicit val framelessDivisibleLong : CatalystDivisible[Long, Double] = of[Long, Double]
- implicit val framelessDivisibleByte : CatalystDivisible[Byte, Double] = of[Byte, Double]
- implicit val framelessDivisibleShort : CatalystDivisible[Short, Double] = of[Short, Double]
+ implicit val framelessDivisibleDouble: CatalystDivisible[Double, Double] = of[Double, Double]
+ implicit val framelessDivisibleInt: CatalystDivisible[Int, Double] = of[Int, Double]
+ implicit val framelessDivisibleLong: CatalystDivisible[Long, Double] = of[Long, Double]
+ implicit val framelessDivisibleByte: CatalystDivisible[Byte, Double] = of[Byte, Double]
+ implicit val framelessDivisibleShort: CatalystDivisible[Short, Double] = of[Short, Double]
}
diff --git a/core/src/main/scala/frameless/CatalystIsin.scala b/core/src/main/scala/frameless/CatalystIsin.scala
index f630a7155..fe12ab622 100644
--- a/core/src/main/scala/frameless/CatalystIsin.scala
+++ b/core/src/main/scala/frameless/CatalystIsin.scala
@@ -8,11 +8,11 @@ trait CatalystIsin[A]
object CatalystIsin {
implicit object framelessBigDecimal extends CatalystIsin[BigDecimal]
- implicit object framelessByte extends CatalystIsin[Byte]
- implicit object framelessDouble extends CatalystIsin[Double]
- implicit object framelessFloat extends CatalystIsin[Float]
- implicit object framelessInt extends CatalystIsin[Int]
- implicit object framelessLong extends CatalystIsin[Long]
- implicit object framelessShort extends CatalystIsin[Short]
- implicit object framelesssString extends CatalystIsin[String]
+ implicit object framelessByte extends CatalystIsin[Byte]
+ implicit object framelessDouble extends CatalystIsin[Double]
+ implicit object framelessFloat extends CatalystIsin[Float]
+ implicit object framelessInt extends CatalystIsin[Int]
+ implicit object framelessLong extends CatalystIsin[Long]
+ implicit object framelessShort extends CatalystIsin[Short]
+ implicit object framelesssString extends CatalystIsin[String]
}
diff --git a/core/src/main/scala/frameless/CatalystNaN.scala b/core/src/main/scala/frameless/CatalystNaN.scala
index 3e7be8263..16db67e4a 100644
--- a/core/src/main/scala/frameless/CatalystNaN.scala
+++ b/core/src/main/scala/frameless/CatalystNaN.scala
@@ -10,7 +10,6 @@ object CatalystNaN {
private[this] val theInstance = new CatalystNaN[Any] {}
private[this] def of[A]: CatalystNaN[A] = theInstance.asInstanceOf[CatalystNaN[A]]
- implicit val framelessFloatNaN : CatalystNaN[Float] = of[Float]
- implicit val framelessDoubleNaN : CatalystNaN[Double] = of[Double]
+ implicit val framelessFloatNaN: CatalystNaN[Float] = of[Float]
+ implicit val framelessDoubleNaN: CatalystNaN[Double] = of[Double]
}
-
diff --git a/core/src/main/scala/frameless/CatalystNumeric.scala b/core/src/main/scala/frameless/CatalystNumeric.scala
index c819ba2ae..74b399f56 100644
--- a/core/src/main/scala/frameless/CatalystNumeric.scala
+++ b/core/src/main/scala/frameless/CatalystNumeric.scala
@@ -11,9 +11,9 @@ object CatalystNumeric {
private[this] def of[A]: CatalystNumeric[A] = theInstance.asInstanceOf[CatalystNumeric[A]]
implicit val framelessbigDecimalNumeric: CatalystNumeric[BigDecimal] = of[BigDecimal]
- implicit val framelessbyteNumeric : CatalystNumeric[Byte] = of[Byte]
- implicit val framelessdoubleNumeric : CatalystNumeric[Double] = of[Double]
- implicit val framelessintNumeric : CatalystNumeric[Int] = of[Int]
- implicit val framelesslongNumeric : CatalystNumeric[Long] = of[Long]
- implicit val framelessshortNumeric : CatalystNumeric[Short] = of[Short]
+ implicit val framelessbyteNumeric: CatalystNumeric[Byte] = of[Byte]
+ implicit val framelessdoubleNumeric: CatalystNumeric[Double] = of[Double]
+ implicit val framelessintNumeric: CatalystNumeric[Int] = of[Int]
+ implicit val framelesslongNumeric: CatalystNumeric[Long] = of[Long]
+ implicit val framelessshortNumeric: CatalystNumeric[Short] = of[Short]
}
diff --git a/core/src/main/scala/frameless/CatalystNumericWithJavaBigDecimal.scala b/core/src/main/scala/frameless/CatalystNumericWithJavaBigDecimal.scala
index 8fee63be2..79d61d965 100644
--- a/core/src/main/scala/frameless/CatalystNumericWithJavaBigDecimal.scala
+++ b/core/src/main/scala/frameless/CatalystNumericWithJavaBigDecimal.scala
@@ -11,11 +11,11 @@ object CatalystNumericWithJavaBigDecimal {
private[this] val theInstance = new CatalystNumericWithJavaBigDecimal[Any, Any] {}
private[this] def of[In, Out]: CatalystNumericWithJavaBigDecimal[In, Out] = theInstance.asInstanceOf[CatalystNumericWithJavaBigDecimal[In, Out]]
- implicit val framelessAbsoluteBigDecimal: CatalystNumericWithJavaBigDecimal[BigDecimal, java.math.BigDecimal] = of[BigDecimal, java.math.BigDecimal]
- implicit val framelessAbsoluteDouble : CatalystNumericWithJavaBigDecimal[Double, Double] = of[Double, Double]
- implicit val framelessAbsoluteInt : CatalystNumericWithJavaBigDecimal[Int, Int] = of[Int, Int]
- implicit val framelessAbsoluteLong : CatalystNumericWithJavaBigDecimal[Long, Long] = of[Long, Long]
- implicit val framelessAbsoluteShort : CatalystNumericWithJavaBigDecimal[Short, Short] = of[Short, Short]
- implicit val framelessAbsoluteByte : CatalystNumericWithJavaBigDecimal[Byte, Byte] = of[Byte, Byte]
+ implicit val framelessAbsoluteBigDecimal: CatalystNumericWithJavaBigDecimal[BigDecimal, java.math.BigDecimal] = of[BigDecimal, java.math.BigDecimal]
+ implicit val framelessAbsoluteDouble: CatalystNumericWithJavaBigDecimal[Double, Double] = of[Double, Double]
+ implicit val framelessAbsoluteInt: CatalystNumericWithJavaBigDecimal[Int, Int] = of[Int, Int]
+ implicit val framelessAbsoluteLong: CatalystNumericWithJavaBigDecimal[Long, Long] = of[Long, Long]
+ implicit val framelessAbsoluteShort: CatalystNumericWithJavaBigDecimal[Short, Short] = of[Short, Short]
+ implicit val framelessAbsoluteByte: CatalystNumericWithJavaBigDecimal[Byte, Byte] = of[Byte, Byte]
-}
\ No newline at end of file
+}
diff --git a/core/src/main/scala/frameless/CatalystOrdered.scala b/core/src/main/scala/frameless/CatalystOrdered.scala
index e73604909..bdf361c1b 100644
--- a/core/src/main/scala/frameless/CatalystOrdered.scala
+++ b/core/src/main/scala/frameless/CatalystOrdered.scala
@@ -13,30 +13,28 @@ object CatalystOrdered {
private[this] val theInstance = new CatalystOrdered[Any] {}
private[this] def of[A]: CatalystOrdered[A] = theInstance.asInstanceOf[CatalystOrdered[A]]
- implicit val framelessIntOrdered : CatalystOrdered[Int] = of[Int]
- implicit val framelessBooleanOrdered : CatalystOrdered[Boolean] = of[Boolean]
- implicit val framelessByteOrdered : CatalystOrdered[Byte] = of[Byte]
- implicit val framelessShortOrdered : CatalystOrdered[Short] = of[Short]
- implicit val framelessLongOrdered : CatalystOrdered[Long] = of[Long]
- implicit val framelessFloatOrdered : CatalystOrdered[Float] = of[Float]
- implicit val framelessDoubleOrdered : CatalystOrdered[Double] = of[Double]
- implicit val framelessBigDecimalOrdered : CatalystOrdered[BigDecimal] = of[BigDecimal]
- implicit val framelessSQLDateOrdered : CatalystOrdered[SQLDate] = of[SQLDate]
+ implicit val framelessIntOrdered: CatalystOrdered[Int] = of[Int]
+ implicit val framelessBooleanOrdered: CatalystOrdered[Boolean] = of[Boolean]
+ implicit val framelessByteOrdered: CatalystOrdered[Byte] = of[Byte]
+ implicit val framelessShortOrdered: CatalystOrdered[Short] = of[Short]
+ implicit val framelessLongOrdered: CatalystOrdered[Long] = of[Long]
+ implicit val framelessFloatOrdered: CatalystOrdered[Float] = of[Float]
+ implicit val framelessDoubleOrdered: CatalystOrdered[Double] = of[Double]
+ implicit val framelessBigDecimalOrdered: CatalystOrdered[BigDecimal] = of[BigDecimal]
+ implicit val framelessSQLDateOrdered: CatalystOrdered[SQLDate] = of[SQLDate]
implicit val framelessSQLTimestampOrdered: CatalystOrdered[SQLTimestamp] = of[SQLTimestamp]
- implicit val framelessStringOrdered : CatalystOrdered[String] = of[String]
- implicit val framelessInstantOrdered : CatalystOrdered[Instant] = of[Instant]
- implicit val framelessDurationOrdered : CatalystOrdered[Duration] = of[Duration]
- implicit val framelessPeriodOrdered : CatalystOrdered[Period] = of[Period]
+ implicit val framelessStringOrdered: CatalystOrdered[String] = of[String]
+ implicit val framelessInstantOrdered: CatalystOrdered[Instant] = of[Instant]
+ implicit val framelessDurationOrdered: CatalystOrdered[Duration] = of[Duration]
+ implicit val framelessPeriodOrdered: CatalystOrdered[Period] = of[Period]
- implicit def injectionOrdered[A, B]
- (implicit
- i0: Injection[A, B],
- i1: CatalystOrdered[B]
- ): CatalystOrdered[A] = of[A]
+ implicit def injectionOrdered[A, B](implicit
+ i0: Injection[A, B],
+ i1: CatalystOrdered[B]
+ ): CatalystOrdered[A] = of[A]
- implicit def deriveGeneric[G, H <: HList]
- (implicit
- i0: Generic.Aux[G, H],
- i1: Lazy[LiftAll[CatalystOrdered, H]]
- ): CatalystOrdered[G] = of[G]
+ implicit def deriveGeneric[G, H <: HList](implicit
+ i0: Generic.Aux[G, H],
+ i1: Lazy[LiftAll[CatalystOrdered, H]]
+ ): CatalystOrdered[G] = of[G]
}
diff --git a/core/src/main/scala/frameless/CatalystPivotable.scala b/core/src/main/scala/frameless/CatalystPivotable.scala
index a7b34da64..2ff69b033 100644
--- a/core/src/main/scala/frameless/CatalystPivotable.scala
+++ b/core/src/main/scala/frameless/CatalystPivotable.scala
@@ -9,8 +9,8 @@ object CatalystPivotable {
private[this] val theInstance = new CatalystPivotable[Any] {}
private[this] def of[A]: CatalystPivotable[A] = theInstance.asInstanceOf[CatalystPivotable[A]]
- implicit val framelessIntPivotable : CatalystPivotable[Int] = of[Int]
- implicit val framelessLongPivotable : CatalystPivotable[Long] = of[Long]
+ implicit val framelessIntPivotable: CatalystPivotable[Int] = of[Int]
+ implicit val framelessLongPivotable: CatalystPivotable[Long] = of[Long]
implicit val framelessBooleanPivotable: CatalystPivotable[Boolean] = of[Boolean]
- implicit val framelessStringPivotable : CatalystPivotable[String] = of[String]
+ implicit val framelessStringPivotable: CatalystPivotable[String] = of[String]
}
diff --git a/core/src/main/scala/frameless/CatalystRound.scala b/core/src/main/scala/frameless/CatalystRound.scala
index ee50b794a..8205945d4 100644
--- a/core/src/main/scala/frameless/CatalystRound.scala
+++ b/core/src/main/scala/frameless/CatalystRound.scala
@@ -12,8 +12,8 @@ object CatalystRound {
private[this] def of[In, Out]: CatalystRound[In, Out] = theInstance.asInstanceOf[CatalystRound[In, Out]]
implicit val framelessBigDecimal: CatalystRound[BigDecimal, java.math.BigDecimal] = of[BigDecimal, java.math.BigDecimal]
- implicit val framelessDouble : CatalystRound[Double, Long] = of[Double, Long]
- implicit val framelessInt : CatalystRound[Int, Long] = of[Int, Long]
- implicit val framelessLong : CatalystRound[Long, Long] = of[Long, Long]
- implicit val framelessShort : CatalystRound[Short, Long] = of[Short, Long]
-}
\ No newline at end of file
+ implicit val framelessDouble: CatalystRound[Double, Long] = of[Double, Long]
+ implicit val framelessInt: CatalystRound[Int, Long] = of[Int, Long]
+ implicit val framelessLong: CatalystRound[Long, Long] = of[Long, Long]
+ implicit val framelessShort: CatalystRound[Short, Long] = of[Short, Long]
+}
diff --git a/core/src/main/scala/frameless/CatalystSummable.scala b/core/src/main/scala/frameless/CatalystSummable.scala
index 94010505e..def5d514a 100644
--- a/core/src/main/scala/frameless/CatalystSummable.scala
+++ b/core/src/main/scala/frameless/CatalystSummable.scala
@@ -23,9 +23,9 @@ object CatalystSummable {
new CatalystSummable[In, Out] { val zero: In = _zero }
}
- implicit val framelessSummableLong : CatalystSummable[Long, Long] = CatalystSummable(zero = 0L)
+ implicit val framelessSummableLong: CatalystSummable[Long, Long] = CatalystSummable(zero = 0L)
implicit val framelessSummableBigDecimal: CatalystSummable[BigDecimal, BigDecimal] = CatalystSummable(zero = BigDecimal(0))
- implicit val framelessSummableDouble : CatalystSummable[Double, Double] = CatalystSummable(zero = 0.0)
- implicit val framelessSummableInt : CatalystSummable[Int, Long] = CatalystSummable(zero = 0)
- implicit val framelessSummableShort : CatalystSummable[Short, Long] = CatalystSummable(zero = 0)
+ implicit val framelessSummableDouble: CatalystSummable[Double, Double] = CatalystSummable(zero = 0.0)
+ implicit val framelessSummableInt: CatalystSummable[Int, Long] = CatalystSummable(zero = 0)
+ implicit val framelessSummableShort: CatalystSummable[Short, Long] = CatalystSummable(zero = 0)
}
diff --git a/core/src/main/scala/frameless/CatalystVariance.scala b/core/src/main/scala/frameless/CatalystVariance.scala
index 9e843fa70..ef91b4c00 100644
--- a/core/src/main/scala/frameless/CatalystVariance.scala
+++ b/core/src/main/scala/frameless/CatalystVariance.scala
@@ -12,9 +12,9 @@ object CatalystVariance {
private[this] val theInstance = new CatalystVariance[Any] {}
private[this] def of[A]: CatalystVariance[A] = theInstance.asInstanceOf[CatalystVariance[A]]
- implicit val framelessIntVariance : CatalystVariance[Int] = of[Int]
- implicit val framelessLongVariance : CatalystVariance[Long] = of[Long]
- implicit val framelessShortVariance : CatalystVariance[Short] = of[Short]
+ implicit val framelessIntVariance: CatalystVariance[Int] = of[Int]
+ implicit val framelessLongVariance: CatalystVariance[Long] = of[Long]
+ implicit val framelessShortVariance: CatalystVariance[Short] = of[Short]
implicit val framelessBigDecimalVariance: CatalystVariance[BigDecimal] = of[BigDecimal]
- implicit val framelessDoubleVariance : CatalystVariance[Double] = of[Double]
+ implicit val framelessDoubleVariance: CatalystVariance[Double] = of[Double]
}
diff --git a/dataset/src/main/scala/frameless/FramelessSyntax.scala b/dataset/src/main/scala/frameless/FramelessSyntax.scala
index 5ba294921..d97fe88ed 100644
--- a/dataset/src/main/scala/frameless/FramelessSyntax.scala
+++ b/dataset/src/main/scala/frameless/FramelessSyntax.scala
@@ -12,7 +12,7 @@ trait FramelessSyntax {
def typed: TypedDataset[T] = TypedDataset.create[T](self)
}
- implicit class DataframeSyntax(self: DataFrame){
+ implicit class DataframeSyntax(self: DataFrame) {
def unsafeTyped[T: TypedEncoder]: TypedDataset[T] = TypedDataset.createUnsafe(self)
}
}
diff --git a/dataset/src/main/scala/frameless/InjectionEnum.scala b/dataset/src/main/scala/frameless/InjectionEnum.scala
index 4ed1006e3..243cb91ac 100644
--- a/dataset/src/main/scala/frameless/InjectionEnum.scala
+++ b/dataset/src/main/scala/frameless/InjectionEnum.scala
@@ -16,10 +16,10 @@ trait InjectionEnum {
implicit def coproductInjectionEnum[H, T <: Coproduct](
implicit
- typeable: Typeable[H] ,
+ typeable: Typeable[H],
gen: Generic.Aux[H, HNil],
tInjectionEnum: Injection[T, String]
- ): Injection[H :+: T, String] = {
+ ): Injection[H :+: T, String] = {
val dataConstructorName = typeable.describe.takeWhile(_ != '.')
Injection(
@@ -40,7 +40,7 @@ trait InjectionEnum {
implicit
gen: Generic.Aux[A, R],
rInjectionEnum: Injection[R, String]
- ): Injection[A, String] =
+ ): Injection[A, String] =
Injection(
value => rInjectionEnum(gen.to(value)),
name => gen.from(rInjectionEnum.invert(name))
diff --git a/dataset/src/main/scala/frameless/IsValueClass.scala b/dataset/src/main/scala/frameless/IsValueClass.scala
index 78605c130..7d5a838e1 100644
--- a/dataset/src/main/scala/frameless/IsValueClass.scala
+++ b/dataset/src/main/scala/frameless/IsValueClass.scala
@@ -5,13 +5,15 @@ import shapeless.labelled.FieldType
/** Evidence that `T` is a Value class */
@annotation.implicitNotFound(msg = "${T} is not a Value class")
-final class IsValueClass[T] private() {}
+final class IsValueClass[T] private () {}
object IsValueClass {
+
/** Provides an evidence `A` is a Value class */
implicit def apply[A <: AnyVal, G <: ::[_, HNil], H <: ::[_ <: FieldType[_ <: Symbol, _], HNil]](
implicit
- i0: LabelledGeneric.Aux[A, G],
- i1: DropUnitValues.Aux[G, H]): IsValueClass[A] = new IsValueClass[A]
+ i0: LabelledGeneric.Aux[A, G],
+ i1: DropUnitValues.Aux[G, H]
+ ): IsValueClass[A] = new IsValueClass[A]
}
diff --git a/dataset/src/main/scala/frameless/Job.scala b/dataset/src/main/scala/frameless/Job.scala
index 40931b8b4..85e92ed3e 100644
--- a/dataset/src/main/scala/frameless/Job.scala
+++ b/dataset/src/main/scala/frameless/Job.scala
@@ -3,6 +3,7 @@ package frameless
import org.apache.spark.sql.SparkSession
sealed abstract class Job[A](implicit spark: SparkSession) { self =>
+
/** Runs a new Spark job. */
def run(): A
@@ -32,7 +33,6 @@ sealed abstract class Job[A](implicit spark: SparkSession) { self =>
}
}
-
object Job {
def apply[A](a: => A)(implicit spark: SparkSession): Job[A] = new Job[A] {
def run(): A = a
diff --git a/dataset/src/main/scala/frameless/RecordEncoder.scala b/dataset/src/main/scala/frameless/RecordEncoder.scala
index 7427d9de0..e0ecdd472 100644
--- a/dataset/src/main/scala/frameless/RecordEncoder.scala
+++ b/dataset/src/main/scala/frameless/RecordEncoder.scala
@@ -3,9 +3,7 @@ package frameless
import org.apache.spark.sql.FramelessInternals
import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.objects.{
- Invoke, NewInstance, UnwrapOption, WrapOption
-}
+import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, NewInstance, UnwrapOption, WrapOption}
import org.apache.spark.sql.types._
import shapeless._
@@ -30,25 +28,24 @@ trait RecordEncoderFields[T <: HList] extends Serializable {
object RecordEncoderFields {
- implicit def deriveRecordLast[K <: Symbol, H]
- (implicit
- key: Witness.Aux[K],
- head: RecordFieldEncoder[H]
- ): RecordEncoderFields[FieldType[K, H] :: HNil] = new RecordEncoderFields[FieldType[K, H] :: HNil] {
- def value: List[RecordEncoderField] = fieldEncoder[K, H] :: Nil
- }
+ implicit def deriveRecordLast[K <: Symbol, H](implicit
+ key: Witness.Aux[K],
+ head: RecordFieldEncoder[H]
+ ): RecordEncoderFields[FieldType[K, H] :: HNil] = new RecordEncoderFields[FieldType[K, H] :: HNil] {
+ def value: List[RecordEncoderField] = fieldEncoder[K, H] :: Nil
+ }
- implicit def deriveRecordCons[K <: Symbol, H, T <: HList]
- (implicit
- key: Witness.Aux[K],
- head: RecordFieldEncoder[H],
- tail: RecordEncoderFields[T]
- ): RecordEncoderFields[FieldType[K, H] :: T] = new RecordEncoderFields[FieldType[K, H] :: T] {
- def value: List[RecordEncoderField] =
- fieldEncoder[K, H] :: tail.value.map(x => x.copy(ordinal = x.ordinal + 1))
+ implicit def deriveRecordCons[K <: Symbol, H, T <: HList](implicit
+ key: Witness.Aux[K],
+ head: RecordFieldEncoder[H],
+ tail: RecordEncoderFields[T]
+ ): RecordEncoderFields[FieldType[K, H] :: T] = new RecordEncoderFields[FieldType[K, H] :: T] {
+ def value: List[RecordEncoderField] =
+ fieldEncoder[K, H] :: tail.value.map(x => x.copy(ordinal = x.ordinal + 1))
}
- private def fieldEncoder[K <: Symbol, H](implicit key: Witness.Aux[K], e: RecordFieldEncoder[H]): RecordEncoderField = RecordEncoderField(0, key.value.name, e.encoder)
+ private def fieldEncoder[K <: Symbol, H](implicit key: Witness.Aux[K], e: RecordFieldEncoder[H]): RecordEncoderField =
+ RecordEncoderField(0, key.value.name, e.encoder)
}
/**
@@ -67,21 +64,19 @@ object NewInstanceExprs {
def from(exprs: List[Expression]): Seq[Expression] = Nil
}
- implicit def deriveUnit[K <: Symbol, T <: HList]
- (implicit
- tail: NewInstanceExprs[T]
- ): NewInstanceExprs[FieldType[K, Unit] :: T] = new NewInstanceExprs[FieldType[K, Unit] :: T] {
- def from(exprs: List[Expression]): Seq[Expression] =
- Literal.fromObject(()) +: tail.from(exprs)
- }
+ implicit def deriveUnit[K <: Symbol, T <: HList](implicit
+ tail: NewInstanceExprs[T]
+ ): NewInstanceExprs[FieldType[K, Unit] :: T] = new NewInstanceExprs[FieldType[K, Unit] :: T] {
+ def from(exprs: List[Expression]): Seq[Expression] =
+ Literal.fromObject(()) +: tail.from(exprs)
+ }
- implicit def deriveNonUnit[K <: Symbol, V, T <: HList]
- (implicit
- notUnit: V =:!= Unit,
- tail: NewInstanceExprs[T]
- ): NewInstanceExprs[FieldType[K, V] :: T] = new NewInstanceExprs[FieldType[K, V] :: T] {
- def from(exprs: List[Expression]): Seq[Expression] = exprs.head +: tail.from(exprs.tail)
- }
+ implicit def deriveNonUnit[K <: Symbol, V, T <: HList](implicit
+ notUnit: V =:!= Unit,
+ tail: NewInstanceExprs[T]
+ ): NewInstanceExprs[FieldType[K, V] :: T] = new NewInstanceExprs[FieldType[K, V] :: T] {
+ def from(exprs: List[Expression]): Seq[Expression] = exprs.head +: tail.from(exprs.tail)
+ }
}
/**
@@ -101,85 +96,87 @@ object DropUnitValues {
def apply(l: HNil): Out = HNil
}
- implicit def deriveUnit[K <: Symbol, T <: HList, OutT <: HList]
- (implicit
- dropUnitValues : DropUnitValues.Aux[T, OutT]
- ): Aux[FieldType[K, Unit] :: T, OutT] = new DropUnitValues[FieldType[K, Unit] :: T] {
- type Out = OutT
- def apply(l : FieldType[K, Unit] :: T): Out = dropUnitValues(l.tail)
- }
+ implicit def deriveUnit[K <: Symbol, T <: HList, OutT <: HList](implicit
+ dropUnitValues: DropUnitValues.Aux[T, OutT]
+ ): Aux[FieldType[K, Unit] :: T, OutT] = new DropUnitValues[FieldType[K, Unit] :: T] {
+ type Out = OutT
+ def apply(l: FieldType[K, Unit] :: T): Out = dropUnitValues(l.tail)
+ }
- implicit def deriveNonUnit[K <: Symbol, V, T <: HList, OutH, OutT <: HList]
- (implicit
- nonUnit: V =:!= Unit,
- dropUnitValues : DropUnitValues.Aux[T, OutT]
- ): Aux[FieldType[K, V] :: T, FieldType[K, V] :: OutT] = new DropUnitValues[FieldType[K, V] :: T] {
- type Out = FieldType[K, V] :: OutT
- def apply(l : FieldType[K, V] :: T): Out = l.head :: dropUnitValues(l.tail)
- }
+ implicit def deriveNonUnit[K <: Symbol, V, T <: HList, OutH, OutT <: HList](implicit
+ nonUnit: V =:!= Unit,
+ dropUnitValues: DropUnitValues.Aux[T, OutT]
+ ): Aux[FieldType[K, V] :: T, FieldType[K, V] :: OutT] = new DropUnitValues[FieldType[K, V] :: T] {
+ type Out = FieldType[K, V] :: OutT
+ def apply(l: FieldType[K, V] :: T): Out = l.head :: dropUnitValues(l.tail)
+ }
}
-class RecordEncoder[F, G <: HList, H <: HList]
- (implicit
- i0: LabelledGeneric.Aux[F, G],
- i1: DropUnitValues.Aux[G, H],
- i2: IsHCons[H],
- fields: Lazy[RecordEncoderFields[H]],
- newInstanceExprs: Lazy[NewInstanceExprs[G]],
- classTag: ClassTag[F]
- ) extends TypedEncoder[F] {
- def nullable: Boolean = false
-
- def jvmRepr: DataType = FramelessInternals.objectTypeFor[F]
-
- def catalystRepr: DataType = {
- val structFields = fields.value.value.map { field =>
- StructField(
- name = field.name,
- dataType = field.encoder.catalystRepr,
- nullable = field.encoder.nullable,
- metadata = Metadata.empty
- )
- }
+class RecordEncoder[F, G <: HList, H <: HList](implicit
+ i0: LabelledGeneric.Aux[F, G],
+ i1: DropUnitValues.Aux[G, H],
+ i2: IsHCons[H],
+ fields: Lazy[RecordEncoderFields[H]],
+ newInstanceExprs: Lazy[NewInstanceExprs[G]],
+ classTag: ClassTag[F]
+) extends TypedEncoder[F] {
+ def nullable: Boolean = false
+
+ def jvmRepr: DataType = FramelessInternals.objectTypeFor[F]
+
+ def catalystRepr: DataType = {
+ val structFields = fields.value.value.map { field =>
+ StructField(
+ name = field.name,
+ dataType = field.encoder.catalystRepr,
+ nullable = field.encoder.nullable,
+ metadata = Metadata.empty
+ )
+ }
- StructType(structFields)
+ StructType(structFields)
+ }
+
+ def toCatalyst(path: Expression): Expression = {
+ val nameExprs = fields.value.value.map { field =>
+ Literal(field.name)
}
- def toCatalyst(path: Expression): Expression = {
- val nameExprs = fields.value.value.map { field =>
- Literal(field.name)
- }
+ val valueExprs = fields.value.value.map { field =>
+ val fieldPath = Invoke(path, field.name, field.encoder.jvmRepr, Nil)
+ field.encoder.toCatalyst(fieldPath)
+ }
- val valueExprs = fields.value.value.map { field =>
- val fieldPath = Invoke(path, field.name, field.encoder.jvmRepr, Nil)
- field.encoder.toCatalyst(fieldPath)
- }
+ // the way exprs are encoded in CreateNamedStruct
+ val exprs = nameExprs.zip(valueExprs).flatMap {
+ case (nameExpr, valueExpr) => nameExpr :: valueExpr :: Nil
+ }
- // the way exprs are encoded in CreateNamedStruct
- val exprs = nameExprs.zip(valueExprs).flatMap {
- case (nameExpr, valueExpr) => nameExpr :: valueExpr :: Nil
- }
+ val createExpr = CreateNamedStruct(exprs)
+ val nullExpr = Literal.create(null, createExpr.dataType)
- val createExpr = CreateNamedStruct(exprs)
- val nullExpr = Literal.create(null, createExpr.dataType)
+ If(IsNull(path), nullExpr, createExpr)
+ }
- If(IsNull(path), nullExpr, createExpr)
+ def fromCatalyst(path: Expression): Expression = {
+ val exprs = fields.value.value.map { field =>
+ field.encoder.fromCatalyst(
+ GetStructField(path, field.ordinal, Some(field.name))
+ )
}
- def fromCatalyst(path: Expression): Expression = {
- val exprs = fields.value.value.map { field =>
- field.encoder.fromCatalyst(
- GetStructField(path, field.ordinal, Some(field.name)))
- }
-
- val newArgs = newInstanceExprs.value.from(exprs)
- val newExpr = NewInstance(
- classTag.runtimeClass, newArgs, jvmRepr, propagateNull = true)
+ val newArgs = newInstanceExprs.value.from(exprs)
+ val newExpr = NewInstance(
+ classTag.runtimeClass,
+ newArgs,
+ jvmRepr,
+ propagateNull = true
+ )
- val nullExpr = Literal.create(null, jvmRepr)
+ val nullExpr = Literal.create(null, jvmRepr)
- If(IsNull(path), nullExpr, newExpr)
- }
+ If(IsNull(path), nullExpr, newExpr)
+ }
}
final class RecordFieldEncoder[T](
@@ -198,59 +195,70 @@ object RecordFieldEncoder extends RecordFieldEncoderLowPriority {
* @tparam K the key type for the fields
* @tparam V the inner value type
*/
- implicit def optionValueClass[F : IsValueClass, G <: ::[_, HNil], H <: ::[_ <: FieldType[_ <: Symbol, _], HNil], K <: Symbol, V, KS <: ::[_ <: Symbol, HNil]]
- (implicit
- i0: LabelledGeneric.Aux[F, G],
- i1: DropUnitValues.Aux[G, H],
- i2: IsHCons.Aux[H, _ <: FieldType[K, V], HNil],
- i3: Keys.Aux[H, KS],
- i4: IsHCons.Aux[KS, K, HNil],
- i5: TypedEncoder[V],
- i6: ClassTag[F]
- ): RecordFieldEncoder[Option[F]] = {
- val fieldName = i4.head(i3()).name
- val innerJvmRepr = ObjectType(i6.runtimeClass)
-
- val catalyst: Expression => Expression = { path =>
- val value = UnwrapOption(innerJvmRepr, path)
- val javaValue = Invoke(value, fieldName, i5.jvmRepr, Nil)
-
- i5.toCatalyst(javaValue)
- }
+ implicit def optionValueClass[F: IsValueClass,
+ G <: ::[
+ _,
+ HNil
+ ],
+ H <: ::[_ <: FieldType[_ <: Symbol, _], HNil],
+ K <: Symbol,
+ V,
+ KS <: ::[_ <: Symbol, HNil]
+ ](implicit
+ i0: LabelledGeneric.Aux[F, G],
+ i1: DropUnitValues.Aux[G, H],
+ i2: IsHCons.Aux[H, _ <: FieldType[K, V], HNil],
+ i3: Keys.Aux[H, KS],
+ i4: IsHCons.Aux[KS, K, HNil],
+ i5: TypedEncoder[V],
+ i6: ClassTag[F]
+ ): RecordFieldEncoder[Option[F]] = {
+ val fieldName = i4.head(i3()).name
+ val innerJvmRepr = ObjectType(i6.runtimeClass)
+
+ val catalyst: Expression => Expression = { path =>
+ val value = UnwrapOption(innerJvmRepr, path)
+ val javaValue = Invoke(value, fieldName, i5.jvmRepr, Nil)
+
+ i5.toCatalyst(javaValue)
+ }
- val fromCatalyst: Expression => Expression = { path =>
- val javaValue = i5.fromCatalyst(path)
- val value = NewInstance(i6.runtimeClass, Seq(javaValue), innerJvmRepr)
+ val fromCatalyst: Expression => Expression = { path =>
+ val javaValue = i5.fromCatalyst(path)
+ val value = NewInstance(i6.runtimeClass, Seq(javaValue), innerJvmRepr)
- WrapOption(value, innerJvmRepr)
- }
+ WrapOption(value, innerJvmRepr)
+ }
- val jvmr = ObjectType(classOf[Option[F]])
+ val jvmr = ObjectType(classOf[Option[F]])
- new RecordFieldEncoder[Option[F]](
- encoder = new TypedEncoder[Option[F]] {
- val nullable = true
+ new RecordFieldEncoder[Option[F]](
+ encoder = new TypedEncoder[Option[F]] {
+ val nullable = true
- val jvmRepr = jvmr
+ val jvmRepr = jvmr
- @inline def catalystRepr: DataType = i5.catalystRepr
+ @inline def catalystRepr: DataType = i5.catalystRepr
- def fromCatalyst(path: Expression): Expression = {
- val javaValue = i5.fromCatalyst(path)
- val value = NewInstance(
- i6.runtimeClass, Seq(javaValue), innerJvmRepr)
+ def fromCatalyst(path: Expression): Expression = {
+ val javaValue = i5.fromCatalyst(path)
+ val value = NewInstance(
+ i6.runtimeClass,
+ Seq(javaValue),
+ innerJvmRepr
+ )
- WrapOption(value, innerJvmRepr)
- }
+ WrapOption(value, innerJvmRepr)
+ }
- def toCatalyst(path: Expression): Expression = catalyst(path)
+ def toCatalyst(path: Expression): Expression = catalyst(path)
- override def toString: String = s"RecordFieldEncoder.optionValueClass[${i6.runtimeClass.getName}]('${fieldName}', $i5)"
- },
- jvmRepr = jvmr,
- fromCatalyst = fromCatalyst,
- toCatalyst = catalyst
- )
+ override def toString: String = s"RecordFieldEncoder.optionValueClass[${i6.runtimeClass.getName}]('${fieldName}', $i5)"
+ },
+ jvmRepr = jvmr,
+ fromCatalyst = fromCatalyst,
+ toCatalyst = catalyst
+ )
}
/**
@@ -259,50 +267,59 @@ object RecordFieldEncoder extends RecordFieldEncoderLowPriority {
* @tparam H the single field of the value class (with guarantee it's not a `Unit` value)
* @tparam V the inner value type
*/
- implicit def valueClass[F : IsValueClass, G <: ::[_, HNil], H <: ::[_ <: FieldType[_ <: Symbol, _], HNil], K <: Symbol, V, KS <: ::[_ <: Symbol, HNil]]
- (implicit
- i0: LabelledGeneric.Aux[F, G],
- i1: DropUnitValues.Aux[G, H],
- i2: IsHCons.Aux[H, _ <: FieldType[K, V], HNil],
- i3: Keys.Aux[H, KS],
- i4: IsHCons.Aux[KS, K, HNil],
- i5: TypedEncoder[V],
- i6: ClassTag[F]
- ): RecordFieldEncoder[F] = {
- val cls = i6.runtimeClass
- val jvmr = i5.jvmRepr
- val fieldName = i4.head(i3()).name
-
- new RecordFieldEncoder[F](
- encoder = new TypedEncoder[F] {
- def nullable = i5.nullable
-
- def jvmRepr = jvmr
-
- def catalystRepr: DataType = i5.catalystRepr
-
- def fromCatalyst(path: Expression): Expression =
- i5.fromCatalyst(path)
-
- @inline def toCatalyst(path: Expression): Expression =
- i5.toCatalyst(path)
-
- override def toString: String = s"RecordFieldEncoder.valueClass[${cls.getName}]('${fieldName}', ${i5})"
- },
- jvmRepr = FramelessInternals.objectTypeFor[F],
- fromCatalyst = { expr: Expression =>
- NewInstance(
- i6.runtimeClass,
- i5.fromCatalyst(expr) :: Nil,
- ObjectType(i6.runtimeClass))
- },
- toCatalyst = { expr: Expression =>
- i5.toCatalyst(Invoke(expr, fieldName, jvmr))
- }
- )
+ implicit def valueClass[F: IsValueClass,
+ G <: ::[
+ _,
+ HNil
+ ],
+ H <: ::[_ <: FieldType[_ <: Symbol, _], HNil],
+ K <: Symbol,
+ V,
+ KS <: ::[_ <: Symbol, HNil]
+ ](implicit
+ i0: LabelledGeneric.Aux[F, G],
+ i1: DropUnitValues.Aux[G, H],
+ i2: IsHCons.Aux[H, _ <: FieldType[K, V], HNil],
+ i3: Keys.Aux[H, KS],
+ i4: IsHCons.Aux[KS, K, HNil],
+ i5: TypedEncoder[V],
+ i6: ClassTag[F]
+ ): RecordFieldEncoder[F] = {
+ val cls = i6.runtimeClass
+ val jvmr = i5.jvmRepr
+ val fieldName = i4.head(i3()).name
+
+ new RecordFieldEncoder[F](
+ encoder = new TypedEncoder[F] {
+ def nullable = i5.nullable
+
+ def jvmRepr = jvmr
+
+ def catalystRepr: DataType = i5.catalystRepr
+
+ def fromCatalyst(path: Expression): Expression =
+ i5.fromCatalyst(path)
+
+ @inline def toCatalyst(path: Expression): Expression =
+ i5.toCatalyst(path)
+
+ override def toString: String = s"RecordFieldEncoder.valueClass[${cls.getName}]('${fieldName}', ${i5})"
+ },
+ jvmRepr = FramelessInternals.objectTypeFor[F],
+ fromCatalyst = { expr: Expression =>
+ NewInstance(
+ i6.runtimeClass,
+ i5.fromCatalyst(expr) :: Nil,
+ ObjectType(i6.runtimeClass)
+ )
+ },
+ toCatalyst = { expr: Expression =>
+ i5.toCatalyst(Invoke(expr, fieldName, jvmr))
+ }
+ )
}
}
-private[frameless] sealed trait RecordFieldEncoderLowPriority {
+sealed private[frameless] trait RecordFieldEncoderLowPriority {
implicit def apply[T](implicit e: TypedEncoder[T]): RecordFieldEncoder[T] = new RecordFieldEncoder[T](e, e.jvmRepr, e.fromCatalyst, e.toCatalyst)
}
diff --git a/dataset/src/main/scala/frameless/TypedColumn.scala b/dataset/src/main/scala/frameless/TypedColumn.scala
index 2888d8608..4140cabe8 100644
--- a/dataset/src/main/scala/frameless/TypedColumn.scala
+++ b/dataset/src/main/scala/frameless/TypedColumn.scala
@@ -1,16 +1,16 @@
package frameless
-import frameless.functions.{ litAggr, lit => flit }
+import frameless.functions.{lit => flit, litAggr}
import frameless.syntax._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.types.DecimalType
-import org.apache.spark.sql.{ Column, FramelessInternals }
+import org.apache.spark.sql.{Column, FramelessInternals}
// Spark 4 added org.apache.spark.sql.catalyst.expressions.With, which the wildcard import
// above would otherwise bind in preference to frameless.With. Alias frameless.With so its
// references resolve consistently on every supported Spark version.
-import frameless.{ With => FWith }
+import frameless.{With => FWith}
import shapeless._
import shapeless.ops.record.Selector
@@ -30,18 +30,17 @@ sealed trait UntypedExpression[T] {
* Expression used in `select`-like constructions.
*/
sealed class TypedColumn[T, U](
- expr: Expression
- )(implicit
- val uenc: TypedEncoder[U])
+ expr: Expression
+)(implicit val uenc: TypedEncoder[U])
extends AbstractTypedColumn[T, U](expr) {
type ThisType[A, B] = TypedColumn[A, B]
def this(
- column: Column
- )(implicit
- uencoder: TypedEncoder[U]
- ) =
+ column: Column
+ )(implicit
+ uencoder: TypedEncoder[U]
+ ) =
this(FramelessInternals.expr(column))
override def typed[W, U1: TypedEncoder](c: Column): TypedColumn[W, U1] =
@@ -54,18 +53,17 @@ sealed class TypedColumn[T, U](
* Expression used in `agg`-like constructions.
*/
sealed class TypedAggregate[T, U](
- expr: Expression
- )(implicit
- val uenc: TypedEncoder[U])
+ expr: Expression
+)(implicit val uenc: TypedEncoder[U])
extends AbstractTypedColumn[T, U](expr) {
type ThisType[A, B] = TypedAggregate[A, B]
def this(
- column: Column
- )(implicit
- uencoder: TypedEncoder[U]
- ) = {
+ column: Column
+ )(implicit
+ uencoder: TypedEncoder[U]
+ ) = {
this(FramelessInternals.expr(column))
}
@@ -88,9 +86,8 @@ sealed class TypedAggregate[T, U](
* @tparam U type of column
*/
abstract class AbstractTypedColumn[T, U](
- val expr: Expression
- )(implicit
- val uencoder: TypedEncoder[U])
+ val expr: Expression
+)(implicit val uencoder: TypedEncoder[U])
extends UntypedExpression[T] { self =>
type ThisType[A, B] <: AbstractTypedColumn[A, B]
@@ -109,10 +106,10 @@ abstract class AbstractTypedColumn[T, U](
trait Mapper[X] {
def map[G, OutputType[_, _]](
- u: ThisType[T, X] => OutputType[T, G]
- )(implicit
- ev: OutputType[T, G] <:< AbstractTypedColumn[T, G]
- ): OutputType[T, Option[G]] = {
+ u: ThisType[T, X] => OutputType[T, G]
+ )(implicit
+ ev: OutputType[T, G] <:< AbstractTypedColumn[T, G]
+ ): OutputType[T, Option[G]] = {
u(self.asInstanceOf[ThisType[T, X]])
.asInstanceOf[OutputType[T, Option[G]]]
}
@@ -129,18 +126,17 @@ abstract class AbstractTypedColumn[T, U](
* }}}
*/
def opt[X](
- implicit
- x: U <:< Option[X]
- ): Mapper[X] = new Mapper[X] {}
+ implicit x: U <:< Option[X]
+ ): Mapper[X] = new Mapper[X] {}
/** Fall back to an untyped Column */
def untyped: Column = FramelessInternals.column(expr)
private def equalsTo[TT, W](
- other: ThisType[TT, U]
- )(implicit
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, Boolean] = typed {
+ other: ThisType[TT, U]
+ )(implicit
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, Boolean] = typed {
if (uencoder.nullable) EqualNullSafe(self.expr, other.expr)
else EqualTo(self.expr, other.expr)
}
@@ -175,10 +171,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def ===[TT, W](
- other: ThisType[TT, U]
- )(implicit
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, Boolean] =
+ other: ThisType[TT, U]
+ )(implicit
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, Boolean] =
equalsTo(other)
/**
@@ -191,10 +187,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def =!=[TT, W](
- other: ThisType[TT, U]
- )(implicit
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, Boolean] =
+ other: ThisType[TT, U]
+ )(implicit
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, Boolean] =
typed(Not(equalsTo(other).expr))
/**
@@ -214,9 +210,8 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def isNone(
- implicit
- i0: U <:< Option[_]
- ): ThisType[T, Boolean] =
+ implicit i0: U <:< Option[_]
+ ): ThisType[T, Boolean] =
typed(IsNull(expr))
/**
@@ -225,9 +220,8 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def isNotNone(
- implicit
- i0: U <:< Option[_]
- ): ThisType[T, Boolean] =
+ implicit i0: U <:< Option[_]
+ ): ThisType[T, Boolean] =
typed(IsNotNull(expr))
/**
@@ -236,9 +230,8 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def isNaN(
- implicit
- n: CatalystNaN[U]
- ): ThisType[T, Boolean] =
+ implicit n: CatalystNaN[U]
+ ): ThisType[T, Boolean] =
typed(self.untyped.isNaN)
/**
@@ -250,10 +243,10 @@ abstract class AbstractTypedColumn[T, U](
* }}}
*/
def isSome[V](
- exists: ThisType[T, V] => ThisType[T, Boolean]
- )(implicit
- i0: U <:< Option[V]
- ): ThisType[T, Boolean] = someOr[V](exists, false)
+ exists: ThisType[T, V] => ThisType[T, Boolean]
+ )(implicit
+ i0: U <:< Option[V]
+ ): ThisType[T, Boolean] = someOr[V](exists, false)
/**
* True if the value for this optional column `exists` as expected,
@@ -264,17 +257,17 @@ abstract class AbstractTypedColumn[T, U](
* }}}
*/
def isSomeOrNone[V](
- exists: ThisType[T, V] => ThisType[T, Boolean]
- )(implicit
- i0: U <:< Option[V]
- ): ThisType[T, Boolean] = someOr[V](exists, true)
+ exists: ThisType[T, V] => ThisType[T, Boolean]
+ )(implicit
+ i0: U <:< Option[V]
+ ): ThisType[T, Boolean] = someOr[V](exists, true)
private def someOr[V](
- exists: ThisType[T, V] => ThisType[T, Boolean],
- default: Boolean
- )(implicit
- i0: U <:< Option[V]
- ): ThisType[T, Boolean] = {
+ exists: ThisType[T, V] => ThisType[T, Boolean],
+ default: Boolean
+ )(implicit
+ i0: U <:< Option[V]
+ ): ThisType[T, Boolean] = {
val defaultExpr = if (default) Literal.TrueLiteral else Literal.FalseLiteral
typed(Coalesce(Seq(opt(i0).map(exists).expr, defaultExpr)))
@@ -288,11 +281,11 @@ abstract class AbstractTypedColumn[T, U](
* }}}
*/
def getOrElse[TT, W, Out](
- default: ThisType[TT, Out]
- )(implicit
- i0: U =:= Option[Out],
- i1: FWith.Aux[T, TT, W]
- ): ThisType[W, Out] =
+ default: ThisType[TT, Out]
+ )(implicit
+ i0: U =:= Option[Out],
+ i1: FWith.Aux[T, TT, W]
+ ): ThisType[W, Out] =
typed(Coalesce(Seq(expr, default.expr)))(default.uencoder)
/**
@@ -303,10 +296,10 @@ abstract class AbstractTypedColumn[T, U](
* }}}
*/
def getOrElse[Out: TypedEncoder](
- default: Out
- )(implicit
- i0: U =:= Option[Out]
- ): ThisType[T, Out] =
+ default: Out
+ )(implicit
+ i0: U =:= Option[Out]
+ ): ThisType[T, Out] =
getOrElse(lit[Out](default))
/**
@@ -320,11 +313,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def plus[TT, W](
- other: ThisType[TT, U]
- )(implicit
- n: CatalystNumeric[U],
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, U] =
+ other: ThisType[TT, U]
+ )(implicit
+ n: CatalystNumeric[U],
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, U] =
typed(self.untyped.plus(other.untyped))
/**
@@ -337,11 +330,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def +[TT, W](
- other: ThisType[TT, U]
- )(implicit
- n: CatalystNumeric[U],
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, U] =
+ other: ThisType[TT, U]
+ )(implicit
+ n: CatalystNumeric[U],
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, U] =
plus(other)
/**
@@ -355,10 +348,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def +(
- u: U
- )(implicit
- n: CatalystNumeric[U]
- ): ThisType[T, U] =
+ u: U
+ )(implicit
+ n: CatalystNumeric[U]
+ ): ThisType[T, U] =
typed(self.untyped.plus(u))
/**
@@ -371,9 +364,8 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def unary_!(
- implicit
- i0: U <:< Boolean
- ): ThisType[T, Boolean] =
+ implicit i0: U <:< Boolean
+ ): ThisType[T, Boolean] =
typed(!untyped)
/**
@@ -386,9 +378,8 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def unary_-(
- implicit
- n: CatalystNumeric[U]
- ): ThisType[T, U] =
+ implicit n: CatalystNumeric[U]
+ ): ThisType[T, U] =
typed(-self.untyped)
/**
@@ -401,11 +392,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def minus[TT, W](
- other: ThisType[TT, U]
- )(implicit
- n: CatalystNumeric[U],
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, U] =
+ other: ThisType[TT, U]
+ )(implicit
+ n: CatalystNumeric[U],
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, U] =
typed(self.untyped.minus(other.untyped))
/**
@@ -418,11 +409,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def -[TT, W](
- other: ThisType[TT, U]
- )(implicit
- n: CatalystNumeric[U],
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, U] =
+ other: ThisType[TT, U]
+ )(implicit
+ n: CatalystNumeric[U],
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, U] =
minus(other)
/**
@@ -436,10 +427,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def -(
- u: U
- )(implicit
- n: CatalystNumeric[U]
- ): ThisType[T, U] =
+ u: U
+ )(implicit
+ n: CatalystNumeric[U]
+ ): ThisType[T, U] =
typed(self.untyped.minus(u))
/**
@@ -452,12 +443,12 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def multiply[TT, W](
- other: ThisType[TT, U]
- )(implicit
- n: CatalystNumeric[U],
- w: FWith.Aux[T, TT, W],
- t: ClassTag[U]
- ): ThisType[W, U] = typed {
+ other: ThisType[TT, U]
+ )(implicit
+ n: CatalystNumeric[U],
+ w: FWith.Aux[T, TT, W],
+ t: ClassTag[U]
+ ): ThisType[W, U] = typed {
if (t.runtimeClass == BigDecimal(0).getClass) {
// That's apparently the only way to get sound multiplication.
// See https://issues.apache.org/jira/browse/SPARK-22036
@@ -478,12 +469,12 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def *[TT, W](
- other: ThisType[TT, U]
- )(implicit
- n: CatalystNumeric[U],
- w: FWith.Aux[T, TT, W],
- t: ClassTag[U]
- ): ThisType[W, U] =
+ other: ThisType[TT, U]
+ )(implicit
+ n: CatalystNumeric[U],
+ w: FWith.Aux[T, TT, W],
+ t: ClassTag[U]
+ ): ThisType[W, U] =
multiply(other)
/**
@@ -496,10 +487,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def *(
- u: U
- )(implicit
- n: CatalystNumeric[U]
- ): ThisType[T, U] =
+ u: U
+ )(implicit
+ n: CatalystNumeric[U]
+ ): ThisType[T, U] =
typed(self.untyped.multiply(u))
/**
@@ -508,11 +499,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def mod[Out: TypedEncoder, TT, W](
- other: ThisType[TT, U]
- )(implicit
- n: CatalystNumeric[U],
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, Out] =
+ other: ThisType[TT, U]
+ )(implicit
+ n: CatalystNumeric[U],
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, Out] =
typed(self.untyped.mod(other.untyped))
/**
@@ -521,11 +512,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def %[TT, W](
- other: ThisType[TT, U]
- )(implicit
- n: CatalystNumeric[U],
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, U] =
+ other: ThisType[TT, U]
+ )(implicit
+ n: CatalystNumeric[U],
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, U] =
mod(other)
/**
@@ -534,10 +525,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def %(
- u: U
- )(implicit
- n: CatalystNumeric[U]
- ): ThisType[T, U] =
+ u: U
+ )(implicit
+ n: CatalystNumeric[U]
+ ): ThisType[T, U] =
typed(self.untyped.mod(u))
/**
@@ -551,11 +542,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def divide[Out: TypedEncoder, TT, W](
- other: ThisType[TT, U]
- )(implicit
- n: CatalystDivisible[U, Out],
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, Out] =
+ other: ThisType[TT, U]
+ )(implicit
+ n: CatalystDivisible[U, Out],
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, Out] =
typed(self.untyped.divide(other.untyped))
/**
@@ -569,12 +560,12 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def /[Out, TT, W](
- other: ThisType[TT, U]
- )(implicit
- n: CatalystDivisible[U, Out],
- e: TypedEncoder[Out],
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, Out] =
+ other: ThisType[TT, U]
+ )(implicit
+ n: CatalystDivisible[U, Out],
+ e: TypedEncoder[Out],
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, Out] =
divide(other)
/**
@@ -588,10 +579,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def /(
- u: U
- )(implicit
- n: CatalystNumeric[U]
- ): ThisType[T, Double] =
+ u: U
+ )(implicit
+ n: CatalystNumeric[U]
+ ): ThisType[T, Double] =
typed(self.untyped.divide(u))
/**
@@ -600,9 +591,8 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def desc(
- implicit
- catalystOrdered: CatalystOrdered[U]
- ): SortedTypedColumn[T, U] =
+ implicit catalystOrdered: CatalystOrdered[U]
+ ): SortedTypedColumn[T, U] =
new SortedTypedColumn[T, U](untyped.desc)
/**
@@ -611,9 +601,8 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def asc(
- implicit
- catalystOrdered: CatalystOrdered[U]
- ): SortedTypedColumn[T, U] =
+ implicit catalystOrdered: CatalystOrdered[U]
+ ): SortedTypedColumn[T, U] =
new SortedTypedColumn[T, U](untyped.asc)
/**
@@ -626,10 +615,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def bitwiseAND(
- u: U
- )(implicit
- n: CatalystBitwise[U]
- ): ThisType[T, U] =
+ u: U
+ )(implicit
+ n: CatalystBitwise[U]
+ ): ThisType[T, U] =
typed(self.untyped.bitwiseAND(u))
/**
@@ -642,11 +631,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def bitwiseAND[TT, W](
- other: ThisType[TT, U]
- )(implicit
- n: CatalystBitwise[U],
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, U] =
+ other: ThisType[TT, U]
+ )(implicit
+ n: CatalystBitwise[U],
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, U] =
typed(self.untyped.bitwiseAND(other.untyped))
/**
@@ -659,10 +648,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def &(
- u: U
- )(implicit
- n: CatalystBitwise[U]
- ): ThisType[T, U] =
+ u: U
+ )(implicit
+ n: CatalystBitwise[U]
+ ): ThisType[T, U] =
bitwiseAND(u)
/**
@@ -675,11 +664,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def &[TT, W](
- other: ThisType[TT, U]
- )(implicit
- n: CatalystBitwise[U],
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, U] =
+ other: ThisType[TT, U]
+ )(implicit
+ n: CatalystBitwise[U],
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, U] =
bitwiseAND(other)
/**
@@ -692,10 +681,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def bitwiseOR(
- u: U
- )(implicit
- n: CatalystBitwise[U]
- ): ThisType[T, U] =
+ u: U
+ )(implicit
+ n: CatalystBitwise[U]
+ ): ThisType[T, U] =
typed(self.untyped.bitwiseOR(u))
/**
@@ -708,11 +697,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def bitwiseOR[TT, W](
- other: ThisType[TT, U]
- )(implicit
- n: CatalystBitwise[U],
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, U] =
+ other: ThisType[TT, U]
+ )(implicit
+ n: CatalystBitwise[U],
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, U] =
typed(self.untyped.bitwiseOR(other.untyped))
/**
@@ -725,10 +714,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def |(
- u: U
- )(implicit
- n: CatalystBitwise[U]
- ): ThisType[T, U] =
+ u: U
+ )(implicit
+ n: CatalystBitwise[U]
+ ): ThisType[T, U] =
bitwiseOR(u)
/**
@@ -741,11 +730,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def |[TT, W](
- other: ThisType[TT, U]
- )(implicit
- n: CatalystBitwise[U],
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, U] =
+ other: ThisType[TT, U]
+ )(implicit
+ n: CatalystBitwise[U],
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, U] =
bitwiseOR(other)
/**
@@ -758,10 +747,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def bitwiseXOR(
- u: U
- )(implicit
- n: CatalystBitwise[U]
- ): ThisType[T, U] =
+ u: U
+ )(implicit
+ n: CatalystBitwise[U]
+ ): ThisType[T, U] =
typed(self.untyped.bitwiseXOR(u))
/**
@@ -774,11 +763,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def bitwiseXOR[TT, W](
- other: ThisType[TT, U]
- )(implicit
- n: CatalystBitwise[U],
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, U] =
+ other: ThisType[TT, U]
+ )(implicit
+ n: CatalystBitwise[U],
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, U] =
typed(self.untyped.bitwiseXOR(other.untyped))
/**
@@ -791,10 +780,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def ^(
- u: U
- )(implicit
- n: CatalystBitwise[U]
- ): ThisType[T, U] =
+ u: U
+ )(implicit
+ n: CatalystBitwise[U]
+ ): ThisType[T, U] =
bitwiseXOR(u)
/**
@@ -807,11 +796,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def ^[TT, W](
- other: ThisType[TT, U]
- )(implicit
- n: CatalystBitwise[U],
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, U] =
+ other: ThisType[TT, U]
+ )(implicit
+ n: CatalystBitwise[U],
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, U] =
bitwiseXOR(other)
/**
@@ -821,9 +810,8 @@ abstract class AbstractTypedColumn[T, U](
* }}}
*/
def cast[A: TypedEncoder](
- implicit
- c: CatalystCast[U, A]
- ): ThisType[T, A] =
+ implicit c: CatalystCast[U, A]
+ ): ThisType[T, A] =
typed(self.untyped.cast(TypedEncoder[A].catalystRepr))
/**
@@ -836,11 +824,11 @@ abstract class AbstractTypedColumn[T, U](
* @param len length of the substring
*/
def substr(
- startPos: Int,
- len: Int
- )(implicit
- ev: U =:= String
- ): ThisType[T, String] =
+ startPos: Int,
+ len: Int
+ )(implicit
+ ev: U =:= String
+ ): ThisType[T, String] =
typed(self.untyped.substr(startPos, len))
/**
@@ -853,13 +841,13 @@ abstract class AbstractTypedColumn[T, U](
* @param len expression for the length of the substring
*/
def substr[TT1, TT2, W1, W2](
- startPos: ThisType[TT1, Int],
- len: ThisType[TT2, Int]
- )(implicit
- ev: U =:= String,
- w1: FWith.Aux[T, TT1, W1],
- w2: FWith.Aux[W1, TT2, W2]
- ): ThisType[W2, String] =
+ startPos: ThisType[TT1, Int],
+ len: ThisType[TT2, Int]
+ )(implicit
+ ev: U =:= String,
+ w1: FWith.Aux[T, TT1, W1],
+ w2: FWith.Aux[W1, TT2, W2]
+ ): ThisType[W2, String] =
typed(self.untyped.substr(startPos.untyped, len.untyped))
/**
@@ -875,10 +863,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def like(
- literal: String
- )(implicit
- ev: U =:= String
- ): ThisType[T, Boolean] =
+ literal: String
+ )(implicit
+ ev: U =:= String
+ ): ThisType[T, Boolean] =
typed(self.untyped.like(literal))
/**
@@ -894,10 +882,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def rlike(
- literal: String
- )(implicit
- ev: U =:= String
- ): ThisType[T, Boolean] =
+ literal: String
+ )(implicit
+ ev: U =:= String
+ ): ThisType[T, Boolean] =
typed(self.untyped.rlike(literal))
/**
@@ -910,10 +898,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def contains(
- other: String
- )(implicit
- ev: U =:= String
- ): ThisType[T, Boolean] =
+ other: String
+ )(implicit
+ ev: U =:= String
+ ): ThisType[T, Boolean] =
typed(self.untyped.contains(other))
/**
@@ -926,11 +914,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def contains[TT, W](
- other: ThisType[TT, U]
- )(implicit
- ev: U =:= String,
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, Boolean] =
+ other: ThisType[TT, U]
+ )(implicit
+ ev: U =:= String,
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, Boolean] =
typed(self.untyped.contains(other.untyped))
/**
@@ -943,10 +931,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def startsWith(
- other: String
- )(implicit
- ev: U =:= String
- ): ThisType[T, Boolean] =
+ other: String
+ )(implicit
+ ev: U =:= String
+ ): ThisType[T, Boolean] =
typed(self.untyped.startsWith(other))
/**
@@ -959,11 +947,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def startsWith[TT, W](
- other: ThisType[TT, U]
- )(implicit
- ev: U =:= String,
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, Boolean] =
+ other: ThisType[TT, U]
+ )(implicit
+ ev: U =:= String,
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, Boolean] =
typed(self.untyped.startsWith(other.untyped))
/**
@@ -976,10 +964,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def endsWith(
- other: String
- )(implicit
- ev: U =:= String
- ): ThisType[T, Boolean] =
+ other: String
+ )(implicit
+ ev: U =:= String
+ ): ThisType[T, Boolean] =
typed(self.untyped.endsWith(other))
/**
@@ -992,11 +980,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def endsWith[TT, W](
- other: ThisType[TT, U]
- )(implicit
- ev: U =:= String,
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, Boolean] =
+ other: ThisType[TT, U]
+ )(implicit
+ ev: U =:= String,
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, Boolean] =
typed(self.untyped.endsWith(other.untyped))
/**
@@ -1006,10 +994,10 @@ abstract class AbstractTypedColumn[T, U](
* }}}
*/
def and[TT, W](
- other: ThisType[TT, Boolean]
- )(implicit
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, Boolean] =
+ other: ThisType[TT, Boolean]
+ )(implicit
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, Boolean] =
typed(self.untyped.and(other.untyped))
/**
@@ -1019,10 +1007,10 @@ abstract class AbstractTypedColumn[T, U](
* }}}
*/
def &&[TT, W](
- other: ThisType[TT, Boolean]
- )(implicit
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, Boolean] =
+ other: ThisType[TT, Boolean]
+ )(implicit
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, Boolean] =
and(other)
/**
@@ -1032,10 +1020,10 @@ abstract class AbstractTypedColumn[T, U](
* }}}
*/
def or[TT, W](
- other: ThisType[TT, Boolean]
- )(implicit
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, Boolean] =
+ other: ThisType[TT, Boolean]
+ )(implicit
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, Boolean] =
typed(self.untyped.or(other.untyped))
/**
@@ -1045,10 +1033,10 @@ abstract class AbstractTypedColumn[T, U](
* }}}
*/
def ||[TT, W](
- other: ThisType[TT, Boolean]
- )(implicit
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, Boolean] =
+ other: ThisType[TT, Boolean]
+ )(implicit
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, Boolean] =
or(other)
/**
@@ -1063,11 +1051,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def <[TT, W](
- other: ThisType[TT, U]
- )(implicit
- i0: CatalystOrdered[U],
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, Boolean] =
+ other: ThisType[TT, U]
+ )(implicit
+ i0: CatalystOrdered[U],
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, Boolean] =
typed(self.untyped < other.untyped)
/**
@@ -1082,11 +1070,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def <=[TT, W](
- other: ThisType[TT, U]
- )(implicit
- i0: CatalystOrdered[U],
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, Boolean] =
+ other: ThisType[TT, U]
+ )(implicit
+ i0: CatalystOrdered[U],
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, Boolean] =
typed(self.untyped <= other.untyped)
/**
@@ -1100,11 +1088,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def >[TT, W](
- other: ThisType[TT, U]
- )(implicit
- i0: CatalystOrdered[U],
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, Boolean] =
+ other: ThisType[TT, U]
+ )(implicit
+ i0: CatalystOrdered[U],
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, Boolean] =
typed(self.untyped > other.untyped)
/**
@@ -1118,11 +1106,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def >=[TT, W](
- other: ThisType[TT, U]
- )(implicit
- i0: CatalystOrdered[U],
- w: FWith.Aux[T, TT, W]
- ): ThisType[W, Boolean] =
+ other: ThisType[TT, U]
+ )(implicit
+ i0: CatalystOrdered[U],
+ w: FWith.Aux[T, TT, W]
+ ): ThisType[W, Boolean] =
typed(self.untyped >= other.untyped)
/**
@@ -1136,10 +1124,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def <(
- u: U
- )(implicit
- i0: CatalystOrdered[U]
- ): ThisType[T, Boolean] =
+ u: U
+ )(implicit
+ i0: CatalystOrdered[U]
+ ): ThisType[T, Boolean] =
typed(self.untyped < lit(u)(self.uencoder).untyped)
/**
@@ -1153,10 +1141,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def <=(
- u: U
- )(implicit
- i0: CatalystOrdered[U]
- ): ThisType[T, Boolean] =
+ u: U
+ )(implicit
+ i0: CatalystOrdered[U]
+ ): ThisType[T, Boolean] =
typed(self.untyped <= lit(u)(self.uencoder).untyped)
/**
@@ -1170,10 +1158,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def >(
- u: U
- )(implicit
- i0: CatalystOrdered[U]
- ): ThisType[T, Boolean] =
+ u: U
+ )(implicit
+ i0: CatalystOrdered[U]
+ ): ThisType[T, Boolean] =
typed(self.untyped > lit(u)(self.uencoder).untyped)
/**
@@ -1187,10 +1175,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def >=(
- u: U
- )(implicit
- i0: CatalystOrdered[U]
- ): ThisType[T, Boolean] =
+ u: U
+ )(implicit
+ i0: CatalystOrdered[U]
+ ): ThisType[T, Boolean] =
typed(self.untyped >= lit(u)(self.uencoder).untyped)
/**
@@ -1204,10 +1192,10 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def isin(
- values: U*
- )(implicit
- e: CatalystIsin[U]
- ): ThisType[T, Boolean] =
+ values: U*
+ )(implicit
+ e: CatalystIsin[U]
+ ): ThisType[T, Boolean] =
typed(self.untyped.isin(values: _*))
/**
@@ -1218,11 +1206,11 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def between(
- lowerBound: U,
- upperBound: U
- )(implicit
- i0: CatalystOrdered[U]
- ): ThisType[T, Boolean] =
+ lowerBound: U,
+ upperBound: U
+ )(implicit
+ i0: CatalystOrdered[U]
+ ): ThisType[T, Boolean] =
typed(
self.untyped.between(
lit(lowerBound)(self.uencoder).untyped,
@@ -1238,13 +1226,13 @@ abstract class AbstractTypedColumn[T, U](
* apache/spark
*/
def between[TT1, TT2, W1, W2](
- lowerBound: ThisType[TT1, U],
- upperBound: ThisType[TT2, U]
- )(implicit
- i0: CatalystOrdered[U],
- w0: FWith.Aux[T, TT1, W1],
- w1: FWith.Aux[TT2, W1, W2]
- ): ThisType[W2, Boolean] =
+ lowerBound: ThisType[TT1, U],
+ upperBound: ThisType[TT2, U]
+ )(implicit
+ i0: CatalystOrdered[U],
+ w0: FWith.Aux[T, TT1, W1],
+ w1: FWith.Aux[TT2, W1, W2]
+ ): ThisType[W2, Boolean] =
typed(self.untyped.between(lowerBound.untyped, upperBound.untyped))
/**
@@ -1254,26 +1242,25 @@ abstract class AbstractTypedColumn[T, U](
* @tparam V the type of the nested field
*/
def field[V](
- symbol: Witness.Lt[Symbol]
- )(implicit
- i0: TypedColumn.Exists[U, symbol.T, V],
- i1: TypedEncoder[V]
- ): ThisType[T, V] =
+ symbol: Witness.Lt[Symbol]
+ )(implicit
+ i0: TypedColumn.Exists[U, symbol.T, V],
+ i1: TypedEncoder[V]
+ ): ThisType[T, V] =
typed(self.untyped.getField(symbol.value.name))
}
sealed class SortedTypedColumn[T, U](
- val expr: Expression
- )(implicit
- val uencoder: TypedEncoder[U])
+ val expr: Expression
+)(implicit val uencoder: TypedEncoder[U])
extends UntypedExpression[T] {
def this(
- column: Column
- )(implicit
- e: TypedEncoder[U]
- ) = {
+ column: Column
+ )(implicit
+ e: TypedEncoder[U]
+ ) = {
this(FramelessInternals.expr(column))
}
@@ -1283,8 +1270,8 @@ sealed class SortedTypedColumn[T, U](
object SortedTypedColumn {
implicit def defaultAscending[T, U: CatalystOrdered](
- typedColumn: TypedColumn[T, U]
- ): SortedTypedColumn[T, U] =
+ typedColumn: TypedColumn[T, U]
+ ): SortedTypedColumn[T, U] =
new SortedTypedColumn[T, U](typedColumn.untyped.asc)(typedColumn.uencoder)
object defaultAscendingPoly extends Poly1 {
@@ -1309,32 +1296,31 @@ object TypedColumn {
object ExistsMany {
implicit def deriveCons[T, KH, KT <: HList, V0, V1](
- implicit
- head: Exists[T, KH, V0],
- tail: ExistsMany[V0, KT, V1]
- ): ExistsMany[T, KH :: KT, V1] =
+ implicit
+ head: Exists[T, KH, V0],
+ tail: ExistsMany[V0, KT, V1]
+ ): ExistsMany[T, KH :: KT, V1] =
new ExistsMany[T, KH :: KT, V1] {}
implicit def deriveHNil[T, K, V](
- implicit
- head: Exists[T, K, V]
- ): ExistsMany[T, K :: HNil, V] =
+ implicit head: Exists[T, K, V]
+ ): ExistsMany[T, K :: HNil, V] =
new ExistsMany[T, K :: HNil, V] {}
}
object Exists {
def apply[T, V](
- column: Witness
- )(implicit
- e: Exists[T, column.T, V]
- ): Exists[T, column.T, V] = e
+ column: Witness
+ )(implicit
+ e: Exists[T, column.T, V]
+ ): Exists[T, column.T, V] = e
implicit def deriveRecord[T, H <: HList, K, V](
- implicit
- i0: LabelledGeneric.Aux[T, H],
- i1: Selector.Aux[H, K, V]
- ): Exists[T, K, V] = new Exists[T, K, V] {}
+ implicit
+ i0: LabelledGeneric.Aux[T, H],
+ i1: Selector.Aux[H, K, V]
+ ): Exists[T, K, V] = new Exists[T, K, V] {}
}
/**
diff --git a/dataset/src/main/scala/frameless/TypedColumnMacroImpl.scala b/dataset/src/main/scala/frameless/TypedColumnMacroImpl.scala
index 23502ef3b..49bffdf87 100644
--- a/dataset/src/main/scala/frameless/TypedColumnMacroImpl.scala
+++ b/dataset/src/main/scala/frameless/TypedColumnMacroImpl.scala
@@ -5,9 +5,8 @@ import scala.reflect.macros.whitebox
private[frameless] object TypedColumnMacroImpl {
def applyImpl[T: c.WeakTypeTag, U: c.WeakTypeTag](
- c: whitebox.Context
- )(x: c.Tree
- ): c.Expr[TypedColumn[T, U]] = {
+ c: whitebox.Context
+ )(x: c.Tree): c.Expr[TypedColumn[T, U]] = {
import c.universe._
val t = c.weakTypeOf[T]
@@ -71,7 +70,7 @@ private[frameless] object TypedColumnMacroImpl {
path(select, List.empty) match {
case root :: tail
- if (expectedRoot.forall(_ == root) && check(t, tail)) => {
+ if expectedRoot.forall(_ == root) && check(t, tail) => {
val colPath = tail.mkString(".")
c.Expr[TypedColumn[T, U]](
diff --git a/dataset/src/main/scala/frameless/TypedDataset.scala b/dataset/src/main/scala/frameless/TypedDataset.scala
index 82a016a3a..6a7780bde 100644
--- a/dataset/src/main/scala/frameless/TypedDataset.scala
+++ b/dataset/src/main/scala/frameless/TypedDataset.scala
@@ -4,32 +4,15 @@ import java.util
import frameless.functions.CatalystExplodableCollection
import frameless.ops._
import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{
- Column,
- DataFrame,
- Dataset,
- FramelessInternals,
- SparkSession
-}
-import org.apache.spark.sql.catalyst.expressions.{
- Attribute,
- AttributeReference,
- Literal
-}
-import org.apache.spark.sql.catalyst.plans.logical.{ Join, JoinHint }
+import org.apache.spark.sql.{Column, DataFrame, Dataset, FramelessInternals, SparkSession}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Literal}
+import org.apache.spark.sql.catalyst.plans.logical.{Join, JoinHint}
import org.apache.spark.sql.catalyst.plans.Inner
import org.apache.spark.sql.types.StructType
import shapeless._
import shapeless.labelled.FieldType
-import shapeless.ops.hlist.{
- Diff,
- IsHCons,
- Mapper,
- Prepend,
- ToTraversable,
- Tupler
-}
-import shapeless.ops.record.{ Keys, Modifier, Remover, Values }
+import shapeless.ops.hlist.{Diff, IsHCons, Mapper, Prepend, ToTraversable, Tupler}
+import shapeless.ops.record.{Keys, Modifier, Remover, Values}
import scala.language.experimental.macros
@@ -44,12 +27,11 @@ import scala.language.experimental.macros
* http://www.apache.org/licenses/LICENSE-2.0
*/
class TypedDataset[T] protected[frameless] (
- val dataset: Dataset[T]
- )(implicit
- val encoder: TypedEncoder[T])
+ val dataset: Dataset[T]
+)(implicit val encoder: TypedEncoder[T])
extends TypedDatasetForwarded[T] { self =>
- private implicit val spark: SparkSession = dataset.sparkSession
+ implicit private val spark: SparkSession = dataset.sparkSession
/**
* Aggregates on the entire Dataset without groups.
@@ -80,9 +62,9 @@ class TypedDataset[T] protected[frameless] (
* apache/spark
*/
def agg[A, B](
- ca: TypedAggregate[T, A],
- cb: TypedAggregate[T, B]
- ): TypedDataset[(A, B)] = {
+ ca: TypedAggregate[T, A],
+ cb: TypedAggregate[T, B]
+ ): TypedDataset[(A, B)] = {
implicit val (ea, eb) = (ca.uencoder, cb.uencoder)
aggMany(ca, cb)
}
@@ -93,10 +75,10 @@ class TypedDataset[T] protected[frameless] (
* apache/spark
*/
def agg[A, B, C](
- ca: TypedAggregate[T, A],
- cb: TypedAggregate[T, B],
- cc: TypedAggregate[T, C]
- ): TypedDataset[(A, B, C)] = {
+ ca: TypedAggregate[T, A],
+ cb: TypedAggregate[T, B],
+ cc: TypedAggregate[T, C]
+ ): TypedDataset[(A, B, C)] = {
implicit val (ea, eb, ec) = (ca.uencoder, cb.uencoder, cc.uencoder)
aggMany(ca, cb, cc)
}
@@ -107,11 +89,11 @@ class TypedDataset[T] protected[frameless] (
* apache/spark
*/
def agg[A, B, C, D](
- ca: TypedAggregate[T, A],
- cb: TypedAggregate[T, B],
- cc: TypedAggregate[T, C],
- cd: TypedAggregate[T, D]
- ): TypedDataset[(A, B, C, D)] = {
+ ca: TypedAggregate[T, A],
+ cb: TypedAggregate[T, B],
+ cc: TypedAggregate[T, C],
+ cd: TypedAggregate[T, D]
+ ): TypedDataset[(A, B, C, D)] = {
implicit val (ea, eb, ec, ed) =
(ca.uencoder, cb.uencoder, cc.uencoder, cd.uencoder)
aggMany(ca, cb, cc, cd)
@@ -125,13 +107,13 @@ class TypedDataset[T] protected[frameless] (
object aggMany extends ProductArgs {
def applyProduct[U <: HList, Out0 <: HList, Out](
- columns: U
- )(implicit
- i0: AggregateTypes.Aux[T, U, Out0],
- i1: ToTraversable.Aux[U, List, UntypedExpression[T]],
- i2: Tupler.Aux[Out0, Out],
- i3: TypedEncoder[Out]
- ): TypedDataset[Out] = {
+ columns: U
+ )(implicit
+ i0: AggregateTypes.Aux[T, U, Out0],
+ i1: ToTraversable.Aux[U, List, UntypedExpression[T]],
+ i2: Tupler.Aux[Out0, Out],
+ i3: TypedEncoder[Out]
+ ): TypedDataset[Out] = {
val underlyingColumns = columns.toList[UntypedExpression[T]]
val cols: Seq[Column] = for {
@@ -159,9 +141,9 @@ class TypedDataset[T] protected[frameless] (
/** Returns a new [[TypedDataset]] where each record has been mapped on to the specified type. */
def as[U](
- )(implicit
- as: As[T, U]
- ): TypedDataset[U] = {
+ )(implicit
+ as: As[T, U]
+ ): TypedDataset[U] = {
implicit val uencoder = as.encoder
TypedDataset.create(dataset.as[U](TypedExpressionEncoder[U]))
}
@@ -177,10 +159,10 @@ class TypedDataset[T] protected[frameless] (
* apache/spark
*/
def checkpoint[F[_]](
- eager: Boolean
- )(implicit
- F: SparkDelay[F]
- ): F[TypedDataset[T]] =
+ eager: Boolean
+ )(implicit
+ F: SparkDelay[F]
+ ): F[TypedDataset[T]] =
F.delay(TypedDataset.create[T](dataset.checkpoint(eager)))
/**
@@ -200,9 +182,8 @@ class TypedDataset[T] protected[frameless] (
* }}}
*/
def project[U](
- implicit
- projector: SmartProject[T, U]
- ): TypedDataset[U] = projector.apply(this)
+ implicit projector: SmartProject[T, U]
+ ): TypedDataset[U] = projector.apply(this)
/**
* Returns a new [[TypedDataset]] that contains the elements of both this and the `other` [[TypedDataset]]
@@ -234,10 +215,10 @@ class TypedDataset[T] protected[frameless] (
* apache/spark
*/
def union[U: TypedEncoder](
- other: TypedDataset[U]
- )(implicit
- projector: SmartProject[U, T]
- ): TypedDataset[T] =
+ other: TypedDataset[U]
+ )(implicit
+ projector: SmartProject[U, T]
+ ): TypedDataset[T] =
TypedDataset.create(dataset.union(other.project[T].dataset))
/**
@@ -259,9 +240,9 @@ class TypedDataset[T] protected[frameless] (
* Differs from `Dataset#count` by wrapping its result into an effect-suspending `F[_]`.
*/
def count[F[_]](
- )(implicit
- F: SparkDelay[F]
- ): F[Long] =
+ )(implicit
+ F: SparkDelay[F]
+ ): F[Long] =
F.delay(dataset.count())
/**
@@ -274,11 +255,11 @@ class TypedDataset[T] protected[frameless] (
* It is statically checked that column with such name exists and has type `A`.
*/
def apply[A](
- column: Witness.Lt[Symbol]
- )(implicit
- i0: TypedColumn.Exists[T, column.T, A],
- i1: TypedEncoder[A]
- ): TypedColumn[T, A] = col(column)
+ column: Witness.Lt[Symbol]
+ )(implicit
+ i0: TypedColumn.Exists[T, column.T, A],
+ i1: TypedEncoder[A]
+ ): TypedColumn[T, A] = col(column)
/**
* Returns `TypedColumn` of type `A` given its name.
@@ -290,11 +271,11 @@ class TypedDataset[T] protected[frameless] (
* It is statically checked that column with such name exists and has type `A`.
*/
def col[A](
- column: Witness.Lt[Symbol]
- )(implicit
- i0: TypedColumn.Exists[T, column.T, A],
- i1: TypedEncoder[A]
- ): TypedColumn[T, A] =
+ column: Witness.Lt[Symbol]
+ )(implicit
+ i0: TypedColumn.Exists[T, column.T, A],
+ i1: TypedEncoder[A]
+ ): TypedColumn[T, A] =
new TypedColumn[T, A](
dataset(column.value.name).as[A](TypedExpressionEncoder[A])
)
@@ -341,9 +322,8 @@ class TypedDataset[T] protected[frameless] (
* }}}
*/
def asJoinColValue(
- implicit
- i0: IsValueClass[T]
- ): TypedColumn[T, T] = {
+ implicit i0: IsValueClass[T]
+ ): TypedColumn[T, T] = {
import _root_.frameless.syntax._
dataset.col("value").typedColumn
@@ -352,12 +332,12 @@ class TypedDataset[T] protected[frameless] (
object colMany extends SingletonProductArgs {
def applyProduct[U <: HList, Out](
- columns: U
- )(implicit
- i0: TypedColumn.ExistsMany[T, U, Out],
- i1: TypedEncoder[Out],
- i2: ToTraversable.Aux[U, List, Symbol]
- ): TypedColumn[T, Out] = {
+ columns: U
+ )(implicit
+ i0: TypedColumn.ExistsMany[T, U, Out],
+ i1: TypedEncoder[Out],
+ i2: ToTraversable.Aux[U, List, Symbol]
+ ): TypedColumn[T, Out] = {
val names = columns.toList[Symbol].map(_.name)
val colExpr = FramelessInternals.resolveExpr(dataset, names)
new TypedColumn[T, Out](colExpr)
@@ -372,11 +352,11 @@ class TypedDataset[T] protected[frameless] (
* String based aliases, which is obviously unsafe.
*/
def colRight[A](
- column: Witness.Lt[Symbol]
- )(implicit
- i0: TypedColumn.Exists[T, column.T, A],
- i1: TypedEncoder[A]
- ): TypedColumn[T, A] =
+ column: Witness.Lt[Symbol]
+ )(implicit
+ i0: TypedColumn.Exists[T, column.T, A],
+ i1: TypedEncoder[A]
+ ): TypedColumn[T, A] =
new TypedColumn[T, A](
FramelessInternals.DisambiguateRight(col(column).expr)
)
@@ -389,11 +369,11 @@ class TypedDataset[T] protected[frameless] (
* String based aliases, which is obviously unsafe.
*/
def colLeft[A](
- column: Witness.Lt[Symbol]
- )(implicit
- i0: TypedColumn.Exists[T, column.T, A],
- i1: TypedEncoder[A]
- ): TypedColumn[T, A] =
+ column: Witness.Lt[Symbol]
+ )(implicit
+ i0: TypedColumn.Exists[T, column.T, A],
+ i1: TypedEncoder[A]
+ ): TypedColumn[T, A] =
new TypedColumn[T, A](FramelessInternals.DisambiguateLeft(col(column).expr))
/**
@@ -405,9 +385,9 @@ class TypedDataset[T] protected[frameless] (
* Differs from `Dataset#collect` by wrapping its result into an effect-suspending `F[_]`.
*/
def collect[F[_]](
- )(implicit
- F: SparkDelay[F]
- ): F[Seq[T]] =
+ )(implicit
+ F: SparkDelay[F]
+ ): F[Seq[T]] =
F.delay(dataset.collect().toSeq)
/**
@@ -416,9 +396,9 @@ class TypedDataset[T] protected[frameless] (
* Differs from `Dataset#first` by wrapping its result into an `Option` and an effect-suspending `F[_]`.
*/
def firstOption[F[_]](
- )(implicit
- F: SparkDelay[F]
- ): F[Option[T]] =
+ )(implicit
+ F: SparkDelay[F]
+ ): F[Option[T]] =
F.delay {
try {
Option(dataset.first())
@@ -438,10 +418,10 @@ class TypedDataset[T] protected[frameless] (
* apache/spark
*/
def take[F[_]](
- num: Int
- )(implicit
- F: SparkDelay[F]
- ): F[Seq[T]] =
+ num: Int
+ )(implicit
+ F: SparkDelay[F]
+ ): F[Seq[T]] =
F.delay(dataset.take(num).toSeq)
/**
@@ -458,27 +438,27 @@ class TypedDataset[T] protected[frameless] (
* apache/spark
*/
def toLocalIterator[F[_]](
- )(implicit
- F: SparkDelay[F]
- ): F[util.Iterator[T]] =
+ )(implicit
+ F: SparkDelay[F]
+ ): F[util.Iterator[T]] =
F.delay(dataset.toLocalIterator())
/**
* Alias for firstOption().
*/
def headOption[F[_]](
- )(implicit
- F: SparkDelay[F]
- ): F[Option[T]] = firstOption()
+ )(implicit
+ F: SparkDelay[F]
+ ): F[Option[T]] = firstOption()
/**
* Alias for take().
*/
def head[F[_]](
- num: Int
- )(implicit
- F: SparkDelay[F]
- ): F[Seq[T]] = take(num)
+ num: Int
+ )(implicit
+ F: SparkDelay[F]
+ ): F[Seq[T]] = take(num)
// $COVERAGE-OFF$
/**
@@ -520,11 +500,11 @@ class TypedDataset[T] protected[frameless] (
* apache/spark
*/
def show[F[_]](
- numRows: Int = 20,
- truncate: Boolean = true
- )(implicit
- F: SparkDelay[F]
- ): F[Unit] =
+ numRows: Int = 20,
+ truncate: Boolean = true
+ )(implicit
+ F: SparkDelay[F]
+ ): F[Unit] =
F.delay(dataset.show(numRows, truncate))
/**
@@ -547,10 +527,10 @@ class TypedDataset[T] protected[frameless] (
* Differs from `Dataset#foreach` by wrapping its result into an effect-suspending `F[_]`.
*/
def foreach[F[_]](
- func: T => Unit
- )(implicit
- F: SparkDelay[F]
- ): F[Unit] =
+ func: T => Unit
+ )(implicit
+ F: SparkDelay[F]
+ ): F[Unit] =
F.delay(dataset.foreach(func))
/**
@@ -559,10 +539,10 @@ class TypedDataset[T] protected[frameless] (
* Differs from `Dataset#foreachPartition` by wrapping its result into an effect-suspending `F[_]`.
*/
def foreachPartition[F[_]](
- func: Iterator[T] => Unit
- )(implicit
- F: SparkDelay[F]
- ): F[Unit] =
+ func: Iterator[T] => Unit
+ )(implicit
+ F: SparkDelay[F]
+ ): F[Unit] =
F.delay(dataset.foreachPartition(func))
/**
@@ -575,8 +555,8 @@ class TypedDataset[T] protected[frameless] (
* apache/spark
*/
def cube[K1](
- c1: TypedColumn[T, K1]
- ): Cube1Ops[K1, T] = new Cube1Ops[K1, T](this, c1)
+ c1: TypedColumn[T, K1]
+ ): Cube1Ops[K1, T] = new Cube1Ops[K1, T](this, c1)
/**
* Create a multi-dimensional cube for the current [[TypedDataset]] using the specified columns,
@@ -588,9 +568,9 @@ class TypedDataset[T] protected[frameless] (
* apache/spark
*/
def cube[K1, K2](
- c1: TypedColumn[T, K1],
- c2: TypedColumn[T, K2]
- ): Cube2Ops[K1, K2, T] = new Cube2Ops[K1, K2, T](this, c1, c2)
+ c1: TypedColumn[T, K1],
+ c2: TypedColumn[T, K2]
+ ): Cube2Ops[K1, K2, T] = new Cube2Ops[K1, K2, T](this, c1, c2)
/**
* Create a multi-dimensional cube for the current [[TypedDataset]] using the specified columns,
@@ -628,12 +608,12 @@ class TypedDataset[T] protected[frameless] (
object cubeMany extends ProductArgs {
def applyProduct[TK <: HList, K <: HList, KT](
- groupedBy: TK
- )(implicit
- i0: ColumnTypes.Aux[T, TK, K],
- i1: Tupler.Aux[K, KT],
- i2: ToTraversable.Aux[TK, List, UntypedExpression[T]]
- ): CubeManyOps[T, TK, K, KT] =
+ groupedBy: TK
+ )(implicit
+ i0: ColumnTypes.Aux[T, TK, K],
+ i1: Tupler.Aux[K, KT],
+ i2: ToTraversable.Aux[TK, List, UntypedExpression[T]]
+ ): CubeManyOps[T, TK, K, KT] =
new CubeManyOps[T, TK, K, KT](self, groupedBy)
}
@@ -644,8 +624,8 @@ class TypedDataset[T] protected[frameless] (
* apache/spark
*/
def groupBy[K1](
- c1: TypedColumn[T, K1]
- ): GroupedBy1Ops[K1, T] = new GroupedBy1Ops[K1, T](this, c1)
+ c1: TypedColumn[T, K1]
+ ): GroupedBy1Ops[K1, T] = new GroupedBy1Ops[K1, T](this, c1)
/**
* Groups the [[TypedDataset]] using the specified columns, so that we can run aggregation on them.
@@ -654,9 +634,9 @@ class TypedDataset[T] protected[frameless] (
* apache/spark
*/
def groupBy[K1, K2](
- c1: TypedColumn[T, K1],
- c2: TypedColumn[T, K2]
- ): GroupedBy2Ops[K1, K2, T] = new GroupedBy2Ops[K1, K2, T](this, c1, c2)
+ c1: TypedColumn[T, K1],
+ c2: TypedColumn[T, K2]
+ ): GroupedBy2Ops[K1, K2, T] = new GroupedBy2Ops[K1, K2, T](this, c1, c2)
/**
* Groups the [[TypedDataset]] using the specified columns, so that we can run aggregation on them.
@@ -686,12 +666,12 @@ class TypedDataset[T] protected[frameless] (
object groupByMany extends ProductArgs {
def applyProduct[TK <: HList, K <: HList, KT](
- groupedBy: TK
- )(implicit
- i0: ColumnTypes.Aux[T, TK, K],
- i1: Tupler.Aux[K, KT],
- i2: ToTraversable.Aux[TK, List, UntypedExpression[T]]
- ): GroupedByManyOps[T, TK, K, KT] =
+ groupedBy: TK
+ )(implicit
+ i0: ColumnTypes.Aux[T, TK, K],
+ i1: Tupler.Aux[K, KT],
+ i2: ToTraversable.Aux[TK, List, UntypedExpression[T]]
+ ): GroupedByManyOps[T, TK, K, KT] =
new GroupedByManyOps[T, TK, K, KT](self, groupedBy)
}
@@ -705,8 +685,8 @@ class TypedDataset[T] protected[frameless] (
* apache/spark
*/
def rollup[K1](
- c1: TypedColumn[T, K1]
- ): Rollup1Ops[K1, T] = new Rollup1Ops[K1, T](this, c1)
+ c1: TypedColumn[T, K1]
+ ): Rollup1Ops[K1, T] = new Rollup1Ops[K1, T](this, c1)
/**
* Create a multi-dimensional rollup for the current [[TypedDataset]] using the specified columns,
@@ -718,9 +698,9 @@ class TypedDataset[T] protected[frameless] (
* apache/spark
*/
def rollup[K1, K2](
- c1: TypedColumn[T, K1],
- c2: TypedColumn[T, K2]
- ): Rollup2Ops[K1, K2, T] = new Rollup2Ops[K1, K2, T](this, c1, c2)
+ c1: TypedColumn[T, K1],
+ c2: TypedColumn[T, K2]
+ ): Rollup2Ops[K1, K2, T] = new Rollup2Ops[K1, K2, T](this, c1, c2)
/**
* Create a multi-dimensional rollup for the current [[TypedDataset]] using the specified columns,
@@ -756,21 +736,21 @@ class TypedDataset[T] protected[frameless] (
object rollupMany extends ProductArgs {
def applyProduct[TK <: HList, K <: HList, KT](
- groupedBy: TK
- )(implicit
- i0: ColumnTypes.Aux[T, TK, K],
- i1: Tupler.Aux[K, KT],
- i2: ToTraversable.Aux[TK, List, UntypedExpression[T]]
- ): RollupManyOps[T, TK, K, KT] =
+ groupedBy: TK
+ )(implicit
+ i0: ColumnTypes.Aux[T, TK, K],
+ i1: Tupler.Aux[K, KT],
+ i2: ToTraversable.Aux[TK, List, UntypedExpression[T]]
+ ): RollupManyOps[T, TK, K, KT] =
new RollupManyOps[T, TK, K, KT](self, groupedBy)
}
/** Computes the cartesian project of `this` `Dataset` with the `other` `Dataset` */
def joinCross[U](
- other: TypedDataset[U]
- )(implicit
- e: TypedEncoder[(T, U)]
- ): TypedDataset[(T, U)] =
+ other: TypedDataset[U]
+ )(implicit
+ e: TypedEncoder[(T, U)]
+ ): TypedDataset[(T, U)] =
new TypedDataset(
self.dataset
.joinWith(
@@ -786,11 +766,10 @@ class TypedDataset[T] protected[frameless] (
* returning a `Tuple2` for each pair where condition evaluates to true.
*/
def joinFull[U](
- other: TypedDataset[U]
- )(condition: TypedColumn[T with U, Boolean]
- )(implicit
- e: TypedEncoder[(Option[T], Option[U])]
- ): TypedDataset[(Option[T], Option[U])] =
+ other: TypedDataset[U]
+ )(condition: TypedColumn[T with U, Boolean])(implicit
+ e: TypedEncoder[(Option[T], Option[U])]
+ ): TypedDataset[(Option[T], Option[U])] =
new TypedDataset(
self.dataset
.joinWith(other.dataset, condition.untyped, "full")
@@ -804,11 +783,10 @@ class TypedDataset[T] protected[frameless] (
* returning a `Tuple2` for each pair where condition evaluates to true.
*/
def joinInner[U](
- other: TypedDataset[U]
- )(condition: TypedColumn[T with U, Boolean]
- )(implicit
- e: TypedEncoder[(T, U)]
- ): TypedDataset[(T, U)] = {
+ other: TypedDataset[U]
+ )(condition: TypedColumn[T with U, Boolean])(implicit
+ e: TypedEncoder[(T, U)]
+ ): TypedDataset[(T, U)] = {
import FramelessInternals._
val leftPlan = logicalPlan(dataset)
@@ -828,11 +806,10 @@ class TypedDataset[T] protected[frameless] (
* returning a `Tuple2` for each pair where condition evaluates to true.
*/
def joinLeft[U](
- other: TypedDataset[U]
- )(condition: TypedColumn[T with U, Boolean]
- )(implicit
- e: TypedEncoder[(T, Option[U])]
- ): TypedDataset[(T, Option[U])] =
+ other: TypedDataset[U]
+ )(condition: TypedColumn[T with U, Boolean])(implicit
+ e: TypedEncoder[(T, Option[U])]
+ ): TypedDataset[(T, Option[U])] =
new TypedDataset(
self.dataset
.joinWith(other.dataset, condition.untyped, "left_outer")
@@ -844,9 +821,8 @@ class TypedDataset[T] protected[frameless] (
* returning a `Tuple2` for each pair where condition evaluates to true.
*/
def joinLeftSemi[U](
- other: TypedDataset[U]
- )(condition: TypedColumn[T with U, Boolean]
- ): TypedDataset[T] =
+ other: TypedDataset[U]
+ )(condition: TypedColumn[T with U, Boolean]): TypedDataset[T] =
new TypedDataset(
self.dataset
.join(other.dataset, condition.untyped, "leftsemi")
@@ -858,9 +834,8 @@ class TypedDataset[T] protected[frameless] (
* returning a `Tuple2` for each pair where condition evaluates to true.
*/
def joinLeftAnti[U](
- other: TypedDataset[U]
- )(condition: TypedColumn[T with U, Boolean]
- ): TypedDataset[T] =
+ other: TypedDataset[U]
+ )(condition: TypedColumn[T with U, Boolean]): TypedDataset[T] =
new TypedDataset(
self.dataset
.join(other.dataset, condition.untyped, "leftanti")
@@ -872,11 +847,10 @@ class TypedDataset[T] protected[frameless] (
* returning a `Tuple2` for each pair where condition evaluates to true.
*/
def joinRight[U](
- other: TypedDataset[U]
- )(condition: TypedColumn[T with U, Boolean]
- )(implicit
- e: TypedEncoder[(Option[T], U)]
- ): TypedDataset[(Option[T], U)] =
+ other: TypedDataset[U]
+ )(condition: TypedColumn[T with U, Boolean])(implicit
+ e: TypedEncoder[(Option[T], U)]
+ ): TypedDataset[(Option[T], U)] =
new TypedDataset(
self.dataset
.joinWith(other.dataset, condition.untyped, "right_outer")
@@ -907,16 +881,16 @@ class TypedDataset[T] protected[frameless] (
* Takes a function from A => R and converts it to a UDF for TypedColumn[T, A] => TypedColumn[T, R].
*/
def makeUDF[A: TypedEncoder, R: TypedEncoder](
- f: A => R
- ): TypedColumn[T, A] => TypedColumn[T, R] = functions.udf(f)
+ f: A => R
+ ): TypedColumn[T, A] => TypedColumn[T, R] = functions.udf(f)
/**
* Takes a function from (A1, A2) => R and converts it to a UDF for
* (TypedColumn[T, A1], TypedColumn[T, A2]) => TypedColumn[T, R].
*/
def makeUDF[A1: TypedEncoder, A2: TypedEncoder, R: TypedEncoder](
- f: (A1, A2) => R
- ): (TypedColumn[T, A1], TypedColumn[T, A2]) => TypedColumn[T, R] =
+ f: (A1, A2) => R
+ ): (TypedColumn[T, A1], TypedColumn[T, A2]) => TypedColumn[T, R] =
functions.udf(f)
/**
@@ -924,12 +898,11 @@ class TypedDataset[T] protected[frameless] (
* (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3]) => TypedColumn[T, R].
*/
def makeUDF[
- A1: TypedEncoder,
- A2: TypedEncoder,
- A3: TypedEncoder,
- R: TypedEncoder
- ](f: (A1, A2, A3) => R
- ): (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3]) => TypedColumn[T, R] =
+ A1: TypedEncoder,
+ A2: TypedEncoder,
+ A3: TypedEncoder,
+ R: TypedEncoder
+ ](f: (A1, A2, A3) => R): (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3]) => TypedColumn[T, R] =
functions.udf(f)
/**
@@ -937,38 +910,36 @@ class TypedDataset[T] protected[frameless] (
* (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3], TypedColumn[T, A4]) => TypedColumn[T, R].
*/
def makeUDF[
- A1: TypedEncoder,
- A2: TypedEncoder,
- A3: TypedEncoder,
- A4: TypedEncoder,
- R: TypedEncoder
- ](f: (A1, A2, A3, A4) => R
- ): (
- TypedColumn[T, A1],
- TypedColumn[T, A2],
- TypedColumn[T, A3],
- TypedColumn[T, A4]
- ) => TypedColumn[T, R] = functions.udf(f)
+ A1: TypedEncoder,
+ A2: TypedEncoder,
+ A3: TypedEncoder,
+ A4: TypedEncoder,
+ R: TypedEncoder
+ ](f: (A1, A2, A3, A4) => R): (
+ TypedColumn[T, A1],
+ TypedColumn[T, A2],
+ TypedColumn[T, A3],
+ TypedColumn[T, A4]
+ ) => TypedColumn[T, R] = functions.udf(f)
/**
* Takes a function from (A1, A2, A3, A4, A5) => R and converts it to a UDF for
* (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3], TypedColumn[T, A4], TypedColumn[T, A5]) => TypedColumn[T, R].
*/
def makeUDF[
- A1: TypedEncoder,
- A2: TypedEncoder,
- A3: TypedEncoder,
- A4: TypedEncoder,
- A5: TypedEncoder,
- R: TypedEncoder
- ](f: (A1, A2, A3, A4, A5) => R
- ): (
- TypedColumn[T, A1],
- TypedColumn[T, A2],
- TypedColumn[T, A3],
- TypedColumn[T, A4],
- TypedColumn[T, A5]
- ) => TypedColumn[T, R] = functions.udf(f)
+ A1: TypedEncoder,
+ A2: TypedEncoder,
+ A3: TypedEncoder,
+ A4: TypedEncoder,
+ A5: TypedEncoder,
+ R: TypedEncoder
+ ](f: (A1, A2, A3, A4, A5) => R): (
+ TypedColumn[T, A1],
+ TypedColumn[T, A2],
+ TypedColumn[T, A3],
+ TypedColumn[T, A4],
+ TypedColumn[T, A5]
+ ) => TypedColumn[T, R] = functions.udf(f)
/**
* Type-safe projection from type T to Tuple1[A]
@@ -977,8 +948,8 @@ class TypedDataset[T] protected[frameless] (
* }}}
*/
def select[A](
- ca: TypedColumn[T, A]
- ): TypedDataset[A] = {
+ ca: TypedColumn[T, A]
+ ): TypedDataset[A] = {
implicit val ea = ca.uencoder
val tuple1: TypedDataset[Tuple1[A]] = selectMany(ca)
@@ -1005,9 +976,9 @@ class TypedDataset[T] protected[frameless] (
* }}}
*/
def select[A, B](
- ca: TypedColumn[T, A],
- cb: TypedColumn[T, B]
- ): TypedDataset[(A, B)] = {
+ ca: TypedColumn[T, A],
+ cb: TypedColumn[T, B]
+ ): TypedDataset[(A, B)] = {
implicit val (ea, eb) = (ca.uencoder, cb.uencoder)
selectMany(ca, cb)
}
@@ -1019,10 +990,10 @@ class TypedDataset[T] protected[frameless] (
* }}}
*/
def select[A, B, C](
- ca: TypedColumn[T, A],
- cb: TypedColumn[T, B],
- cc: TypedColumn[T, C]
- ): TypedDataset[(A, B, C)] = {
+ ca: TypedColumn[T, A],
+ cb: TypedColumn[T, B],
+ cc: TypedColumn[T, C]
+ ): TypedDataset[(A, B, C)] = {
implicit val (ea, eb, ec) = (ca.uencoder, cb.uencoder, cc.uencoder)
selectMany(ca, cb, cc)
}
@@ -1034,11 +1005,11 @@ class TypedDataset[T] protected[frameless] (
* }}}
*/
def select[A, B, C, D](
- ca: TypedColumn[T, A],
- cb: TypedColumn[T, B],
- cc: TypedColumn[T, C],
- cd: TypedColumn[T, D]
- ): TypedDataset[(A, B, C, D)] = {
+ ca: TypedColumn[T, A],
+ cb: TypedColumn[T, B],
+ cc: TypedColumn[T, C],
+ cd: TypedColumn[T, D]
+ ): TypedDataset[(A, B, C, D)] = {
implicit val (ea, eb, ec, ed) =
(ca.uencoder, cb.uencoder, cc.uencoder, cd.uencoder)
selectMany(ca, cb, cc, cd)
@@ -1051,12 +1022,12 @@ class TypedDataset[T] protected[frameless] (
* }}}
*/
def select[A, B, C, D, E](
- ca: TypedColumn[T, A],
- cb: TypedColumn[T, B],
- cc: TypedColumn[T, C],
- cd: TypedColumn[T, D],
- ce: TypedColumn[T, E]
- ): TypedDataset[(A, B, C, D, E)] = {
+ ca: TypedColumn[T, A],
+ cb: TypedColumn[T, B],
+ cc: TypedColumn[T, C],
+ cd: TypedColumn[T, D],
+ ce: TypedColumn[T, E]
+ ): TypedDataset[(A, B, C, D, E)] = {
implicit val (ea, eb, ec, ed, ee) =
(ca.uencoder, cb.uencoder, cc.uencoder, cd.uencoder, ce.uencoder)
@@ -1070,13 +1041,13 @@ class TypedDataset[T] protected[frameless] (
* }}}
*/
def select[A, B, C, D, E, F](
- ca: TypedColumn[T, A],
- cb: TypedColumn[T, B],
- cc: TypedColumn[T, C],
- cd: TypedColumn[T, D],
- ce: TypedColumn[T, E],
- cf: TypedColumn[T, F]
- ): TypedDataset[(A, B, C, D, E, F)] = {
+ ca: TypedColumn[T, A],
+ cb: TypedColumn[T, B],
+ cc: TypedColumn[T, C],
+ cd: TypedColumn[T, D],
+ ce: TypedColumn[T, E],
+ cf: TypedColumn[T, F]
+ ): TypedDataset[(A, B, C, D, E, F)] = {
implicit val (ea, eb, ec, ed, ee, ef) =
(
ca.uencoder,
@@ -1097,14 +1068,14 @@ class TypedDataset[T] protected[frameless] (
* }}}
*/
def select[A, B, C, D, E, F, G](
- ca: TypedColumn[T, A],
- cb: TypedColumn[T, B],
- cc: TypedColumn[T, C],
- cd: TypedColumn[T, D],
- ce: TypedColumn[T, E],
- cf: TypedColumn[T, F],
- cg: TypedColumn[T, G]
- ): TypedDataset[(A, B, C, D, E, F, G)] = {
+ ca: TypedColumn[T, A],
+ cb: TypedColumn[T, B],
+ cc: TypedColumn[T, C],
+ cd: TypedColumn[T, D],
+ ce: TypedColumn[T, E],
+ cf: TypedColumn[T, F],
+ cg: TypedColumn[T, G]
+ ): TypedDataset[(A, B, C, D, E, F, G)] = {
implicit val (ea, eb, ec, ed, ee, ef, eg) =
(
ca.uencoder,
@@ -1126,15 +1097,15 @@ class TypedDataset[T] protected[frameless] (
* }}}
*/
def select[A, B, C, D, E, F, G, H](
- ca: TypedColumn[T, A],
- cb: TypedColumn[T, B],
- cc: TypedColumn[T, C],
- cd: TypedColumn[T, D],
- ce: TypedColumn[T, E],
- cf: TypedColumn[T, F],
- cg: TypedColumn[T, G],
- ch: TypedColumn[T, H]
- ): TypedDataset[(A, B, C, D, E, F, G, H)] = {
+ ca: TypedColumn[T, A],
+ cb: TypedColumn[T, B],
+ cc: TypedColumn[T, C],
+ cd: TypedColumn[T, D],
+ ce: TypedColumn[T, E],
+ cf: TypedColumn[T, F],
+ cg: TypedColumn[T, G],
+ ch: TypedColumn[T, H]
+ ): TypedDataset[(A, B, C, D, E, F, G, H)] = {
implicit val (ea, eb, ec, ed, ee, ef, eg, eh) =
(
ca.uencoder,
@@ -1157,16 +1128,16 @@ class TypedDataset[T] protected[frameless] (
* }}}
*/
def select[A, B, C, D, E, F, G, H, I](
- ca: TypedColumn[T, A],
- cb: TypedColumn[T, B],
- cc: TypedColumn[T, C],
- cd: TypedColumn[T, D],
- ce: TypedColumn[T, E],
- cf: TypedColumn[T, F],
- cg: TypedColumn[T, G],
- ch: TypedColumn[T, H],
- ci: TypedColumn[T, I]
- ): TypedDataset[(A, B, C, D, E, F, G, H, I)] = {
+ ca: TypedColumn[T, A],
+ cb: TypedColumn[T, B],
+ cc: TypedColumn[T, C],
+ cd: TypedColumn[T, D],
+ ce: TypedColumn[T, E],
+ cf: TypedColumn[T, F],
+ cg: TypedColumn[T, G],
+ ch: TypedColumn[T, H],
+ ci: TypedColumn[T, I]
+ ): TypedDataset[(A, B, C, D, E, F, G, H, I)] = {
implicit val (ea, eb, ec, ed, ee, ef, eg, eh, ei) =
(
ca.uencoder,
@@ -1190,17 +1161,17 @@ class TypedDataset[T] protected[frameless] (
* }}}
*/
def select[A, B, C, D, E, F, G, H, I, J](
- ca: TypedColumn[T, A],
- cb: TypedColumn[T, B],
- cc: TypedColumn[T, C],
- cd: TypedColumn[T, D],
- ce: TypedColumn[T, E],
- cf: TypedColumn[T, F],
- cg: TypedColumn[T, G],
- ch: TypedColumn[T, H],
- ci: TypedColumn[T, I],
- cj: TypedColumn[T, J]
- ): TypedDataset[(A, B, C, D, E, F, G, H, I, J)] = {
+ ca: TypedColumn[T, A],
+ cb: TypedColumn[T, B],
+ cc: TypedColumn[T, C],
+ cd: TypedColumn[T, D],
+ ce: TypedColumn[T, E],
+ cf: TypedColumn[T, F],
+ cg: TypedColumn[T, G],
+ ch: TypedColumn[T, H],
+ ci: TypedColumn[T, I],
+ cj: TypedColumn[T, J]
+ ): TypedDataset[(A, B, C, D, E, F, G, H, I, J)] = {
implicit val (ea, eb, ec, ed, ee, ef, eg, eh, ei, ej) =
(
ca.uencoder,
@@ -1220,13 +1191,13 @@ class TypedDataset[T] protected[frameless] (
object selectMany extends ProductArgs {
def applyProduct[U <: HList, Out0 <: HList, Out](
- columns: U
- )(implicit
- i0: ColumnTypes.Aux[T, U, Out0],
- i1: ToTraversable.Aux[U, List, UntypedExpression[T]],
- i2: Tupler.Aux[Out0, Out],
- i3: TypedEncoder[Out]
- ): TypedDataset[Out] = {
+ columns: U
+ )(implicit
+ i0: ColumnTypes.Aux[T, U, Out0],
+ i1: ToTraversable.Aux[U, List, UntypedExpression[T]],
+ i2: Tupler.Aux[Out0, Out],
+ i3: TypedEncoder[Out]
+ ): TypedDataset[Out] = {
val base = dataset
.toDF()
.select(
@@ -1242,25 +1213,22 @@ class TypedDataset[T] protected[frameless] (
/** Sort each partition in the dataset using the columns selected. */
def sortWithinPartitions[A: CatalystOrdered](
- ca: SortedTypedColumn[T, A]
- ): TypedDataset[T] =
+ ca: SortedTypedColumn[T, A]
+ ): TypedDataset[T] =
sortWithinPartitionsMany(ca)
/** Sort each partition in the dataset using the columns selected. */
def sortWithinPartitions[A: CatalystOrdered, B: CatalystOrdered](
- ca: SortedTypedColumn[T, A],
- cb: SortedTypedColumn[T, B]
- ): TypedDataset[T] = sortWithinPartitionsMany(ca, cb)
+ ca: SortedTypedColumn[T, A],
+ cb: SortedTypedColumn[T, B]
+ ): TypedDataset[T] = sortWithinPartitionsMany(ca, cb)
/** Sort each partition in the dataset using the columns selected. */
def sortWithinPartitions[
- A: CatalystOrdered,
- B: CatalystOrdered,
- C: CatalystOrdered
- ](ca: SortedTypedColumn[T, A],
- cb: SortedTypedColumn[T, B],
- cc: SortedTypedColumn[T, C]
- ): TypedDataset[T] = sortWithinPartitionsMany(ca, cb, cc)
+ A: CatalystOrdered,
+ B: CatalystOrdered,
+ C: CatalystOrdered
+ ](ca: SortedTypedColumn[T, A], cb: SortedTypedColumn[T, B], cc: SortedTypedColumn[T, C]): TypedDataset[T] = sortWithinPartitionsMany(ca, cb, cc)
/**
* Sort each partition in the dataset by the given column expressions
@@ -1272,11 +1240,11 @@ class TypedDataset[T] protected[frameless] (
object sortWithinPartitionsMany extends ProductArgs {
def applyProduct[U <: HList, O <: HList](
- columns: U
- )(implicit
- i0: Mapper.Aux[SortedTypedColumn.defaultAscendingPoly.type, U, O],
- i1: ToTraversable.Aux[O, List, SortedTypedColumn[T, _]]
- ): TypedDataset[T] = {
+ columns: U
+ )(implicit
+ i0: Mapper.Aux[SortedTypedColumn.defaultAscendingPoly.type, U, O],
+ i1: ToTraversable.Aux[O, List, SortedTypedColumn[T, _]]
+ ): TypedDataset[T] = {
val sorted = dataset
.toDF()
.sortWithinPartitions(
@@ -1290,22 +1258,22 @@ class TypedDataset[T] protected[frameless] (
/** Orders the TypedDataset using the column selected. */
def orderBy[A: CatalystOrdered](
- ca: SortedTypedColumn[T, A]
- ): TypedDataset[T] =
+ ca: SortedTypedColumn[T, A]
+ ): TypedDataset[T] =
orderByMany(ca)
/** Orders the TypedDataset using the columns selected. */
def orderBy[A: CatalystOrdered, B: CatalystOrdered](
- ca: SortedTypedColumn[T, A],
- cb: SortedTypedColumn[T, B]
- ): TypedDataset[T] = orderByMany(ca, cb)
+ ca: SortedTypedColumn[T, A],
+ cb: SortedTypedColumn[T, B]
+ ): TypedDataset[T] = orderByMany(ca, cb)
/** Orders the TypedDataset using the columns selected. */
def orderBy[A: CatalystOrdered, B: CatalystOrdered, C: CatalystOrdered](
- ca: SortedTypedColumn[T, A],
- cb: SortedTypedColumn[T, B],
- cc: SortedTypedColumn[T, C]
- ): TypedDataset[T] = orderByMany(ca, cb, cc)
+ ca: SortedTypedColumn[T, A],
+ cb: SortedTypedColumn[T, B],
+ cc: SortedTypedColumn[T, C]
+ ): TypedDataset[T] = orderByMany(ca, cb, cc)
/**
* Sort the dataset by any number of column expressions.
@@ -1317,11 +1285,11 @@ class TypedDataset[T] protected[frameless] (
object orderByMany extends ProductArgs {
def applyProduct[U <: HList, O <: HList](
- columns: U
- )(implicit
- i0: Mapper.Aux[SortedTypedColumn.defaultAscendingPoly.type, U, O],
- i1: ToTraversable.Aux[O, List, SortedTypedColumn[T, _]]
- ): TypedDataset[T] = {
+ columns: U
+ )(implicit
+ i0: Mapper.Aux[SortedTypedColumn.defaultAscendingPoly.type, U, O],
+ i1: ToTraversable.Aux[O, List, SortedTypedColumn[T, _]]
+ ): TypedDataset[T] = {
val sorted = dataset
.toDF()
.orderBy(i0(columns).toList[SortedTypedColumn[T, _]].map(_.untyped): _*)
@@ -1354,19 +1322,18 @@ class TypedDataset[T] protected[frameless] (
* @return
*/
def dropTupled[
- Out,
- TRep <: HList,
- Removed <: HList,
- ValuesFromRemoved <: HList,
- V
- ](column: Witness.Lt[Symbol]
- )(implicit
- i0: LabelledGeneric.Aux[T, TRep],
- i1: Remover.Aux[TRep, column.T, (V, Removed)],
- i2: Values.Aux[Removed, ValuesFromRemoved],
- i3: Tupler.Aux[ValuesFromRemoved, Out],
- i4: TypedEncoder[Out]
- ): TypedDataset[Out] = {
+ Out,
+ TRep <: HList,
+ Removed <: HList,
+ ValuesFromRemoved <: HList,
+ V
+ ](column: Witness.Lt[Symbol])(implicit
+ i0: LabelledGeneric.Aux[T, TRep],
+ i1: Remover.Aux[TRep, column.T, (V, Removed)],
+ i2: Values.Aux[Removed, ValuesFromRemoved],
+ i3: Tupler.Aux[ValuesFromRemoved, Out],
+ i4: TypedEncoder[Out]
+ ): TypedDataset[Out] = {
val dropped = dataset
.toDF()
.drop(column.value.name)
@@ -1391,9 +1358,8 @@ class TypedDataset[T] protected[frameless] (
* @see [[frameless.TypedDataset#project]]
*/
def drop[U](
- implicit
- projector: SmartProject[T, U]
- ): TypedDataset[U] = project[U]
+ implicit projector: SmartProject[T, U]
+ ): TypedDataset[U] = project[U]
/**
* Prepends a new column to the Dataset.
@@ -1405,13 +1371,13 @@ class TypedDataset[T] protected[frameless] (
* }}}
*/
def withColumnTupled[A: TypedEncoder, H <: HList, FH <: HList, Out](
- ca: TypedColumn[T, A]
- )(implicit
- i0: Generic.Aux[T, H],
- i1: Prepend.Aux[H, A :: HNil, FH],
- i2: Tupler.Aux[FH, Out],
- i3: TypedEncoder[Out]
- ): TypedDataset[Out] = {
+ ca: TypedColumn[T, A]
+ )(implicit
+ i0: Generic.Aux[T, H],
+ i1: Prepend.Aux[H, A :: HNil, FH],
+ i2: Tupler.Aux[FH, Out],
+ i3: TypedEncoder[Out]
+ ): TypedDataset[Out] = {
// Giving a random name to the new column (the proper name will be given by the Tuple-based encoder)
val selected = dataset
.toDF()
@@ -1433,11 +1399,11 @@ class TypedDataset[T] protected[frameless] (
* @param i0 Evidence that a column with the correct type and name exists
*/
def withColumnReplaced[A](
- column: Witness.Lt[Symbol],
- replacement: TypedColumn[T, A]
- )(implicit
- i0: TypedColumn.Exists[T, column.T, A]
- ): TypedDataset[T] = {
+ column: Witness.Lt[Symbol],
+ replacement: TypedColumn[T, A]
+ )(implicit
+ i0: TypedColumn.Exists[T, column.T, A]
+ ): TypedDataset[T] = {
val updated = dataset
.toDF()
.withColumn(column.value.name, replacement.untyped)
@@ -1485,27 +1451,26 @@ class TypedDataset[T] protected[frameless] (
class WithColumnApply[U] {
def apply[
- A,
- TRep <: HList,
- URep <: HList,
- UKeys <: HList,
- NewFields <: HList,
- NewKeys <: HList,
- NewKey <: Symbol
- ](ca: TypedColumn[T, A]
- )(implicit
- i0: TypedEncoder[U],
- i1: TypedEncoder[A],
- i2: LabelledGeneric.Aux[T, TRep],
- i3: LabelledGeneric.Aux[U, URep],
- i4: Diff.Aux[TRep, URep, HNil],
- i5: Diff.Aux[URep, TRep, NewFields],
- i6: Keys.Aux[NewFields, NewKeys],
- i7: IsHCons.Aux[NewKeys, NewKey, HNil],
- i8: IsHCons.Aux[NewFields, FieldType[NewKey, A], HNil],
- i9: Keys.Aux[URep, UKeys],
- iA: ToTraversable.Aux[UKeys, Seq, Symbol]
- ): TypedDataset[U] = {
+ A,
+ TRep <: HList,
+ URep <: HList,
+ UKeys <: HList,
+ NewFields <: HList,
+ NewKeys <: HList,
+ NewKey <: Symbol
+ ](ca: TypedColumn[T, A])(implicit
+ i0: TypedEncoder[U],
+ i1: TypedEncoder[A],
+ i2: LabelledGeneric.Aux[T, TRep],
+ i3: LabelledGeneric.Aux[U, URep],
+ i4: Diff.Aux[TRep, URep, HNil],
+ i5: Diff.Aux[URep, TRep, NewFields],
+ i6: Keys.Aux[NewFields, NewKeys],
+ i7: IsHCons.Aux[NewKeys, NewKey, HNil],
+ i8: IsHCons.Aux[NewFields, FieldType[NewKey, A], HNil],
+ i9: Keys.Aux[URep, UKeys],
+ iA: ToTraversable.Aux[UKeys, Seq, Symbol]
+ ): TypedDataset[U] = {
val newColumnName =
i7.head(i6()).name
@@ -1535,24 +1500,23 @@ class TypedDataset[T] protected[frameless] (
* @param column the column we wish to explode
*/
def explode[
- A,
- TRep <: HList,
- V[_],
- OutMod <: HList,
- OutModValues <: HList,
- Out
- ](column: Witness.Lt[Symbol]
- )(implicit
- i0: TypedColumn.Exists[T, column.T, V[A]],
- i1: TypedEncoder[A],
- i2: CatalystExplodableCollection[V],
- i3: LabelledGeneric.Aux[T, TRep],
- i4: Modifier.Aux[TRep, column.T, V[A], A, OutMod],
- i5: Values.Aux[OutMod, OutModValues],
- i6: Tupler.Aux[OutModValues, Out],
- i7: TypedEncoder[Out]
- ): TypedDataset[Out] = {
- import org.apache.spark.sql.functions.{ explode => sparkExplode }
+ A,
+ TRep <: HList,
+ V[_],
+ OutMod <: HList,
+ OutModValues <: HList,
+ Out
+ ](column: Witness.Lt[Symbol])(implicit
+ i0: TypedColumn.Exists[T, column.T, V[A]],
+ i1: TypedEncoder[A],
+ i2: CatalystExplodableCollection[V],
+ i3: LabelledGeneric.Aux[T, TRep],
+ i4: Modifier.Aux[TRep, column.T, V[A], A, OutMod],
+ i5: Values.Aux[OutMod, OutModValues],
+ i6: Tupler.Aux[OutModValues, Out],
+ i7: TypedEncoder[Out]
+ ): TypedDataset[Out] = {
+ import org.apache.spark.sql.functions.{explode => sparkExplode}
val df = dataset.toDF()
val trans =
@@ -1576,29 +1540,24 @@ class TypedDataset[T] protected[frameless] (
* @param column the column we wish to explode
*/
def explodeMap[
- A,
- B,
- V[_, _],
- TRep <: HList,
- OutMod <: HList,
- OutModValues <: HList,
- Out
- ](column: Witness.Lt[Symbol]
- )(implicit
- i0: TypedColumn.Exists[T, column.T, V[A, B]],
- i1: TypedEncoder[A],
- i2: TypedEncoder[B],
- i3: LabelledGeneric.Aux[T, TRep],
- i4: Modifier.Aux[TRep, column.T, V[A, B], (A, B), OutMod],
- i5: Values.Aux[OutMod, OutModValues],
- i6: Tupler.Aux[OutModValues, Out],
- i7: TypedEncoder[Out]
- ): TypedDataset[Out] = {
- import org.apache.spark.sql.functions.{
- explode => sparkExplode,
- struct => sparkStruct,
- col => sparkCol
- }
+ A,
+ B,
+ V[_, _],
+ TRep <: HList,
+ OutMod <: HList,
+ OutModValues <: HList,
+ Out
+ ](column: Witness.Lt[Symbol])(implicit
+ i0: TypedColumn.Exists[T, column.T, V[A, B]],
+ i1: TypedEncoder[A],
+ i2: TypedEncoder[B],
+ i3: LabelledGeneric.Aux[T, TRep],
+ i4: Modifier.Aux[TRep, column.T, V[A, B], (A, B), OutMod],
+ i5: Values.Aux[OutMod, OutModValues],
+ i6: Tupler.Aux[OutModValues, Out],
+ i7: TypedEncoder[Out]
+ ): TypedDataset[Out] = {
+ import org.apache.spark.sql.functions.{explode => sparkExplode, struct => sparkStruct, col => sparkCol}
val df = dataset.toDF()
// select all columns, all original columns and [key, value] columns appeared after the map explode
@@ -1648,23 +1607,22 @@ class TypedDataset[T] protected[frameless] (
* @param column the column we wish to flatten
*/
def flattenOption[
- A,
- TRep <: HList,
- V[_],
- OutMod <: HList,
- OutModValues <: HList,
- Out
- ](column: Witness.Lt[Symbol]
- )(implicit
- i0: TypedColumn.Exists[T, column.T, V[A]],
- i1: TypedEncoder[A],
- i2: V[A] =:= Option[A],
- i3: LabelledGeneric.Aux[T, TRep],
- i4: Modifier.Aux[TRep, column.T, V[A], A, OutMod],
- i5: Values.Aux[OutMod, OutModValues],
- i6: Tupler.Aux[OutModValues, Out],
- i7: TypedEncoder[Out]
- ): TypedDataset[Out] = {
+ A,
+ TRep <: HList,
+ V[_],
+ OutMod <: HList,
+ OutModValues <: HList,
+ Out
+ ](column: Witness.Lt[Symbol])(implicit
+ i0: TypedColumn.Exists[T, column.T, V[A]],
+ i1: TypedEncoder[A],
+ i2: V[A] =:= Option[A],
+ i3: LabelledGeneric.Aux[T, TRep],
+ i4: Modifier.Aux[TRep, column.T, V[A], A, OutMod],
+ i5: Values.Aux[OutMod, OutModValues],
+ i6: Tupler.Aux[OutModValues, Out],
+ i7: TypedEncoder[Out]
+ ): TypedDataset[Out] = {
val df = dataset.toDF()
val trans = df
.filter(df(column.value.name).isNotNull)
@@ -1677,22 +1635,22 @@ class TypedDataset[T] protected[frameless] (
object TypedDataset {
def create[A](
- data: Seq[A]
- )(implicit
- encoder: TypedEncoder[A],
- sqlContext: SparkSession
- ): TypedDataset[A] = {
+ data: Seq[A]
+ )(implicit
+ encoder: TypedEncoder[A],
+ sqlContext: SparkSession
+ ): TypedDataset[A] = {
val dataset = sqlContext.createDataset(data)(TypedExpressionEncoder[A])
TypedDataset.create[A](dataset)
}
def create[A](
- data: RDD[A]
- )(implicit
- encoder: TypedEncoder[A],
- sqlContext: SparkSession
- ): TypedDataset[A] = {
+ data: RDD[A]
+ )(implicit
+ encoder: TypedEncoder[A],
+ sqlContext: SparkSession
+ ): TypedDataset[A] = {
val dataset = sqlContext.createDataset(data)(TypedExpressionEncoder[A])
TypedDataset.create[A](dataset)
diff --git a/dataset/src/main/scala/frameless/TypedDatasetForwarded.scala b/dataset/src/main/scala/frameless/TypedDatasetForwarded.scala
index 0856732f2..658e5b4e2 100644
--- a/dataset/src/main/scala/frameless/TypedDatasetForwarded.scala
+++ b/dataset/src/main/scala/frameless/TypedDatasetForwarded.scala
@@ -6,13 +6,7 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.sql.execution.QueryExecution
import org.apache.spark.sql.streaming.DataStreamWriter
import org.apache.spark.sql.types.StructType
-import org.apache.spark.sql.{
- DataFrame,
- DataFrameWriter,
- FramelessInternals,
- SQLContext,
- SparkSession
-}
+import org.apache.spark.sql.{DataFrame, DataFrameWriter, FramelessInternals, SQLContext, SparkSession}
import org.apache.spark.storage.StorageLevel
import scala.util.Random
@@ -177,10 +171,10 @@ trait TypedDatasetForwarded[T] { self: TypedDataset[T] =>
* apache/spark
*/
def sample(
- withReplacement: Boolean,
- fraction: Double,
- seed: Long = Random.nextLong()
- ): TypedDataset[T] =
+ withReplacement: Boolean,
+ fraction: Double,
+ seed: Long = Random.nextLong()
+ ): TypedDataset[T] =
TypedDataset.create(dataset.sample(withReplacement, fraction, seed))
/**
@@ -266,9 +260,9 @@ trait TypedDatasetForwarded[T] { self: TypedDataset[T] =>
* apache/spark
*/
def randomSplitAsList(
- weights: Array[Double],
- seed: Long
- ): util.List[TypedDataset[T]] = {
+ weights: Array[Double],
+ seed: Long
+ ): util.List[TypedDataset[T]] = {
val values = randomSplit(weights, seed)
java.util.Arrays.asList(values: _*)
}
@@ -301,8 +295,8 @@ trait TypedDatasetForwarded[T] { self: TypedDataset[T] =>
* apache/spark
*/
def persist(
- newLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK
- ): TypedDataset[T] =
+ newLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK
+ ): TypedDataset[T] =
TypedDataset.create(dataset.persist(newLevel))
/**
@@ -327,8 +321,8 @@ trait TypedDatasetForwarded[T] { self: TypedDataset[T] =>
"0.4.0"
)
def mapPartitions[U: TypedEncoder](
- func: Iterator[T] => Iterator[U]
- ): TypedDataset[U] =
+ func: Iterator[T] => Iterator[U]
+ ): TypedDataset[U] =
deserialized.mapPartitions(func)
@deprecated(
@@ -393,8 +387,8 @@ trait TypedDatasetForwarded[T] { self: TypedDataset[T] =>
* apache/spark
*/
def mapPartitions[U: TypedEncoder](
- func: Iterator[T] => Iterator[U]
- ): TypedDataset[U] =
+ func: Iterator[T] => Iterator[U]
+ ): TypedDataset[U] =
TypedDataset.create(
self.dataset.mapPartitions(func)(TypedExpressionEncoder[U])
)
@@ -406,8 +400,8 @@ trait TypedDatasetForwarded[T] { self: TypedDataset[T] =>
* apache/spark
*/
def flatMap[U: TypedEncoder](
- func: T => TraversableOnce[U]
- ): TypedDataset[U] =
+ func: T => TraversableOnce[U]
+ ): TypedDataset[U] =
TypedDataset.create(self.dataset.flatMap(func)(TypedExpressionEncoder[U]))
/**
@@ -425,10 +419,10 @@ trait TypedDatasetForwarded[T] { self: TypedDataset[T] =>
* Differs from `Dataset#reduce` by wrapping its result into an `Option` and an effect-suspending `F`.
*/
def reduceOption[F[_]](
- func: (T, T) => T
- )(implicit
- F: SparkDelay[F]
- ): F[Option[T]] =
+ func: (T, T) => T
+ )(implicit
+ F: SparkDelay[F]
+ ): F[Option[T]] =
F.delay {
try {
Option(self.dataset.reduce(func))
diff --git a/dataset/src/main/scala/frameless/TypedEncoder.scala b/dataset/src/main/scala/frameless/TypedEncoder.scala
index b42b026ee..8525edeed 100644
--- a/dataset/src/main/scala/frameless/TypedEncoder.scala
+++ b/dataset/src/main/scala/frameless/TypedEncoder.scala
@@ -4,7 +4,7 @@ import java.math.BigInteger
import java.util.Date
-import java.time.{ Duration, Instant, Period, LocalDate }
+import java.time.{Duration, Instant, LocalDate, Period}
import java.sql.Timestamp
@@ -12,14 +12,10 @@ import scala.reflect.ClassTag
import org.apache.spark.sql.FramelessInternals
import org.apache.spark.sql.FramelessInternals.UserDefinedType
-import org.apache.spark.sql.{ reflection => ScalaReflection }
+import org.apache.spark.sql.{reflection => ScalaReflection}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.objects._
-import org.apache.spark.sql.catalyst.util.{
- ArrayBasedMapData,
- DateTimeUtils,
- GenericArrayData
-}
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData}
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
@@ -27,9 +23,8 @@ import shapeless._
import shapeless.ops.hlist.IsHCons
abstract class TypedEncoder[T](
- implicit
- val classTag: ClassTag[T])
- extends Serializable {
+ implicit val classTag: ClassTag[T]
+) extends Serializable {
def nullable: Boolean
def jvmRepr: DataType
@@ -436,9 +431,8 @@ object TypedEncoder {
TypedEncoder.usingInjection
implicit def arrayEncoder[T: ClassTag](
- implicit
- i0: Lazy[RecordFieldEncoder[T]]
- ): TypedEncoder[Array[T]] =
+ implicit i0: Lazy[RecordFieldEncoder[T]]
+ ): TypedEncoder[Array[T]] =
new TypedEncoder[Array[T]] {
private lazy val encodeT = i0.value.encoder
@@ -502,10 +496,10 @@ object TypedEncoder {
}
implicit def collectionEncoder[C[X] <: Seq[X], T](
- implicit
- i0: Lazy[RecordFieldEncoder[T]],
- i1: ClassTag[C[T]]
- ): TypedEncoder[C[T]] = new TypedEncoder[C[T]] {
+ implicit
+ i0: Lazy[RecordFieldEncoder[T]],
+ i1: ClassTag[C[T]]
+ ): TypedEncoder[C[T]] = new TypedEncoder[C[T]] {
private lazy val encodeT = i0.value.encoder
def nullable: Boolean = false
@@ -544,10 +538,10 @@ object TypedEncoder {
* @return a `TypedEncoder` instance for `Set[T]`.
*/
implicit def setEncoder[T](
- implicit
- i1: shapeless.Lazy[RecordFieldEncoder[T]],
- i2: ClassTag[Set[T]]
- ): TypedEncoder[Set[T]] = {
+ implicit
+ i1: shapeless.Lazy[RecordFieldEncoder[T]],
+ i2: ClassTag[Set[T]]
+ ): TypedEncoder[Set[T]] = {
implicit val inj: Injection[Set[T], Seq[T]] = Injection(_.toSeq, _.toSet)
TypedEncoder.usingInjection
@@ -560,10 +554,10 @@ object TypedEncoder {
* @param i1 the values encoder
*/
implicit def mapEncoder[A: NotCatalystNullable, B](
- implicit
- i0: Lazy[RecordFieldEncoder[A]],
- i1: Lazy[RecordFieldEncoder[B]]
- ): TypedEncoder[Map[A, B]] = new TypedEncoder[Map[A, B]] {
+ implicit
+ i0: Lazy[RecordFieldEncoder[A]],
+ i1: Lazy[RecordFieldEncoder[B]]
+ ): TypedEncoder[Map[A, B]] = new TypedEncoder[Map[A, B]] {
def nullable: Boolean = false
def jvmRepr: DataType = FramelessInternals.objectTypeFor[Map[A, B]]
@@ -626,9 +620,8 @@ object TypedEncoder {
}
implicit def optionEncoder[A](
- implicit
- underlying: TypedEncoder[A]
- ): TypedEncoder[Option[A]] =
+ implicit underlying: TypedEncoder[A]
+ ): TypedEncoder[Option[A]] =
new TypedEncoder[Option[A]] {
def nullable: Boolean = true
@@ -706,10 +699,10 @@ object TypedEncoder {
/** Encodes things using injection if there is one defined */
implicit def usingInjection[A: ClassTag, B](
- implicit
- inj: Injection[A, B],
- trb: TypedEncoder[B]
- ): TypedEncoder[A] =
+ implicit
+ inj: Injection[A, B],
+ trb: TypedEncoder[B]
+ ): TypedEncoder[A] =
new TypedEncoder[A] {
def nullable: Boolean = trb.nullable
def jvmRepr: DataType = FramelessInternals.objectTypeFor[A](classTag)
@@ -728,19 +721,19 @@ object TypedEncoder {
/** Encodes things as records if there is no Injection defined */
implicit def usingDerivation[F, G <: HList, H <: HList](
- implicit
- i0: LabelledGeneric.Aux[F, G],
- i1: DropUnitValues.Aux[G, H],
- i2: IsHCons[H],
- i3: Lazy[RecordEncoderFields[H]],
- i4: Lazy[NewInstanceExprs[G]],
- i5: ClassTag[F]
- ): TypedEncoder[F] = new RecordEncoder[F, G, H]
+ implicit
+ i0: LabelledGeneric.Aux[F, G],
+ i1: DropUnitValues.Aux[G, H],
+ i2: IsHCons[H],
+ i3: Lazy[RecordEncoderFields[H]],
+ i4: Lazy[NewInstanceExprs[G]],
+ i5: ClassTag[F]
+ ): TypedEncoder[F] = new RecordEncoder[F, G, H]
/** Encodes things using a Spark SQL's User Defined Type (UDT) if there is one defined in implicit */
implicit def usingUserDefinedType[
- A >: Null: UserDefinedType: ClassTag
- ]: TypedEncoder[A] = {
+ A >: Null: UserDefinedType: ClassTag
+ ]: TypedEncoder[A] = {
val udt = implicitly[UserDefinedType[A]]
val udtInstance =
NewInstance(udt.getClass, Nil, dataType = ObjectType(udt.getClass))
diff --git a/dataset/src/main/scala/frameless/TypedExpressionEncoder.scala b/dataset/src/main/scala/frameless/TypedExpressionEncoder.scala
index 71fa286a5..c2de827d8 100644
--- a/dataset/src/main/scala/frameless/TypedExpressionEncoder.scala
+++ b/dataset/src/main/scala/frameless/TypedExpressionEncoder.scala
@@ -3,11 +3,7 @@ package frameless
import org.apache.spark.sql.Encoder
import org.apache.spark.sql.FramelessInternals
import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal
-import org.apache.spark.sql.catalyst.expressions.{
- BoundReference,
- CreateNamedStruct,
- If
-}
+import org.apache.spark.sql.catalyst.expressions.{BoundReference, CreateNamedStruct, If}
import org.apache.spark.sql.types.StructType
object TypedExpressionEncoder {
@@ -28,9 +24,8 @@ object TypedExpressionEncoder {
}
def apply[T](
- implicit
- encoder: TypedEncoder[T]
- ): Encoder[T] = {
+ implicit encoder: TypedEncoder[T]
+ ): Encoder[T] = {
val in = BoundReference(0, encoder.jvmRepr, encoder.nullable)
val (out, serializer) = encoder.toCatalyst(in) match {
diff --git a/dataset/src/main/scala/frameless/With.scala b/dataset/src/main/scala/frameless/With.scala
index 11ceaa35b..571921bb7 100644
--- a/dataset/src/main/scala/frameless/With.scala
+++ b/dataset/src/main/scala/frameless/With.scala
@@ -15,7 +15,7 @@ object With extends LowPrioWith {
implicit def combine[A, B]: Aux[A, B, A with B] = of[A, B, A with B]
}
-private[frameless] sealed trait LowPrioWith {
+sealed private[frameless] trait LowPrioWith {
type Aux[A, B, W] = With[A, B] { type Out = W }
protected[this] val theInstance = new With[Any, Any] {}
diff --git a/dataset/src/main/scala/frameless/functions/AggregateFunctions.scala b/dataset/src/main/scala/frameless/functions/AggregateFunctions.scala
index e371ea048..bf9e36331 100644
--- a/dataset/src/main/scala/frameless/functions/AggregateFunctions.scala
+++ b/dataset/src/main/scala/frameless/functions/AggregateFunctions.scala
@@ -9,6 +9,7 @@ import frameless.syntax._
import scala.annotation.nowarn
trait AggregateFunctions {
+
/** Aggregate function: returns the number of items in a group.
*
* apache/spark
@@ -147,7 +148,7 @@ trait AggregateFunctions {
*
* apache/spark
*/
- def stddevSamp[A, T](column: TypedColumn[T, A])(implicit ev: CatalystCast[A, Double] ): TypedAggregate[T, Option[Double]] = {
+ def stddevSamp[A, T](column: TypedColumn[T, A])(implicit ev: CatalystCast[A, Double]): TypedAggregate[T, Option[Double]] = {
new TypedAggregate[T, Option[Double]](
sparkFunctions.stddev_samp(column.cast[Double].untyped)
)
@@ -203,15 +204,14 @@ trait AggregateFunctions {
*
* apache/spark
*/
- def corr[A, B, T](column1: TypedColumn[T, A], column2: TypedColumn[T, B])
- (implicit
- i0: CatalystCast[A, Double],
- i1: CatalystCast[B, Double]
- ): TypedAggregate[T, Option[Double]] = {
- new TypedAggregate[T, Option[Double]](
- sparkFunctions.corr(column1.cast[Double].untyped, column2.cast[Double].untyped)
- )
- }
+ def corr[A, B, T](column1: TypedColumn[T, A], column2: TypedColumn[T, B])(implicit
+ i0: CatalystCast[A, Double],
+ i1: CatalystCast[B, Double]
+ ): TypedAggregate[T, Option[Double]] = {
+ new TypedAggregate[T, Option[Double]](
+ sparkFunctions.corr(column1.cast[Double].untyped, column2.cast[Double].untyped)
+ )
+ }
/**
* Aggregate function: returns the covariance of two collumns.
@@ -221,15 +221,14 @@ trait AggregateFunctions {
*
* apache/spark
*/
- def covarPop[A, B, T](column1: TypedColumn[T, A], column2: TypedColumn[T, B])
- (implicit
- i0: CatalystCast[A, Double],
- i1: CatalystCast[B, Double]
- ): TypedAggregate[T, Option[Double]] = {
- new TypedAggregate[T, Option[Double]](
- sparkFunctions.covar_pop(column1.cast[Double].untyped, column2.cast[Double].untyped)
- )
- }
+ def covarPop[A, B, T](column1: TypedColumn[T, A], column2: TypedColumn[T, B])(implicit
+ i0: CatalystCast[A, Double],
+ i1: CatalystCast[B, Double]
+ ): TypedAggregate[T, Option[Double]] = {
+ new TypedAggregate[T, Option[Double]](
+ sparkFunctions.covar_pop(column1.cast[Double].untyped, column2.cast[Double].untyped)
+ )
+ }
/**
* Aggregate function: returns the covariance of two columns.
@@ -239,16 +238,14 @@ trait AggregateFunctions {
*
* apache/spark
*/
- def covarSamp[A, B, T](column1: TypedColumn[T, A], column2: TypedColumn[T, B])
- (implicit
- i0: CatalystCast[A, Double],
- i1: CatalystCast[B, Double]
- ): TypedAggregate[T, Option[Double]] = {
- new TypedAggregate[T, Option[Double]](
- sparkFunctions.covar_samp(column1.cast[Double].untyped, column2.cast[Double].untyped)
- )
- }
-
+ def covarSamp[A, B, T](column1: TypedColumn[T, A], column2: TypedColumn[T, B])(implicit
+ i0: CatalystCast[A, Double],
+ i1: CatalystCast[B, Double]
+ ): TypedAggregate[T, Option[Double]] = {
+ new TypedAggregate[T, Option[Double]](
+ sparkFunctions.covar_samp(column1.cast[Double].untyped, column2.cast[Double].untyped)
+ )
+ }
/**
* Aggregate function: returns the kurtosis of a column.
diff --git a/dataset/src/main/scala/frameless/functions/Lit.scala b/dataset/src/main/scala/frameless/functions/Lit.scala
index d01467b13..69d6f38a3 100644
--- a/dataset/src/main/scala/frameless/functions/Lit.scala
+++ b/dataset/src/main/scala/frameless/functions/Lit.scala
@@ -6,10 +6,10 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, NonSQLExpression}
import org.apache.spark.sql.types.DataType
private[frameless] case class Lit[T <: AnyVal](
- dataType: DataType,
- nullable: Boolean,
- show: () => String,
- catalystExpr: Expression // must be a generated Expression from a literal TypedEncoder's toCatalyst function
+ dataType: DataType,
+ nullable: Boolean,
+ show: () => String,
+ catalystExpr: Expression // must be a generated Expression from a literal TypedEncoder's toCatalyst function
) extends Expression with NonSQLExpression {
override def toString: String = s"FramelessLit(${show()})"
@@ -52,7 +52,7 @@ private[frameless] case class Lit[T <: AnyVal](
}
def eval(input: InternalRow): Any = codegen(input)
-
+
def children: Seq[Expression] = Nil
protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = catalystExpr.genCode(ctx)
diff --git a/dataset/src/main/scala/frameless/functions/NonAggregateFunctions.scala b/dataset/src/main/scala/frameless/functions/NonAggregateFunctions.scala
index 939bf5b8d..935f369f3 100644
--- a/dataset/src/main/scala/frameless/functions/NonAggregateFunctions.scala
+++ b/dataset/src/main/scala/frameless/functions/NonAggregateFunctions.scala
@@ -1,12 +1,13 @@
package frameless
package functions
-import org.apache.spark.sql.{Column, functions => sparkFunctions}
+import org.apache.spark.sql.{functions => sparkFunctions, Column}
import scala.annotation.nowarn
import scala.util.matching.Regex
trait NonAggregateFunctions {
+
/** Non-Aggregate function: calculates the SHA-2 digest of a binary column and returns the value as a 40 character hex string
*
* apache/spark
@@ -27,15 +28,17 @@ trait NonAggregateFunctions {
*/
def crc32[T](column: AbstractTypedColumn[T, Array[Byte]]): column.ThisType[T, Long] =
column.typed(sparkFunctions.crc32(column.untyped))
+
/**
* Non-Aggregate function: returns the negated value of column.
*
* apache/spark
*/
- def negate[A, B, T](column: AbstractTypedColumn[T,A])(
- implicit i0: CatalystNumericWithJavaBigDecimal[A, B],
+ def negate[A, B, T](column: AbstractTypedColumn[T, A])(
+ implicit
+ i0: CatalystNumericWithJavaBigDecimal[A, B],
i1: TypedEncoder[B]
- ): column.ThisType[T,B] =
+ ): column.ThisType[T, B] =
column.typed(sparkFunctions.negate(column.untyped))
/**
@@ -43,7 +46,7 @@ trait NonAggregateFunctions {
*
* apache/spark
*/
- def not[T](column: AbstractTypedColumn[T,Boolean]): column.ThisType[T,Boolean] =
+ def not[T](column: AbstractTypedColumn[T, Boolean]): column.ThisType[T, Boolean] =
column.typed(sparkFunctions.not(column.untyped))
/**
@@ -51,36 +54,34 @@ trait NonAggregateFunctions {
*
* apache/spark
*/
- def conv[T](column: AbstractTypedColumn[T,String], fromBase: Int, toBase: Int): column.ThisType[T,String] =
- column.typed(sparkFunctions.conv(column.untyped,fromBase,toBase))
+ def conv[T](column: AbstractTypedColumn[T, String], fromBase: Int, toBase: Int): column.ThisType[T, String] =
+ column.typed(sparkFunctions.conv(column.untyped, fromBase, toBase))
/** Non-Aggregate function: Converts an angle measured in radians to an approximately equivalent angle measured in degrees.
*
* apache/spark
*/
- def degrees[A,T](column: AbstractTypedColumn[T,A]): column.ThisType[T,Double] =
+ def degrees[A, T](column: AbstractTypedColumn[T, A]): column.ThisType[T, Double] =
column.typed(sparkFunctions.degrees(column.untyped))
/** Non-Aggregate function: returns the ceiling of a numeric column
*
* apache/spark
*/
- def ceil[A, B, T](column: AbstractTypedColumn[T, A])
- (implicit
- i0: CatalystRound[A, B],
- i1: TypedEncoder[B]
- ): column.ThisType[T, B] =
- column.typed(sparkFunctions.ceil(column.untyped))(i1)
+ def ceil[A, B, T](column: AbstractTypedColumn[T, A])(implicit
+ i0: CatalystRound[A, B],
+ i1: TypedEncoder[B]
+ ): column.ThisType[T, B] =
+ column.typed(sparkFunctions.ceil(column.untyped))(i1)
/** Non-Aggregate function: returns the floor of a numeric column
*
* apache/spark
*/
- def floor[A, B, T](column: AbstractTypedColumn[T, A])
- (implicit
+ def floor[A, B, T](column: AbstractTypedColumn[T, A])(implicit
i0: CatalystRound[A, B],
i1: TypedEncoder[B]
- ): column.ThisType[T, B] =
+ ): column.ThisType[T, B] =
column.typed(sparkFunctions.floor(column.untyped))(i1)
/** Non-Aggregate function: unsigned shift the the given value numBits right. If given long, will return long else it will return an integer.
@@ -88,47 +89,43 @@ trait NonAggregateFunctions {
* apache/spark
*/
@nowarn // supress sparkFunctions.shiftRightUnsigned call which is used to maintain Spark 3.1.x backwards compat
- def shiftRightUnsigned[A, B, T](column: AbstractTypedColumn[T, A], numBits: Int)
- (implicit
- i0: CatalystBitShift[A, B],
- i1: TypedEncoder[B]
- ): column.ThisType[T, B] =
- column.typed(sparkFunctions.shiftRightUnsigned(column.untyped, numBits))
+ def shiftRightUnsigned[A, B, T](column: AbstractTypedColumn[T, A], numBits: Int)(implicit
+ i0: CatalystBitShift[A, B],
+ i1: TypedEncoder[B]
+ ): column.ThisType[T, B] =
+ column.typed(sparkFunctions.shiftRightUnsigned(column.untyped, numBits))
/** Non-Aggregate function: shift the the given value numBits right. If given long, will return long else it will return an integer.
*
* apache/spark
*/
@nowarn // supress sparkFunctions.shiftReft call which is used to maintain Spark 3.1.x backwards compat
- def shiftRight[A, B, T](column: AbstractTypedColumn[T, A], numBits: Int)
- (implicit
- i0: CatalystBitShift[A, B],
- i1: TypedEncoder[B]
- ): column.ThisType[T, B] =
- column.typed(sparkFunctions.shiftRight(column.untyped, numBits))
+ def shiftRight[A, B, T](column: AbstractTypedColumn[T, A], numBits: Int)(implicit
+ i0: CatalystBitShift[A, B],
+ i1: TypedEncoder[B]
+ ): column.ThisType[T, B] =
+ column.typed(sparkFunctions.shiftRight(column.untyped, numBits))
/** Non-Aggregate function: shift the the given value numBits left. If given long, will return long else it will return an integer.
*
* apache/spark
*/
@nowarn // supress sparkFunctions.shiftLeft call which is used to maintain Spark 3.1.x backwards compat
- def shiftLeft[A, B, T](column: AbstractTypedColumn[T, A], numBits: Int)
- (implicit
- i0: CatalystBitShift[A, B],
- i1: TypedEncoder[B]
- ): column.ThisType[T, B] =
+ def shiftLeft[A, B, T](column: AbstractTypedColumn[T, A], numBits: Int)(implicit
+ i0: CatalystBitShift[A, B],
+ i1: TypedEncoder[B]
+ ): column.ThisType[T, B] =
column.typed(sparkFunctions.shiftLeft(column.untyped, numBits))
-
+
/** Non-Aggregate function: returns the absolute value of a numeric column
*
* apache/spark
*/
- def abs[A, B, T](column: AbstractTypedColumn[T, A])
- (implicit
- i0: CatalystNumericWithJavaBigDecimal[A, B],
- i1: TypedEncoder[B]
- ): column.ThisType[T, B] =
- column.typed(sparkFunctions.abs(column.untyped))(i1)
+ def abs[A, B, T](column: AbstractTypedColumn[T, A])(implicit
+ i0: CatalystNumericWithJavaBigDecimal[A, B],
+ i1: TypedEncoder[B]
+ ): column.ThisType[T, B] =
+ column.typed(sparkFunctions.abs(column.untyped))(i1)
/** Non-Aggregate function: Computes the cosine of the given value.
*
@@ -136,9 +133,8 @@ trait NonAggregateFunctions {
* [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]]
* apache/spark
*/
- def cos[A, T](column: AbstractTypedColumn[T, A])
- (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
- column.typed(sparkFunctions.cos(column.cast[Double].untyped))
+ def cos[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
+ column.typed(sparkFunctions.cos(column.cast[Double].untyped))
/** Non-Aggregate function: Computes the hyperbolic cosine of the given value.
*
@@ -146,9 +142,8 @@ trait NonAggregateFunctions {
* [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]]
* apache/spark
*/
- def cosh[A, T](column: AbstractTypedColumn[T, A])
- (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
- column.typed(sparkFunctions.cosh(column.cast[Double].untyped))
+ def cosh[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
+ column.typed(sparkFunctions.cosh(column.cast[Double].untyped))
/** Non-Aggregate function: Computes the signum of the given value.
*
@@ -156,8 +151,7 @@ trait NonAggregateFunctions {
* [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]]
* apache/spark
*/
- def signum[A, T](column: AbstractTypedColumn[T, A])
- (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
+ def signum[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
column.typed(sparkFunctions.signum(column.cast[Double].untyped))
/** Non-Aggregate function: Computes the sine of the given value.
@@ -166,9 +160,8 @@ trait NonAggregateFunctions {
* [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]]
* apache/spark
*/
- def sin[A, T](column: AbstractTypedColumn[T, A])
- (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
- column.typed(sparkFunctions.sin(column.cast[Double].untyped))
+ def sin[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
+ column.typed(sparkFunctions.sin(column.cast[Double].untyped))
/** Non-Aggregate function: Computes the hyperbolic sine of the given value.
*
@@ -176,9 +169,8 @@ trait NonAggregateFunctions {
* [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]]
* apache/spark
*/
- def sinh[A, T](column: AbstractTypedColumn[T, A])
- (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
- column.typed(sparkFunctions.sinh(column.cast[Double].untyped))
+ def sinh[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
+ column.typed(sparkFunctions.sinh(column.cast[Double].untyped))
/** Non-Aggregate function: Computes the tangent of the given column.
*
@@ -186,9 +178,8 @@ trait NonAggregateFunctions {
* [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]]
* apache/spark
*/
- def tan[A, T](column: AbstractTypedColumn[T, A])
- (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
- column.typed(sparkFunctions.tan(column.cast[Double].untyped))
+ def tan[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
+ column.typed(sparkFunctions.tan(column.cast[Double].untyped))
/** Non-Aggregate function: Computes the hyperbolic tangent of the given value.
*
@@ -196,9 +187,8 @@ trait NonAggregateFunctions {
* [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]]
* apache/spark
*/
- def tanh[A, T](column: AbstractTypedColumn[T, A])
- (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
- column.typed(sparkFunctions.tanh(column.cast[Double].untyped))
+ def tanh[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
+ column.typed(sparkFunctions.tanh(column.cast[Double].untyped))
/** Non-Aggregate function: returns the acos of a numeric column
*
@@ -206,9 +196,8 @@ trait NonAggregateFunctions {
* [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]]
* apache/spark
*/
- def acos[A, T](column: AbstractTypedColumn[T, A])
- (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
- column.typed(sparkFunctions.acos(column.cast[Double].untyped))
+ def acos[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
+ column.typed(sparkFunctions.acos(column.cast[Double].untyped))
/** Non-Aggregate function: returns true if value is contained with in the array in the specified column
*
@@ -223,9 +212,8 @@ trait NonAggregateFunctions {
* [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]]
* apache/spark
*/
- def atan[A, T](column: AbstractTypedColumn[T,A])
- (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
- column.typed(sparkFunctions.atan(column.cast[Double].untyped))
+ def atan[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
+ column.typed(sparkFunctions.atan(column.cast[Double].untyped))
/** Non-Aggregate function: returns the asin of a numeric column
*
@@ -233,9 +221,8 @@ trait NonAggregateFunctions {
* [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]]
* apache/spark
*/
- def asin[A, T](column: AbstractTypedColumn[T, A])
- (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
- column.typed(sparkFunctions.asin(column.cast[Double].untyped))
+ def asin[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
+ column.typed(sparkFunctions.asin(column.cast[Double].untyped))
/** Non-Aggregate function: returns the angle theta from the conversion of rectangular coordinates (x, y) to
* polar coordinates (r, theta).
@@ -244,12 +231,11 @@ trait NonAggregateFunctions {
* [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]]
* apache/spark
*/
- def atan2[A, B, T](l: TypedColumn[T, A], r: TypedColumn[T, B])
- (implicit
- i0: CatalystCast[A, Double],
- i1: CatalystCast[B, Double]
- ): TypedColumn[T, Double] =
- r.typed(sparkFunctions.atan2(l.cast[Double].untyped, r.cast[Double].untyped))
+ def atan2[A, B, T](l: TypedColumn[T, A], r: TypedColumn[T, B])(implicit
+ i0: CatalystCast[A, Double],
+ i1: CatalystCast[B, Double]
+ ): TypedColumn[T, Double] =
+ r.typed(sparkFunctions.atan2(l.cast[Double].untyped, r.cast[Double].untyped))
/** Non-Aggregate function: returns the angle theta from the conversion of rectangular coordinates (x, y) to
* polar coordinates (r, theta).
@@ -258,51 +244,43 @@ trait NonAggregateFunctions {
* [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]]
* apache/spark
*/
- def atan2[A, B, T](l: TypedAggregate[T, A], r: TypedAggregate[T, B])
- (implicit
- i0: CatalystCast[A, Double],
- i1: CatalystCast[B, Double]
- ): TypedAggregate[T, Double] =
- r.typed(sparkFunctions.atan2(l.cast[Double].untyped, r.cast[Double].untyped))
+ def atan2[A, B, T](l: TypedAggregate[T, A], r: TypedAggregate[T, B])(implicit
+ i0: CatalystCast[A, Double],
+ i1: CatalystCast[B, Double]
+ ): TypedAggregate[T, Double] =
+ r.typed(sparkFunctions.atan2(l.cast[Double].untyped, r.cast[Double].untyped))
- def atan2[B, T](l: Double, r: TypedColumn[T, B])
- (implicit i0: CatalystCast[B, Double]): TypedColumn[T, Double] =
- atan2(r.lit(l), r)
+ def atan2[B, T](l: Double, r: TypedColumn[T, B])(implicit i0: CatalystCast[B, Double]): TypedColumn[T, Double] =
+ atan2(r.lit(l), r)
- def atan2[A, T](l: TypedColumn[T, A], r: Double)
- (implicit i0: CatalystCast[A, Double]): TypedColumn[T, Double] =
- atan2(l, l.lit(r))
+ def atan2[A, T](l: TypedColumn[T, A], r: Double)(implicit i0: CatalystCast[A, Double]): TypedColumn[T, Double] =
+ atan2(l, l.lit(r))
- def atan2[B, T](l: Double, r: TypedAggregate[T, B])
- (implicit i0: CatalystCast[B, Double]): TypedAggregate[T, Double] =
- atan2(r.lit(l), r)
+ def atan2[B, T](l: Double, r: TypedAggregate[T, B])(implicit i0: CatalystCast[B, Double]): TypedAggregate[T, Double] =
+ atan2(r.lit(l), r)
- def atan2[A, T](l: TypedAggregate[T, A], r: Double)
- (implicit i0: CatalystCast[A, Double]): TypedAggregate[T, Double] =
- atan2(l, l.lit(r))
+ def atan2[A, T](l: TypedAggregate[T, A], r: Double)(implicit i0: CatalystCast[A, Double]): TypedAggregate[T, Double] =
+ atan2(l, l.lit(r))
/** Non-Aggregate function: returns the square root value of a numeric column.
*
* apache/spark
*/
- def sqrt[A, T](column: AbstractTypedColumn[T, A])
- (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
+ def sqrt[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
column.typed(sparkFunctions.sqrt(column.cast[Double].untyped))
/** Non-Aggregate function: returns the cubic root value of a numeric column.
*
* apache/spark
*/
- def cbrt[A, T](column: AbstractTypedColumn[T, A])
- (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
+ def cbrt[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
column.typed(sparkFunctions.cbrt(column.cast[Double].untyped))
/** Non-Aggregate function: returns the exponential value of a numeric column.
*
* apache/spark
*/
- def exp[A, T](column: AbstractTypedColumn[T, A])
- (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
+ def exp[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] =
column.typed(sparkFunctions.exp(column.cast[Double].untyped))
/** Non-Aggregate function: Returns the value of the column `e` rounded to 0 decimal places with HALF_UP round mode.
@@ -310,7 +288,9 @@ trait NonAggregateFunctions {
* apache/spark
*/
def round[A, B, T](column: AbstractTypedColumn[T, A])(
- implicit i0: CatalystNumericWithJavaBigDecimal[A, B], i1: TypedEncoder[B]
+ implicit
+ i0: CatalystNumericWithJavaBigDecimal[A, B],
+ i1: TypedEncoder[B]
): column.ThisType[T, B] =
column.typed(sparkFunctions.round(column.untyped))(i1)
@@ -320,7 +300,9 @@ trait NonAggregateFunctions {
* apache/spark
*/
def round[A, B, T](column: AbstractTypedColumn[T, A], scale: Int)(
- implicit i0: CatalystNumericWithJavaBigDecimal[A, B], i1: TypedEncoder[B]
+ implicit
+ i0: CatalystNumericWithJavaBigDecimal[A, B],
+ i1: TypedEncoder[B]
): column.ThisType[T, B] =
column.typed(sparkFunctions.round(column.untyped, scale))(i1)
@@ -330,7 +312,9 @@ trait NonAggregateFunctions {
* apache/spark
*/
def bround[A, B, T](column: AbstractTypedColumn[T, A])(
- implicit i0: CatalystNumericWithJavaBigDecimal[A, B], i1: TypedEncoder[B]
+ implicit
+ i0: CatalystNumericWithJavaBigDecimal[A, B],
+ i1: TypedEncoder[B]
): column.ThisType[T, B] =
column.typed(sparkFunctions.bround(column.untyped))(i1)
@@ -340,7 +324,9 @@ trait NonAggregateFunctions {
* apache/spark
*/
def bround[A, B, T](column: AbstractTypedColumn[T, A], scale: Int)(
- implicit i0: CatalystNumericWithJavaBigDecimal[A, B], i1: TypedEncoder[B]
+ implicit
+ i0: CatalystNumericWithJavaBigDecimal[A, B],
+ i1: TypedEncoder[B]
): column.ThisType[T, B] =
column.typed(sparkFunctions.bround(column.untyped, scale))(i1)
@@ -394,7 +380,6 @@ trait NonAggregateFunctions {
): column.ThisType[T, Double] =
column.typed(sparkFunctions.log10(column.untyped))
-
/**
* Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
*
@@ -465,7 +450,6 @@ trait NonAggregateFunctions {
): column.ThisType[T, A] =
column.typed(sparkFunctions.pmod(column.untyped, column2.untyped))
-
/** Non-Aggregate function: Returns the string representation of the binary value of the given long
* column. For example, bin("12") returns "1100".
*
@@ -480,7 +464,7 @@ trait NonAggregateFunctions {
*
* apache/spark
*/
- def md5[T, A](column: AbstractTypedColumn[T, A])(implicit i0: TypedEncoder[A]): column.ThisType[T, String] =
+ def md5[T, A](column: AbstractTypedColumn[T, A])(implicit i0: TypedEncoder[A]): column.ThisType[T, String] =
column.typed(sparkFunctions.md5(column.untyped))
/**
@@ -488,7 +472,7 @@ trait NonAggregateFunctions {
*
* apache/spark
*/
- def factorial[T](column: AbstractTypedColumn[T, Long])(implicit i0: TypedEncoder[Long]): column.ThisType[T, Long] =
+ def factorial[T](column: AbstractTypedColumn[T, Long])(implicit i0: TypedEncoder[Long]): column.ThisType[T, Long] =
column.typed(sparkFunctions.factorial(column.untyped))
/** Non-Aggregate function: Computes bitwise NOT.
@@ -542,7 +526,6 @@ trait NonAggregateFunctions {
// String functions
//////////////////////////////////////////////////////////////////////////////////////////////
-
/** Non-Aggregate function: takes the first letter of a string column and returns the ascii int value in a new column
*
* apache/spark
@@ -615,7 +598,7 @@ trait NonAggregateFunctions {
*
* apache/spark
*/
- //TODO: Also for binary
+ // TODO: Also for binary
def length[T](str: AbstractTypedColumn[T, String]): str.ThisType[T, Int] =
str.typed(sparkFunctions.length(str.untyped))
@@ -645,9 +628,7 @@ trait NonAggregateFunctions {
*
* apache/spark
*/
- def lpad[T](str: AbstractTypedColumn[T, String],
- len: Int,
- pad: String): str.ThisType[T, String] =
+ def lpad[T](str: AbstractTypedColumn[T, String], len: Int, pad: String): str.ThisType[T, String] =
str.typed(sparkFunctions.lpad(str.untyped, len, pad))
/** Non-Aggregate function: Trim the spaces from left end for the specified string value.
@@ -661,12 +642,9 @@ trait NonAggregateFunctions {
*
* apache/spark
*/
- def regexpReplace[T](str: AbstractTypedColumn[T, String],
- pattern: Regex,
- replacement: String): str.ThisType[T, String] =
+ def regexpReplace[T](str: AbstractTypedColumn[T, String], pattern: Regex, replacement: String): str.ThisType[T, String] =
str.typed(sparkFunctions.regexp_replace(str.untyped, pattern.regex, replacement))
-
/** Non-Aggregate function: Reverses the string column and returns it as a new string column.
*
* apache/spark
@@ -693,7 +671,7 @@ trait NonAggregateFunctions {
*
* apache/spark
*/
- //TODO: Also for byte array
+ // TODO: Also for byte array
def substring[T](str: AbstractTypedColumn[T, String], pos: Int, len: Int): str.ThisType[T, String] =
str.typed(sparkFunctions.substring(str.untyped, pos, len))
diff --git a/dataset/src/main/scala/frameless/functions/Udf.scala b/dataset/src/main/scala/frameless/functions/Udf.scala
index 93ba7f118..ca6ce8271 100644
--- a/dataset/src/main/scala/frameless/functions/Udf.scala
+++ b/dataset/src/main/scala/frameless/functions/Udf.scala
@@ -19,8 +19,7 @@ trait Udf {
*
* apache/spark
*/
- def udf[T, A, R: TypedEncoder](f: A => R):
- TypedColumn[T, A] => TypedColumn[T, R] = {
+ def udf[T, A, R: TypedEncoder](f: A => R): TypedColumn[T, A] => TypedColumn[T, R] = {
u =>
val scalaUdf = FramelessUdf(f, List(u), TypedEncoder[R])
new TypedColumn[T, R](scalaUdf)
@@ -31,48 +30,46 @@ trait Udf {
*
* apache/spark
*/
- def udf[T, A1, A2, R: TypedEncoder](f: (A1,A2) => R):
- (TypedColumn[T, A1], TypedColumn[T, A2]) => TypedColumn[T, R] = {
+ def udf[T, A1, A2, R: TypedEncoder](f: (A1, A2) => R): (TypedColumn[T, A1], TypedColumn[T, A2]) => TypedColumn[T, R] = {
case us =>
val scalaUdf = FramelessUdf(f, us.toList[UntypedExpression[T]], TypedEncoder[R])
new TypedColumn[T, R](scalaUdf)
- }
+ }
/** Defines a user-defined function of 3 arguments as user-defined function (UDF).
* The data types are automatically inferred based on the function's signature.
*
* apache/spark
*/
- def udf[T, A1, A2, A3, R: TypedEncoder](f: (A1,A2,A3) => R):
- (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3]) => TypedColumn[T, R] = {
+ def udf[T, A1, A2, A3, R: TypedEncoder](f: (A1, A2, A3) => R): (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3]) => TypedColumn[T, R] = {
case us =>
val scalaUdf = FramelessUdf(f, us.toList[UntypedExpression[T]], TypedEncoder[R])
new TypedColumn[T, R](scalaUdf)
- }
+ }
/** Defines a user-defined function of 4 arguments as user-defined function (UDF).
* The data types are automatically inferred based on the function's signature.
*
* apache/spark
*/
- def udf[T, A1, A2, A3, A4, R: TypedEncoder](f: (A1,A2,A3,A4) => R):
- (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3], TypedColumn[T, A4]) => TypedColumn[T, R] = {
+ def udf[T, A1, A2, A3, A4, R: TypedEncoder](f: (A1, A2, A3, A4) => R)
+ : (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3], TypedColumn[T, A4]) => TypedColumn[T, R] = {
case us =>
val scalaUdf = FramelessUdf(f, us.toList[UntypedExpression[T]], TypedEncoder[R])
new TypedColumn[T, R](scalaUdf)
- }
+ }
/** Defines a user-defined function of 5 arguments as user-defined function (UDF).
* The data types are automatically inferred based on the function's signature.
*
* apache/spark
*/
- def udf[T, A1, A2, A3, A4, A5, R: TypedEncoder](f: (A1,A2,A3,A4,A5) => R):
- (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3], TypedColumn[T, A4], TypedColumn[T, A5]) => TypedColumn[T, R] = {
+ def udf[T, A1, A2, A3, A4, A5, R: TypedEncoder](f: (A1, A2, A3, A4, A5) => R)
+ : (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3], TypedColumn[T, A4], TypedColumn[T, A5]) => TypedColumn[T, R] = {
case us =>
val scalaUdf = FramelessUdf(f, us.toList[UntypedExpression[T]], TypedEncoder[R])
new TypedColumn[T, R](scalaUdf)
- }
+ }
}
/**
@@ -118,7 +115,8 @@ case class FramelessUdf[T, R](
"""
val code = CodeFormatter.stripOverlappingComments(
- new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()))
+ new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())
+ )
val (clazz, _) = CodeGenerator.compile(code)
val codegen = clazz.generate(ctx.references.toArray).asInstanceOf[InternalRow => AnyRef]
@@ -139,9 +137,13 @@ case class FramelessUdf[T, R](
val framelessUdfClassName = classOf[FramelessUdf[_, _]].getName
val funcClassName = s"scala.Function${children.size}"
val funcExpressionIdx = ctx.references.size - 1
- val funcTerm = ctx.addMutableState(funcClassName, ctx.freshName("udf"),
- v => s"$v = ($funcClassName)((($framelessUdfClassName)references" +
- s"[$funcExpressionIdx]).function());")
+ val funcTerm = ctx.addMutableState(
+ funcClassName,
+ ctx.freshName("udf"),
+ v =>
+ s"$v = ($funcClassName)((($framelessUdfClassName)references" +
+ s"[$funcExpressionIdx]).function());"
+ )
val (argsCode, funcArguments) = encoders.zip(children).map {
case (encoder, child) =>
@@ -161,7 +163,8 @@ case class FramelessUdf[T, R](
val resultEval = rencoder.toCatalyst(internalExpr).genCode(ctx)
- ev.copy(code = code"""
+ ev.copy(
+ code = code"""
${argsCode.mkString("\n")}
$internalTerm =
@@ -179,17 +182,17 @@ case class FramelessUdf[T, R](
}
case class Spark2_4_LambdaVariable(
- value: String,
- isNull: String,
- dataType: DataType,
- nullable: Boolean = true) extends LeafExpression with NonSQLExpression {
+ value: String,
+ isNull: String,
+ dataType: DataType,
+ nullable: Boolean = true
+) extends LeafExpression with NonSQLExpression {
private val accessor: (InternalRow, Int) => Any = InternalRow.getAccessor(dataType)
// Interpreted execution of `LambdaVariable` always get the 0-index element from input row.
override def eval(input: InternalRow): Any = {
- assert(input.numFields == 1,
- "The input row of interpreted LambdaVariable should have only 1 field.")
+ assert(input.numFields == 1, "The input row of interpreted LambdaVariable should have only 1 field.")
if (nullable && input.isNullAt(0)) {
null
} else {
diff --git a/dataset/src/main/scala/frameless/functions/UnaryFunctions.scala b/dataset/src/main/scala/frameless/functions/UnaryFunctions.scala
index 64bdf0ed1..f76b1604e 100644
--- a/dataset/src/main/scala/frameless/functions/UnaryFunctions.scala
+++ b/dataset/src/main/scala/frameless/functions/UnaryFunctions.scala
@@ -1,16 +1,17 @@
package frameless
package functions
-import org.apache.spark.sql.{Column, functions => sparkFunctions}
+import org.apache.spark.sql.{functions => sparkFunctions, Column}
import scala.math.Ordering
trait UnaryFunctions {
+
/** Returns length of array
*
* apache/spark
*/
- def size[T, A, V[_] : CatalystSizableCollection](column: TypedColumn[T, V[A]]): TypedColumn[T, Int] =
+ def size[T, A, V[_]: CatalystSizableCollection](column: TypedColumn[T, V[A]]): TypedColumn[T, Int] =
new TypedColumn[T, Int](implicitly[CatalystSizableCollection[V]].sizeOp(column.untyped))
/** Returns length of Map
@@ -25,7 +26,7 @@ trait UnaryFunctions {
*
* apache/spark
*/
- def sortAscending[T, A: Ordering, V[_] : CatalystSortableCollection](column: TypedColumn[T, V[A]]): TypedColumn[T, V[A]] =
+ def sortAscending[T, A: Ordering, V[_]: CatalystSortableCollection](column: TypedColumn[T, V[A]]): TypedColumn[T, V[A]] =
new TypedColumn[T, V[A]](implicitly[CatalystSortableCollection[V]].sortOp(column.untyped, sortAscending = true))(column.uencoder)
/** Sorts the input array for the given column in descending order, according to
@@ -33,18 +34,20 @@ trait UnaryFunctions {
*
* apache/spark
*/
- def sortDescending[T, A: Ordering, V[_] : CatalystSortableCollection](column: TypedColumn[T, V[A]]): TypedColumn[T, V[A]] =
+ def sortDescending[T, A: Ordering, V[_]: CatalystSortableCollection](column: TypedColumn[T, V[A]]): TypedColumn[T, V[A]] =
new TypedColumn[T, V[A]](implicitly[CatalystSortableCollection[V]].sortOp(column.untyped, sortAscending = false))(column.uencoder)
-
/** Creates a new row for each element in the given collection. The column types
* eligible for this operation are constrained by CatalystExplodableCollection.
*
* apache/spark
*/
- @deprecated("Use explode() from the TypedDataset instead. This method will result in " +
- "runtime error if applied to two columns in the same select statement.", "0.6.2")
- def explode[T, A: TypedEncoder, V[_] : CatalystExplodableCollection](column: TypedColumn[T, V[A]]): TypedColumn[T, A] =
+ @deprecated(
+ "Use explode() from the TypedDataset instead. This method will result in " +
+ "runtime error if applied to two columns in the same select statement.",
+ "0.6.2"
+ )
+ def explode[T, A: TypedEncoder, V[_]: CatalystExplodableCollection](column: TypedColumn[T, V[A]]): TypedColumn[T, A] =
new TypedColumn[T, A](sparkFunctions.explode(column.untyped))
}
diff --git a/dataset/src/main/scala/frameless/functions/package.scala b/dataset/src/main/scala/frameless/functions/package.scala
index 1a57101e0..291cb4857 100644
--- a/dataset/src/main/scala/frameless/functions/package.scala
+++ b/dataset/src/main/scala/frameless/functions/package.scala
@@ -5,9 +5,9 @@ import scala.reflect.ClassTag
import shapeless._
import shapeless.labelled.FieldType
import shapeless.ops.hlist.IsHCons
-import shapeless.ops.record.{ Keys, Values }
+import shapeless.ops.record.{Keys, Values}
-import org.apache.spark.sql.{ reflection => ScalaReflection }
+import org.apache.spark.sql.{reflection => ScalaReflection}
import org.apache.spark.sql.catalyst.expressions.Literal
package object functions extends Udf with UnaryFunctions {
@@ -22,11 +22,11 @@ package object functions extends Udf with UnaryFunctions {
* apache/spark
*/
def litAggr[A, T](
- value: A
- )(implicit
- i0: TypedEncoder[A],
- i1: Refute[IsValueClass[A]]
- ): TypedAggregate[T, A] =
+ value: A
+ )(implicit
+ i0: TypedEncoder[A],
+ i1: Refute[IsValueClass[A]]
+ ): TypedAggregate[T, A] =
new TypedAggregate[T, A](lit(value).expr)
/**
@@ -39,10 +39,10 @@ package object functions extends Udf with UnaryFunctions {
* @tparam T the row type
*/
def lit[A, T](
- value: A
- )(implicit
- encoder: TypedEncoder[A]
- ): TypedColumn[T, A] = {
+ value: A
+ )(implicit
+ encoder: TypedEncoder[A]
+ ): TypedColumn[T, A] = {
if (
ScalaReflection.isNativeType(
@@ -74,26 +74,25 @@ package object functions extends Udf with UnaryFunctions {
* @tparam T the row type
*/
def litValue[
- A: IsValueClass,
- T,
- G <: ::[_, HNil],
- H <: ::[_ <: FieldType[_ <: Symbol, _], HNil],
- K <: Symbol,
- V,
- KS <: ::[_ <: Symbol, HNil],
- VS <: HList
- ](value: A
- )(implicit
- i0: LabelledGeneric.Aux[A, G],
- i1: DropUnitValues.Aux[G, H],
- i2: IsHCons.Aux[H, _ <: FieldType[K, V], HNil],
- i3: Keys.Aux[H, KS],
- i4: Values.Aux[H, VS],
- i5: IsHCons.Aux[KS, K, HNil],
- i6: IsHCons.Aux[VS, V, HNil],
- i7: TypedEncoder[V],
- i8: ClassTag[A]
- ): TypedColumn[T, A] = {
+ A: IsValueClass,
+ T,
+ G <: ::[_, HNil],
+ H <: ::[_ <: FieldType[_ <: Symbol, _], HNil],
+ K <: Symbol,
+ V,
+ KS <: ::[_ <: Symbol, HNil],
+ VS <: HList
+ ](value: A)(implicit
+ i0: LabelledGeneric.Aux[A, G],
+ i1: DropUnitValues.Aux[G, H],
+ i2: IsHCons.Aux[H, _ <: FieldType[K, V], HNil],
+ i3: Keys.Aux[H, KS],
+ i4: Values.Aux[H, VS],
+ i5: IsHCons.Aux[KS, K, HNil],
+ i6: IsHCons.Aux[VS, V, HNil],
+ i7: TypedEncoder[V],
+ i8: ClassTag[A]
+ ): TypedColumn[T, A] = {
val expr = {
val field: H = i1(i0.to(value))
val v: V = i6.head(i4(field))
@@ -122,26 +121,25 @@ package object functions extends Udf with UnaryFunctions {
* @tparam T the row type
*/
def litValue[
- A: IsValueClass,
- T,
- G <: ::[_, HNil],
- H <: ::[_ <: FieldType[_ <: Symbol, _], HNil],
- K <: Symbol,
- V,
- KS <: ::[_ <: Symbol, HNil],
- VS <: HList
- ](value: Option[A]
- )(implicit
- i0: LabelledGeneric.Aux[A, G],
- i1: DropUnitValues.Aux[G, H],
- i2: IsHCons.Aux[H, _ <: FieldType[K, V], HNil],
- i3: Keys.Aux[H, KS],
- i4: Values.Aux[H, VS],
- i5: IsHCons.Aux[KS, K, HNil],
- i6: IsHCons.Aux[VS, V, HNil],
- i7: TypedEncoder[V],
- i8: ClassTag[A]
- ): TypedColumn[T, Option[A]] = {
+ A: IsValueClass,
+ T,
+ G <: ::[_, HNil],
+ H <: ::[_ <: FieldType[_ <: Symbol, _], HNil],
+ K <: Symbol,
+ V,
+ KS <: ::[_ <: Symbol, HNil],
+ VS <: HList
+ ](value: Option[A])(implicit
+ i0: LabelledGeneric.Aux[A, G],
+ i1: DropUnitValues.Aux[G, H],
+ i2: IsHCons.Aux[H, _ <: FieldType[K, V], HNil],
+ i3: Keys.Aux[H, KS],
+ i4: Values.Aux[H, VS],
+ i5: IsHCons.Aux[KS, K, HNil],
+ i6: IsHCons.Aux[VS, V, HNil],
+ i7: TypedEncoder[V],
+ i8: ClassTag[A]
+ ): TypedColumn[T, Option[A]] = {
val expr = value match {
case Some(some) => {
val field: H = i1(i0.to(some))
diff --git a/dataset/src/main/scala/frameless/ops/AggregateTypes.scala b/dataset/src/main/scala/frameless/ops/AggregateTypes.scala
index 403c25301..225e88647 100644
--- a/dataset/src/main/scala/frameless/ops/AggregateTypes.scala
+++ b/dataset/src/main/scala/frameless/ops/AggregateTypes.scala
@@ -17,12 +17,12 @@ trait AggregateTypes[V, U <: HList] {
}
object AggregateTypes {
- type Aux[V, U <: HList, Out0 <: HList] = AggregateTypes[V, U] {type Out = Out0}
+ type Aux[V, U <: HList, Out0 <: HList] = AggregateTypes[V, U] { type Out = Out0 }
implicit def deriveHNil[T]: AggregateTypes.Aux[T, HNil, HNil] = new AggregateTypes[T, HNil] { type Out = HNil }
implicit def deriveCons1[T, H, TT <: HList, V <: HList](
implicit tail: AggregateTypes.Aux[T, TT, V]
): AggregateTypes.Aux[T, TypedAggregate[T, H] :: TT, H :: V] =
- new AggregateTypes[T, TypedAggregate[T, H] :: TT] {type Out = H :: V}
+ new AggregateTypes[T, TypedAggregate[T, H] :: TT] { type Out = H :: V }
}
diff --git a/dataset/src/main/scala/frameless/ops/As.scala b/dataset/src/main/scala/frameless/ops/As.scala
index 06b691028..04a49efc4 100644
--- a/dataset/src/main/scala/frameless/ops/As.scala
+++ b/dataset/src/main/scala/frameless/ops/As.scala
@@ -12,11 +12,10 @@ object As extends LowPriorityAs {
implicit def equivIdentity[A] = new Equiv[A, A]
- implicit def deriveAs[A, B]
- (implicit
- i0: TypedEncoder[B],
- i1: Equiv[A, B]
- ): As[A, B] = new As[A, B]
+ implicit def deriveAs[A, B](implicit
+ i0: TypedEncoder[B],
+ i1: Equiv[A, B]
+ ): As[A, B] = new As[A, B]
}
@@ -24,17 +23,15 @@ trait LowPriorityAs {
import As.Equiv
- implicit def equivHList[AH, AT <: HList, BH, BT <: HList]
- (implicit
- i0: Lazy[Equiv[AH, BH]],
- i1: Equiv[AT, BT]
- ): Equiv[AH :: AT, BH :: BT] = new Equiv[AH :: AT, BH :: BT]
-
- implicit def equivGeneric[A, B, R, S]
- (implicit
- i0: Generic.Aux[A, R],
- i1: Generic.Aux[B, S],
- i2: Lazy[Equiv[R, S]]
- ): Equiv[A, B] = new Equiv[A, B]
+ implicit def equivHList[AH, AT <: HList, BH, BT <: HList](implicit
+ i0: Lazy[Equiv[AH, BH]],
+ i1: Equiv[AT, BT]
+ ): Equiv[AH :: AT, BH :: BT] = new Equiv[AH :: AT, BH :: BT]
+
+ implicit def equivGeneric[A, B, R, S](implicit
+ i0: Generic.Aux[A, R],
+ i1: Generic.Aux[B, S],
+ i2: Lazy[Equiv[R, S]]
+ ): Equiv[A, B] = new Equiv[A, B]
}
diff --git a/dataset/src/main/scala/frameless/ops/ColumnTypes.scala b/dataset/src/main/scala/frameless/ops/ColumnTypes.scala
index e5ae6aea2..4411500da 100644
--- a/dataset/src/main/scala/frameless/ops/ColumnTypes.scala
+++ b/dataset/src/main/scala/frameless/ops/ColumnTypes.scala
@@ -17,12 +17,12 @@ trait ColumnTypes[T, U <: HList] {
}
object ColumnTypes {
- type Aux[T, U <: HList, Out0 <: HList] = ColumnTypes[T, U] {type Out = Out0}
+ type Aux[T, U <: HList, Out0 <: HList] = ColumnTypes[T, U] { type Out = Out0 }
implicit def deriveHNil[T]: ColumnTypes.Aux[T, HNil, HNil] = new ColumnTypes[T, HNil] { type Out = HNil }
implicit def deriveCons[T, H, TT <: HList, V <: HList](
implicit tail: ColumnTypes.Aux[T, TT, V]
): ColumnTypes.Aux[T, TypedColumn[T, H] :: TT, H :: V] =
- new ColumnTypes[T, TypedColumn[T, H] :: TT] {type Out = H :: V}
+ new ColumnTypes[T, TypedColumn[T, H] :: TT] { type Out = H :: V }
}
diff --git a/dataset/src/main/scala/frameless/ops/GroupByOps.scala b/dataset/src/main/scala/frameless/ops/GroupByOps.scala
index e6f51a407..fff67dca0 100644
--- a/dataset/src/main/scala/frameless/ops/GroupByOps.scala
+++ b/dataset/src/main/scala/frameless/ops/GroupByOps.scala
@@ -3,29 +3,14 @@ package ops
import org.apache.spark.sql.catalyst.analysis.UnresolvedAlias
import org.apache.spark.sql.catalyst.plans.logical.Project
-import org.apache.spark.sql.{
- Column,
- Dataset,
- FramelessInternals,
- RelationalGroupedDataset
-}
+import org.apache.spark.sql.{Column, Dataset, FramelessInternals, RelationalGroupedDataset}
import shapeless._
-import shapeless.ops.hlist.{
- Length,
- Mapped,
- Prepend,
- ToList,
- ToTraversable,
- Tupler
-}
+import shapeless.ops.hlist.{Length, Mapped, Prepend, ToList, ToTraversable, Tupler}
class GroupedByManyOps[T, TK <: HList, K <: HList, KT](
- self: TypedDataset[T],
- groupedBy: TK
- )(implicit
- i0: ColumnTypes.Aux[T, TK, K],
- i1: ToTraversable.Aux[TK, List, UntypedExpression[T]],
- i3: Tupler.Aux[K, KT])
+ self: TypedDataset[T],
+ groupedBy: TK
+)(implicit i0: ColumnTypes.Aux[T, TK, K], i1: ToTraversable.Aux[TK, List, UntypedExpression[T]], i3: Tupler.Aux[K, KT])
extends AggregatingOps[T, TK, K, KT](
self,
groupedBy,
@@ -35,24 +20,25 @@ class GroupedByManyOps[T, TK <: HList, K <: HList, KT](
object agg extends ProductArgs {
def applyProduct[TC <: HList, C <: HList, Out0 <: HList, Out1](
- columns: TC
- )(implicit
- i3: AggregateTypes.Aux[T, TC, C],
- i4: Prepend.Aux[K, C, Out0],
- i5: Tupler.Aux[Out0, Out1],
- i6: TypedEncoder[Out1],
- i7: ToTraversable.Aux[TC, List, UntypedExpression[T]]
- ): TypedDataset[Out1] = {
+ columns: TC
+ )(implicit
+ i3: AggregateTypes.Aux[T, TC, C],
+ i4: Prepend.Aux[K, C, Out0],
+ i5: Tupler.Aux[Out0, Out1],
+ i6: TypedEncoder[Out1],
+ i7: ToTraversable.Aux[TC, List, UntypedExpression[T]]
+ ): TypedDataset[Out1] = {
aggregate[TC, Out1](columns)
}
}
}
class GroupedBy1Ops[K1, V](
- self: TypedDataset[V],
- g1: TypedColumn[V, K1]) {
+ self: TypedDataset[V],
+ g1: TypedColumn[V, K1]
+) {
private def underlying = new GroupedByManyOps(self, g1 :: HNil)
- private implicit def eg1 = g1.uencoder
+ implicit private def eg1 = g1.uencoder
def agg[U1](c1: TypedAggregate[V, U1]): TypedDataset[(K1, U1)] = {
implicit val e1 = c1.uencoder
@@ -60,41 +46,41 @@ class GroupedBy1Ops[K1, V](
}
def agg[U1, U2](
- c1: TypedAggregate[V, U1],
- c2: TypedAggregate[V, U2]
- ): TypedDataset[(K1, U1, U2)] = {
+ c1: TypedAggregate[V, U1],
+ c2: TypedAggregate[V, U2]
+ ): TypedDataset[(K1, U1, U2)] = {
implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder
underlying.agg(c1, c2)
}
def agg[U1, U2, U3](
- c1: TypedAggregate[V, U1],
- c2: TypedAggregate[V, U2],
- c3: TypedAggregate[V, U3]
- ): TypedDataset[(K1, U1, U2, U3)] = {
+ c1: TypedAggregate[V, U1],
+ c2: TypedAggregate[V, U2],
+ c3: TypedAggregate[V, U3]
+ ): TypedDataset[(K1, U1, U2, U3)] = {
implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder;
implicit val e3 = c3.uencoder
underlying.agg(c1, c2, c3)
}
def agg[U1, U2, U3, U4](
- c1: TypedAggregate[V, U1],
- c2: TypedAggregate[V, U2],
- c3: TypedAggregate[V, U3],
- c4: TypedAggregate[V, U4]
- ): TypedDataset[(K1, U1, U2, U3, U4)] = {
+ c1: TypedAggregate[V, U1],
+ c2: TypedAggregate[V, U2],
+ c3: TypedAggregate[V, U3],
+ c4: TypedAggregate[V, U4]
+ ): TypedDataset[(K1, U1, U2, U3, U4)] = {
implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder;
implicit val e3 = c3.uencoder; implicit val e4 = c4.uencoder
underlying.agg(c1, c2, c3, c4)
}
def agg[U1, U2, U3, U4, U5](
- c1: TypedAggregate[V, U1],
- c2: TypedAggregate[V, U2],
- c3: TypedAggregate[V, U3],
- c4: TypedAggregate[V, U4],
- c5: TypedAggregate[V, U5]
- ): TypedDataset[(K1, U1, U2, U3, U4, U5)] = {
+ c1: TypedAggregate[V, U1],
+ c2: TypedAggregate[V, U2],
+ c3: TypedAggregate[V, U3],
+ c4: TypedAggregate[V, U4],
+ c5: TypedAggregate[V, U5]
+ ): TypedDataset[(K1, U1, U2, U3, U4, U5)] = {
implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder;
implicit val e3 = c3.uencoder; implicit val e4 = c4.uencoder;
implicit val e5 = c5.uencoder
@@ -108,31 +94,32 @@ class GroupedBy1Ops[K1, V](
object deserialized {
def mapGroups[U: TypedEncoder](
- f: (K1, Iterator[V]) => U
- ): TypedDataset[U] = {
+ f: (K1, Iterator[V]) => U
+ ): TypedDataset[U] = {
underlying.deserialized.mapGroups(AggregatingOps.tuple1(f))
}
def flatMapGroups[U: TypedEncoder](
- f: (K1, Iterator[V]) => TraversableOnce[U]
- ): TypedDataset[U] = {
+ f: (K1, Iterator[V]) => TraversableOnce[U]
+ ): TypedDataset[U] = {
underlying.deserialized.flatMapGroups(AggregatingOps.tuple1(f))
}
}
def pivot[P: CatalystPivotable](
- pivotColumn: TypedColumn[V, P]
- ): PivotNotValues[V, TypedColumn[V, K1] :: HNil, P] =
+ pivotColumn: TypedColumn[V, P]
+ ): PivotNotValues[V, TypedColumn[V, K1] :: HNil, P] =
PivotNotValues(self, g1 :: HNil, pivotColumn)
}
class GroupedBy2Ops[K1, K2, V](
- self: TypedDataset[V],
- g1: TypedColumn[V, K1],
- g2: TypedColumn[V, K2]) {
+ self: TypedDataset[V],
+ g1: TypedColumn[V, K1],
+ g2: TypedColumn[V, K2]
+) {
private def underlying = new GroupedByManyOps(self, g1 :: g2 :: HNil)
- private implicit def eg1 = g1.uencoder
- private implicit def eg2 = g2.uencoder
+ implicit private def eg1 = g1.uencoder
+ implicit private def eg2 = g2.uencoder
def agg[U1](c1: TypedAggregate[V, U1]): TypedDataset[(K1, K2, U1)] = {
implicit val e1 = c1.uencoder
@@ -140,41 +127,41 @@ class GroupedBy2Ops[K1, K2, V](
}
def agg[U1, U2](
- c1: TypedAggregate[V, U1],
- c2: TypedAggregate[V, U2]
- ): TypedDataset[(K1, K2, U1, U2)] = {
+ c1: TypedAggregate[V, U1],
+ c2: TypedAggregate[V, U2]
+ ): TypedDataset[(K1, K2, U1, U2)] = {
implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder
underlying.agg(c1, c2)
}
def agg[U1, U2, U3](
- c1: TypedAggregate[V, U1],
- c2: TypedAggregate[V, U2],
- c3: TypedAggregate[V, U3]
- ): TypedDataset[(K1, K2, U1, U2, U3)] = {
+ c1: TypedAggregate[V, U1],
+ c2: TypedAggregate[V, U2],
+ c3: TypedAggregate[V, U3]
+ ): TypedDataset[(K1, K2, U1, U2, U3)] = {
implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder;
implicit val e3 = c3.uencoder
underlying.agg(c1, c2, c3)
}
def agg[U1, U2, U3, U4](
- c1: TypedAggregate[V, U1],
- c2: TypedAggregate[V, U2],
- c3: TypedAggregate[V, U3],
- c4: TypedAggregate[V, U4]
- ): TypedDataset[(K1, K2, U1, U2, U3, U4)] = {
+ c1: TypedAggregate[V, U1],
+ c2: TypedAggregate[V, U2],
+ c3: TypedAggregate[V, U3],
+ c4: TypedAggregate[V, U4]
+ ): TypedDataset[(K1, K2, U1, U2, U3, U4)] = {
implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder;
implicit val e3 = c3.uencoder; implicit val e4 = c4.uencoder
underlying.agg(c1, c2, c3, c4)
}
def agg[U1, U2, U3, U4, U5](
- c1: TypedAggregate[V, U1],
- c2: TypedAggregate[V, U2],
- c3: TypedAggregate[V, U3],
- c4: TypedAggregate[V, U4],
- c5: TypedAggregate[V, U5]
- ): TypedDataset[(K1, K2, U1, U2, U3, U4, U5)] = {
+ c1: TypedAggregate[V, U1],
+ c2: TypedAggregate[V, U2],
+ c3: TypedAggregate[V, U3],
+ c4: TypedAggregate[V, U4],
+ c5: TypedAggregate[V, U5]
+ ): TypedDataset[(K1, K2, U1, U2, U3, U4, U5)] = {
implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder;
implicit val e3 = c3.uencoder; implicit val e4 = c4.uencoder;
implicit val e5 = c5.uencoder
@@ -188,39 +175,36 @@ class GroupedBy2Ops[K1, K2, V](
object deserialized {
def mapGroups[U: TypedEncoder](
- f: ((K1, K2), Iterator[V]) => U
- ): TypedDataset[U] = {
+ f: ((K1, K2), Iterator[V]) => U
+ ): TypedDataset[U] = {
underlying.deserialized.mapGroups(f)
}
def flatMapGroups[U: TypedEncoder](
- f: ((K1, K2), Iterator[V]) => TraversableOnce[U]
- ): TypedDataset[U] = {
+ f: ((K1, K2), Iterator[V]) => TraversableOnce[U]
+ ): TypedDataset[U] = {
underlying.deserialized.flatMapGroups(f)
}
}
def pivot[P: CatalystPivotable](
- pivotColumn: TypedColumn[V, P]
- ): PivotNotValues[V, TypedColumn[V, K1] :: TypedColumn[V, K2] :: HNil, P] =
+ pivotColumn: TypedColumn[V, P]
+ ): PivotNotValues[V, TypedColumn[V, K1] :: TypedColumn[V, K2] :: HNil, P] =
PivotNotValues(self, g1 :: g2 :: HNil, pivotColumn)
}
-private[ops] abstract class AggregatingOps[T, TK <: HList, K <: HList, KT](
- self: TypedDataset[T],
- groupedBy: TK,
- groupingFunc: (Dataset[T], Seq[Column]) => RelationalGroupedDataset
- )(implicit
- i0: ColumnTypes.Aux[T, TK, K],
- i1: ToTraversable.Aux[TK, List, UntypedExpression[T]],
- i2: Tupler.Aux[K, KT]) {
+abstract private[ops] class AggregatingOps[T, TK <: HList, K <: HList, KT](
+ self: TypedDataset[T],
+ groupedBy: TK,
+ groupingFunc: (Dataset[T], Seq[Column]) => RelationalGroupedDataset
+)(implicit i0: ColumnTypes.Aux[T, TK, K], i1: ToTraversable.Aux[TK, List, UntypedExpression[T]], i2: Tupler.Aux[K, KT]) {
def aggregate[TC <: HList, Out1](
- columns: TC
- )(implicit
- i7: TypedEncoder[Out1],
- i8: ToTraversable.Aux[TC, List, UntypedExpression[T]]
- ): TypedDataset[Out1] = {
+ columns: TC
+ )(implicit
+ i7: TypedEncoder[Out1],
+ i8: ToTraversable.Aux[TC, List, UntypedExpression[T]]
+ ): TypedDataset[Out1] = {
def expr(c: UntypedExpression[T]): Column =
FramelessInternals.column(c.expr)
@@ -244,19 +228,19 @@ private[ops] abstract class AggregatingOps[T, TK <: HList, K <: HList, KT](
object deserialized {
def mapGroups[U: TypedEncoder](
- f: (KT, Iterator[T]) => U
- )(implicit
- e: TypedEncoder[KT]
- ): TypedDataset[U] = {
+ f: (KT, Iterator[T]) => U
+ )(implicit
+ e: TypedEncoder[KT]
+ ): TypedDataset[U] = {
val func = (key: KT, it: Iterator[T]) => Iterator(f(key, it))
flatMapGroups(func)
}
def flatMapGroups[U: TypedEncoder](
- f: (KT, Iterator[T]) => TraversableOnce[U]
- )(implicit
- e: TypedEncoder[KT]
- ): TypedDataset[U] = {
+ f: (KT, Iterator[T]) => TraversableOnce[U]
+ )(implicit
+ e: TypedEncoder[KT]
+ ): TypedDataset[U] = {
implicit val tendcoder = self.encoder
val cols = groupedBy.toList[UntypedExpression[T]]
@@ -296,8 +280,8 @@ private[ops] abstract class AggregatingOps[T, TK <: HList, K <: HList, KT](
}
def pivot[P: CatalystPivotable](
- pivotColumn: TypedColumn[T, P]
- ): PivotNotValues[T, TK, P] =
+ pivotColumn: TypedColumn[T, P]
+ ): PivotNotValues[T, TK, P] =
PivotNotValues(self, groupedBy, pivotColumn)
}
@@ -305,8 +289,8 @@ private[ops] object AggregatingOps {
/** Utility function to help Spark with serialization of closures */
def tuple1[K1, V, U](
- f: (K1, Iterator[V]) => U
- ): (Tuple1[K1], Iterator[V]) => U = { (x: Tuple1[K1], it: Iterator[V]) =>
+ f: (K1, Iterator[V]) => U
+ ): (Tuple1[K1], Iterator[V]) => U = { (x: Tuple1[K1], it: Iterator[V]) =>
f(x._1, it)
}
}
@@ -315,37 +299,37 @@ private[ops] object AggregatingOps {
* Represents a typed Pivot operation.
*/
final case class Pivot[T, GroupedColumns <: HList, PivotType, Values <: HList](
- ds: TypedDataset[T],
- groupedBy: GroupedColumns,
- pivotedBy: TypedColumn[T, PivotType],
- values: Values) {
+ ds: TypedDataset[T],
+ groupedBy: GroupedColumns,
+ pivotedBy: TypedColumn[T, PivotType],
+ values: Values
+) {
object agg extends ProductArgs {
def applyProduct[
- AggrColumns <: HList,
- AggrColumnTypes <: HList,
- GroupedColumnTypes <: HList,
- NumValues <: Nat,
- TypesForPivotedValues <: HList,
- TypesForPivotedValuesOpt <: HList,
- OutAsHList <: HList,
- Out
- ](aggrColumns: AggrColumns
- )(implicit
- i0: AggregateTypes.Aux[T, AggrColumns, AggrColumnTypes],
- i1: ColumnTypes.Aux[T, GroupedColumns, GroupedColumnTypes],
- i2: Length.Aux[Values, NumValues],
- i3: Repeat.Aux[AggrColumnTypes, NumValues, TypesForPivotedValues],
- i4: Mapped.Aux[TypesForPivotedValues, Option, TypesForPivotedValuesOpt],
- i5: Prepend.Aux[
- GroupedColumnTypes,
- TypesForPivotedValuesOpt,
- OutAsHList
- ],
- i6: Tupler.Aux[OutAsHList, Out],
- i7: TypedEncoder[Out]
- ): TypedDataset[Out] = {
+ AggrColumns <: HList,
+ AggrColumnTypes <: HList,
+ GroupedColumnTypes <: HList,
+ NumValues <: Nat,
+ TypesForPivotedValues <: HList,
+ TypesForPivotedValuesOpt <: HList,
+ OutAsHList <: HList,
+ Out
+ ](aggrColumns: AggrColumns)(implicit
+ i0: AggregateTypes.Aux[T, AggrColumns, AggrColumnTypes],
+ i1: ColumnTypes.Aux[T, GroupedColumns, GroupedColumnTypes],
+ i2: Length.Aux[Values, NumValues],
+ i3: Repeat.Aux[AggrColumnTypes, NumValues, TypesForPivotedValues],
+ i4: Mapped.Aux[TypesForPivotedValues, Option, TypesForPivotedValuesOpt],
+ i5: Prepend.Aux[
+ GroupedColumnTypes,
+ TypesForPivotedValuesOpt,
+ OutAsHList
+ ],
+ i6: Tupler.Aux[OutAsHList, Out],
+ i7: TypedEncoder[Out]
+ ): TypedDataset[Out] = {
def mapAny[X](h: HList)(f: Any => X): List[X] =
h match {
case HNil => Nil
@@ -369,18 +353,18 @@ final case class Pivot[T, GroupedColumns <: HList, PivotType, Values <: HList](
}
final case class PivotNotValues[T, GroupedColumns <: HList, PivotType](
- ds: TypedDataset[T],
- groupedBy: GroupedColumns,
- pivotedBy: TypedColumn[T, PivotType])
- extends ProductArgs {
+ ds: TypedDataset[T],
+ groupedBy: GroupedColumns,
+ pivotedBy: TypedColumn[T, PivotType]
+) extends ProductArgs {
def onProduct[Values <: HList](
- values: Values
- )(implicit
- validValues: ToList[
- Values,
- PivotType
- ] // validValues: FilterNot.Aux[Values, PivotType, HNil] // did not work
- ): Pivot[T, GroupedColumns, PivotType, Values] =
+ values: Values
+ )(implicit
+ validValues: ToList[
+ Values,
+ PivotType
+ ] // validValues: FilterNot.Aux[Values, PivotType, HNil] // did not work
+ ): Pivot[T, GroupedColumns, PivotType, Values] =
Pivot(ds, groupedBy, pivotedBy, values)
}
diff --git a/dataset/src/main/scala/frameless/ops/RelationalGroupsOps.scala b/dataset/src/main/scala/frameless/ops/RelationalGroupsOps.scala
index 569407762..b8f683000 100644
--- a/dataset/src/main/scala/frameless/ops/RelationalGroupsOps.scala
+++ b/dataset/src/main/scala/frameless/ops/RelationalGroupsOps.scala
@@ -12,15 +12,18 @@ import shapeless.{::, HList, HNil, ProductArgs}
* @tparam K individual columns' types as HList
* @tparam KT individual columns' types as Tuple
*/
-private[ops] abstract class RelationalGroupsOps[T, TK <: HList, K <: HList, KT]
- (self: TypedDataset[T], groupedBy: TK, groupingFunc: (Dataset[T], Seq[Column]) => RelationalGroupedDataset)
- (implicit
- i0: ColumnTypes.Aux[T, TK, K],
- i1: ToTraversable.Aux[TK, List, UntypedExpression[T]],
- i2: Tupler.Aux[K, KT]
- ) extends AggregatingOps(self, groupedBy, groupingFunc){
+abstract private[ops] class RelationalGroupsOps[T, TK <: HList, K <: HList, KT](
+ self: TypedDataset[T],
+ groupedBy: TK,
+ groupingFunc: (Dataset[T], Seq[Column]) => RelationalGroupedDataset
+)(implicit
+ i0: ColumnTypes.Aux[T, TK, K],
+ i1: ToTraversable.Aux[TK, List, UntypedExpression[T]],
+ i2: Tupler.Aux[K, KT]
+) extends AggregatingOps(self, groupedBy, groupingFunc) {
object agg extends ProductArgs {
+
/**
* @tparam TC resulting columns after aggregation function
* @tparam C individual columns' types as HList
@@ -28,9 +31,7 @@ private[ops] abstract class RelationalGroupsOps[T, TK <: HList, K <: HList, KT]
* @tparam Out0 OptK columns appended to C
* @tparam Out1 output type
*/
- def applyProduct[TC <: HList, C <: HList, OptK <: HList, Out0 <: HList, Out1]
- (columns: TC)
- (implicit
+ def applyProduct[TC <: HList, C <: HList, OptK <: HList, Out0 <: HList, Out1](columns: TC)(implicit
i3: AggregateTypes.Aux[T, TC, C], // shares individual columns' types after agg function as HList
i4: Mapped.Aux[K, Option, OptK], // maps all original columns' types to Option
i5: Prepend.Aux[OptK, C, Out0], // concatenates Option columns with those resulting from applying agg function
@@ -43,9 +44,9 @@ private[ops] abstract class RelationalGroupsOps[T, TK <: HList, K <: HList, KT]
}
}
-private[ops] abstract class RelationalGroups1Ops[K1, V](self: TypedDataset[V], g1: TypedColumn[V, K1]) {
+abstract private[ops] class RelationalGroups1Ops[K1, V](self: TypedDataset[V], g1: TypedColumn[V, K1]) {
protected def underlying: RelationalGroupsOps[V, ::[TypedColumn[V, K1], HNil], ::[K1, HNil], Tuple1[K1]]
- private implicit def eg1 = g1.uencoder
+ implicit private def eg1 = g1.uencoder
def agg[U1](c1: TypedAggregate[V, U1]): TypedDataset[(Option[K1], U1)] = {
implicit val e1 = c1.uencoder
@@ -62,13 +63,25 @@ private[ops] abstract class RelationalGroups1Ops[K1, V](self: TypedDataset[V], g
underlying.agg(c1, c2, c3)
}
- def agg[U1, U2, U3, U4](c1: TypedAggregate[V, U1], c2: TypedAggregate[V, U2], c3: TypedAggregate[V, U3], c4: TypedAggregate[V, U4]): TypedDataset[(Option[K1], U1, U2, U3, U4)] = {
+ def agg[U1, U2, U3, U4](
+ c1: TypedAggregate[V, U1],
+ c2: TypedAggregate[V, U2],
+ c3: TypedAggregate[V, U3],
+ c4: TypedAggregate[V, U4]
+ ): TypedDataset[(Option[K1], U1, U2, U3, U4)] = {
implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder; implicit val e3 = c3.uencoder; implicit val e4 = c4.uencoder
underlying.agg(c1, c2, c3, c4)
}
- def agg[U1, U2, U3, U4, U5](c1: TypedAggregate[V, U1], c2: TypedAggregate[V, U2], c3: TypedAggregate[V, U3], c4: TypedAggregate[V, U4], c5: TypedAggregate[V, U5]): TypedDataset[(Option[K1], U1, U2, U3, U4, U5)] = {
- implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder; implicit val e3 = c3.uencoder; implicit val e4 = c4.uencoder; implicit val e5 = c5.uencoder
+ def agg[U1, U2, U3, U4, U5](
+ c1: TypedAggregate[V, U1],
+ c2: TypedAggregate[V, U2],
+ c3: TypedAggregate[V, U3],
+ c4: TypedAggregate[V, U4],
+ c5: TypedAggregate[V, U5]
+ ): TypedDataset[(Option[K1], U1, U2, U3, U4, U5)] = {
+ implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder; implicit val e3 = c3.uencoder; implicit val e4 = c4.uencoder;
+ implicit val e5 = c5.uencoder
underlying.agg(c1, c2, c3, c4, c5)
}
@@ -85,14 +98,14 @@ private[ops] abstract class RelationalGroups1Ops[K1, V](self: TypedDataset[V], g
}
}
- def pivot[P: CatalystPivotable](pivotColumn: TypedColumn[V, P]): PivotNotValues[V, TypedColumn[V,K1] :: HNil, P] =
+ def pivot[P: CatalystPivotable](pivotColumn: TypedColumn[V, P]): PivotNotValues[V, TypedColumn[V, K1] :: HNil, P] =
PivotNotValues(self, g1 :: HNil, pivotColumn)
}
-private[ops] abstract class RelationalGroups2Ops[K1, K2, V](self: TypedDataset[V], g1: TypedColumn[V, K1], g2: TypedColumn[V, K2]) {
+abstract private[ops] class RelationalGroups2Ops[K1, K2, V](self: TypedDataset[V], g1: TypedColumn[V, K1], g2: TypedColumn[V, K2]) {
protected def underlying: RelationalGroupsOps[V, ::[TypedColumn[V, K1], ::[TypedColumn[V, K2], HNil]], ::[K1, ::[K2, HNil]], (K1, K2)]
- private implicit def eg1 = g1.uencoder
- private implicit def eg2 = g2.uencoder
+ implicit private def eg1 = g1.uencoder
+ implicit private def eg2 = g2.uencoder
def agg[U1](c1: TypedAggregate[V, U1]): TypedDataset[(Option[K1], Option[K2], U1)] = {
implicit val e1 = c1.uencoder
@@ -104,18 +117,34 @@ private[ops] abstract class RelationalGroups2Ops[K1, K2, V](self: TypedDataset[V
underlying.agg(c1, c2)
}
- def agg[U1, U2, U3](c1: TypedAggregate[V, U1], c2: TypedAggregate[V, U2], c3: TypedAggregate[V, U3]): TypedDataset[(Option[K1], Option[K2], U1, U2, U3)] = {
+ def agg[U1, U2, U3](
+ c1: TypedAggregate[V, U1],
+ c2: TypedAggregate[V, U2],
+ c3: TypedAggregate[V, U3]
+ ): TypedDataset[(Option[K1], Option[K2], U1, U2, U3)] = {
implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder; implicit val e3 = c3.uencoder
underlying.agg(c1, c2, c3)
}
- def agg[U1, U2, U3, U4](c1: TypedAggregate[V, U1], c2: TypedAggregate[V, U2], c3: TypedAggregate[V, U3], c4: TypedAggregate[V, U4]): TypedDataset[(Option[K1], Option[K2], U1, U2, U3, U4)] = {
+ def agg[U1, U2, U3, U4](
+ c1: TypedAggregate[V, U1],
+ c2: TypedAggregate[V, U2],
+ c3: TypedAggregate[V, U3],
+ c4: TypedAggregate[V, U4]
+ ): TypedDataset[(Option[K1], Option[K2], U1, U2, U3, U4)] = {
implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder; implicit val e3 = c3.uencoder; implicit val e4 = c4.uencoder
- underlying.agg(c1 , c2 , c3 , c4)
+ underlying.agg(c1, c2, c3, c4)
}
- def agg[U1, U2, U3, U4, U5](c1: TypedAggregate[V, U1], c2: TypedAggregate[V, U2], c3: TypedAggregate[V, U3], c4: TypedAggregate[V, U4], c5: TypedAggregate[V, U5]): TypedDataset[(Option[K1], Option[K2], U1, U2, U3, U4, U5)] = {
- implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder; implicit val e3 = c3.uencoder; implicit val e4 = c4.uencoder; implicit val e5 = c5.uencoder
+ def agg[U1, U2, U3, U4, U5](
+ c1: TypedAggregate[V, U1],
+ c2: TypedAggregate[V, U2],
+ c3: TypedAggregate[V, U3],
+ c4: TypedAggregate[V, U4],
+ c5: TypedAggregate[V, U5]
+ ): TypedDataset[(Option[K1], Option[K2], U1, U2, U3, U4, U5)] = {
+ implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder; implicit val e3 = c3.uencoder; implicit val e4 = c4.uencoder;
+ implicit val e5 = c5.uencoder
underlying.agg(c1, c2, c3, c4, c5)
}
@@ -132,17 +161,15 @@ private[ops] abstract class RelationalGroups2Ops[K1, K2, V](self: TypedDataset[V
}
}
- def pivot[P: CatalystPivotable](pivotColumn: TypedColumn[V, P]):
- PivotNotValues[V, TypedColumn[V,K1] :: TypedColumn[V, K2] :: HNil, P] =
+ def pivot[P: CatalystPivotable](pivotColumn: TypedColumn[V, P]): PivotNotValues[V, TypedColumn[V, K1] :: TypedColumn[V, K2] :: HNil, P] =
PivotNotValues(self, g1 :: g2 :: HNil, pivotColumn)
}
-class RollupManyOps[T, TK <: HList, K <: HList, KT](self: TypedDataset[T], groupedBy: TK)
- (implicit
- i0: ColumnTypes.Aux[T, TK, K],
- i1: ToTraversable.Aux[TK, List, UntypedExpression[T]],
- i2: Tupler.Aux[K, KT]
- ) extends RelationalGroupsOps[T, TK, K, KT](self, groupedBy, (dataset, cols) => dataset.rollup(cols: _*))
+class RollupManyOps[T, TK <: HList, K <: HList, KT](self: TypedDataset[T], groupedBy: TK)(implicit
+ i0: ColumnTypes.Aux[T, TK, K],
+ i1: ToTraversable.Aux[TK, List, UntypedExpression[T]],
+ i2: Tupler.Aux[K, KT]
+) extends RelationalGroupsOps[T, TK, K, KT](self, groupedBy, (dataset, cols) => dataset.rollup(cols: _*))
class Rollup1Ops[K1, V](self: TypedDataset[V], g1: TypedColumn[V, K1]) extends RelationalGroups1Ops(self, g1) {
override protected def underlying = new RollupManyOps(self, g1 :: HNil)
@@ -152,12 +179,11 @@ class Rollup2Ops[K1, K2, V](self: TypedDataset[V], g1: TypedColumn[V, K1], g2: T
override protected def underlying = new RollupManyOps(self, g1 :: g2 :: HNil)
}
-class CubeManyOps[T, TK <: HList, K <: HList, KT](self: TypedDataset[T], groupedBy: TK)
- (implicit
- i0: ColumnTypes.Aux[T, TK, K],
- i1: ToTraversable.Aux[TK, List, UntypedExpression[T]],
- i2: Tupler.Aux[K, KT]
- ) extends RelationalGroupsOps[T, TK, K, KT](self, groupedBy, (dataset, cols) => dataset.cube(cols: _*))
+class CubeManyOps[T, TK <: HList, K <: HList, KT](self: TypedDataset[T], groupedBy: TK)(implicit
+ i0: ColumnTypes.Aux[T, TK, K],
+ i1: ToTraversable.Aux[TK, List, UntypedExpression[T]],
+ i2: Tupler.Aux[K, KT]
+) extends RelationalGroupsOps[T, TK, K, KT](self, groupedBy, (dataset, cols) => dataset.cube(cols: _*))
class Cube1Ops[K1, V](self: TypedDataset[V], g1: TypedColumn[V, K1]) extends RelationalGroups1Ops(self, g1) {
override protected def underlying = new CubeManyOps(self, g1 :: HNil)
diff --git a/dataset/src/main/scala/frameless/ops/Repeat.scala b/dataset/src/main/scala/frameless/ops/Repeat.scala
index bde855500..5caad2958 100644
--- a/dataset/src/main/scala/frameless/ops/Repeat.scala
+++ b/dataset/src/main/scala/frameless/ops/Repeat.scala
@@ -23,11 +23,10 @@ object Repeat {
type Out = L
}
- implicit def succ[L <: HList, Prev <: Nat, PrevOut <: HList, P <: HList]
- (implicit
- i0: Aux[L, Prev, PrevOut],
- i1: Prepend.Aux[L, PrevOut, P]
- ): Aux[L, Succ[Prev], P] = new Repeat[L, Succ[Prev]] {
- type Out = P
- }
+ implicit def succ[L <: HList, Prev <: Nat, PrevOut <: HList, P <: HList](implicit
+ i0: Aux[L, Prev, PrevOut],
+ i1: Prepend.Aux[L, PrevOut, P]
+ ): Aux[L, Succ[Prev], P] = new Repeat[L, Succ[Prev]] {
+ type Out = P
+ }
}
diff --git a/dataset/src/main/scala/frameless/ops/SmartProject.scala b/dataset/src/main/scala/frameless/ops/SmartProject.scala
index ec3628efd..d86f01aaa 100644
--- a/dataset/src/main/scala/frameless/ops/SmartProject.scala
+++ b/dataset/src/main/scala/frameless/ops/SmartProject.scala
@@ -11,6 +11,7 @@ import scala.annotation.implicitNotFound
case class SmartProject[T: TypedEncoder, U: TypedEncoder](apply: TypedDataset[T] => TypedDataset[U])
object SmartProject {
+
/**
* Proofs that there is a type-safe projection from a type T to another type U. It requires that:
* (a) both T and U are Products for which a LabelledGeneric can be derived (e.g., case classes),
@@ -32,17 +33,16 @@ object SmartProject {
* @tparam UKeys the keys of U as an HList
* @return a projection if it exists
*/
- implicit def deriveProduct[T: TypedEncoder, U: TypedEncoder, TRec <: HList, TProj <: HList, URec <: HList, UVals <: HList, UKeys <: HList]
- (implicit
- i0: LabelledGeneric.Aux[T, TRec],
- i1: LabelledGeneric.Aux[U, URec],
- i2: Keys.Aux[URec, UKeys],
- i3: SelectAll.Aux[TRec, UKeys, TProj],
- i4: Values.Aux[URec, UVals],
- i5: UVals =:= TProj,
- i6: ToTraversable.Aux[UKeys, Seq, Symbol]
- ): SmartProject[T,U] = SmartProject[T, U]({ from =>
- val names = implicitly[Keys.Aux[URec, UKeys]].apply().to[Seq].map(_.name).map(from.dataset.col)
- TypedDataset.create(from.dataset.toDF().select(names: _*).as[U](TypedExpressionEncoder[U]))
- })
+ implicit def deriveProduct[T: TypedEncoder, U: TypedEncoder, TRec <: HList, TProj <: HList, URec <: HList, UVals <: HList, UKeys <: HList](implicit
+ i0: LabelledGeneric.Aux[T, TRec],
+ i1: LabelledGeneric.Aux[U, URec],
+ i2: Keys.Aux[URec, UKeys],
+ i3: SelectAll.Aux[TRec, UKeys, TProj],
+ i4: Values.Aux[URec, UVals],
+ i5: UVals =:= TProj,
+ i6: ToTraversable.Aux[UKeys, Seq, Symbol]
+ ): SmartProject[T, U] = SmartProject[T, U] { from =>
+ val names = implicitly[Keys.Aux[URec, UKeys]].apply().to[Seq].map(_.name).map(from.dataset.col)
+ TypedDataset.create(from.dataset.toDF().select(names: _*).as[U](TypedExpressionEncoder[U]))
+ }
}
diff --git a/dataset/src/main/scala/org/apache/spark/sql/reflection/package.scala b/dataset/src/main/scala/org/apache/spark/sql/reflection/package.scala
index 07090a8db..8bc651c65 100644
--- a/dataset/src/main/scala/org/apache/spark/sql/reflection/package.scala
+++ b/dataset/src/main/scala/org/apache/spark/sql/reflection/package.scala
@@ -1,10 +1,6 @@
package org.apache.spark.sql
-import org.apache.spark.sql.catalyst.ScalaReflection.{
- cleanUpReflectionObjects,
- getClassFromType,
- localTypeOf
-}
+import org.apache.spark.sql.catalyst.ScalaReflection.{cleanUpReflectionObjects, getClassFromType, localTypeOf}
import org.apache.spark.sql.types.{
BinaryType,
BooleanType,
@@ -70,15 +66,15 @@ package object reflection {
private def dataTypeFor(tpe: `Type`): DataType = cleanUpReflectionObjects {
tpe.dealias match {
- case t if isSubtype(t, definitions.NullTpe) => NullType
- case t if isSubtype(t, definitions.IntTpe) => IntegerType
- case t if isSubtype(t, definitions.LongTpe) => LongType
- case t if isSubtype(t, definitions.DoubleTpe) => DoubleType
- case t if isSubtype(t, definitions.FloatTpe) => FloatType
- case t if isSubtype(t, definitions.ShortTpe) => ShortType
- case t if isSubtype(t, definitions.ByteTpe) => ByteType
- case t if isSubtype(t, definitions.BooleanTpe) => BooleanType
- case t if isSubtype(t, localTypeOf[Array[Byte]]) => BinaryType
+ case t if isSubtype(t, definitions.NullTpe) => NullType
+ case t if isSubtype(t, definitions.IntTpe) => IntegerType
+ case t if isSubtype(t, definitions.LongTpe) => LongType
+ case t if isSubtype(t, definitions.DoubleTpe) => DoubleType
+ case t if isSubtype(t, definitions.FloatTpe) => FloatType
+ case t if isSubtype(t, definitions.ShortTpe) => ShortType
+ case t if isSubtype(t, definitions.ByteTpe) => ByteType
+ case t if isSubtype(t, definitions.BooleanTpe) => BooleanType
+ case t if isSubtype(t, localTypeOf[Array[Byte]]) => BinaryType
case t if isSubtype(t, localTypeOf[CalendarInterval]) =>
CalendarIntervalType
case t if isSubtype(t, localTypeOf[Decimal]) => DecimalType.SYSTEM_DEFAULT
diff --git a/dataset/src/main/spark-3.4+/org/apache/spark/sql/FramelessInternals.scala b/dataset/src/main/spark-3.4+/org/apache/spark/sql/FramelessInternals.scala
index 3022bf23c..79172360d 100644
--- a/dataset/src/main/spark-3.4+/org/apache/spark/sql/FramelessInternals.scala
+++ b/dataset/src/main/spark-3.4+/org/apache/spark/sql/FramelessInternals.scala
@@ -2,12 +2,12 @@ package org.apache.spark.sql
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen._
-import org.apache.spark.sql.catalyst.expressions.{ Alias, CreateStruct }
-import org.apache.spark.sql.catalyst.expressions.{ Expression, NamedExpression }
+import org.apache.spark.sql.catalyst.expressions.{Alias, CreateStruct}
+import org.apache.spark.sql.catalyst.expressions.{Expression, NamedExpression}
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.{ LogicalPlan, Project }
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
import org.apache.spark.sql.execution.QueryExecution
import org.apache.spark.sql.types._
import org.apache.spark.sql.types.ObjectType
@@ -16,9 +16,8 @@ import scala.reflect.ClassTag
object FramelessInternals {
def objectTypeFor[A](
- implicit
- classTag: ClassTag[A]
- ): ObjectType = ObjectType(classTag.runtimeClass)
+ implicit classTag: ClassTag[A]
+ ): ObjectType = ObjectType(classTag.runtimeClass)
def resolveExpr(ds: Dataset[_], colNames: Seq[String]): NamedExpression = {
ds.toDF()
@@ -49,11 +48,11 @@ object FramelessInternals {
ds.sqlContext.getConf(key, default)
def joinPlan(
- ds: Dataset[_],
- plan: LogicalPlan,
- leftPlan: LogicalPlan,
- rightPlan: LogicalPlan
- ): LogicalPlan = {
+ ds: Dataset[_],
+ plan: LogicalPlan,
+ leftPlan: LogicalPlan,
+ rightPlan: LogicalPlan
+ ): LogicalPlan = {
val joined = executePlan(ds, plan)
val leftOutput = joined.analyzed.output.take(leftPlan.output.length)
val rightOutput = joined.analyzed.output.takeRight(rightPlan.output.length)
@@ -68,10 +67,10 @@ object FramelessInternals {
}
def mkDataset[T](
- source: Dataset[_],
- plan: LogicalPlan,
- encoder: Encoder[T]
- ): Dataset[T] =
+ source: Dataset[_],
+ plan: LogicalPlan,
+ encoder: Encoder[T]
+ ): Dataset[T] =
new Dataset(source.sparkSession, plan, encoder)
def ofRows(sparkSession: SparkSession, logicalPlan: LogicalPlan): DataFrame =
@@ -79,10 +78,10 @@ object FramelessInternals {
/** Builds an `ExpressionEncoder` from frameless' own serializer/deserializer expressions. */
def expressionEncoder[T](
- objSerializer: Expression,
- objDeserializer: Expression,
- classTag: ClassTag[T]
- ): ExpressionEncoder[T] =
+ objSerializer: Expression,
+ objDeserializer: Expression,
+ classTag: ClassTag[T]
+ ): ExpressionEncoder[T] =
new ExpressionEncoder[T](objSerializer, objDeserializer, classTag)
// because org.apache.spark.sql.types.UserDefinedType is private[spark]
@@ -104,8 +103,8 @@ object FramelessInternals {
tagged.genCode(ctx)
protected def withNewChildrenInternal(
- newChildren: IndexedSeq[Expression]
- ): Expression = copy(newChildren.head)
+ newChildren: IndexedSeq[Expression]
+ ): Expression = copy(newChildren.head)
}
/** Expression to tag columns from the right hand side of join expression. */
@@ -121,7 +120,7 @@ object FramelessInternals {
tagged.genCode(ctx)
protected def withNewChildrenInternal(
- newChildren: IndexedSeq[Expression]
- ): Expression = copy(newChildren.head)
+ newChildren: IndexedSeq[Expression]
+ ): Expression = copy(newChildren.head)
}
}
diff --git a/dataset/src/main/spark-4/frameless/MapGroups.scala b/dataset/src/main/spark-4/frameless/MapGroups.scala
index 25411420b..6a6751bd3 100644
--- a/dataset/src/main/spark-4/frameless/MapGroups.scala
+++ b/dataset/src/main/spark-4/frameless/MapGroups.scala
@@ -2,19 +2,16 @@ package frameless
import org.apache.spark.sql.Encoder
import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.{
- LogicalPlan,
- MapGroups => SMapGroups
-}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, MapGroups => SMapGroups}
object MapGroups {
def apply[K: Encoder, T: Encoder, U: Encoder](
- func: (K, Iterator[T]) => TraversableOnce[U],
- groupingAttributes: Seq[Attribute],
- dataAttributes: Seq[Attribute],
- child: LogicalPlan
- ): LogicalPlan =
+ func: (K, Iterator[T]) => TraversableOnce[U],
+ groupingAttributes: Seq[Attribute],
+ dataAttributes: Seq[Attribute],
+ child: LogicalPlan
+ ): LogicalPlan =
SMapGroups(
func,
groupingAttributes,
diff --git a/dataset/src/main/spark-4/org/apache/spark/sql/FramelessInternals.scala b/dataset/src/main/spark-4/org/apache/spark/sql/FramelessInternals.scala
index 6daf2b4e4..850aac9e1 100644
--- a/dataset/src/main/spark-4/org/apache/spark/sql/FramelessInternals.scala
+++ b/dataset/src/main/spark-4/org/apache/spark/sql/FramelessInternals.scala
@@ -2,19 +2,14 @@ package org.apache.spark.sql
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen._
-import org.apache.spark.sql.catalyst.expressions.{ Alias, CreateStruct }
-import org.apache.spark.sql.catalyst.expressions.{ Expression, NamedExpression }
+import org.apache.spark.sql.catalyst.expressions.{Alias, CreateStruct}
+import org.apache.spark.sql.catalyst.expressions.{Expression, NamedExpression}
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.JavaBeanEncoder
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.{ LogicalPlan, Project }
-import org.apache.spark.sql.classic.{
- Dataset => ClassicDataset,
- SparkSession => ClassicSparkSession,
- ExpressionUtils,
- ColumnNodeToExpressionConverter
-}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
+import org.apache.spark.sql.classic.{ColumnNodeToExpressionConverter, Dataset => ClassicDataset, ExpressionUtils, SparkSession => ClassicSparkSession}
import org.apache.spark.sql.execution.QueryExecution
import org.apache.spark.sql.types._
import org.apache.spark.sql.types.ObjectType
@@ -32,9 +27,8 @@ import scala.reflect.ClassTag
object FramelessInternals {
def objectTypeFor[A](
- implicit
- classTag: ClassTag[A]
- ): ObjectType = ObjectType(classTag.runtimeClass)
+ implicit classTag: ClassTag[A]
+ ): ObjectType = ObjectType(classTag.runtimeClass)
private def classic(ds: Dataset[_]): ClassicDataset[_] =
ds.asInstanceOf[ClassicDataset[_]]
@@ -76,11 +70,11 @@ object FramelessInternals {
classic(ds).sparkSession.conf.get(key, default)
def joinPlan(
- ds: Dataset[_],
- plan: LogicalPlan,
- leftPlan: LogicalPlan,
- rightPlan: LogicalPlan
- ): LogicalPlan = {
+ ds: Dataset[_],
+ plan: LogicalPlan,
+ leftPlan: LogicalPlan,
+ rightPlan: LogicalPlan
+ ): LogicalPlan = {
val joined = executePlan(ds, plan)
val leftOutput = joined.analyzed.output.take(leftPlan.output.length)
val rightOutput = joined.analyzed.output.takeRight(rightPlan.output.length)
@@ -95,10 +89,10 @@ object FramelessInternals {
}
def mkDataset[T](
- source: Dataset[_],
- plan: LogicalPlan,
- encoder: Encoder[T]
- ): Dataset[T] =
+ source: Dataset[_],
+ plan: LogicalPlan,
+ encoder: Encoder[T]
+ ): Dataset[T] =
new ClassicDataset[T](classic(source).sparkSession, plan, encoder)
def ofRows(sparkSession: SparkSession, logicalPlan: LogicalPlan): DataFrame =
@@ -116,10 +110,10 @@ object FramelessInternals {
* carrying the right `ClassTag` is therefore a correct, metadata-only stand-in.
*/
def expressionEncoder[T](
- objSerializer: Expression,
- objDeserializer: Expression,
- classTag: ClassTag[T]
- ): ExpressionEncoder[T] =
+ objSerializer: Expression,
+ objDeserializer: Expression,
+ classTag: ClassTag[T]
+ ): ExpressionEncoder[T] =
new ExpressionEncoder[T](
JavaBeanEncoder(classTag, Nil),
objSerializer,
@@ -145,8 +139,8 @@ object FramelessInternals {
tagged.genCode(ctx)
protected def withNewChildrenInternal(
- newChildren: IndexedSeq[Expression]
- ): Expression = copy(newChildren.head)
+ newChildren: IndexedSeq[Expression]
+ ): Expression = copy(newChildren.head)
}
/** Expression to tag columns from the right hand side of join expression. */
@@ -162,7 +156,7 @@ object FramelessInternals {
tagged.genCode(ctx)
protected def withNewChildrenInternal(
- newChildren: IndexedSeq[Expression]
- ): Expression = copy(newChildren.head)
+ newChildren: IndexedSeq[Expression]
+ ): Expression = copy(newChildren.head)
}
}
diff --git a/dataset/src/test/scala/frameless/AsTests.scala b/dataset/src/test/scala/frameless/AsTests.scala
index c1091f9ca..08620dcd3 100644
--- a/dataset/src/test/scala/frameless/AsTests.scala
+++ b/dataset/src/test/scala/frameless/AsTests.scala
@@ -12,7 +12,7 @@ class AsTests extends TypedDatasetSuite {
): Prop = {
val dataset = TypedDataset.create(data)
- val dataset2 = dataset.as[X2[A,B]]().collect().run().toVector
+ val dataset2 = dataset.as[X2[A, B]]().collect().run().toVector
val data2 = data.map { case (a, b) => X2(a, b) }
dataset2 ?= data2
@@ -37,7 +37,7 @@ class AsTests extends TypedDatasetSuite {
}
val dataset = TypedDataset.create(data2)
- val dataset2 = dataset.as[X2[X2[A,B], C]]().collect().run().toVector
+ val dataset2 = dataset.as[X2[X2[A, B], C]]().collect().run().toVector
val data3 = data2.map { case ((a, b), c) => X2(X2(a, b), c) }
dataset2 ?= data3
diff --git a/dataset/src/test/scala/frameless/BitwiseTests.scala b/dataset/src/test/scala/frameless/BitwiseTests.scala
index f58c906a2..bb32b3309 100644
--- a/dataset/src/test/scala/frameless/BitwiseTests.scala
+++ b/dataset/src/test/scala/frameless/BitwiseTests.scala
@@ -12,7 +12,7 @@ class BitwiseTests extends TypedDatasetSuite with Matchers {
* for Numeric it is easy to test since scala comes with Numeric typeclass but there seems
* to be no equivalent typeclass for bitwise ops for Byte Short Int and Long types supported in Catalyst
*/
- trait CatalystBitwise4Tests[A]{
+ trait CatalystBitwise4Tests[A] {
def bitwiseAnd(a1: A, a2: A): A
def bitwiseOr(a1: A, a2: A): A
def bitwiseXor(a1: A, a2: A): A
@@ -22,23 +22,23 @@ class BitwiseTests extends TypedDatasetSuite with Matchers {
}
object CatalystBitwise4Tests {
- implicit val framelessbyteBitwise : CatalystBitwise4Tests[Byte] = new CatalystBitwise4Tests[Byte] {
- def bitwiseOr(a1: Byte, a2: Byte) : Byte = (a1 | a2).toByte
+ implicit val framelessbyteBitwise: CatalystBitwise4Tests[Byte] = new CatalystBitwise4Tests[Byte] {
+ def bitwiseOr(a1: Byte, a2: Byte): Byte = (a1 | a2).toByte
def bitwiseAnd(a1: Byte, a2: Byte): Byte = (a1 & a2).toByte
def bitwiseXor(a1: Byte, a2: Byte): Byte = (a1 ^ a2).toByte
}
- implicit val framelessshortBitwise : CatalystBitwise4Tests[Short] = new CatalystBitwise4Tests[Short] {
- def bitwiseOr(a1: Short, a2: Short) : Short = (a1 | a2).toShort
+ implicit val framelessshortBitwise: CatalystBitwise4Tests[Short] = new CatalystBitwise4Tests[Short] {
+ def bitwiseOr(a1: Short, a2: Short): Short = (a1 | a2).toShort
def bitwiseAnd(a1: Short, a2: Short): Short = (a1 & a2).toShort
def bitwiseXor(a1: Short, a2: Short): Short = (a1 ^ a2).toShort
}
- implicit val framelessintBitwise : CatalystBitwise4Tests[Int] = new CatalystBitwise4Tests[Int] {
- def bitwiseOr(a1: Int, a2: Int) : Int = a1 | a2
+ implicit val framelessintBitwise: CatalystBitwise4Tests[Int] = new CatalystBitwise4Tests[Int] {
+ def bitwiseOr(a1: Int, a2: Int): Int = a1 | a2
def bitwiseAnd(a1: Int, a2: Int): Int = a1 & a2
def bitwiseXor(a1: Int, a2: Int): Int = a1 ^ a2
}
- implicit val framelesslongBitwise : CatalystBitwise4Tests[Long] = new CatalystBitwise4Tests[Long] {
- def bitwiseOr(a1: Long, a2: Long) : Long = a1 | a2
+ implicit val framelesslongBitwise: CatalystBitwise4Tests[Long] = new CatalystBitwise4Tests[Long] {
+ def bitwiseOr(a1: Long, a2: Long): Long = a1 | a2
def bitwiseAnd(a1: Long, a2: Long): Long = a1 & a2
def bitwiseXor(a1: Long, a2: Long): Long = a1 ^ a2
}
diff --git a/dataset/src/test/scala/frameless/CastTests.scala b/dataset/src/test/scala/frameless/CastTests.scala
index 5f79f8fa6..cefcce533 100644
--- a/dataset/src/test/scala/frameless/CastTests.scala
+++ b/dataset/src/test/scala/frameless/CastTests.scala
@@ -6,8 +6,7 @@ import org.scalacheck.Prop._
class CastTests extends TypedDatasetSuite {
def prop[A: TypedEncoder, B: TypedEncoder](f: A => B)(a: A)(
- implicit
- cast: CatalystCast[A, B]
+ implicit cast: CatalystCast[A, B]
): Prop = {
val df = TypedDataset.create(X1(a) :: Nil)
val got = df.select(df.col('a).cast[B]).collect().run()
@@ -102,7 +101,7 @@ class CastTests extends TypedDatasetSuite {
// booleanToNumeric
check(prop[Boolean, BigDecimal](x => if (x) BigDecimal(1) else BigDecimal(0)) _)
check(prop[Boolean, Byte](x => if (x) 1 else 0) _)
- check(prop[Boolean, Double](x => if (x) 1.0f else 0.0f) _)
+ check(prop[Boolean, Double](x => if (x) 1.0F else 0.0F) _)
check(prop[Boolean, Int](x => if (x) 1 else 0) _)
check(prop[Boolean, Long](x => if (x) 1L else 0L) _)
check(prop[Boolean, Short](x => if (x) 1 else 0) _)
diff --git a/dataset/src/test/scala/frameless/CollectTests.scala b/dataset/src/test/scala/frameless/CollectTests.scala
index 0ff1e6956..7b4269665 100644
--- a/dataset/src/test/scala/frameless/CollectTests.scala
+++ b/dataset/src/test/scala/frameless/CollectTests.scala
@@ -1,6 +1,6 @@
package frameless
-import frameless.CollectTests.{ prop, propArray }
+import frameless.CollectTests.{prop, propArray}
import org.apache.spark.sql.SparkSession
import org.scalacheck.Prop
import org.scalacheck.Prop._
@@ -85,10 +85,10 @@ class CollectTests extends TypedDatasetSuite {
object CollectTests {
import frameless.syntax._
- def prop[A: TypedEncoder : ClassTag](data: Vector[A])(implicit c: SparkSession): Prop =
+ def prop[A: TypedEncoder: ClassTag](data: Vector[A])(implicit c: SparkSession): Prop =
TypedDataset.create(data).collect().run().toVector ?= data
- def propArray[A: TypedEncoder : ClassTag](data: Vector[X1[Array[A]]])(implicit c: SparkSession): Prop =
+ def propArray[A: TypedEncoder: ClassTag](data: Vector[X1[Array[A]]])(implicit c: SparkSession): Prop =
Prop(TypedDataset.create(data).collect().run().toVector.zip(data).forall {
case (X1(l), X1(r)) => l.sameElements(r)
})
diff --git a/dataset/src/test/scala/frameless/ColumnTests.scala b/dataset/src/test/scala/frameless/ColumnTests.scala
index c56cf499c..baee93718 100644
--- a/dataset/src/test/scala/frameless/ColumnTests.scala
+++ b/dataset/src/test/scala/frameless/ColumnTests.scala
@@ -3,15 +3,15 @@ package frameless
import java.util.Date
import java.math.BigInteger
-import java.time.{ Instant, LocalDate, Period, Duration }
+import java.time.{Duration, Instant, LocalDate, Period}
import java.time.temporal.ChronoUnit
-import java.sql.{ Date => SqlDate, Timestamp }
+import java.sql.{Date => SqlDate, Timestamp}
import scala.math.Ordering.Implicits._
import scala.util.Try
-import org.scalacheck.{ Arbitrary, Gen, Prop }, Arbitrary.arbitrary, Prop._
+import org.scalacheck.{Arbitrary, Gen, Prop}, Arbitrary.arbitrary, Prop._
import org.scalatest.matchers.should.Matchers
@@ -29,7 +29,7 @@ final class ColumnTests extends TypedDatasetSuite with Matchers {
OrderingImplicits.arbInstant.arbitrary.map(Date from _)
}
- private implicit object OrderingImplicits {
+ implicit private object OrderingImplicits {
implicit val sqlDateOrdering: Ordering[SQLDate] = Ordering.by(_.days)
implicit val sqlTimestmapOrdering: Ordering[SQLTimestamp] =
@@ -104,10 +104,10 @@ final class ColumnTests extends TypedDatasetSuite with Matchers {
test("between") {
import OrderingImplicits._
def prop[A: TypedEncoder: CatalystOrdered: Ordering](
- a: A,
- b: A,
- c: A
- ): Prop = {
+ a: A,
+ b: A,
+ c: A
+ ): Prop = {
val dataset = TypedDataset.create(X3(a, b, c) :: Nil)
val A = dataset.col('a)
val B = dataset.col('b)
@@ -537,7 +537,7 @@ final class ColumnTests extends TypedDatasetSuite with Matchers {
}
test("reference Value class so can join on") {
- import RecordEncoderTests.{ Name, Person }
+ import RecordEncoderTests.{Name, Person}
val bar = new Name("bar")
diff --git a/dataset/src/test/scala/frameless/ColumnViaLambdaTests.scala b/dataset/src/test/scala/frameless/ColumnViaLambdaTests.scala
index 0a9c532a6..f9b77ad9a 100644
--- a/dataset/src/test/scala/frameless/ColumnViaLambdaTests.scala
+++ b/dataset/src/test/scala/frameless/ColumnViaLambdaTests.scala
@@ -13,7 +13,8 @@ final class ColumnViaLambdaTests extends TypedDatasetSuite with Matchers {
def ds = {
TypedDataset.create(Seq(
MyClass1(1, "2", MyClass2(3L, MyClass3(7.0D)), Some(MyClass4(true))),
- MyClass1(4, "5", MyClass2(6L, MyClass3(8.0D)), None)))
+ MyClass1(4, "5", MyClass2(6L, MyClass3(8.0D)), None)
+ ))
}
test("col(_.a)") {
diff --git a/dataset/src/test/scala/frameless/CreateTests.scala b/dataset/src/test/scala/frameless/CreateTests.scala
index 4d9b5547d..183828988 100644
--- a/dataset/src/test/scala/frameless/CreateTests.scala
+++ b/dataset/src/test/scala/frameless/CreateTests.scala
@@ -13,10 +13,11 @@ class CreateTests extends TypedDatasetSuite with Matchers {
test("creation using X4 derived DataFrames") {
def prop[
- A: TypedEncoder,
- B: TypedEncoder,
- C: TypedEncoder,
- D: TypedEncoder](data: Vector[X4[A, B, C, D]]): Prop = {
+ A: TypedEncoder,
+ B: TypedEncoder,
+ C: TypedEncoder,
+ D: TypedEncoder
+ ](data: Vector[X4[A, B, C, D]]): Prop = {
val ds = TypedDataset.create(data)
TypedDataset.createUnsafe[X4[A, B, C, D]](ds.toDF()).collect().run() ?= data
}
@@ -29,13 +30,13 @@ class CreateTests extends TypedDatasetSuite with Matchers {
Option[Vector[Food]],
Vector[Vector[X2[Vector[(Person, X1[Char])], Country]]],
X3[Food, Country, String],
- Vector[(Food, Country)]] _))
+ Vector[(Food, Country)]
+ ] _))
}
test("array fields") {
def prop[T: Arbitrary: TypedEncoder: ClassTag] = forAll {
- (d1: Array[T], d2: Array[Option[T]], d3: Array[X1[T]], d4: Array[X1[Option[T]]],
- d5: X1[Array[T]]) =>
+ (d1: Array[T], d2: Array[Option[T]], d3: Array[X1[T]], d4: Array[X1[Option[T]]], d5: X1[Array[T]]) =>
TypedDataset.create(Seq(d1)).collect().run().head.sameElements(d1) &&
TypedDataset.create(Seq(d2)).collect().run().head.sameElements(d2) &&
TypedDataset.create(Seq(d3)).collect().run().head.sameElements(d3) &&
@@ -55,13 +56,12 @@ class CreateTests extends TypedDatasetSuite with Matchers {
test("vector fields") {
def prop[T: Arbitrary: TypedEncoder] = forAll {
- (d1: Vector[T], d2: Vector[Option[T]], d3: Vector[X1[T]], d4: Vector[X1[Option[T]]],
- d5: X1[Vector[T]]) =>
- (TypedDataset.create(Seq(d1)).collect().run().head ?= d1) &&
- (TypedDataset.create(Seq(d2)).collect().run().head ?= d2) &&
- (TypedDataset.create(Seq(d3)).collect().run().head ?= d3) &&
- (TypedDataset.create(Seq(d4)).collect().run().head ?= d4) &&
- (TypedDataset.create(Seq(d5)).collect().run().head ?= d5)
+ (d1: Vector[T], d2: Vector[Option[T]], d3: Vector[X1[T]], d4: Vector[X1[Option[T]]], d5: X1[Vector[T]]) =>
+ (TypedDataset.create(Seq(d1)).collect().run().head ?= d1) &&
+ (TypedDataset.create(Seq(d2)).collect().run().head ?= d2) &&
+ (TypedDataset.create(Seq(d3)).collect().run().head ?= d3) &&
+ (TypedDataset.create(Seq(d4)).collect().run().head ?= d4) &&
+ (TypedDataset.create(Seq(d5)).collect().run().head ?= d5)
}
check(prop[Boolean])
@@ -77,9 +77,8 @@ class CreateTests extends TypedDatasetSuite with Matchers {
test("list fields") {
def prop[T: Arbitrary: TypedEncoder] = forAll {
- (d1: List[T], d2: List[Option[T]], d3: List[X1[T]], d4: List[X1[Option[T]]],
- d5: X1[List[T]]) =>
- (TypedDataset.create(Seq(d1)).collect().run().head ?= d1) &&
+ (d1: List[T], d2: List[Option[T]], d3: List[X1[T]], d4: List[X1[Option[T]]], d5: X1[List[T]]) =>
+ (TypedDataset.create(Seq(d1)).collect().run().head ?= d1) &&
(TypedDataset.create(Seq(d2)).collect().run().head ?= d2) &&
(TypedDataset.create(Seq(d3)).collect().run().head ?= d3) &&
(TypedDataset.create(Seq(d4)).collect().run().head ?= d4) &&
@@ -99,15 +98,13 @@ class CreateTests extends TypedDatasetSuite with Matchers {
test("Map fields (scala.Predef.Map / scala.collection.immutable.Map)") {
def prop[A: Arbitrary: NotCatalystNullable: TypedEncoder, B: Arbitrary: NotCatalystNullable: TypedEncoder] = forAll {
- (d1: Map[A, B], d2: Map[B, A], d3: Map[A, Option[B]],
- d4: Map[A, X1[B]], d5: Map[X1[A], B], d6: Map[X1[A], X1[B]]) =>
-
- (TypedDataset.create(Seq(d1)).collect().run().head ?= d1) &&
- (TypedDataset.create(Seq(d2)).collect().run().head ?= d2) &&
- (TypedDataset.create(Seq(d3)).collect().run().head ?= d3) &&
- (TypedDataset.create(Seq(d4)).collect().run().head ?= d4) &&
- (TypedDataset.create(Seq(d5)).collect().run().head ?= d5) &&
- (TypedDataset.create(Seq(d6)).collect().run().head ?= d6)
+ (d1: Map[A, B], d2: Map[B, A], d3: Map[A, Option[B]], d4: Map[A, X1[B]], d5: Map[X1[A], B], d6: Map[X1[A], X1[B]]) =>
+ (TypedDataset.create(Seq(d1)).collect().run().head ?= d1) &&
+ (TypedDataset.create(Seq(d2)).collect().run().head ?= d2) &&
+ (TypedDataset.create(Seq(d3)).collect().run().head ?= d3) &&
+ (TypedDataset.create(Seq(d4)).collect().run().head ?= d4) &&
+ (TypedDataset.create(Seq(d5)).collect().run().head ?= d5) &&
+ (TypedDataset.create(Seq(d6)).collect().run().head ?= d6)
}
check(prop[String, String])
@@ -127,10 +124,10 @@ class CreateTests extends TypedDatasetSuite with Matchers {
}
test("not aligned columns should throw an exception") {
- val v = Vector(X2(1,2))
+ val v = Vector(X2(1, 2))
val df = TypedDataset.create(v).dataset.toDF()
- a [IllegalStateException] should be thrownBy {
+ a[IllegalStateException] should be thrownBy {
TypedDataset.createUnsafe[X1[Int]](df).show().run()
}
}
@@ -139,13 +136,14 @@ class CreateTests extends TypedDatasetSuite with Matchers {
// e.g. when loading data from partitioned dataset
// the partition columns get appended to the end of the underlying relation
def prop[A: Arbitrary: TypedEncoder, B: Arbitrary: TypedEncoder] = forAll {
- (a1: A, b1: B) => {
- val ds = TypedDataset.create(
- Vector((b1, a1))
- ).dataset.toDF("b", "a").as[X2[A, B]](TypedExpressionEncoder[X2[A, B]])
- TypedDataset.create(ds).collect().run().head ?= X2(a1, b1)
-
- }
+ (a1: A, b1: B) =>
+ {
+ val ds = TypedDataset.create(
+ Vector((b1, a1))
+ ).dataset.toDF("b", "a").as[X2[A, B]](TypedExpressionEncoder[X2[A, B]])
+ TypedDataset.create(ds).collect().run().head ?= X2(a1, b1)
+
+ }
}
check(prop[X1[Double], X1[X1[SQLDate]]])
check(prop[String, Int])
diff --git a/dataset/src/test/scala/frameless/DropTupledTest.scala b/dataset/src/test/scala/frameless/DropTupledTest.scala
index ff0158b91..d23b8a640 100644
--- a/dataset/src/test/scala/frameless/DropTupledTest.scala
+++ b/dataset/src/test/scala/frameless/DropTupledTest.scala
@@ -7,9 +7,9 @@ class DropTupledTest extends TypedDatasetSuite {
test("drop five columns") {
def prop[A: TypedEncoder](value: A): Prop = {
val d5 = TypedDataset.create(X5(value, value, value, value, value) :: Nil)
- val d4 = d5.dropTupled('a) //drops first column
- val d3 = d4.dropTupled('_4) //drops last column
- val d2 = d3.dropTupled('_2) //drops middle column
+ val d4 = d5.dropTupled('a) // drops first column
+ val d3 = d4.dropTupled('_4) // drops last column
+ val d2 = d3.dropTupled('_2) // drops middle column
val d1 = d2.dropTupled('_2)
Tuple1(value) ?= d1.collect().run().head
diff --git a/dataset/src/test/scala/frameless/ExplodeTests.scala b/dataset/src/test/scala/frameless/ExplodeTests.scala
index 3078ceb12..b4c420929 100644
--- a/dataset/src/test/scala/frameless/ExplodeTests.scala
+++ b/dataset/src/test/scala/frameless/ExplodeTests.scala
@@ -9,12 +9,15 @@ import scala.reflect.ClassTag
class ExplodeTests extends TypedDatasetSuite {
test("simple explode test") {
- val ds = TypedDataset.create(Seq((1,Array(1,2))))
- ds.explode('_2): TypedDataset[(Int,Int)]
+ val ds = TypedDataset.create(Seq((1, Array(1, 2))))
+ ds.explode('_2): TypedDataset[(Int, Int)]
}
test("explode on vectors/list/seq") {
- def prop[F[X] <: Traversable[X] : CatalystExplodableCollection, A: TypedEncoder](xs: List[X1[F[A]]])(implicit arb: Arbitrary[F[A]], enc: TypedEncoder[F[A]]): Prop = {
+ def prop[F[X] <: Traversable[X]: CatalystExplodableCollection, A: TypedEncoder](xs: List[X1[F[A]]])(implicit
+ arb: Arbitrary[F[A]],
+ enc: TypedEncoder[F[A]]
+ ): Prop = {
val tds = TypedDataset.create(xs)
val framelessResults = tds.explode('a).collect().run().toVector
@@ -79,7 +82,12 @@ class ExplodeTests extends TypedDatasetSuite {
}
test("explode on maps making sure no key / value naming collision happens") {
- def prop[K: TypedEncoder: ClassTag, V: TypedEncoder: ClassTag, A: TypedEncoder: ClassTag, B: TypedEncoder: ClassTag](xs: List[X3KV[K, V, Map[A, B]]]): Prop = {
+ def prop[
+ K: TypedEncoder: ClassTag,
+ V: TypedEncoder: ClassTag,
+ A: TypedEncoder: ClassTag,
+ B: TypedEncoder: ClassTag
+ ](xs: List[X3KV[K, V, Map[A, B]]]): Prop = {
val tds = TypedDataset.create(xs)
val framelessResults = tds.explodeMap('c).collect().run().toVector
diff --git a/dataset/src/test/scala/frameless/FilterTests.scala b/dataset/src/test/scala/frameless/FilterTests.scala
index 56d5d2ec5..6cd4c2e60 100644
--- a/dataset/src/test/scala/frameless/FilterTests.scala
+++ b/dataset/src/test/scala/frameless/FilterTests.scala
@@ -61,13 +61,13 @@ final class FilterTests extends TypedDatasetSuite with Matchers {
}
test("filter('a =!= 'b") {
- def prop[A: TypedEncoder](elem: A, data: Vector[X2[A,A]]): Prop = {
+ def prop[A: TypedEncoder](elem: A, data: Vector[X2[A, A]]): Prop = {
val dataset = TypedDataset.create(data)
val cA = dataset.col('a)
val cB = dataset.col('b)
val dataset2 = dataset.filter(cA =!= cB).collect().run().toVector
- val data2 = data.filter(x => x.a != x.b )
+ val data2 = data.filter(x => x.a != x.b)
(dataset2 ?= data2).&&(dataset.filter(cA =!= cA).count().run() ?= 0)
}
@@ -104,7 +104,7 @@ final class FilterTests extends TypedDatasetSuite with Matchers {
}
test("Option equality/inequality for columns") {
- def prop[A <: Option[_] : TypedEncoder](a: A, b: A): Prop = {
+ def prop[A <: Option[_]: TypedEncoder](a: A, b: A): Prop = {
val data = X2(a, b) :: X2(a, a) :: Nil
val dataset = TypedDataset.create(data)
val A = dataset.col('a)
@@ -126,7 +126,7 @@ final class FilterTests extends TypedDatasetSuite with Matchers {
}
test("Option equality/inequality for lit") {
- def prop[A <: Option[_] : TypedEncoder](a: A, b: A, cLit: A): Prop = {
+ def prop[A <: Option[_]: TypedEncoder](a: A, b: A, cLit: A): Prop = {
val data = X2(a, b) :: X2(a, cLit) :: Nil
val dataset = TypedDataset.create(data)
val colA = dataset.col('a)
@@ -162,13 +162,15 @@ final class FilterTests extends TypedDatasetSuite with Matchers {
ds.filter(exists).collect().run() shouldEqual Seq(Option(0L) -> Option(1L))
ds.filter(forall).collect().run() shouldEqual Seq(
- Option(0L) -> Option(1L), (None -> None))
+ Option(0L) -> Option(1L),
+ None -> None
+ )
}
test("filter with isin values") {
- def prop[A: TypedEncoder](data: Vector[X1[A]], values: Vector[A])(implicit a : CatalystIsin[A]): Prop = {
+ def prop[A: TypedEncoder](data: Vector[X1[A]], values: Vector[A])(implicit a: CatalystIsin[A]): Prop = {
val ds = TypedDataset.create(data)
- val res = ds.filter(ds('a).isin(values:_*)).collect().run().toVector
+ val res = ds.filter(ds('a).isin(values: _*)).collect().run().toVector
res ?= data.filter(d => values.contains(d.a))
}
diff --git a/dataset/src/test/scala/frameless/FlattenTests.scala b/dataset/src/test/scala/frameless/FlattenTests.scala
index a65e51b8f..a1c49b37e 100644
--- a/dataset/src/test/scala/frameless/FlattenTests.scala
+++ b/dataset/src/test/scala/frameless/FlattenTests.scala
@@ -4,11 +4,10 @@ import org.scalacheck.Prop
import org.scalacheck.Prop.forAll
import org.scalacheck.Prop._
-
class FlattenTests extends TypedDatasetSuite {
test("simple flatten test") {
- val ds: TypedDataset[(Int,Option[Int])] = TypedDataset.create(Seq((1,Option(1))))
- ds.flattenOption('_2): TypedDataset[(Int,Int)]
+ val ds: TypedDataset[(Int, Option[Int])] = TypedDataset.create(Seq((1, Option(1))))
+ ds.flattenOption('_2): TypedDataset[(Int, Int)]
}
test("different Optional types") {
diff --git a/dataset/src/test/scala/frameless/GroupByTests.scala b/dataset/src/test/scala/frameless/GroupByTests.scala
index 7178def30..e117c72be 100644
--- a/dataset/src/test/scala/frameless/GroupByTests.scala
+++ b/dataset/src/test/scala/frameless/GroupByTests.scala
@@ -7,9 +7,9 @@ import org.scalacheck.Prop._
class GroupByTests extends TypedDatasetSuite {
test("groupByMany('a).agg(sum('b))") {
def prop[
- A: TypedEncoder : Ordering,
+ A: TypedEncoder: Ordering,
B: TypedEncoder,
- Out: TypedEncoder : Numeric
+ Out: TypedEncoder: Numeric
](data: List[X2[A, B]])(
implicit
summable: CatalystSummable[B, Out],
@@ -29,9 +29,8 @@ class GroupByTests extends TypedDatasetSuite {
}
test("agg(sum('a))") {
- def prop[A: TypedEncoder : Numeric](data: List[X1[A]])(
- implicit
- summable: CatalystSummable[A, A]
+ def prop[A: TypedEncoder: Numeric](data: List[X1[A]])(
+ implicit summable: CatalystSummable[A, A]
): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
@@ -47,8 +46,8 @@ class GroupByTests extends TypedDatasetSuite {
test("agg(sum('a), sum('b))") {
def prop[
- A: TypedEncoder : Numeric,
- B: TypedEncoder : Numeric
+ A: TypedEncoder: Numeric,
+ B: TypedEncoder: Numeric
](data: List[X2[A, B]])(
implicit
as: CatalystSummable[A, A],
@@ -70,9 +69,9 @@ class GroupByTests extends TypedDatasetSuite {
test("agg(sum('a), sum('b), sum('c))") {
def prop[
- A: TypedEncoder : Numeric,
- B: TypedEncoder : Numeric,
- C: TypedEncoder : Numeric
+ A: TypedEncoder: Numeric,
+ B: TypedEncoder: Numeric,
+ C: TypedEncoder: Numeric
](data: List[X3[A, B, C]])(
implicit
as: CatalystSummable[A, A],
@@ -97,10 +96,10 @@ class GroupByTests extends TypedDatasetSuite {
test("agg(sum('a), sum('b), min('c), max('d))") {
def prop[
- A: TypedEncoder : Numeric,
- B: TypedEncoder : Numeric,
- C: TypedEncoder : Numeric,
- D: TypedEncoder : Numeric
+ A: TypedEncoder: Numeric,
+ B: TypedEncoder: Numeric,
+ C: TypedEncoder: Numeric,
+ D: TypedEncoder: Numeric
](data: List[X4[A, B, C, D]])(
implicit
as: CatalystSummable[A, A],
@@ -117,8 +116,8 @@ class GroupByTests extends TypedDatasetSuite {
val datasetSum = dataset.agg(sum(A), sum(B), min(C), max(D)).collect().run().toVector
val listSumA = data.map(_.a).sum
val listSumB = data.map(_.b).sum
- val listMinC = if(data.isEmpty) implicitly[Numeric[C]].fromInt(0) else data.map(_.c).min
- val listMaxD = if(data.isEmpty) implicitly[Numeric[D]].fromInt(0) else data.map(_.d).max
+ val listMinC = if (data.isEmpty) implicitly[Numeric[C]].fromInt(0) else data.map(_.c).min
+ val listMaxD = if (data.isEmpty) implicitly[Numeric[D]].fromInt(0) else data.map(_.d).max
datasetSum ?= Vector(if (data.isEmpty) null else (listSumA, listSumB, listMinC, listMaxD))
}
@@ -130,9 +129,9 @@ class GroupByTests extends TypedDatasetSuite {
test("groupBy('a).agg(sum('b))") {
def prop[
- A: TypedEncoder : Ordering,
+ A: TypedEncoder: Ordering,
B: TypedEncoder,
- Out: TypedEncoder : Numeric
+ Out: TypedEncoder: Numeric
](data: List[X2[A, B]])(
implicit
summable: CatalystSummable[B, Out],
@@ -153,8 +152,8 @@ class GroupByTests extends TypedDatasetSuite {
test("groupBy('a).mapGroups('a, sum('b))") {
def prop[
- A: TypedEncoder : Ordering,
- B: TypedEncoder : Numeric
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Numeric
](data: List[X2[A, B]]): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
@@ -173,11 +172,11 @@ class GroupByTests extends TypedDatasetSuite {
test("groupBy('a).agg(sum('b), sum('c)) to groupBy('a).agg(sum('a), sum('b), sum('a), sum('b), sum('a))") {
def prop[
- A: TypedEncoder : Ordering,
- B: TypedEncoder,
- C: TypedEncoder,
- OutB: TypedEncoder : Numeric,
- OutC: TypedEncoder : Numeric
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder,
+ C: TypedEncoder,
+ OutB: TypedEncoder: Numeric,
+ OutC: TypedEncoder: Numeric
](data: List[X3[A, B, C]])(
implicit
summableB: CatalystSummable[B, OutB],
@@ -229,7 +228,13 @@ class GroupByTests extends TypedDatasetSuite {
.collect().run.toVector.sortBy(_._1)
val scalaSumBCBCB = data.groupBy(_.a).mapValues { xs =>
- (xs.map(_.b).map(widenb).sum, xs.map(_.c).map(widenc).sum, xs.map(_.b).map(widenb).sum, xs.map(_.c).map(widenc).sum, xs.map(_.b).map(widenb).sum)
+ (
+ xs.map(_.b).map(widenb).sum,
+ xs.map(_.c).map(widenc).sum,
+ xs.map(_.b).map(widenb).sum,
+ xs.map(_.c).map(widenc).sum,
+ xs.map(_.b).map(widenb).sum
+ )
}.toVector.map {
case (a, (b1, c1, b2, c2, b3)) => (a, b1, c1, b2, c2, b3)
}.sortBy(_._1)
@@ -245,10 +250,10 @@ class GroupByTests extends TypedDatasetSuite {
test("groupBy('a, 'b).agg(sum('c)) to groupBy('a, 'b).agg(sum('c),sum('c),sum('c),sum('c),sum('c))") {
def prop[
- A: TypedEncoder : Ordering,
- B: TypedEncoder : Ordering,
- C: TypedEncoder,
- OutC: TypedEncoder: Numeric
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering,
+ C: TypedEncoder,
+ OutC: TypedEncoder: Numeric
](data: List[X3[A, B, C]])(
implicit
summableC: CatalystSummable[C, OutC],
@@ -260,55 +265,55 @@ class GroupByTests extends TypedDatasetSuite {
val C = dataset.col[C]('c)
val framelessSumC = dataset
- .groupBy(A,B)
+ .groupBy(A, B)
.agg(sum(C))
- .collect().run.toVector.sortBy(x => (x._1,x._2))
+ .collect().run.toVector.sortBy(x => (x._1, x._2))
- val scalaSumC = data.groupBy(x => (x.a,x.b)).mapValues { xs =>
+ val scalaSumC = data.groupBy(x => (x.a, x.b)).mapValues { xs =>
xs.map(_.c).map(widenc).sum
- }.toVector.map { case ((a, b), c) => (a, b, c) }.sortBy(x => (x._1,x._2))
+ }.toVector.map { case ((a, b), c) => (a, b, c) }.sortBy(x => (x._1, x._2))
val framelessSumCC = dataset
- .groupBy(A,B)
+ .groupBy(A, B)
.agg(sum(C), sum(C))
- .collect().run.toVector.sortBy(x => (x._1,x._2))
+ .collect().run.toVector.sortBy(x => (x._1, x._2))
- val scalaSumCC = data.groupBy(x => (x.a,x.b)).mapValues { xs =>
- val s = xs.map(_.c).map(widenc).sum; (s,s)
- }.toVector.map { case ((a, b), (c1, c2)) => (a, b, c1, c2) }.sortBy(x => (x._1,x._2))
+ val scalaSumCC = data.groupBy(x => (x.a, x.b)).mapValues { xs =>
+ val s = xs.map(_.c).map(widenc).sum; (s, s)
+ }.toVector.map { case ((a, b), (c1, c2)) => (a, b, c1, c2) }.sortBy(x => (x._1, x._2))
val framelessSumCCC = dataset
- .groupBy(A,B)
+ .groupBy(A, B)
.agg(sum(C), sum(C), sum(C))
- .collect().run.toVector.sortBy(x => (x._1,x._2))
+ .collect().run.toVector.sortBy(x => (x._1, x._2))
- val scalaSumCCC = data.groupBy(x => (x.a,x.b)).mapValues { xs =>
- val s = xs.map(_.c).map(widenc).sum; (s,s,s)
- }.toVector.map { case ((a, b), (c1, c2, c3)) => (a, b, c1, c2, c3) }.sortBy(x => (x._1,x._2))
+ val scalaSumCCC = data.groupBy(x => (x.a, x.b)).mapValues { xs =>
+ val s = xs.map(_.c).map(widenc).sum; (s, s, s)
+ }.toVector.map { case ((a, b), (c1, c2, c3)) => (a, b, c1, c2, c3) }.sortBy(x => (x._1, x._2))
val framelessSumCCCC = dataset
- .groupBy(A,B)
+ .groupBy(A, B)
.agg(sum(C), sum(C), sum(C), sum(C))
- .collect().run.toVector.sortBy(x => (x._1,x._2))
+ .collect().run.toVector.sortBy(x => (x._1, x._2))
- val scalaSumCCCC = data.groupBy(x => (x.a,x.b)).mapValues { xs =>
- val s = xs.map(_.c).map(widenc).sum; (s,s,s,s)
- }.toVector.map { case ((a, b), (c1, c2, c3, c4)) => (a, b, c1, c2, c3, c4) }.sortBy(x => (x._1,x._2))
+ val scalaSumCCCC = data.groupBy(x => (x.a, x.b)).mapValues { xs =>
+ val s = xs.map(_.c).map(widenc).sum; (s, s, s, s)
+ }.toVector.map { case ((a, b), (c1, c2, c3, c4)) => (a, b, c1, c2, c3, c4) }.sortBy(x => (x._1, x._2))
val framelessSumCCCCC = dataset
- .groupBy(A,B)
+ .groupBy(A, B)
.agg(sum(C), sum(C), sum(C), sum(C), sum(C))
- .collect().run.toVector.sortBy(x => (x._1,x._2))
+ .collect().run.toVector.sortBy(x => (x._1, x._2))
- val scalaSumCCCCC = data.groupBy(x => (x.a,x.b)).mapValues { xs =>
- val s = xs.map(_.c).map(widenc).sum; (s,s,s,s,s)
- }.toVector.map { case ((a, b), (c1, c2, c3, c4, c5)) => (a, b, c1, c2, c3, c4, c5) }.sortBy(x => (x._1,x._2))
+ val scalaSumCCCCC = data.groupBy(x => (x.a, x.b)).mapValues { xs =>
+ val s = xs.map(_.c).map(widenc).sum; (s, s, s, s, s)
+ }.toVector.map { case ((a, b), (c1, c2, c3, c4, c5)) => (a, b, c1, c2, c3, c4, c5) }.sortBy(x => (x._1, x._2))
(framelessSumC ?= scalaSumC) &&
- (framelessSumCC ?= scalaSumCC) &&
- (framelessSumCCC ?= scalaSumCCC) &&
- (framelessSumCCCC ?= scalaSumCCCC) &&
- (framelessSumCCCCC ?= scalaSumCCCCC)
+ (framelessSumCC ?= scalaSumCC) &&
+ (framelessSumCCC ?= scalaSumCCC) &&
+ (framelessSumCCCC ?= scalaSumCCCC) &&
+ (framelessSumCCCCC ?= scalaSumCCCCC)
}
check(forAll(prop[String, Long, BigDecimal, BigDecimal] _))
@@ -316,12 +321,12 @@ class GroupByTests extends TypedDatasetSuite {
test("groupBy('a, 'b).agg(sum('c), sum('d))") {
def prop[
- A: TypedEncoder : Ordering,
- B: TypedEncoder : Ordering,
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering,
C: TypedEncoder,
D: TypedEncoder,
- OutC: TypedEncoder : Numeric,
- OutD: TypedEncoder : Numeric
+ OutC: TypedEncoder: Numeric,
+ OutD: TypedEncoder: Numeric
](data: List[X4[A, B, C, D]])(
implicit
summableC: CatalystSummable[C, OutC],
@@ -354,9 +359,9 @@ class GroupByTests extends TypedDatasetSuite {
test("groupBy('a, 'b).mapGroups('a, 'b, sum('c))") {
def prop[
- A: TypedEncoder : Ordering,
- B: TypedEncoder : Ordering,
- C: TypedEncoder : Numeric
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering,
+ C: TypedEncoder: Numeric
](data: List[X3[A, B, C]]): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
@@ -403,8 +408,8 @@ class GroupByTests extends TypedDatasetSuite {
test("groupBy('a).flatMapGroups(('a, toVector(('a, 'b))") {
def prop[
- A: TypedEncoder : Ordering,
- B: TypedEncoder : Ordering
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering
](data: Vector[X2[A, B]]): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
@@ -430,9 +435,9 @@ class GroupByTests extends TypedDatasetSuite {
test("groupBy('a, 'b).flatMapGroups((('a,'b) toVector((('a,'b), 'c))") {
def prop[
- A: TypedEncoder : Ordering,
- B: TypedEncoder : Ordering,
- C: TypedEncoder : Ordering
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering,
+ C: TypedEncoder: Ordering
](data: Vector[X3[A, B, C]]): Prop = {
val dataset = TypedDataset.create(data)
val cA = dataset.col[A]('a)
@@ -445,7 +450,7 @@ class GroupByTests extends TypedDatasetSuite {
.sorted
val dataGrouped = data
- .groupBy(t => (t.a,t.b)).toSeq
+ .groupBy(t => (t.a, t.b)).toSeq
.flatMap { case (a, xs) => xs.map(x => (a, x)) }
.sorted
diff --git a/dataset/src/test/scala/frameless/InjectionTests.scala b/dataset/src/test/scala/frameless/InjectionTests.scala
index c17a52bd7..9ae136409 100644
--- a/dataset/src/test/scala/frameless/InjectionTests.scala
+++ b/dataset/src/test/scala/frameless/InjectionTests.scala
@@ -30,8 +30,8 @@ object Food {
Injection(
{
case Burger => 0
- case Pasta => 1
- case Rice => 2
+ case Pasta => 1
+ case Rice => 2
},
{
case 0 => Burger
diff --git a/dataset/src/test/scala/frameless/JobTests.scala b/dataset/src/test/scala/frameless/JobTests.scala
index 9650a020f..a1d37a8db 100644
--- a/dataset/src/test/scala/frameless/JobTests.scala
+++ b/dataset/src/test/scala/frameless/JobTests.scala
@@ -6,7 +6,6 @@ import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks
import org.scalatest.freespec.AnyFreeSpec
import org.scalatest.matchers.should.Matchers
-
class JobTests extends AnyFreeSpec with BeforeAndAfterAll with SparkTesting with ScalaCheckDrivenPropertyChecks with Matchers {
"map" - {
@@ -45,10 +44,10 @@ class JobTests extends AnyFreeSpec with BeforeAndAfterAll with SparkTesting with
"properties" - {
"read back" in forAll {
- (k:String, v: String) =>
+ (k: String, v: String) =>
val scopedKey = "frameless.tests." + k
- Job(1).withLocalProperty(scopedKey,v).run()
+ Job(1).withLocalProperty(scopedKey, v).run()
sc.getLocalProperty(scopedKey) shouldBe v
}
}
-}
\ No newline at end of file
+}
diff --git a/dataset/src/test/scala/frameless/JoinTests.scala b/dataset/src/test/scala/frameless/JoinTests.scala
index b34911c4f..f6a75f3da 100644
--- a/dataset/src/test/scala/frameless/JoinTests.scala
+++ b/dataset/src/test/scala/frameless/JoinTests.scala
@@ -7,9 +7,9 @@ import org.scalacheck.Prop._
class JoinTests extends TypedDatasetSuite {
test("ab.joinCross(ac)") {
def prop[
- A : TypedEncoder : Ordering,
- B : TypedEncoder : Ordering,
- C : TypedEncoder : Ordering
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering,
+ C: TypedEncoder: Ordering
](left: List[X2[A, B]], right: List[X2[A, C]]): Prop = {
val leftDs = TypedDataset.create(left)
val rightDs = TypedDataset.create(right)
@@ -27,7 +27,8 @@ class JoinTests extends TypedDatasetSuite {
val equalSchemas = joinedDs.schema ?= StructType(Seq(
StructField("_1", leftDs.schema, nullable = false),
- StructField("_2", rightDs.schema, nullable = false)))
+ StructField("_2", rightDs.schema, nullable = false)
+ ))
(joined.sorted ?= joinedData) && equalSchemas
}
@@ -37,9 +38,9 @@ class JoinTests extends TypedDatasetSuite {
test("ab.joinFull(ac)(ab.a == ac.a)") {
def prop[
- A : TypedEncoder : Ordering,
- B : TypedEncoder : Ordering,
- C : TypedEncoder : Ordering
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering,
+ C: TypedEncoder: Ordering
](left: List[X2[A, B]], right: List[X2[A, C]]): Prop = {
val leftDs = TypedDataset.create(left)
val rightDs = TypedDataset.create(right)
@@ -49,25 +50,28 @@ class JoinTests extends TypedDatasetSuite {
val joinedData = joinedDs.collect().run().toVector.sorted
val rightKeys = right.map(_.a).toSet
- val leftKeys = left.map(_.a).toSet
+ val leftKeys = left.map(_.a).toSet
val joined = {
for {
ab <- left
ac <- right if ac.a == ab.a
} yield (Some(ab), Some(ac))
- }.toVector ++ {
- for {
- ab <- left if !rightKeys.contains(ab.a)
- } yield (Some(ab), None)
- }.toVector ++ {
- for {
- ac <- right if !leftKeys.contains(ac.a)
- } yield (None, Some(ac))
- }.toVector
+ }.toVector ++
+ {
+ for {
+ ab <- left if !rightKeys.contains(ab.a)
+ } yield (Some(ab), None)
+ }.toVector ++
+ {
+ for {
+ ac <- right if !leftKeys.contains(ac.a)
+ } yield (None, Some(ac))
+ }.toVector
val equalSchemas = joinedDs.schema ?= StructType(Seq(
StructField("_1", leftDs.schema, nullable = true),
- StructField("_2", rightDs.schema, nullable = true)))
+ StructField("_2", rightDs.schema, nullable = true)
+ ))
(joined.sorted ?= joinedData) && equalSchemas
}
@@ -77,9 +81,9 @@ class JoinTests extends TypedDatasetSuite {
test("ab.joinInner(ac)(ab.a == ac.a)") {
def prop[
- A : TypedEncoder : Ordering,
- B : TypedEncoder : Ordering,
- C : TypedEncoder : Ordering
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering,
+ C: TypedEncoder: Ordering
](left: List[X2[A, B]], right: List[X2[A, C]]): Prop = {
val leftDs = TypedDataset.create(left)
val rightDs = TypedDataset.create(right)
@@ -97,7 +101,8 @@ class JoinTests extends TypedDatasetSuite {
val equalSchemas = joinedDs.schema ?= StructType(Seq(
StructField("_1", leftDs.schema, nullable = false),
- StructField("_2", rightDs.schema, nullable = false)))
+ StructField("_2", rightDs.schema, nullable = false)
+ ))
(joined.sorted ?= joinedData) && equalSchemas
}
@@ -107,9 +112,9 @@ class JoinTests extends TypedDatasetSuite {
test("ab.joinLeft(ac)(ab.a == ac.a)") {
def prop[
- A : TypedEncoder : Ordering,
- B : TypedEncoder : Ordering,
- C : TypedEncoder : Ordering
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering,
+ C: TypedEncoder: Ordering
](left: List[X2[A, B]], right: List[X2[A, C]]): Prop = {
val leftDs = TypedDataset.create(left)
val rightDs = TypedDataset.create(right)
@@ -124,15 +129,17 @@ class JoinTests extends TypedDatasetSuite {
ab <- left
ac <- right if ac.a == ab.a
} yield (ab, Some(ac))
- }.toVector ++ {
- for {
- ab <- left if !rightKeys.contains(ab.a)
- } yield (ab, None)
- }.toVector
+ }.toVector ++
+ {
+ for {
+ ab <- left if !rightKeys.contains(ab.a)
+ } yield (ab, None)
+ }.toVector
val equalSchemas = joinedDs.schema ?= StructType(Seq(
StructField("_1", leftDs.schema, nullable = false),
- StructField("_2", rightDs.schema, nullable = true)))
+ StructField("_2", rightDs.schema, nullable = true)
+ ))
(joined.sorted ?= joinedData) && (joinedData.map(_._1).toSet ?= left.toSet) && equalSchemas
}
@@ -142,9 +149,9 @@ class JoinTests extends TypedDatasetSuite {
test("ab.joinLeftAnti(ac)(ab.a == ac.a)") {
def prop[
- A : TypedEncoder : Ordering,
- B : TypedEncoder : Ordering,
- C : TypedEncoder : Ordering
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering,
+ C: TypedEncoder: Ordering
](left: List[X2[A, B]], right: List[X2[A, C]]): Prop = {
val leftDs = TypedDataset.create(left)
val rightDs = TypedDataset.create(right)
@@ -170,9 +177,9 @@ class JoinTests extends TypedDatasetSuite {
test("ab.joinLeftSemi(ac)(ab.a == ac.a)") {
def prop[
- A : TypedEncoder : Ordering,
- B : TypedEncoder : Ordering,
- C : TypedEncoder : Ordering
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering,
+ C: TypedEncoder: Ordering
](left: List[X2[A, B]], right: List[X2[A, C]]): Prop = {
val leftDs = TypedDataset.create(left)
val rightDs = TypedDataset.create(right)
@@ -198,9 +205,9 @@ class JoinTests extends TypedDatasetSuite {
test("ab.joinRight(ac)(ab.a == ac.a)") {
def prop[
- A : TypedEncoder : Ordering,
- B : TypedEncoder : Ordering,
- C : TypedEncoder : Ordering
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering,
+ C: TypedEncoder: Ordering
](left: List[X2[A, B]], right: List[X2[A, C]]): Prop = {
val leftDs = TypedDataset.create(left)
val rightDs = TypedDataset.create(right)
@@ -215,15 +222,17 @@ class JoinTests extends TypedDatasetSuite {
ab <- left
ac <- right if ac.a == ab.a
} yield (Some(ab), ac)
- }.toVector ++ {
- for {
- ac <- right if !leftKeys.contains(ac.a)
- } yield (None, ac)
- }.toVector
+ }.toVector ++
+ {
+ for {
+ ac <- right if !leftKeys.contains(ac.a)
+ } yield (None, ac)
+ }.toVector
val equalSchemas = joinedDs.schema ?= StructType(Seq(
StructField("_1", leftDs.schema, nullable = true),
- StructField("_2", rightDs.schema, nullable = false)))
+ StructField("_2", rightDs.schema, nullable = false)
+ ))
(joined.sorted ?= joinedData) && (joinedData.map(_._2).toSet ?= right.toSet) && equalSchemas
}
diff --git a/dataset/src/test/scala/frameless/LitTests.scala b/dataset/src/test/scala/frameless/LitTests.scala
index 50df45220..1ab3c167c 100644
--- a/dataset/src/test/scala/frameless/LitTests.scala
+++ b/dataset/src/test/scala/frameless/LitTests.scala
@@ -15,7 +15,7 @@ class LitTests extends TypedDatasetSuite with Matchers {
val l: TypedColumn[Int, A] = lit(value)
// filter forces whole codegen
- val elems = df.deserialized.filter((_:Int) => true).select(l)
+ val elems = df.deserialized.filter((_: Int) => true).select(l)
.collect()
.run()
.toVector
@@ -58,7 +58,8 @@ class LitTests extends TypedDatasetSuite with Matchers {
test("support value class") {
val initial = Seq(
Q(name = new Name("Foo"), id = 1),
- Q(name = new Name("Bar"), id = 2))
+ Q(name = new Name("Bar"), id = 2)
+ )
val ds = TypedDataset.create(initial)
ds.collect.run() shouldBe initial
@@ -72,7 +73,8 @@ class LitTests extends TypedDatasetSuite with Matchers {
test("support optional value class") {
val initial = Seq(
R(name = "Foo", id = 1, alias = None),
- R(name = "Bar", id = 2, alias = Some(new Name("Lorem"))))
+ R(name = "Bar", id = 2, alias = Some(new Name("Lorem")))
+ )
val ds = TypedDataset.create(initial)
ds.collect.run() shouldBe initial
@@ -82,7 +84,7 @@ class LitTests extends TypedDatasetSuite with Matchers {
val lit = functions.litValue(someIpsum)
val tds = ds.withColumnReplaced('alias, functions.litValue(someIpsum))
- tds.queryExecution.toString() should include (lit.toString)
+ tds.queryExecution.toString() should include(lit.toString)
tds.
collect.run() shouldBe initial.map(_.copy(alias = someIpsum))
diff --git a/dataset/src/test/scala/frameless/NumericTests.scala b/dataset/src/test/scala/frameless/NumericTests.scala
index 0c13ae5a3..60e921ee2 100644
--- a/dataset/src/test/scala/frameless/NumericTests.scala
+++ b/dataset/src/test/scala/frameless/NumericTests.scala
@@ -43,7 +43,7 @@ class NumericTests extends TypedDatasetSuite with Matchers {
}
test("multiply") {
- def prop[A: TypedEncoder : CatalystNumeric : Numeric : ClassTag](a: A, b: A): Prop = {
+ def prop[A: TypedEncoder: CatalystNumeric: Numeric: ClassTag](a: A, b: A): Prop = {
val df = TypedDataset.create(X2(a, b) :: Nil)
val result = implicitly[Numeric[A]].times(a, b)
val got = df.select(df.col('a) * df.col('b)).collect().run()
@@ -61,7 +61,8 @@ class NumericTests extends TypedDatasetSuite with Matchers {
test("divide") {
def prop[A: TypedEncoder: CatalystNumeric: Numeric](a: A, b: A)(implicit cd: CatalystDivisible[A, Double]): Prop = {
val df = TypedDataset.create(X2(a, b) :: Nil)
- if (b == 0) proved else {
+ if (b == 0) proved
+ else {
val div: Double = implicitly[Numeric[A]].toDouble(a) / implicitly[Numeric[A]].toDouble(b)
val got: Seq[Double] = df.select(df.col('a) / df.col('b)).collect().run()
@@ -69,17 +70,18 @@ class NumericTests extends TypedDatasetSuite with Matchers {
}
}
- check(prop[Byte ] _)
+ check(prop[Byte] _)
check(prop[Double] _)
- check(prop[Int ] _)
- check(prop[Long ] _)
- check(prop[Short ] _)
+ check(prop[Int] _)
+ check(prop[Long] _)
+ check(prop[Short] _)
}
test("divide BigDecimals") {
def prop(a: BigDecimal, b: BigDecimal): Prop = {
val df = TypedDataset.create(X2(a, b) :: Nil)
- if (b.doubleValue == 0) proved else {
+ if (b.doubleValue == 0) proved
+ else {
// Spark performs something in between Double division and BigDecimal division,
// we approximate it using double vision and `approximatelyEqual`:
val div = BigDecimal(a.doubleValue / b.doubleValue)
@@ -133,9 +135,10 @@ class NumericTests extends TypedDatasetSuite with Matchers {
test("mod") {
import NumericMod._
- def prop[A: TypedEncoder : CatalystNumeric : NumericMod](a: A, b: A): Prop = {
+ def prop[A: TypedEncoder: CatalystNumeric: NumericMod](a: A, b: A): Prop = {
val df = TypedDataset.create(X2(a, b) :: Nil)
- if (b == 0) proved else {
+ if (b == 0) proved
+ else {
val mod: A = implicitly[NumericMod[A]].mod(a, b)
val got: Seq[A] = df.select(df.col('a) % df.col('b)).collect().run()
@@ -145,19 +148,20 @@ class NumericTests extends TypedDatasetSuite with Matchers {
check(prop[Byte] _)
check(prop[Double] _)
- check(prop[Int ] _)
- check(prop[Long ] _)
- check(prop[Short ] _)
+ check(prop[Int] _)
+ check(prop[Long] _)
+ check(prop[Short] _)
check(prop[BigDecimal] _)
}
- test("a mod lit(b)"){
+ test("a mod lit(b)") {
import NumericMod._
- def prop[A: TypedEncoder : CatalystNumeric : NumericMod](elem: A, data: X1[A]): Prop = {
+ def prop[A: TypedEncoder: CatalystNumeric: NumericMod](elem: A, data: X1[A]): Prop = {
val dataset = TypedDataset.create(Seq(data))
val a = dataset.col('a)
- if (elem == 0) proved else {
+ if (elem == 0) proved
+ else {
val mod: A = implicitly[NumericMod[A]].mod(data.a, elem)
val got: Seq[A] = dataset.select(a % elem).collect().run()
@@ -167,9 +171,9 @@ class NumericTests extends TypedDatasetSuite with Matchers {
check(prop[Byte] _)
check(prop[Double] _)
- check(prop[Int ] _)
- check(prop[Long ] _)
- check(prop[Short ] _)
+ check(prop[Int] _)
+ check(prop[Long] _)
+ check(prop[Short] _)
check(prop[BigDecimal] _)
}
@@ -180,9 +184,9 @@ class NumericTests extends TypedDatasetSuite with Matchers {
implicit val doubleWithNaN = Arbitrary {
implicitly[Arbitrary[Double]].arbitrary.flatMap(Gen.oneOf(_, Double.NaN))
}
- implicit val x1 = Arbitrary{ doubleWithNaN.arbitrary.map(X1(_)) }
+ implicit val x1 = Arbitrary { doubleWithNaN.arbitrary.map(X1(_)) }
- def prop[A : TypedEncoder : Encoder : CatalystNaN](data: List[X1[A]]): Prop = {
+ def prop[A: TypedEncoder: Encoder: CatalystNaN](data: List[X1[A]]): Prop = {
val ds = TypedDataset.create(data)
val expected = ds.toDF().filter(!$"a".isNaN).map(_.getAs[A](0)).collect().toSeq
diff --git a/dataset/src/test/scala/frameless/OrderByTests.scala b/dataset/src/test/scala/frameless/OrderByTests.scala
index 98bd7442d..20137819a 100644
--- a/dataset/src/test/scala/frameless/OrderByTests.scala
+++ b/dataset/src/test/scala/frameless/OrderByTests.scala
@@ -7,19 +7,20 @@ import org.apache.spark.sql.Column
import org.scalatest.matchers.should.Matchers
class OrderByTests extends TypedDatasetSuite with Matchers {
- def sortings[A : CatalystOrdered, T]: Seq[(TypedColumn[T, A] => SortedTypedColumn[T, A], Column => Column)] = Seq(
+ def sortings[A: CatalystOrdered, T]: Seq[(TypedColumn[T, A] => SortedTypedColumn[T, A], Column => Column)] = Seq(
(_.desc, _.desc),
(_.asc, _.asc),
- (t => t, t => t) //default ascending
+ (t => t, t => t) // default ascending
)
test("single column non nullable orderBy") {
- def prop[A: TypedEncoder : CatalystOrdered](data: Vector[X1[A]]): Prop = {
+ def prop[A: TypedEncoder: CatalystOrdered](data: Vector[X1[A]]): Prop = {
val ds = TypedDataset.create(data)
sortings[A, X1[A]].map { case (typ, untyp) =>
ds.dataset.orderBy(untyp(ds.dataset.col("a"))).collect().toVector.?=(
- ds.orderBy(typ(ds('a))).collect().run().toVector)
+ ds.orderBy(typ(ds('a))).collect().run().toVector
+ )
}.reduce(_ && _)
}
@@ -36,12 +37,13 @@ class OrderByTests extends TypedDatasetSuite with Matchers {
}
test("single column non nullable partition sorting") {
- def prop[A: TypedEncoder : CatalystOrdered](data: Vector[X1[A]]): Prop = {
+ def prop[A: TypedEncoder: CatalystOrdered](data: Vector[X1[A]]): Prop = {
val ds = TypedDataset.create(data)
sortings[A, X1[A]].map { case (typ, untyp) =>
ds.dataset.sortWithinPartitions(untyp(ds.dataset.col("a"))).collect().toVector.?=(
- ds.sortWithinPartitions(typ(ds('a))).collect().run().toVector)
+ ds.sortWithinPartitions(typ(ds('a))).collect().run().toVector
+ )
}.reduce(_ && _)
}
@@ -58,7 +60,7 @@ class OrderByTests extends TypedDatasetSuite with Matchers {
}
test("two columns non nullable orderBy") {
- def prop[A: TypedEncoder : CatalystOrdered, B: TypedEncoder : CatalystOrdered](data: Vector[X2[A,B]]): Prop = {
+ def prop[A: TypedEncoder: CatalystOrdered, B: TypedEncoder: CatalystOrdered](data: Vector[X2[A, B]]): Prop = {
val ds = TypedDataset.create(data)
sortings[A, X2[A, B]].reverse.zip(sortings[B, X2[A, B]]).map { case ((typA, untypA), (typB, untypB)) =>
@@ -75,7 +77,7 @@ class OrderByTests extends TypedDatasetSuite with Matchers {
}
test("two columns non nullable partition sorting") {
- def prop[A: TypedEncoder : CatalystOrdered, B: TypedEncoder : CatalystOrdered](data: Vector[X2[A,B]]): Prop = {
+ def prop[A: TypedEncoder: CatalystOrdered, B: TypedEncoder: CatalystOrdered](data: Vector[X2[A, B]]): Prop = {
val ds = TypedDataset.create(data)
sortings[A, X2[A, B]].reverse.zip(sortings[B, X2[A, B]]).map { case ((typA, untypA), (typB, untypB)) =>
@@ -92,7 +94,7 @@ class OrderByTests extends TypedDatasetSuite with Matchers {
}
test("three columns non nullable orderBy") {
- def prop[A: TypedEncoder : CatalystOrdered, B: TypedEncoder : CatalystOrdered](data: Vector[X3[A,B,A]]): Prop = {
+ def prop[A: TypedEncoder: CatalystOrdered, B: TypedEncoder: CatalystOrdered](data: Vector[X3[A, B, A]]): Prop = {
val ds = TypedDataset.create(data)
sortings[A, X3[A, B, A]].reverse
@@ -115,7 +117,7 @@ class OrderByTests extends TypedDatasetSuite with Matchers {
}
test("three columns non nullable partition sorting") {
- def prop[A: TypedEncoder : CatalystOrdered, B: TypedEncoder : CatalystOrdered](data: Vector[X3[A,B,A]]): Prop = {
+ def prop[A: TypedEncoder: CatalystOrdered, B: TypedEncoder: CatalystOrdered](data: Vector[X3[A, B, A]]): Prop = {
val ds = TypedDataset.create(data)
sortings[A, X3[A, B, A]].reverse
@@ -138,13 +140,15 @@ class OrderByTests extends TypedDatasetSuite with Matchers {
}
test("sort support for mixed default and explicit ordering") {
- def prop[A: TypedEncoder : CatalystOrdered, B: TypedEncoder : CatalystOrdered](data: Vector[X2[A, B]]): Prop = {
+ def prop[A: TypedEncoder: CatalystOrdered, B: TypedEncoder: CatalystOrdered](data: Vector[X2[A, B]]): Prop = {
val ds = TypedDataset.create(data)
ds.dataset.orderBy(ds.dataset.col("a"), ds.dataset.col("b").desc).collect().toVector.?=(
- ds.orderByMany(ds('a), ds('b).desc).collect().run().toVector) &&
+ ds.orderByMany(ds('a), ds('b).desc).collect().run().toVector
+ ) &&
ds.dataset.sortWithinPartitions(ds.dataset.col("a"), ds.dataset.col("b").desc).collect().toVector.?=(
- ds.sortWithinPartitionsMany(ds('a), ds('b).desc).collect().run().toVector)
+ ds.sortWithinPartitionsMany(ds('a), ds('b).desc).collect().run().toVector
+ )
}
check(forAll(prop[SQLDate, Long] _))
@@ -162,13 +166,13 @@ class OrderByTests extends TypedDatasetSuite with Matchers {
test("derives a CatalystOrdered for case classes when all fields are comparable") {
type T[A, B] = X3[Int, Boolean, X2[A, B]]
def prop[
- A: TypedEncoder : CatalystOrdered,
- B: TypedEncoder : CatalystOrdered
+ A: TypedEncoder: CatalystOrdered,
+ B: TypedEncoder: CatalystOrdered
](data: Vector[T[A, B]]): Prop = {
val ds = TypedDataset.create(data)
sortings[X2[A, B], T[A, B]].map { case (typX2, untypX2) =>
- val vanilla = ds.dataset.orderBy(untypX2(ds.dataset.col("c"))).collect().toVector
+ val vanilla = ds.dataset.orderBy(untypX2(ds.dataset.col("c"))).collect().toVector
val frameless = ds.orderBy(typX2(ds('c))).collect().run.toVector
vanilla ?= frameless
}.reduce(_ && _)
@@ -183,13 +187,13 @@ class OrderByTests extends TypedDatasetSuite with Matchers {
test("derives a CatalystOrdered for tuples when all fields are comparable") {
type T[A, B] = X2[Int, (A, B)]
def prop[
- A: TypedEncoder : CatalystOrdered,
- B: TypedEncoder : CatalystOrdered
+ A: TypedEncoder: CatalystOrdered,
+ B: TypedEncoder: CatalystOrdered
](data: Vector[T[A, B]]): Prop = {
val ds = TypedDataset.create(data)
sortings[(A, B), T[A, B]].map { case (typX2, untypX2) =>
- val vanilla = ds.dataset.orderBy(untypX2(ds.dataset.col("b"))).collect().toVector
+ val vanilla = ds.dataset.orderBy(untypX2(ds.dataset.col("b"))).collect().toVector
val frameless = ds.orderBy(typX2(ds('b))).collect().run.toVector
vanilla ?= frameless
}.reduce(_ && _)
diff --git a/dataset/src/test/scala/frameless/RecordEncoderTests.scala b/dataset/src/test/scala/frameless/RecordEncoderTests.scala
index 98274cf01..206c2d6e8 100644
--- a/dataset/src/test/scala/frameless/RecordEncoderTests.scala
+++ b/dataset/src/test/scala/frameless/RecordEncoderTests.scala
@@ -1,6 +1,6 @@
package frameless
-import org.apache.spark.sql.{Row, functions => F}
+import org.apache.spark.sql.{functions => F, Row}
import org.apache.spark.sql.types.{
ArrayType,
BinaryType,
@@ -95,7 +95,8 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
encoder.jvmRepr shouldBe ObjectType(classOf[Name])
encoder.catalystRepr shouldBe StructType(
- Seq(StructField("value", StringType, false)))
+ Seq(StructField("value", StringType, false))
+ )
val sqlContext = session.sqlContext
import sqlContext.implicits._
@@ -111,7 +112,8 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
illTyped(
// As `Person` is not a Value class
- "val _: RecordFieldEncoder[Person] = RecordFieldEncoder.valueClass")
+ "val _: RecordFieldEncoder[Person] = RecordFieldEncoder.valueClass"
+ )
val fieldEncoder: RecordFieldEncoder[Name] = RecordFieldEncoder.valueClass
@@ -125,7 +127,8 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
val expectedPersonStructType = StructType(Seq(
StructField("name", StringType, false),
- StructField("age", IntegerType, false)))
+ StructField("age", IntegerType, false)
+ ))
encoder.catalystRepr shouldBe expectedPersonStructType
@@ -140,7 +143,9 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
}
val expected = Seq(
- Person(new Name("Foo"), 2), Person(new Name("Bar"), 3))
+ Person(new Name("Foo"), 2),
+ Person(new Name("Bar"), 3)
+ )
unsafeDs.collect.run() shouldBe expected
@@ -160,7 +165,8 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
illTyped( // As `Person` is not a Value class
"""val _: RecordFieldEncoder[Option[Person]] =
- RecordFieldEncoder.optionValueClass""")
+ RecordFieldEncoder.optionValueClass"""
+ )
val fieldEncoder: RecordFieldEncoder[Option[Name]] =
RecordFieldEncoder.optionValueClass
@@ -177,7 +183,8 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
val expectedPersonStructType = StructType(Seq(
StructField("id", LongType, false),
- StructField("name", StringType, true)))
+ StructField("name", StringType, true)
+ ))
encoder.catalystRepr shouldBe expectedPersonStructType
@@ -194,7 +201,8 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
ds1.collect.run() shouldBe Seq(
User(1L, None),
- User(2L, Some(new Name("Foo"))))
+ User(2L, Some(new Name("Foo")))
+ )
val ds2: TypedDataset[User] = {
val sqlContext = session.sqlContext
@@ -208,7 +216,8 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
val df2 = df1.withColumn(
"jsonValue",
- F.from_json(df1.col("value"), expectedPersonStructType)).
+ F.from_json(df1.col("value"), expectedPersonStructType)
+ ).
select("jsonValue.id", "jsonValue.name")
TypedDataset.createUnsafe[User](df2)
@@ -217,7 +226,8 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
val expected = Seq(
User(3L, None),
User(4L, Some(new Name("Lorem"))),
- User(5L, None))
+ User(5L, None)
+ )
ds2.collect.run() shouldBe expected
@@ -233,10 +243,16 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
encoder.jvmRepr shouldBe ObjectType(classOf[D])
val expectedStructType = StructType(Seq(
- StructField("m", MapType(
- keyType = StringType,
- valueType = IntegerType,
- valueContainsNull = false), false)))
+ StructField(
+ "m",
+ MapType(
+ keyType = StringType,
+ valueType = IntegerType,
+ valueContainsNull = false
+ ),
+ false
+ )
+ ))
encoder.catalystRepr shouldBe expectedStructType
@@ -246,18 +262,20 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
val ds1 = TypedDataset.createUnsafe[D] {
val df = Seq(
"""{"m":{"pizza":1,"sushi":2}}""",
- """{"m":{"red":3,"blue":4}}""",
+ """{"m":{"red":3,"blue":4}}"""
).toDF
df.withColumn(
"jsonValue",
- F.from_json(df.col("value"), expectedStructType)).
+ F.from_json(df.col("value"), expectedStructType)
+ ).
select("jsonValue.*")
}
val expected = Seq(
D(m = Map("pizza" -> 1, "sushi" -> 2)),
- D(m = Map("red" -> 3, "blue" -> 4)))
+ D(m = Map("red" -> 3, "blue" -> 4))
+ )
ds1.collect.run() shouldBe expected
@@ -277,10 +295,16 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
val expectedStudentStructType = StructType(Seq(
StructField("name", StringType, false),
- StructField("grades", MapType(
- keyType = StringType,
- valueType = DecimalType.SYSTEM_DEFAULT,
- valueContainsNull = false), false)))
+ StructField(
+ "grades",
+ MapType(
+ keyType = StringType,
+ valueType = DecimalType.SYSTEM_DEFAULT,
+ valueContainsNull = false
+ ),
+ false
+ )
+ ))
encoder.catalystRepr shouldBe expectedStudentStructType
@@ -290,43 +314,58 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
val ds1 = TypedDataset.createUnsafe[Student] {
val df = Seq(
"""{"name":"Foo","grades":{"math":1,"physics":"23.4"}}""",
- """{"name":"Bar","grades":{"biology":18.5,"geography":4}}""",
+ """{"name":"Bar","grades":{"biology":18.5,"geography":4}}"""
).toDF
df.withColumn(
"jsonValue",
- F.from_json(df.col("value"), expectedStudentStructType)).
+ F.from_json(df.col("value"), expectedStudentStructType)
+ ).
select("jsonValue.*")
}
val expected = Seq(
- Student(name = "Foo", grades = Map(
- new Subject("math") -> new Grade(BigDecimal(1)),
- new Subject("physics") -> new Grade(BigDecimal(23.4D)))),
- Student(name = "Bar", grades = Map(
- new Subject("biology") -> new Grade(BigDecimal(18.5)),
- new Subject("geography") -> new Grade(BigDecimal(4L)))))
+ Student(
+ name = "Foo",
+ grades = Map(
+ new Subject("math") -> new Grade(BigDecimal(1)),
+ new Subject("physics") -> new Grade(BigDecimal(23.4D))
+ )
+ ),
+ Student(
+ name = "Bar",
+ grades = Map(
+ new Subject("biology") -> new Grade(BigDecimal(18.5)),
+ new Subject("geography") -> new Grade(BigDecimal(4L))
+ )
+ )
+ )
ds1.collect.run() shouldBe expected
val grades = Map[Subject, Grade](
- new Subject("any") -> new Grade(BigDecimal(Long.MaxValue) + 1L))
+ new Subject("any") -> new Grade(BigDecimal(Long.MaxValue) + 1L)
+ )
val ds2 = ds1.withColumnReplaced('grades, functions.lit(grades))
ds2.collect.run() shouldBe Seq(
- Student("Foo", grades), Student("Bar", grades))
+ Student("Foo", grades),
+ Student("Bar", grades)
+ )
}
test("Encode binary array") {
val encoder = TypedEncoder[Tuple2[String, Array[Byte]]]
encoder.jvmRepr shouldBe ObjectType(
- classOf[Tuple2[String, Array[Byte]]])
+ classOf[Tuple2[String, Array[Byte]]]
+ )
val expectedStructType = StructType(Seq(
StructField("_1", StringType, false),
- StructField("_2", BinaryType, false)))
+ StructField("_2", BinaryType, false)
+ ))
encoder.catalystRepr shouldBe expectedStructType
@@ -359,11 +398,13 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
val encoder = TypedEncoder[Tuple2[String, Array[Int]]]
encoder.jvmRepr shouldBe ObjectType(
- classOf[Tuple2[String, Array[Int]]])
+ classOf[Tuple2[String, Array[Int]]]
+ )
val expectedStructType = StructType(Seq(
StructField("_1", StringType, false),
- StructField("_2", ArrayType(IntegerType, false), false)))
+ StructField("_2", ArrayType(IntegerType, false), false)
+ ))
encoder.catalystRepr shouldBe expectedStructType
@@ -373,12 +414,13 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
val ds1 = TypedDataset.createUnsafe[(String, Array[Int])] {
val df = Seq(
"""{"_1":"Foo", "_2":[3, 4]}""",
- """{"_1":"Bar", "_2":[5]}""",
+ """{"_1":"Bar", "_2":[5]}"""
).toDF
df.withColumn(
"jsonValue",
- F.from_json(df.col("value"), expectedStructType)).
+ F.from_json(df.col("value"), expectedStructType)
+ ).
select("jsonValue.*")
}
@@ -403,11 +445,13 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
val encoder = TypedEncoder[Tuple2[String, Array[Subject]]]
encoder.jvmRepr shouldBe ObjectType(
- classOf[Tuple2[String, Array[Subject]]])
+ classOf[Tuple2[String, Array[Subject]]]
+ )
val expectedStructType = StructType(Seq(
StructField("_1", StringType, false),
- StructField("_2", ArrayType(StringType, false), false)))
+ StructField("_2", ArrayType(StringType, false), false)
+ ))
encoder.catalystRepr shouldBe expectedStructType
@@ -417,18 +461,20 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
val ds1 = TypedDataset.createUnsafe[(String, Array[Subject])] {
val df = Seq(
"""{"_1":"Foo", "_2":["math","physics"]}""",
- """{"_1":"Bar", "_2":["biology","geography"]}""",
+ """{"_1":"Bar", "_2":["biology","geography"]}"""
).toDF
df.withColumn(
"jsonValue",
- F.from_json(df.col("value"), expectedStructType)).
+ F.from_json(df.col("value"), expectedStructType)
+ ).
select("jsonValue.*")
}
val expected = Seq(
"Foo" -> Seq(new Subject("math"), new Subject("physics")),
- "Bar" -> Seq(new Subject("biology"), new Subject("geography")))
+ "Bar" -> Seq(new Subject("biology"), new Subject("geography"))
+ )
ds1.collect.run().map {
case (_1, _2) => _1 -> _2.toSeq
@@ -451,8 +497,17 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
encoder.jvmRepr shouldBe ObjectType(classOf[B])
val expectedStructType = StructType(Seq(
- StructField("a", ArrayType(StructType(Seq(
- StructField("x", IntegerType, false))), false), false)))
+ StructField(
+ "a",
+ ArrayType(
+ StructType(Seq(
+ StructField("x", IntegerType, false)
+ )),
+ false
+ ),
+ false
+ )
+ ))
encoder.catalystRepr shouldBe expectedStructType
@@ -491,7 +546,8 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
val expectedStructType = StructType(Seq(
StructField("_1", IntegerType, false),
- StructField("_2", ArrayType(StringType, false), false)))
+ StructField("_2", ArrayType(StringType, false), false)
+ ))
encoder.catalystRepr shouldBe expectedStructType
@@ -501,18 +557,20 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
val df = Seq(
"""{"_1":1, "_2":["foo", "bar"]}""",
- """{"_1":2, "_2":["lorem"]}""",
+ """{"_1":2, "_2":["lorem"]}"""
).toDF
df.withColumn(
"jsonValue",
- F.from_json(df.col("value"), expectedStructType)).
+ F.from_json(df.col("value"), expectedStructType)
+ ).
select("jsonValue.*")
}
val expected = Seq(
1 -> Seq(new Name("foo"), new Name("bar")),
- 2 -> Seq(new Name("lorem")))
+ 2 -> Seq(new Name("lorem"))
+ )
ds1.collect.run() shouldBe expected
}
@@ -523,7 +581,13 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers {
case class UnitsOnly(a: Unit, b: Unit)
case class TupleWithUnits(
- u0: Unit, _1: Int, u1: Unit, u2: Unit, _2: String, u3: Unit)
+ u0: Unit,
+ _1: Int,
+ u1: Unit,
+ u2: Unit,
+ _2: String,
+ u3: Unit
+)
object TupleWithUnits {
def apply(_1: Int, _2: String): TupleWithUnits =
diff --git a/dataset/src/test/scala/frameless/SchemaTests.scala b/dataset/src/test/scala/frameless/SchemaTests.scala
index 89fed7f86..520ee195e 100644
--- a/dataset/src/test/scala/frameless/SchemaTests.scala
+++ b/dataset/src/test/scala/frameless/SchemaTests.scala
@@ -2,7 +2,7 @@ package frameless
import frameless.functions.aggregate._
import frameless.functions._
-import org.apache.spark.sql.types.{ Metadata, StructType }
+import org.apache.spark.sql.types.{Metadata, StructType}
import org.scalacheck.Prop
import org.scalacheck.Prop._
import org.scalatest.matchers.should.Matchers
@@ -20,9 +20,9 @@ class SchemaTests extends TypedDatasetSuite with Matchers {
}
def prop[A](
- dataset: TypedDataset[A],
- ignoreNullable: Boolean = false
- ): Prop = {
+ dataset: TypedDataset[A],
+ ignoreNullable: Boolean = false
+ ): Prop = {
val schema = dataset.dataset.schema
Prop.all(
diff --git a/dataset/src/test/scala/frameless/SelfJoinTests.scala b/dataset/src/test/scala/frameless/SelfJoinTests.scala
index 742429108..7c8a4f68b 100644
--- a/dataset/src/test/scala/frameless/SelfJoinTests.scala
+++ b/dataset/src/test/scala/frameless/SelfJoinTests.scala
@@ -2,7 +2,7 @@ package frameless
import org.scalacheck.Prop
import org.scalacheck.Prop._
-import org.apache.spark.sql.{ SparkSession, functions => sparkFunctions }
+import org.apache.spark.sql.{functions => sparkFunctions, SparkSession}
class SelfJoinTests extends TypedDatasetSuite {
@@ -10,10 +10,10 @@ class SelfJoinTests extends TypedDatasetSuite {
// [error] Join condition is missing or trivial.
// [error] Use the CROSS JOIN syntax to allow cartesian products between these relations.
def allowTrivialJoin[T](
- body: => T
- )(implicit
- session: SparkSession
- ): T = {
+ body: => T
+ )(implicit
+ session: SparkSession
+ ): T = {
val crossJoin = "spark.sql.crossJoin.enabled"
val oldSetting = session.conf.get(crossJoin)
session.conf.set(crossJoin, "true")
@@ -23,10 +23,10 @@ class SelfJoinTests extends TypedDatasetSuite {
}
def allowAmbiguousJoin[T](
- body: => T
- )(implicit
- session: SparkSession
- ): T = {
+ body: => T
+ )(implicit
+ session: SparkSession
+ ): T = {
val crossJoin = "spark.sql.analyzer.failAmbiguousSelfJoin"
val oldSetting = session.conf.get(crossJoin)
session.conf.set(crossJoin, "false")
@@ -37,9 +37,9 @@ class SelfJoinTests extends TypedDatasetSuite {
test("self join with colLeft/colRight disambiguation") {
def prop[A: TypedEncoder: Ordering, B: TypedEncoder: Ordering](
- dx: List[X2[A, B]],
- d: X2[A, B]
- ): Prop = allowAmbiguousJoin {
+ dx: List[X2[A, B]],
+ d: X2[A, B]
+ ): Prop = allowAmbiguousJoin {
val data = d :: dx
val ds = TypedDataset.create(data)
@@ -65,9 +65,9 @@ class SelfJoinTests extends TypedDatasetSuite {
test("self join collects correct values via colLeft/colRight") {
def prop[A: TypedEncoder: Ordering, B: TypedEncoder: Ordering](
- dx: List[X2[A, B]],
- d: X2[A, B]
- ): Prop = allowAmbiguousJoin {
+ dx: List[X2[A, B]],
+ d: X2[A, B]
+ ): Prop = allowAmbiguousJoin {
val data = d :: dx
val ds = TypedDataset.create(data)
@@ -96,9 +96,9 @@ class SelfJoinTests extends TypedDatasetSuite {
test("trivial self join") {
def prop[A: TypedEncoder: Ordering, B: TypedEncoder: Ordering](
- dx: List[X2[A, B]],
- d: X2[A, B]
- ): Prop =
+ dx: List[X2[A, B]],
+ d: X2[A, B]
+ ): Prop =
allowTrivialJoin {
allowAmbiguousJoin {
@@ -125,10 +125,9 @@ class SelfJoinTests extends TypedDatasetSuite {
test("self join with unambiguous expression") {
def prop[
- A: TypedEncoder: CatalystNumeric: Ordering,
- B: TypedEncoder: Ordering
- ](data: List[X3[A, A, B]]
- ): Prop = allowAmbiguousJoin {
+ A: TypedEncoder: CatalystNumeric: Ordering,
+ B: TypedEncoder: Ordering
+ ](data: List[X3[A, A, B]]): Prop = allowAmbiguousJoin {
val ds = TypedDataset.create(data)
val df1 = ds.dataset.alias("df1")
@@ -144,8 +143,10 @@ class SelfJoinTests extends TypedDatasetSuite {
val typed = ds
.joinInner(ds)(
- (ds.colLeft('a) + ds.colLeft('b)) === (ds.colRight('a) + ds
- .colRight('b))
+ (ds.colLeft('a) + ds.colLeft('b)) ===
+ (ds.colRight('a) +
+ ds
+ .colRight('b))
)
.count()
.run()
@@ -160,10 +161,9 @@ class SelfJoinTests extends TypedDatasetSuite {
"Do you want ambiguous self join? This is how you get ambiguous self join."
) {
def prop[
- A: TypedEncoder: CatalystNumeric: Ordering,
- B: TypedEncoder: Ordering
- ](data: List[X3[A, A, B]]
- ): Prop =
+ A: TypedEncoder: CatalystNumeric: Ordering,
+ B: TypedEncoder: Ordering
+ ](data: List[X3[A, A, B]]): Prop =
allowTrivialJoin {
allowAmbiguousJoin {
val ds = TypedDataset.create(data)
@@ -195,11 +195,11 @@ class SelfJoinTests extends TypedDatasetSuite {
test("colLeft and colRight are equivalent to col outside of joins") {
def prop[A, B, C, D](
- data: Vector[X4[A, B, C, D]]
- )(implicit
- ea: TypedEncoder[A],
- ex4: TypedEncoder[X4[A, B, C, D]]
- ): Prop = {
+ data: Vector[X4[A, B, C, D]]
+ )(implicit
+ ea: TypedEncoder[A],
+ ex4: TypedEncoder[X4[A, B, C, D]]
+ ): Prop = {
val dataset = TypedDataset.create(data)
val selectedCol =
dataset.select(dataset.col[A]('a)).collect().run().toVector
@@ -219,11 +219,11 @@ class SelfJoinTests extends TypedDatasetSuite {
test("colLeft and colRight are equivalent to col outside of joins - via files (codegen)") {
def prop[A, B, C, D](
- data: Vector[X4[A, B, C, D]]
- )(implicit
- ea: TypedEncoder[A],
- ex4: TypedEncoder[X4[A, B, C, D]]
- ): Prop = {
+ data: Vector[X4[A, B, C, D]]
+ )(implicit
+ ea: TypedEncoder[A],
+ ex4: TypedEncoder[X4[A, B, C, D]]
+ ): Prop = {
TypedDataset
.create(data)
.write
diff --git a/dataset/src/test/scala/frameless/TypedDatasetSuite.scala b/dataset/src/test/scala/frameless/TypedDatasetSuite.scala
index e31be7cbc..a602add90 100644
--- a/dataset/src/test/scala/frameless/TypedDatasetSuite.scala
+++ b/dataset/src/test/scala/frameless/TypedDatasetSuite.scala
@@ -2,23 +2,24 @@ package frameless
import com.globalmentor.apache.hadoop.fs.BareLocalFileSystem
import org.apache.hadoop.fs.local.StreamingFS
-import org.apache.spark.{ SparkConf, SparkContext }
-import org.apache.spark.sql.{ SQLContext, SparkSession }
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.sql.{SQLContext, SparkSession}
import org.scalactic.anyvals.PosZInt
import org.scalatest.BeforeAndAfterAll
import org.scalatestplus.scalacheck.Checkers
import org.scalacheck.Prop
import org.scalacheck.Prop._
-import scala.util.{ Properties, Try }
+import scala.util.{Properties, Try}
import org.scalatest.funsuite.AnyFunSuite
trait SparkTesting { self: BeforeAndAfterAll =>
- val appID: String = new java.util.Date().toString + math
- .floor(math.random * 10e4)
- .toLong
- .toString
+ val appID: String = new java.util.Date().toString +
+ math
+ .floor(math.random * 10e4)
+ .toLong
+ .toString
/**
* Allows bare naked to be used instead of winutils for testing / dev
@@ -93,11 +94,11 @@ class TypedDatasetSuite
implicit val sparkDelay: SparkDelay[Job] = Job.framelessSparkDelayForJob
def approximatelyEqual[A](
- a: A,
- b: A
- )(implicit
- numeric: Numeric[A]
- ): Prop = {
+ a: A,
+ b: A
+ )(implicit
+ numeric: Numeric[A]
+ ): Prop = {
val da = numeric.toDouble(a)
val db = numeric.toDouble(b)
val epsilon = 1e-6
diff --git a/dataset/src/test/scala/frameless/UdtEncodedClass.scala b/dataset/src/test/scala/frameless/UdtEncodedClass.scala
index 4e5c2c6d9..e154428be 100644
--- a/dataset/src/test/scala/frameless/UdtEncodedClass.scala
+++ b/dataset/src/test/scala/frameless/UdtEncodedClass.scala
@@ -9,7 +9,7 @@ import org.apache.spark.sql.FramelessInternals.UserDefinedType
class UdtEncodedClass(val a: Int, val b: Array[Double]) {
override def equals(other: Any): Boolean = other match {
case that: UdtEncodedClass => a == that.a && java.util.Arrays.equals(b, that.b)
- case _ => false
+ case _ => false
}
override def hashCode(): Int = {
diff --git a/dataset/src/test/scala/frameless/WithColumnTest.scala b/dataset/src/test/scala/frameless/WithColumnTest.scala
index c41c4e726..2950dc42d 100644
--- a/dataset/src/test/scala/frameless/WithColumnTest.scala
+++ b/dataset/src/test/scala/frameless/WithColumnTest.scala
@@ -8,28 +8,28 @@ class WithColumnTest extends TypedDatasetSuite {
import WithColumnTest._
test("fail to compile on missing value") {
- val f: TypedDataset[X] = TypedDataset.create(X(1,1) :: X(1,1) :: X(1,10) :: Nil)
+ val f: TypedDataset[X] = TypedDataset.create(X(1, 1) :: X(1, 1) :: X(1, 10) :: Nil)
illTyped {
"""val fNew: TypedDataset[XMissing] = f.withColumn[XMissing](f('j) === 10)"""
}
}
test("fail to compile on different column name") {
- val f: TypedDataset[X] = TypedDataset.create(X(1,1) :: X(1,1) :: X(1,10) :: Nil)
+ val f: TypedDataset[X] = TypedDataset.create(X(1, 1) :: X(1, 1) :: X(1, 10) :: Nil)
illTyped {
"""val fNew: TypedDataset[XDifferentColumnName] = f.withColumn[XDifferentColumnName](f('j) === 10)"""
}
}
test("fail to compile on added column name") {
- val f: TypedDataset[X] = TypedDataset.create(X(1,1) :: X(1,1) :: X(1,10) :: Nil)
+ val f: TypedDataset[X] = TypedDataset.create(X(1, 1) :: X(1, 1) :: X(1, 10) :: Nil)
illTyped {
"""val fNew: TypedDataset[XAdded] = f.withColumn[XAdded](f('j) === 10)"""
}
}
test("fail to compile on wrong typed column") {
- val f: TypedDataset[X] = TypedDataset.create(X(1,1) :: X(1,1) :: X(1,10) :: Nil)
+ val f: TypedDataset[X] = TypedDataset.create(X(1, 1) :: X(1, 1) :: X(1, 10) :: Nil)
illTyped {
"""val fNew: TypedDataset[XWrongType] = f.withColumn[XWrongType](f('j) === 10)"""
}
@@ -54,7 +54,7 @@ class WithColumnTest extends TypedDatasetSuite {
}
test("update in place") {
- def prop[A : TypedEncoder](startValue: A, replaceValue: A): Prop = {
+ def prop[A: TypedEncoder](startValue: A, replaceValue: A): Prop = {
val d = TypedDataset.create(X2(startValue, replaceValue) :: Nil)
val X2(a, b) = d.withColumnReplaced('a, d('b))
diff --git a/dataset/src/test/scala/frameless/XN.scala b/dataset/src/test/scala/frameless/XN.scala
index c23d4b45d..ffcc59a76 100644
--- a/dataset/src/test/scala/frameless/XN.scala
+++ b/dataset/src/test/scala/frameless/XN.scala
@@ -97,9 +97,16 @@ object X6 {
implicit def arbitrary[A: Arbitrary, B: Arbitrary, C: Arbitrary, D: Arbitrary, E: Arbitrary, F: Arbitrary]: Arbitrary[X6[A, B, C, D, E, F]] =
Arbitrary(Arbitrary.arbTuple6[A, B, C, D, E, F].arbitrary.map((X6.apply[A, B, C, D, E, F] _).tupled))
- implicit def cogen[A, B, C, D, E, F](implicit A: Cogen[A], B: Cogen[B], C: Cogen[C], D: Cogen[D], E: Cogen[E], F: Cogen[F]): Cogen[X6[A, B, C, D, E, F]] =
+ implicit def cogen[A, B, C, D, E, F](implicit
+ A: Cogen[A],
+ B: Cogen[B],
+ C: Cogen[C],
+ D: Cogen[D],
+ E: Cogen[E],
+ F: Cogen[F]
+ ): Cogen[X6[A, B, C, D, E, F]] =
Cogen.tuple6(A, B, C, D, E, F).contramap(x => (x.a, x.b, x.c, x.d, x.e, x.f))
implicit def ordering[A: Ordering, B: Ordering, C: Ordering, D: Ordering, E: Ordering, F: Ordering]: Ordering[X6[A, B, C, D, E, F]] =
Ordering.Tuple6[A, B, C, D, E, F].on(x => (x.a, x.b, x.c, x.d, x.e, x.f))
-}
\ No newline at end of file
+}
diff --git a/dataset/src/test/scala/frameless/forward/CheckpointTests.scala b/dataset/src/test/scala/frameless/forward/CheckpointTests.scala
index 9a1ff8b44..91ff570a9 100644
--- a/dataset/src/test/scala/frameless/forward/CheckpointTests.scala
+++ b/dataset/src/test/scala/frameless/forward/CheckpointTests.scala
@@ -3,7 +3,6 @@ package frameless
import org.scalacheck.Prop
import org.scalacheck.Prop.{forAll, _}
-
class CheckpointTests extends TypedDatasetSuite {
test("checkpoint") {
def prop[A: TypedEncoder](data: Vector[A], isEager: Boolean): Prop = {
@@ -18,4 +17,4 @@ class CheckpointTests extends TypedDatasetSuite {
check(forAll(prop[Int] _))
check(forAll(prop[String] _))
}
-}
\ No newline at end of file
+}
diff --git a/dataset/src/test/scala/frameless/forward/ColumnsTests.scala b/dataset/src/test/scala/frameless/forward/ColumnsTests.scala
index 282a72c9a..ec7c4c3f8 100644
--- a/dataset/src/test/scala/frameless/forward/ColumnsTests.scala
+++ b/dataset/src/test/scala/frameless/forward/ColumnsTests.scala
@@ -13,9 +13,14 @@ class ColumnsTests extends TypedDatasetSuite {
val x5 = X5(i, s, b, l, d) :: Nil
val x6 = X6(i, s, b, l, d, by) :: Nil
- val datasets = Seq(TypedDataset.create(x1), TypedDataset.create(x2),
- TypedDataset.create(x3), TypedDataset.create(x4),
- TypedDataset.create(x5), TypedDataset.create(x6))
+ val datasets = Seq(
+ TypedDataset.create(x1),
+ TypedDataset.create(x2),
+ TypedDataset.create(x3),
+ TypedDataset.create(x4),
+ TypedDataset.create(x5),
+ TypedDataset.create(x6)
+ )
Prop.all(datasets.flatMap { dataset =>
val columns = dataset.dataset.columns
@@ -27,4 +32,4 @@ class ColumnsTests extends TypedDatasetSuite {
check(forAll(prop _))
}
-}
\ No newline at end of file
+}
diff --git a/dataset/src/test/scala/frameless/forward/DistinctTests.scala b/dataset/src/test/scala/frameless/forward/DistinctTests.scala
index 44da5e59e..fd8ac0719 100644
--- a/dataset/src/test/scala/frameless/forward/DistinctTests.scala
+++ b/dataset/src/test/scala/frameless/forward/DistinctTests.scala
@@ -7,7 +7,7 @@ import math.Ordering
class DistinctTests extends TypedDatasetSuite {
test("distinct") {
// Comparison done with `.sorted` because order is not preserved by Spark for this operation.
- def prop[A: TypedEncoder : Ordering](data: Vector[A]): Prop =
+ def prop[A: TypedEncoder: Ordering](data: Vector[A]): Prop =
TypedDataset.create(data).distinct.collect().run().toVector.sorted ?= data.distinct.sorted
check(forAll(prop[Int] _))
diff --git a/dataset/src/test/scala/frameless/forward/HeadTests.scala b/dataset/src/test/scala/frameless/forward/HeadTests.scala
index 63f76e003..96c13a3ea 100644
--- a/dataset/src/test/scala/frameless/forward/HeadTests.scala
+++ b/dataset/src/test/scala/frameless/forward/HeadTests.scala
@@ -9,14 +9,14 @@ import scala.reflect.ClassTag
import org.scalatest.matchers.should.Matchers
class HeadTests extends TypedDatasetSuite with Matchers {
- def propArray[A: TypedEncoder : ClassTag : Ordering](data: Vector[X1[A]])(implicit c: SparkSession): Prop = {
+ def propArray[A: TypedEncoder: ClassTag: Ordering](data: Vector[X1[A]])(implicit c: SparkSession): Prop = {
import c.implicits._
- if(data.nonEmpty) {
+ if (data.nonEmpty) {
val tds = TypedDataset.
create(c.createDataset(data)(
TypedExpressionEncoder.apply[X1[A]]
).orderBy($"a".desc))
- (tds.headOption().run().get ?= data.max).
+ (tds.headOption().run().get ?= data.max).
&&(tds.head(1).run().head ?= data.max).
&&(tds.head(4).run().toVector ?=
data.sortBy(_.a)(implicitly[Ordering[A]].reverse).take(4))
diff --git a/dataset/src/test/scala/frameless/forward/InputFilesTests.scala b/dataset/src/test/scala/frameless/forward/InputFilesTests.scala
index 246867e63..306070e20 100644
--- a/dataset/src/test/scala/frameless/forward/InputFilesTests.scala
+++ b/dataset/src/test/scala/frameless/forward/InputFilesTests.scala
@@ -25,7 +25,8 @@ class InputFilesTests extends TypedDatasetSuite with Matchers {
inputDataset.dataset.write.csv(filePath)
val dataset = TypedDataset.createUnsafe(
- implicitly[SparkSession].sqlContext.read.schema(inputDataset.schema).csv(filePath))
+ implicitly[SparkSession].sqlContext.read.schema(inputDataset.schema).csv(filePath)
+ )
dataset.inputFiles sameElements dataset.dataset.inputFiles
}
@@ -36,7 +37,8 @@ class InputFilesTests extends TypedDatasetSuite with Matchers {
inputDataset.dataset.write.json(filePath)
val dataset = TypedDataset.createUnsafe(
- implicitly[SparkSession].sqlContext.read.schema(inputDataset.schema).json(filePath))
+ implicitly[SparkSession].sqlContext.read.schema(inputDataset.schema).json(filePath)
+ )
dataset.inputFiles sameElements dataset.dataset.inputFiles
}
@@ -45,4 +47,4 @@ class InputFilesTests extends TypedDatasetSuite with Matchers {
check(forAll(propCsv[String] _))
check(forAll(propJson[String] _))
}
-}
\ No newline at end of file
+}
diff --git a/dataset/src/test/scala/frameless/forward/IntersectTests.scala b/dataset/src/test/scala/frameless/forward/IntersectTests.scala
index f0edb856e..c6bc110d0 100644
--- a/dataset/src/test/scala/frameless/forward/IntersectTests.scala
+++ b/dataset/src/test/scala/frameless/forward/IntersectTests.scala
@@ -6,7 +6,7 @@ import math.Ordering
class IntersectTests extends TypedDatasetSuite {
test("intersect") {
- def prop[A: TypedEncoder : Ordering](data1: Vector[A], data2: Vector[A]): Prop = {
+ def prop[A: TypedEncoder: Ordering](data1: Vector[A], data2: Vector[A]): Prop = {
val dataset1 = TypedDataset.create(data1)
val dataset2 = TypedDataset.create(data2)
val datasetIntersect = dataset1.intersect(dataset2).collect().run().toVector
diff --git a/dataset/src/test/scala/frameless/forward/IsLocalTests.scala b/dataset/src/test/scala/frameless/forward/IsLocalTests.scala
index f61d25cd1..71fbd27ce 100644
--- a/dataset/src/test/scala/frameless/forward/IsLocalTests.scala
+++ b/dataset/src/test/scala/frameless/forward/IsLocalTests.scala
@@ -14,4 +14,4 @@ class IsLocalTests extends TypedDatasetSuite {
check(forAll(prop[Int] _))
check(forAll(prop[String] _))
}
-}
\ No newline at end of file
+}
diff --git a/dataset/src/test/scala/frameless/forward/IsStreamingTests.scala b/dataset/src/test/scala/frameless/forward/IsStreamingTests.scala
index dd1874977..b056bc409 100644
--- a/dataset/src/test/scala/frameless/forward/IsStreamingTests.scala
+++ b/dataset/src/test/scala/frameless/forward/IsStreamingTests.scala
@@ -14,4 +14,4 @@ class IsStreamingTests extends TypedDatasetSuite {
check(forAll(prop[Int] _))
check(forAll(prop[String] _))
}
-}
\ No newline at end of file
+}
diff --git a/dataset/src/test/scala/frameless/forward/QueryExecutionTests.scala b/dataset/src/test/scala/frameless/forward/QueryExecutionTests.scala
index d59e250df..6fc9a5750 100644
--- a/dataset/src/test/scala/frameless/forward/QueryExecutionTests.scala
+++ b/dataset/src/test/scala/frameless/forward/QueryExecutionTests.scala
@@ -14,4 +14,4 @@ class QueryExecutionTests extends TypedDatasetSuite {
check(forAll(prop[Int] _))
check(forAll(prop[String] _))
}
-}
\ No newline at end of file
+}
diff --git a/dataset/src/test/scala/frameless/forward/RandomSplitTests.scala b/dataset/src/test/scala/frameless/forward/RandomSplitTests.scala
index 4cc9a4fde..24c5ab42e 100644
--- a/dataset/src/test/scala/frameless/forward/RandomSplitTests.scala
+++ b/dataset/src/test/scala/frameless/forward/RandomSplitTests.scala
@@ -12,7 +12,7 @@ class RandomSplitTests extends TypedDatasetSuite with Matchers {
val nonEmptyPositiveArray: Gen[Array[Double]] = Gen.nonEmptyListOf(Gen.posNum[Double]).map(_.toArray)
test("randomSplit(weight, seed)") {
- def prop[A: TypedEncoder : Arbitrary] = forAll(vectorGen[A], nonEmptyPositiveArray, arbitrary[Long]) {
+ def prop[A: TypedEncoder: Arbitrary] = forAll(vectorGen[A], nonEmptyPositiveArray, arbitrary[Long]) {
(data: Vector[A], weights: Array[Double], seed: Long) =>
val dataset = TypedDataset.create(data)
@@ -25,7 +25,7 @@ class RandomSplitTests extends TypedDatasetSuite with Matchers {
}
test("randomSplitAsList(weight, seed)") {
- def prop[A: TypedEncoder : Arbitrary] = forAll(vectorGen[A], nonEmptyPositiveArray, arbitrary[Long]) {
+ def prop[A: TypedEncoder: Arbitrary] = forAll(vectorGen[A], nonEmptyPositiveArray, arbitrary[Long]) {
(data: Vector[A], weights: Array[Double], seed: Long) =>
val dataset = TypedDataset.create(data)
diff --git a/dataset/src/test/scala/frameless/forward/SQLContextTests.scala b/dataset/src/test/scala/frameless/forward/SQLContextTests.scala
index 06c14f651..6a9456d56 100644
--- a/dataset/src/test/scala/frameless/forward/SQLContextTests.scala
+++ b/dataset/src/test/scala/frameless/forward/SQLContextTests.scala
@@ -1,15 +1,16 @@
package frameless
import org.scalacheck.Prop
-import org.scalacheck.Prop.{ forAll, _ }
+import org.scalacheck.Prop.{forAll, _}
class SQLContextTests extends TypedDatasetSuite {
test("sqlContext") {
def prop[A: TypedEncoder](data: Vector[A]): Prop = {
val dataset = TypedDataset.create[A](data)
- dataset.sqlContext =? org.apache.spark.sql.FramelessInternals
- .sqlContext(dataset.dataset)
+ dataset.sqlContext =?
+ org.apache.spark.sql.FramelessInternals
+ .sqlContext(dataset.dataset)
}
check(forAll(prop[Int] _))
diff --git a/dataset/src/test/scala/frameless/forward/SparkSessionTests.scala b/dataset/src/test/scala/frameless/forward/SparkSessionTests.scala
index c5d0da338..ce3130d3b 100644
--- a/dataset/src/test/scala/frameless/forward/SparkSessionTests.scala
+++ b/dataset/src/test/scala/frameless/forward/SparkSessionTests.scala
@@ -14,4 +14,4 @@ class SparkSessionTests extends TypedDatasetSuite {
check(forAll(prop[Int] _))
check(forAll(prop[String] _))
}
-}
\ No newline at end of file
+}
diff --git a/dataset/src/test/scala/frameless/forward/StorageLevelTests.scala b/dataset/src/test/scala/frameless/forward/StorageLevelTests.scala
index 3ac93773e..63bd9be5e 100644
--- a/dataset/src/test/scala/frameless/forward/StorageLevelTests.scala
+++ b/dataset/src/test/scala/frameless/forward/StorageLevelTests.scala
@@ -7,12 +7,23 @@ import org.scalacheck.{Arbitrary, Gen}
class StorageLevelTests extends TypedDatasetSuite {
- val storageLevelGen: Gen[StorageLevel] = Gen.oneOf(Seq(NONE, DISK_ONLY, DISK_ONLY_2, MEMORY_ONLY,
- MEMORY_ONLY_2, MEMORY_ONLY_SER, MEMORY_ONLY_SER_2, MEMORY_AND_DISK,
- MEMORY_AND_DISK_2, MEMORY_AND_DISK_SER, MEMORY_AND_DISK_SER_2, OFF_HEAP))
+ val storageLevelGen: Gen[StorageLevel] = Gen.oneOf(Seq(
+ NONE,
+ DISK_ONLY,
+ DISK_ONLY_2,
+ MEMORY_ONLY,
+ MEMORY_ONLY_2,
+ MEMORY_ONLY_SER,
+ MEMORY_ONLY_SER_2,
+ MEMORY_AND_DISK,
+ MEMORY_AND_DISK_2,
+ MEMORY_AND_DISK_SER,
+ MEMORY_AND_DISK_SER_2,
+ OFF_HEAP
+ ))
test("storageLevel") {
- def prop[A: TypedEncoder : Arbitrary] = forAll(vectorGen[A], storageLevelGen) {
+ def prop[A: TypedEncoder: Arbitrary] = forAll(vectorGen[A], storageLevelGen) {
(data: Vector[A], storageLevel: StorageLevel) =>
val dataset = TypedDataset.create(data)
if (storageLevel != StorageLevel.NONE)
@@ -26,4 +37,4 @@ class StorageLevelTests extends TypedDatasetSuite {
check(prop[Int])
check(prop[String])
}
-}
\ No newline at end of file
+}
diff --git a/dataset/src/test/scala/frameless/forward/TakeTests.scala b/dataset/src/test/scala/frameless/forward/TakeTests.scala
index eec77bc80..98a877f87 100644
--- a/dataset/src/test/scala/frameless/forward/TakeTests.scala
+++ b/dataset/src/test/scala/frameless/forward/TakeTests.scala
@@ -6,17 +6,15 @@ import scala.reflect.ClassTag
class TakeTests extends TypedDatasetSuite {
test("take") {
- def prop[A: TypedEncoder](n: Int, data: Vector[A]): Prop =
- (n >= 0) ==> (TypedDataset.create(data).take(n).run().toVector =? data.take(n))
+ def prop[A: TypedEncoder](n: Int, data: Vector[A]): Prop = (n >= 0) ==> (TypedDataset.create(data).take(n).run().toVector =? data.take(n))
- def propArray[A: TypedEncoder: ClassTag](n: Int, data: Vector[X1[Array[A]]]): Prop =
- (n >= 0) ==> {
- Prop {
- TypedDataset.create(data).take(n).run().toVector.zip(data.take(n)).forall {
- case (X1(l), X1(r)) => l sameElements r
- }
+ def propArray[A: TypedEncoder: ClassTag](n: Int, data: Vector[X1[Array[A]]]): Prop = (n >= 0) ==> {
+ Prop {
+ TypedDataset.create(data).take(n).run().toVector.zip(data.take(n)).forall {
+ case (X1(l), X1(r)) => l sameElements r
}
}
+ }
check(forAll(prop[Int] _))
check(forAll(prop[String] _))
diff --git a/dataset/src/test/scala/frameless/forward/ToJSONTests.scala b/dataset/src/test/scala/frameless/forward/ToJSONTests.scala
index 5ed79a9c9..5e78ea6d0 100644
--- a/dataset/src/test/scala/frameless/forward/ToJSONTests.scala
+++ b/dataset/src/test/scala/frameless/forward/ToJSONTests.scala
@@ -14,4 +14,4 @@ class ToJSONTests extends TypedDatasetSuite {
check(forAll(prop[Int] _))
check(forAll(prop[String] _))
}
-}
\ No newline at end of file
+}
diff --git a/dataset/src/test/scala/frameless/forward/UnionTests.scala b/dataset/src/test/scala/frameless/forward/UnionTests.scala
index 6cd8f4005..45860046e 100644
--- a/dataset/src/test/scala/frameless/forward/UnionTests.scala
+++ b/dataset/src/test/scala/frameless/forward/UnionTests.scala
@@ -63,4 +63,4 @@ class UnionTests extends TypedDatasetSuite {
final case class Foo[A, B](x: A, y: B)
final case class Bar[A, B](y: B, x: A)
final case class Baz[A, B, C](z: C, y: B, x: A)
-final case class Wrong[A, B, C](a: A, b: B, c: C)
\ No newline at end of file
+final case class Wrong[A, B, C](a: A, b: B, c: C)
diff --git a/dataset/src/test/scala/frameless/forward/WriteStreamTests.scala b/dataset/src/test/scala/frameless/forward/WriteStreamTests.scala
index 368147c93..c3b2d6dee 100644
--- a/dataset/src/test/scala/frameless/forward/WriteStreamTests.scala
+++ b/dataset/src/test/scala/frameless/forward/WriteStreamTests.scala
@@ -49,7 +49,7 @@ class WriteStreamTests extends TypedDatasetSuite {
.start()
tester.processAllAvailable()
val output = spark.table(s"testCsv_$uidNoHyphens").as[A]
- TypedDataset.create(data).collect().run().groupBy(identity) ?= output.collect().groupBy(identity).map { case (k, arr) => (k, arr.toSeq) }
+ TypedDataset.create(data).collect().run().groupBy(identity) ?= output.collect().groupBy(identity).map { case (k, arr) => (k, arr.toSeq) }
}
check(forAll(Gen.nonEmptyListOf(Gen.alphaNumStr.suchThat(_.nonEmpty)))(prop[String]))
@@ -79,7 +79,7 @@ class WriteStreamTests extends TypedDatasetSuite {
.start()
tester.processAllAvailable()
val output = spark.table(s"testParquet_$uidNoHyphens").as[A]
- TypedDataset.create(data).collect().run().groupBy(identity) ?= output.collect().groupBy(identity).map { case (k, arr) => (k, arr.toSeq) }
+ TypedDataset.create(data).collect().run().groupBy(identity) ?= output.collect().groupBy(identity).map { case (k, arr) => (k, arr.toSeq) }
}
check(forAll(Gen.nonEmptyListOf(genWriteExample))(prop[WriteExample]))
diff --git a/dataset/src/test/scala/frameless/functions/AggregateFunctionsTests.scala b/dataset/src/test/scala/frameless/functions/AggregateFunctionsTests.scala
index 201d93c63..d469608ba 100644
--- a/dataset/src/test/scala/frameless/functions/AggregateFunctionsTests.scala
+++ b/dataset/src/test/scala/frameless/functions/AggregateFunctionsTests.scala
@@ -21,7 +21,7 @@ class AggregateFunctionsTests extends TypedDatasetSuite {
test("sum") {
case class Sum4Tests[A, B](sum: Seq[A] => B)
- def prop[A: TypedEncoder, Out: TypedEncoder : Numeric](xs: List[A])(
+ def prop[A: TypedEncoder, Out: TypedEncoder: Numeric](xs: List[A])(
implicit
summable: CatalystSummable[A, Out],
summer: Sum4Tests[A, Out]
@@ -33,7 +33,7 @@ class AggregateFunctionsTests extends TypedDatasetSuite {
datasetSum match {
case x :: Nil => approximatelyEqual(summer.sum(xs), x)
- case other => falsified
+ case other => falsified
}
}
@@ -61,7 +61,7 @@ class AggregateFunctionsTests extends TypedDatasetSuite {
test("sumDistinct") {
case class Sum4Tests[A, B](sum: Seq[A] => B)
- def prop[A: TypedEncoder, Out: TypedEncoder : Numeric](xs: List[A])(
+ def prop[A: TypedEncoder, Out: TypedEncoder: Numeric](xs: List[A])(
implicit
summable: CatalystSummable[A, Out],
summer: Sum4Tests[A, Out]
@@ -73,15 +73,15 @@ class AggregateFunctionsTests extends TypedDatasetSuite {
datasetSum match {
case x :: Nil => approximatelyEqual(summer.sum(xs), x)
- case other => falsified
+ case other => falsified
}
}
// Replicate Spark's behaviour : Ints and Shorts are cast to Long
// https://github.com/apache/spark/blob/7eb2ca8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L37
implicit def summerLong = Sum4Tests[Long, Long](_.toSet.sum)
- implicit def summerInt = Sum4Tests[Int, Long]( x => x.toSet.map((_:Int).toLong).sum)
- implicit def summerShort = Sum4Tests[Short, Long](x => x.toSet.map((_:Short).toLong).sum)
+ implicit def summerInt = Sum4Tests[Int, Long](x => x.toSet.map((_: Int).toLong).sum)
+ implicit def summerShort = Sum4Tests[Short, Long](x => x.toSet.map((_: Short).toLong).sum)
check(forAll(prop[Long, Long] _))
check(forAll(prop[Int, Long] _))
@@ -95,7 +95,7 @@ class AggregateFunctionsTests extends TypedDatasetSuite {
test("avg") {
case class Averager4Tests[A, B](avg: Seq[A] => B)
- def prop[A: TypedEncoder, Out: TypedEncoder : Numeric](xs: List[A])(
+ def prop[A: TypedEncoder, Out: TypedEncoder: Numeric](xs: List[A])(
implicit
averageable: CatalystAverageable[A, Out],
averager: Averager4Tests[A, Out]
@@ -107,21 +107,21 @@ class AggregateFunctionsTests extends TypedDatasetSuite {
if (datasetAvg.size > 2) falsified
else xs match {
- case Nil => datasetAvg ?= Vector()
+ case Nil => datasetAvg ?= Vector()
case _ :: _ => datasetAvg.headOption match {
- case Some(x) => approximatelyEqual(averager.avg(xs), x)
- case None => falsified
- }
+ case Some(x) => approximatelyEqual(averager.avg(xs), x)
+ case None => falsified
+ }
}
}
// Replicate Spark's behaviour : If the datatype isn't BigDecimal cast type to Double
// https://github.com/apache/spark/blob/7eb2ca8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L50
- implicit def averageDecimal = Averager4Tests[BigDecimal, BigDecimal](as => as.sum/as.size)
- implicit def averageDouble = Averager4Tests[Double, Double](as => as.sum/as.size)
- implicit def averageLong = Averager4Tests[Long, Double](as => as.map(_.toDouble).sum/as.size)
- implicit def averageInt = Averager4Tests[Int, Double](as => as.map(_.toDouble).sum/as.size)
- implicit def averageShort = Averager4Tests[Short, Double](as => as.map(_.toDouble).sum/as.size)
+ implicit def averageDecimal = Averager4Tests[BigDecimal, BigDecimal](as => as.sum / as.size)
+ implicit def averageDouble = Averager4Tests[Double, Double](as => as.sum / as.size)
+ implicit def averageLong = Averager4Tests[Long, Double](as => as.map(_.toDouble).sum / as.size)
+ implicit def averageInt = Averager4Tests[Int, Double](as => as.map(_.toDouble).sum / as.size)
+ implicit def averageShort = Averager4Tests[Short, Double](as => as.map(_.toDouble).sum / as.size)
/* under 3.4 an oddity was detected:
Falsified after 2 successful property evaluations.
@@ -141,7 +141,7 @@ class AggregateFunctionsTests extends TypedDatasetSuite {
}
test("stddev and variance") {
- def prop[A: TypedEncoder : CatalystVariance : Numeric](xs: List[A]): Prop = {
+ def prop[A: TypedEncoder: CatalystVariance: Numeric](xs: List[A]): Prop = {
val numeric = implicitly[Numeric[A]]
val dataset = TypedDataset.create(xs.map(X1(_)))
val A = dataset.col[A]('a)
@@ -225,7 +225,7 @@ class AggregateFunctionsTests extends TypedDatasetSuite {
val A = dataset.col[Long]('a)
val datasetMax = dataset.agg(max(A) * 2).collect().run().headOption
- datasetMax ?= (if(xs.isEmpty) None else Some(xs.max * 2))
+ datasetMax ?= (if (xs.isEmpty) None else Some(xs.max * 2))
}
check(forAll(prop _))
@@ -336,7 +336,7 @@ class AggregateFunctionsTests extends TypedDatasetSuite {
}
test("collectList") {
- def prop[A: TypedEncoder : Ordering](xs: List[X2[A, A]]): Prop = {
+ def prop[A: TypedEncoder: Ordering](xs: List[X2[A, A]]): Prop = {
val tds = TypedDataset.create(xs)
val tdsRes: Seq[(A, Vector[A])] = tds.groupBy(tds('a)).agg(collectList(tds('b))).collect().run()
@@ -350,7 +350,7 @@ class AggregateFunctionsTests extends TypedDatasetSuite {
}
test("collectSet") {
- def prop[A: TypedEncoder : Ordering](xs: List[X2[A, A]]): Prop = {
+ def prop[A: TypedEncoder: Ordering](xs: List[X2[A, A]]): Prop = {
val tds = TypedDataset.create(xs)
val tdsRes: Seq[(A, Vector[A])] = tds.groupBy(tds('a)).agg(collectSet(tds('b))).collect().run()
@@ -379,19 +379,15 @@ class AggregateFunctionsTests extends TypedDatasetSuite {
check(forAll(prop[BigDecimal] _))
}
-
- def bivariatePropTemplate[A: TypedEncoder, B: TypedEncoder]
- (
+ def bivariatePropTemplate[A: TypedEncoder, B: TypedEncoder](
xs: List[X3[Int, A, B]]
- )
- (
+ )(
framelessFun: (TypedColumn[X3[Int, A, B], A], TypedColumn[X3[Int, A, B], B]) => TypedAggregate[X3[Int, A, B], Option[Double]],
sparkFun: (Column, Column) => Column
- )
- (
+ )(
implicit
encEv: Encoder[(Int, A, B)],
- encEv2: Encoder[(Int,Option[Double])],
+ encEv2: Encoder[(Int, Option[Double])],
evCanBeDoubleA: CatalystCast[A, Double],
evCanBeDoubleB: CatalystCast[B, Double]
): Prop = {
@@ -407,34 +403,29 @@ class AggregateFunctionsTests extends TypedDatasetSuite {
val compBivar = cDF
.groupBy(cDF("_1"))
.agg(sparkFun(cDF("_2"), cDF("_3")))
- .map(
- row => {
- val grp = row.getInt(0)
- (grp, DoubleBehaviourUtils.nanNullHandler(row.get(1)))
- }
- )
+ .map(row => {
+ val grp = row.getInt(0)
+ (grp, DoubleBehaviourUtils.nanNullHandler(row.get(1)))
+ })
// Should be the same
tdBivar.toMap ?= compBivar.collect().toMap
}
- def univariatePropTemplate[A: TypedEncoder]
- (
+ def univariatePropTemplate[A: TypedEncoder](
xs: List[X2[Int, A]]
- )
- (
+ )(
framelessFun: (TypedColumn[X2[Int, A], A]) => TypedAggregate[X2[Int, A], Option[Double]],
sparkFun: (Column) => Column
- )
- (
+ )(
implicit
encEv: Encoder[(Int, A)],
- encEv2: Encoder[(Int,Option[Double])],
+ encEv2: Encoder[(Int, Option[Double])],
evCanBeDoubleA: CatalystCast[A, Double]
): Prop = {
val tds = TypedDataset.create(xs)
- //typed implementation of univariate stats function
+ // typed implementation of univariate stats function
val tdUnivar = tds.groupBy(tds('a)).agg(framelessFun(tds('b))).deserialized.map(kv =>
(kv._1, kv._2.flatMap(DoubleBehaviourUtils.nanNullHandler))
).collect().run()
@@ -444,12 +435,10 @@ class AggregateFunctionsTests extends TypedDatasetSuite {
val compUnivar = cDF
.groupBy(cDF("_1"))
.agg(sparkFun(cDF("_2")))
- .map(
- row => {
- val grp = row.getInt(0)
- (grp, DoubleBehaviourUtils.nanNullHandler(row.get(1)))
- }
- )
+ .map(row => {
+ val grp = row.getInt(0)
+ (grp, DoubleBehaviourUtils.nanNullHandler(row.get(1)))
+ })
// Should be the same
tdUnivar.toMap ?= compUnivar.collect().toMap
@@ -464,7 +453,7 @@ class AggregateFunctionsTests extends TypedDatasetSuite {
encEv: Encoder[(Int, A, B)],
evCanBeDoubleA: CatalystCast[A, Double],
evCanBeDoubleB: CatalystCast[B, Double]
- ): Prop = bivariatePropTemplate(xs)(corr[A,B,X3[Int, A, B]],org.apache.spark.sql.functions.corr)
+ ): Prop = bivariatePropTemplate(xs)(corr[A, B, X3[Int, A, B]], org.apache.spark.sql.functions.corr)
check(forAll(prop[Double, Double] _))
check(forAll(prop[Double, Int] _))
diff --git a/dataset/src/test/scala/frameless/functions/DateTimeStringBehaviourUtils.scala b/dataset/src/test/scala/frameless/functions/DateTimeStringBehaviourUtils.scala
index e22fe4337..3f90e4d2d 100644
--- a/dataset/src/test/scala/frameless/functions/DateTimeStringBehaviourUtils.scala
+++ b/dataset/src/test/scala/frameless/functions/DateTimeStringBehaviourUtils.scala
@@ -5,6 +5,6 @@ import org.apache.spark.sql.Row
object DateTimeStringBehaviourUtils {
val nullHandler: Row => Option[Int] = _.get(0) match {
case i: Int => Some(i)
- case _ => None
+ case _ => None
}
}
diff --git a/dataset/src/test/scala/frameless/functions/DoubleBehaviourUtils.scala b/dataset/src/test/scala/frameless/functions/DoubleBehaviourUtils.scala
index f3a8be581..591edcf77 100644
--- a/dataset/src/test/scala/frameless/functions/DoubleBehaviourUtils.scala
+++ b/dataset/src/test/scala/frameless/functions/DoubleBehaviourUtils.scala
@@ -13,8 +13,8 @@ object DoubleBehaviourUtils {
private val nanHandler: Double => Option[Double] = value => if (!value.equals(Double.NaN)) Option(value) else None
// Making sure that null => None and does not result in 0.0d because of row.getAs[Double]'s use of .asInstanceOf
val nanNullHandler: Any => Option[Double] = {
- case null => None
+ case null => None
case d: Double => nanHandler(d)
- case _ => ???
+ case _ => ???
}
}
diff --git a/dataset/src/test/scala/frameless/functions/NonAggregateFunctionsTests.scala b/dataset/src/test/scala/frameless/functions/NonAggregateFunctionsTests.scala
index 470d58e5f..1ddc8483e 100644
--- a/dataset/src/test/scala/frameless/functions/NonAggregateFunctionsTests.scala
+++ b/dataset/src/test/scala/frameless/functions/NonAggregateFunctionsTests.scala
@@ -7,7 +7,7 @@ import java.nio.charset.StandardCharsets
import frameless.functions.nonAggregate._
import org.apache.commons.io.FileUtils
-import org.apache.spark.sql.{Column, Encoder, SaveMode, functions => sparkFunctions}
+import org.apache.spark.sql.{functions => sparkFunctions, Column, Encoder, SaveMode}
import org.scalacheck.Prop._
import org.scalacheck.{Arbitrary, Gen, Prop}
@@ -18,24 +18,24 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
object NonNegativeGenerators {
val doubleGen = for {
- s <- Gen.chooseNum(1, Int.MaxValue)
- e <- Gen.chooseNum(1, Int.MaxValue)
+ s <- Gen.chooseNum(1, Int.MaxValue)
+ e <- Gen.chooseNum(1, Int.MaxValue)
res: Double = s.toDouble / e.toDouble
} yield res
- val intGen: Gen[Int] = Gen.chooseNum(1, Int.MaxValue)
+ val intGen: Gen[Int] = Gen.chooseNum(1, Int.MaxValue)
val shortGen: Gen[Short] = Gen.chooseNum(1, Short.MaxValue)
- val longGen: Gen[Long] = Gen.chooseNum(1, Long.MaxValue)
- val byteGen: Gen[Byte] = Gen.chooseNum(1, Byte.MaxValue)
+ val longGen: Gen[Long] = Gen.chooseNum(1, Long.MaxValue)
+ val byteGen: Gen[Byte] = Gen.chooseNum(1, Byte.MaxValue)
}
object NonNegativeArbitraryNumericValues {
import NonNegativeGenerators._
- implicit val arbInt: Arbitrary[Int] = Arbitrary(intGen)
- implicit val arbDouble: Arbitrary[Double] = Arbitrary(doubleGen)
- implicit val arbLong: Arbitrary[Long] = Arbitrary(longGen)
- implicit val arbShort: Arbitrary[Short] = Arbitrary(shortGen)
- implicit val arbByte: Arbitrary[Byte] = Arbitrary(byteGen)
+ implicit val arbInt: Arbitrary[Int] = Arbitrary(intGen)
+ implicit val arbDouble: Arbitrary[Double] = Arbitrary(doubleGen)
+ implicit val arbLong: Arbitrary[Long] = Arbitrary(longGen)
+ implicit val arbShort: Arbitrary[Short] = Arbitrary(shortGen)
+ implicit val arbByte: Arbitrary[Byte] = Arbitrary(byteGen)
}
private val base64Encoder = Base64.getEncoder
@@ -53,9 +53,11 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: TypedEncoder : Encoder, B: TypedEncoder : Encoder](values: List[X1[A]])(
- implicit encX1:Encoder[X1[A]],
- catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, B]) = {
+ def prop[A: TypedEncoder: Encoder, B: TypedEncoder: Encoder](values: List[X1[A]])(
+ implicit
+ encX1: Encoder[X1[A]],
+ catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, B]
+ ) = {
val cDS = session.createDataset(values)
val resCompare = cDS
.select(sparkFunctions.negate(cDS("a")))
@@ -77,7 +79,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
check(forAll(prop[Byte, Byte] _))
check(forAll(prop[Short, Short] _))
check(forAll(prop[Int, Int] _))
- check(forAll(prop[Long, Long] _))
+ check(forAll(prop[Long, Long] _))
check(forAll(prop[BigDecimal, java.math.BigDecimal] _))
}
@@ -85,7 +87,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop(values: List[X1[Boolean]], fromBase: Int, toBase: Int)(implicit encX1:Encoder[X1[Boolean]]) = {
+ def prop(values: List[X1[Boolean]], fromBase: Int, toBase: Int)(implicit encX1: Encoder[X1[Boolean]]) = {
val cDS = session.createDataset(values)
val resCompare = cDS
@@ -112,7 +114,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop(values: List[X1[String]], fromBase: Int, toBase: Int)(implicit encX1:Encoder[X1[String]]) = {
+ def prop(values: List[X1[String]], fromBase: Int, toBase: Int)(implicit encX1: Encoder[X1[String]]) = {
val cDS = session.createDataset(values)
val resCompare = cDS
@@ -139,7 +141,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: TypedEncoder : Encoder](values: List[X1[A]])(implicit encX1:Encoder[X1[A]]) = {
+ def prop[A: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = {
val cDS = session.createDataset(values)
val resCompare = cDS
.select(sparkFunctions.degrees(cDS("a")))
@@ -161,12 +163,15 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
check(forAll(prop[Byte] _))
check(forAll(prop[Short] _))
check(forAll(prop[Int] _))
- check(forAll(prop[Long] _))
+ check(forAll(prop[Long] _))
check(forAll(prop[BigDecimal] _))
}
- def propBitShift[A: TypedEncoder : Encoder, B: TypedEncoder : Encoder](typedDS: TypedDataset[X1[A]])
- (typedCol: TypedColumn[X1[A], B], sparkFunc: (Column,Int) => Column, numBits: Int): Prop = {
+ def propBitShift[A: TypedEncoder: Encoder, B: TypedEncoder: Encoder](typedDS: TypedDataset[X1[A]])(
+ typedCol: TypedColumn[X1[A], B],
+ sparkFunc: (Column, Int) => Column,
+ numBits: Int
+ ): Prop = {
val spark = session
import spark.implicits._
@@ -190,9 +195,10 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
import spark.implicits._
@nowarn // supress sparkFunctions.shiftRightUnsigned call which is used to maintain Spark 3.1.x backwards compat
- def prop[A: TypedEncoder : Encoder, B: TypedEncoder : Encoder]
- (values: List[X1[A]], numBits: Int)
- (implicit catalystBitShift: CatalystBitShift[A, B], encX1: Encoder[X1[A]]) = {
+ def prop[A: TypedEncoder: Encoder, B: TypedEncoder: Encoder](values: List[X1[A]], numBits: Int)(implicit
+ catalystBitShift: CatalystBitShift[A, B],
+ encX1: Encoder[X1[A]]
+ ) = {
val typedDS = TypedDataset.create(values)
propBitShift(typedDS)(shiftRightUnsigned(typedDS('a), numBits), sparkFunctions.shiftRightUnsigned, numBits)
}
@@ -209,9 +215,10 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
import spark.implicits._
@nowarn // supress sparkFunctions.shiftRight call which is used to maintain Spark 3.1.x backwards compat
- def prop[A: TypedEncoder : Encoder, B: TypedEncoder : Encoder]
- (values: List[X1[A]], numBits: Int)
- (implicit catalystBitShift: CatalystBitShift[A, B], encX1: Encoder[X1[A]]) = {
+ def prop[A: TypedEncoder: Encoder, B: TypedEncoder: Encoder](values: List[X1[A]], numBits: Int)(implicit
+ catalystBitShift: CatalystBitShift[A, B],
+ encX1: Encoder[X1[A]]
+ ) = {
val typedDS = TypedDataset.create(values)
propBitShift(typedDS)(shiftRight(typedDS('a), numBits), sparkFunctions.shiftRight, numBits)
}
@@ -228,9 +235,10 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
import spark.implicits._
@nowarn // supress sparkFunctions.shiftLeft call which is used to maintain Spark 3.1.x backwards compat
- def prop[A: TypedEncoder : Encoder, B: TypedEncoder : Encoder]
- (values: List[X1[A]], numBits: Int)
- (implicit catalystBitShift: CatalystBitShift[A, B], encX1: Encoder[X1[A]]) = {
+ def prop[A: TypedEncoder: Encoder, B: TypedEncoder: Encoder](values: List[X1[A]], numBits: Int)(implicit
+ catalystBitShift: CatalystBitShift[A, B],
+ encX1: Encoder[X1[A]]
+ ) = {
val typedDS = TypedDataset.create(values)
propBitShift(typedDS)(shiftLeft(typedDS('a), numBits), sparkFunctions.shiftLeft, numBits)
}
@@ -246,21 +254,21 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: TypedEncoder : Encoder, B: TypedEncoder : Encoder]
- (values: List[X1[A]])(
- implicit catalystAbsolute: CatalystRound[A, B], encX1: Encoder[X1[A]]
+ def prop[A: TypedEncoder: Encoder, B: TypedEncoder: Encoder](values: List[X1[A]])(
+ implicit
+ catalystAbsolute: CatalystRound[A, B],
+ encX1: Encoder[X1[A]]
) = {
val cDS = session.createDataset(values)
val resCompare = cDS
.select(sparkFunctions.ceil(cDS("a")))
.map(_.getAs[B](0))
.collect()
- .toList.map{
- case bigDecimal : java.math.BigDecimal => bigDecimal.setScale(0)
- case other => other
+ .toList.map {
+ case bigDecimal: java.math.BigDecimal => bigDecimal.setScale(0)
+ case other => other
}.asInstanceOf[List[B]]
-
val typedDS = TypedDataset.create(values)
val res = typedDS
.select(ceil(typedDS('a)))
@@ -356,20 +364,20 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: TypedEncoder : Encoder, B: TypedEncoder : Encoder]
- (values: List[X1[A]])(
- implicit catalystAbsolute: CatalystRound[A, B], encX1: Encoder[X1[A]]
+ def prop[A: TypedEncoder: Encoder, B: TypedEncoder: Encoder](values: List[X1[A]])(
+ implicit
+ catalystAbsolute: CatalystRound[A, B],
+ encX1: Encoder[X1[A]]
) = {
val cDS = session.createDataset(values)
val resCompare = cDS
.select(sparkFunctions.floor(cDS("a")))
.map(_.getAs[B](0))
.collect()
- .toList.map{
- case bigDecimal : java.math.BigDecimal => bigDecimal.setScale(0)
- case other => other
- }.asInstanceOf[List[B]]
-
+ .toList.map {
+ case bigDecimal: java.math.BigDecimal => bigDecimal.setScale(0)
+ case other => other
+ }.asInstanceOf[List[B]]
val typedDS = TypedDataset.create(values)
val res = typedDS
@@ -387,35 +395,33 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
check(forAll(prop[BigDecimal, java.math.BigDecimal] _))
}
-
test("abs big decimal") {
val spark = session
import spark.implicits._
- def prop[A: TypedEncoder: Encoder, B: TypedEncoder: Encoder]
- (values: List[X1[A]])
- (
- implicit catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, B],
- encX1:Encoder[X1[A]]
- )= {
- val cDS = session.createDataset(values)
- val resCompare = cDS
- .select(sparkFunctions.abs(cDS("a")))
- .map(_.getAs[B](0))
- .collect().toList
+ def prop[A: TypedEncoder: Encoder, B: TypedEncoder: Encoder](values: List[X1[A]])(
+ implicit
+ catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, B],
+ encX1: Encoder[X1[A]]
+ ) = {
+ val cDS = session.createDataset(values)
+ val resCompare = cDS
+ .select(sparkFunctions.abs(cDS("a")))
+ .map(_.getAs[B](0))
+ .collect().toList
- val typedDS = TypedDataset.create(values)
- val col = typedDS('a)
- val res = typedDS
- .select(
- abs(col)
- )
- .collect()
- .run()
- .toList
+ val typedDS = TypedDataset.create(values)
+ val col = typedDS('a)
+ val res = typedDS
+ .select(
+ abs(col)
+ )
+ .collect()
+ .run()
+ .toList
- res ?= resCompare
- }
+ res ?= resCompare
+ }
check(forAll(prop[BigDecimal, java.math.BigDecimal] _))
}
@@ -424,10 +430,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: TypedEncoder : Encoder]
- (values: List[X1[A]])
- (
- implicit catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, A],
+ def prop[A: TypedEncoder: Encoder](values: List[X1[A]])(
+ implicit
+ catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, A],
encX1: Encoder[X1[A]]
) = {
val cDS = session.createDataset(values)
@@ -436,7 +441,6 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
.map(_.getAs[A](0))
.collect().toList
-
val typedDS = TypedDataset.create(values)
val res = typedDS
.select(abs(typedDS('a)))
@@ -453,36 +457,37 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
check(forAll(prop[Double] _))
}
- def propTrigonometric[A: CatalystNumeric: TypedEncoder : Encoder](typedDS: TypedDataset[X1[A]])
- (typedCol: TypedColumn[X1[A], Double], sparkFunc: Column => Column): Prop = {
- val spark = session
- import spark.implicits._
+ def propTrigonometric[A: CatalystNumeric: TypedEncoder: Encoder](typedDS: TypedDataset[X1[A]])(
+ typedCol: TypedColumn[X1[A], Double],
+ sparkFunc: Column => Column
+ ): Prop = {
+ val spark = session
+ import spark.implicits._
- val resCompare = typedDS.dataset
- .select(sparkFunc($"a"))
- .map(_.getAs[Double](0))
- .map(DoubleBehaviourUtils.nanNullHandler)
- .collect().toList
+ val resCompare = typedDS.dataset
+ .select(sparkFunc($"a"))
+ .map(_.getAs[Double](0))
+ .map(DoubleBehaviourUtils.nanNullHandler)
+ .collect().toList
- val res = typedDS
- .select(typedCol)
- .deserialized
- .map(DoubleBehaviourUtils.nanNullHandler)
- .collect()
- .run()
- .toList
+ val res = typedDS
+ .select(typedCol)
+ .deserialized
+ .map(DoubleBehaviourUtils.nanNullHandler)
+ .collect()
+ .run()
+ .toList
- res ?= resCompare
+ res ?= resCompare
}
test("cos") {
val spark = session
import spark.implicits._
- def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])
- (implicit encX1:Encoder[X1[A]]) = {
- val typedDS = TypedDataset.create(values)
- propTrigonometric(typedDS)(cos(typedDS('a)), sparkFunctions.cos)
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = {
+ val typedDS = TypedDataset.create(values)
+ propTrigonometric(typedDS)(cos(typedDS('a)), sparkFunctions.cos)
}
check(forAll(prop[Int] _))
@@ -497,10 +502,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])
- (implicit encX1:Encoder[X1[A]]) = {
- val typedDS = TypedDataset.create(values)
- propTrigonometric(typedDS)(cosh(typedDS('a)), sparkFunctions.cosh)
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = {
+ val typedDS = TypedDataset.create(values)
+ propTrigonometric(typedDS)(cosh(typedDS('a)), sparkFunctions.cosh)
}
check(forAll(prop[Int] _))
@@ -515,10 +519,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])
- (implicit encX1:Encoder[X1[A]]) = {
- val typedDS = TypedDataset.create(values)
- propTrigonometric(typedDS)(acos(typedDS('a)), sparkFunctions.acos)
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = {
+ val typedDS = TypedDataset.create(values)
+ propTrigonometric(typedDS)(acos(typedDS('a)), sparkFunctions.acos)
}
check(forAll(prop[Int] _))
@@ -529,16 +532,13 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
check(forAll(prop[Double] _))
}
-
-
test("signum") {
val spark = session
import spark.implicits._
- def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])
- (implicit encX1:Encoder[X1[A]]) = {
- val typedDS = TypedDataset.create(values)
- propTrigonometric(typedDS)(signum(typedDS('a)), sparkFunctions.signum)
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = {
+ val typedDS = TypedDataset.create(values)
+ propTrigonometric(typedDS)(signum(typedDS('a)), sparkFunctions.signum)
}
check(forAll(prop[Int] _))
@@ -553,10 +553,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])
- (implicit encX1:Encoder[X1[A]]) = {
- val typedDS = TypedDataset.create(values)
- propTrigonometric(typedDS)(sin(typedDS('a)), sparkFunctions.sin)
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = {
+ val typedDS = TypedDataset.create(values)
+ propTrigonometric(typedDS)(sin(typedDS('a)), sparkFunctions.sin)
}
check(forAll(prop[Int] _))
@@ -571,10 +570,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])
- (implicit encX1:Encoder[X1[A]]) = {
- val typedDS = TypedDataset.create(values)
- propTrigonometric(typedDS)(sinh(typedDS('a)), sparkFunctions.sinh)
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = {
+ val typedDS = TypedDataset.create(values)
+ propTrigonometric(typedDS)(sinh(typedDS('a)), sparkFunctions.sinh)
}
check(forAll(prop[Int] _))
@@ -589,10 +587,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])
- (implicit encX1:Encoder[X1[A]]) = {
- val typedDS = TypedDataset.create(values)
- propTrigonometric(typedDS)(asin(typedDS('a)), sparkFunctions.asin)
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = {
+ val typedDS = TypedDataset.create(values)
+ propTrigonometric(typedDS)(asin(typedDS('a)), sparkFunctions.asin)
}
check(forAll(prop[Int] _))
@@ -607,10 +604,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])
- (implicit encX1:Encoder[X1[A]]) = {
- val typedDS = TypedDataset.create(values)
- propTrigonometric(typedDS)(tan(typedDS('a)), sparkFunctions.tan)
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = {
+ val typedDS = TypedDataset.create(values)
+ propTrigonometric(typedDS)(tan(typedDS('a)), sparkFunctions.tan)
}
check(forAll(prop[Int] _))
@@ -625,10 +621,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])
- (implicit encX1:Encoder[X1[A]]) = {
- val typedDS = TypedDataset.create(values)
- propTrigonometric(typedDS)(tanh(typedDS('a)), sparkFunctions.tanh)
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = {
+ val typedDS = TypedDataset.create(values)
+ propTrigonometric(typedDS)(tanh(typedDS('a)), sparkFunctions.tanh)
}
check(forAll(prop[Int] _))
@@ -639,51 +634,50 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
check(forAll(prop[Double] _))
}
- /*
- * Currently not all Collection types play nice with the Encoders.
- * This test needs to be readressed and Set readded to the Collection Typeclass once these issues are resolved.
- *
- * [[https://issues.apache.org/jira/browse/SPARK-18891]]
- * [[https://issues.apache.org/jira/browse/SPARK-21204]]
- */
- test("arrayContains"){
+ /*
+ * Currently not all Collection types play nice with the Encoders.
+ * This test needs to be readressed and Set readded to the Collection Typeclass once these issues are resolved.
+ *
+ * [[https://issues.apache.org/jira/browse/SPARK-18891]]
+ * [[https://issues.apache.org/jira/browse/SPARK-21204]]
+ */
+ test("arrayContains") {
val spark = session
import spark.implicits._
val listLength = 10
val idxs = Stream.continually(Range(0, listLength)).flatten.toIterator
- abstract class Nth[A, C[A]:CatalystCollection] {
+ abstract class Nth[A, C[A]: CatalystCollection] {
- def nth(c:C[A], idx:Int):A
+ def nth(c: C[A], idx: Int): A
}
- implicit def deriveListNth[A] : Nth[A, List] = new Nth[A, List] {
+ implicit def deriveListNth[A]: Nth[A, List] = new Nth[A, List] {
override def nth(c: List[A], idx: Int): A = c(idx)
}
- implicit def deriveSeqNth[A] : Nth[A, Seq] = new Nth[A, Seq] {
+ implicit def deriveSeqNth[A]: Nth[A, Seq] = new Nth[A, Seq] {
override def nth(c: Seq[A], idx: Int): A = c(idx)
}
- implicit def deriveVectorNth[A] : Nth[A, Vector] = new Nth[A, Vector] {
+ implicit def deriveVectorNth[A]: Nth[A, Vector] = new Nth[A, Vector] {
override def nth(c: Vector[A], idx: Int): A = c(idx)
}
- implicit def deriveArrayNth[A] : Nth[A, Array] = new Nth[A, Array] {
+ implicit def deriveArrayNth[A]: Nth[A, Array] = new Nth[A, Array] {
override def nth(c: Array[A], idx: Int): A = c(idx)
}
-
- def prop[C[_] : CatalystCollection]
- (
- values: C[Int],
- shouldBeIn:Boolean)
- (
- implicit nth:Nth[Int, C],
- encEv: Encoder[C[Int]],
- tEncEv: TypedEncoder[C[Int]]
- ) = {
+ def prop[C[_]: CatalystCollection](
+ values: C[Int],
+ shouldBeIn: Boolean
+ )(
+ implicit
+ nth: Nth[Int, C],
+ encEv: Encoder[C[Int]],
+ tEncEv: TypedEncoder[C[Int]]
+ ) = {
val contained = if (shouldBeIn) nth.nth(values, idxs.next) else -1
@@ -705,10 +699,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
check(
forAll(
- Gen.listOfN(listLength, Gen.choose(0,100)),
- Gen.oneOf(true,false)
- )
- (prop[List])
+ Gen.listOfN(listLength, Gen.choose(0, 100)),
+ Gen.oneOf(true, false)
+ )(prop[List])
)
/*check( Looks like there is no Typed Encoder for Seq type yet
@@ -721,18 +714,16 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
check(
forAll(
- Gen.listOfN(listLength, Gen.choose(0,100)).map(_.toVector),
- Gen.oneOf(true,false)
- )
- (prop[Vector])
+ Gen.listOfN(listLength, Gen.choose(0, 100)).map(_.toVector),
+ Gen.oneOf(true, false)
+ )(prop[Vector])
)
check(
forAll(
- Gen.listOfN(listLength, Gen.choose(0,100)).map(_.toArray),
- Gen.oneOf(true,false)
- )
- (prop[Array])
+ Gen.listOfN(listLength, Gen.choose(0, 100)).map(_.toArray),
+ Gen.oneOf(true, false)
+ )(prop[Array])
)
}
@@ -740,8 +731,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: CatalystNumeric : TypedEncoder : Encoder]
- (na: A, values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = {
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](na: A, values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = {
val cDS = session.createDataset(X1(na) :: values)
val resCompare = cDS
.select(sparkFunctions.atan(cDS("a")))
@@ -759,8 +749,8 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
.toList
val aggrTyped = typedDS.agg(atan(
- frameless.functions.aggregate.first(typedDS('a)))
- ).firstOption().run().get
+ frameless.functions.aggregate.first(typedDS('a))
+ )).firstOption().run().get
val aggrSpark = cDS.select(
sparkFunctions.atan(sparkFunctions.first("a")).as[Double]
@@ -781,9 +771,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: CatalystNumeric : TypedEncoder : Encoder,
- B: CatalystNumeric : TypedEncoder : Encoder](na: X2[A, B], values: List[X2[A, B]])
- (implicit encEv: Encoder[X2[A,B]]) = {
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder, B: CatalystNumeric: TypedEncoder: Encoder](na: X2[A, B], values: List[X2[A, B]])(implicit
+ encEv: Encoder[X2[A, B]]
+ ) = {
val cDS = session.createDataset(na +: values)
val resCompare = cDS
.select(sparkFunctions.atan2(cDS("a"), cDS("b")))
@@ -791,7 +781,6 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
.map(DoubleBehaviourUtils.nanNullHandler)
.collect().toList
-
val typedDS = TypedDataset.create(cDS)
val res = typedDS
.select(atan2(typedDS('a), typedDS('b)))
@@ -803,17 +792,16 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val aggrTyped = typedDS.agg(atan2(
frameless.functions.aggregate.first(typedDS('a)),
- frameless.functions.aggregate.first(typedDS('b)))
- ).firstOption().run().get
+ frameless.functions.aggregate.first(typedDS('b))
+ )).firstOption().run().get
val aggrSpark = cDS.select(
- sparkFunctions.atan2(sparkFunctions.first("a"),sparkFunctions.first("b")).as[Double]
+ sparkFunctions.atan2(sparkFunctions.first("a"), sparkFunctions.first("b")).as[Double]
).first()
(res ?= resCompare).&&(aggrTyped ?= aggrSpark)
}
-
check(forAll(prop[Int, Long] _))
check(forAll(prop[Long, Int] _))
check(forAll(prop[Short, Byte] _))
@@ -826,8 +814,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: CatalystNumeric : TypedEncoder : Encoder]
- (na: X1[A], value: List[X1[A]], lit:Double)(implicit encX1:Encoder[X1[A]]) = {
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](na: X1[A], value: List[X1[A]], lit: Double)(implicit encX1: Encoder[X1[A]]) = {
val cDS = session.createDataset(na +: value)
val resCompare = cDS
.select(sparkFunctions.atan2(lit, cDS("a")))
@@ -835,7 +822,6 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
.map(DoubleBehaviourUtils.nanNullHandler)
.collect().toList
-
val typedDS = TypedDataset.create(cDS)
val res = typedDS
.select(atan2(lit, typedDS('a)))
@@ -847,8 +833,8 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val aggrTyped = typedDS.agg(atan2(
lit,
- frameless.functions.aggregate.first(typedDS('a)))
- ).firstOption().run().get
+ frameless.functions.aggregate.first(typedDS('a))
+ )).firstOption().run().get
val aggrSpark = cDS.select(
sparkFunctions.atan2(lit, sparkFunctions.first("a")).as[Double]
@@ -869,8 +855,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: CatalystNumeric : TypedEncoder : Encoder]
- (na: X1[A], value: List[X1[A]], lit:Double)(implicit encX1:Encoder[X1[A]]) = {
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](na: X1[A], value: List[X1[A]], lit: Double)(implicit encX1: Encoder[X1[A]]) = {
val cDS = session.createDataset(na +: value)
val resCompare = cDS
.select(sparkFunctions.atan2(cDS("a"), lit))
@@ -878,7 +863,6 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
.map(DoubleBehaviourUtils.nanNullHandler)
.collect().toList
-
val typedDS = TypedDataset.create(cDS)
val res = typedDS
.select(atan2(typedDS('a), lit))
@@ -890,8 +874,8 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val aggrTyped = typedDS.agg(atan2(
frameless.functions.aggregate.first(typedDS('a)),
- lit)
- ).firstOption().run().get
+ lit
+ )).firstOption().run().get
val aggrSpark = cDS.select(
sparkFunctions.atan2(sparkFunctions.first("a"), lit).as[Double]
@@ -900,7 +884,6 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
(res ?= resCompare).&&(aggrTyped ?= aggrSpark)
}
-
check(forAll(prop[Int] _))
check(forAll(prop[Long] _))
check(forAll(prop[Short] _))
@@ -909,8 +892,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
check(forAll(prop[Double] _))
}
- def mathProp[A: CatalystNumeric: TypedEncoder : Encoder](typedDS: TypedDataset[X1[A]])(
- typedCol: TypedColumn[X1[A], Double], sparkFunc: Column => Column
+ def mathProp[A: CatalystNumeric: TypedEncoder: Encoder](typedDS: TypedDataset[X1[A]])(
+ typedCol: TypedColumn[X1[A], Double],
+ sparkFunc: Column => Column
): Prop = {
val spark = session
import spark.implicits._
@@ -936,7 +920,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])(implicit encX1:Encoder[X1[A]]) = {
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = {
val typedDS = TypedDataset.create(values)
mathProp(typedDS)(sqrt(typedDS('a)), sparkFunctions.sqrt)
}
@@ -953,7 +937,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])(implicit encX1:Encoder[X1[A]]) = {
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = {
val typedDS = TypedDataset.create(values)
mathProp(typedDS)(cbrt(typedDS('a)), sparkFunctions.cbrt)
}
@@ -970,7 +954,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])(implicit encX1:Encoder[X1[A]]) = {
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = {
val typedDS = TypedDataset.create(values)
mathProp(typedDS)(exp(typedDS('a)), sparkFunctions.exp)
}
@@ -987,7 +971,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: TypedEncoder : Encoder](values: List[X1[A]]): Prop = {
+ def prop[A: TypedEncoder: Encoder](values: List[X1[A]]): Prop = {
val spark = session
import spark.implicits._
@@ -1040,8 +1024,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: TypedEncoder : Encoder](values: List[X1[A]])(
- implicit catalystNumericWithJavaBigDecimal: CatalystNumericWithJavaBigDecimal[A, A],
+ def prop[A: TypedEncoder: Encoder](values: List[X1[A]])(
+ implicit
+ catalystNumericWithJavaBigDecimal: CatalystNumericWithJavaBigDecimal[A, A],
encX1: Encoder[X1[A]]
) = {
val cDS = session.createDataset(values)
@@ -1050,7 +1035,6 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
.map(_.getAs[A](0))
.collect().toList
-
val typedDS = TypedDataset.create(values)
val res = typedDS
.select(round(typedDS('a)))
@@ -1072,8 +1056,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
import spark.implicits._
def prop[A: TypedEncoder: Encoder](values: List[X1[A]])(
- implicit catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, java.math.BigDecimal],
- encX1:Encoder[X1[A]]
+ implicit
+ catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, java.math.BigDecimal],
+ encX1: Encoder[X1[A]]
) = {
val cDS = session.createDataset(values)
@@ -1101,8 +1086,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: TypedEncoder : Encoder](values: List[X1[A]])(
- implicit catalystNumericWithJavaBigDecimal: CatalystNumericWithJavaBigDecimal[A, A],
+ def prop[A: TypedEncoder: Encoder](values: List[X1[A]])(
+ implicit
+ catalystNumericWithJavaBigDecimal: CatalystNumericWithJavaBigDecimal[A, A],
encX1: Encoder[X1[A]]
) = {
val cDS = session.createDataset(values)
@@ -1111,7 +1097,6 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
.map(_.getAs[A](0))
.collect().toList
-
val typedDS = TypedDataset.create(values)
val res = typedDS
.select(round(typedDS('a), 1))
@@ -1133,8 +1118,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
import spark.implicits._
def prop[A: TypedEncoder: Encoder](values: List[X1[A]])(
- implicit catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, java.math.BigDecimal],
- encX1:Encoder[X1[A]]
+ implicit
+ catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, java.math.BigDecimal],
+ encX1: Encoder[X1[A]]
) = {
val cDS = session.createDataset(values)
@@ -1162,8 +1148,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: TypedEncoder : Encoder](values: List[X1[A]])(
- implicit catalystNumericWithJavaBigDecimal: CatalystNumericWithJavaBigDecimal[A, A],
+ def prop[A: TypedEncoder: Encoder](values: List[X1[A]])(
+ implicit
+ catalystNumericWithJavaBigDecimal: CatalystNumericWithJavaBigDecimal[A, A],
encX1: Encoder[X1[A]]
) = {
val cDS = session.createDataset(values)
@@ -1172,7 +1159,6 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
.map(_.getAs[A](0))
.collect().toList
-
val typedDS = TypedDataset.create(values)
val res = typedDS
.select(bround(typedDS('a)))
@@ -1187,15 +1173,16 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
check(forAll(prop[Long] _))
check(forAll(prop[Short] _))
check(forAll(prop[Double] _))
- }
+ }
test("bround big decimal") {
val spark = session
import spark.implicits._
def prop[A: TypedEncoder: Encoder](values: List[X1[A]])(
- implicit catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, java.math.BigDecimal],
- encX1:Encoder[X1[A]]
+ implicit
+ catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, java.math.BigDecimal],
+ encX1: Encoder[X1[A]]
) = {
val cDS = session.createDataset(values)
@@ -1219,63 +1206,64 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
check(forAll(prop[BigDecimal] _))
}
- test("bround with scale") {
- val spark = session
- import spark.implicits._
+ test("bround with scale") {
+ val spark = session
+ import spark.implicits._
- def prop[A: TypedEncoder : Encoder](values: List[X1[A]])(
- implicit catalystNumericWithJavaBigDecimal: CatalystNumericWithJavaBigDecimal[A, A],
- encX1: Encoder[X1[A]]
- ) = {
- val cDS = session.createDataset(values)
- val resCompare = cDS
- .select(sparkFunctions.bround(cDS("a"), 1))
- .map(_.getAs[A](0))
- .collect().toList
-
-
- val typedDS = TypedDataset.create(values)
- val res = typedDS
- .select(bround(typedDS('a), 1))
- .collect()
- .run()
- .toList
-
- res ?= resCompare
- }
+ def prop[A: TypedEncoder: Encoder](values: List[X1[A]])(
+ implicit
+ catalystNumericWithJavaBigDecimal: CatalystNumericWithJavaBigDecimal[A, A],
+ encX1: Encoder[X1[A]]
+ ) = {
+ val cDS = session.createDataset(values)
+ val resCompare = cDS
+ .select(sparkFunctions.bround(cDS("a"), 1))
+ .map(_.getAs[A](0))
+ .collect().toList
- check(forAll(prop[Int] _))
- check(forAll(prop[Long] _))
- check(forAll(prop[Short] _))
- check(forAll(prop[Double] _))
+ val typedDS = TypedDataset.create(values)
+ val res = typedDS
+ .select(bround(typedDS('a), 1))
+ .collect()
+ .run()
+ .toList
+
+ res ?= resCompare
}
- test("bround big decimal with scale") {
- val spark = session
- import spark.implicits._
+ check(forAll(prop[Int] _))
+ check(forAll(prop[Long] _))
+ check(forAll(prop[Short] _))
+ check(forAll(prop[Double] _))
+ }
- def prop[A: TypedEncoder: Encoder](values: List[X1[A]])(
- implicit catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, java.math.BigDecimal],
- encX1:Encoder[X1[A]]
- ) = {
- val cDS = session.createDataset(values)
-
- val resCompare = cDS
- .select(sparkFunctions.bround(cDS("a"), 0))
- .map(_.getAs[java.math.BigDecimal](0))
- .collect()
- .toList.map(_.setScale(0))
-
- val typedDS = TypedDataset.create(values)
- val col = typedDS('a)
- val res = typedDS
- .select(bround(col, 0))
- .collect()
- .run()
- .toList
-
- res ?= resCompare
- }
+ test("bround big decimal with scale") {
+ val spark = session
+ import spark.implicits._
+
+ def prop[A: TypedEncoder: Encoder](values: List[X1[A]])(
+ implicit
+ catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, java.math.BigDecimal],
+ encX1: Encoder[X1[A]]
+ ) = {
+ val cDS = session.createDataset(values)
+
+ val resCompare = cDS
+ .select(sparkFunctions.bround(cDS("a"), 0))
+ .map(_.getAs[java.math.BigDecimal](0))
+ .collect()
+ .toList.map(_.setScale(0))
+
+ val typedDS = TypedDataset.create(values)
+ val col = typedDS('a)
+ val res = typedDS
+ .select(bround(col, 0))
+ .collect()
+ .run()
+ .toList
+
+ res ?= resCompare
+ }
check(forAll(prop[BigDecimal] _))
}
@@ -1285,7 +1273,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
import spark.implicits._
import NonNegativeArbitraryNumericValues._
- def prop[A: CatalystNumeric: TypedEncoder : Encoder](
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](
values: List[X1[A]],
base: Double
): Prop = {
@@ -1322,7 +1310,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
import spark.implicits._
import NonNegativeArbitraryNumericValues._
- def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])(implicit encX1:Encoder[X1[A]]) = {
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = {
val typedDS = TypedDataset.create(values)
mathProp(typedDS)(log(typedDS('a)), sparkFunctions.log)
}
@@ -1339,7 +1327,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
import spark.implicits._
import NonNegativeArbitraryNumericValues._
- def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])(implicit encX1:Encoder[X1[A]]) = {
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = {
val typedDS = TypedDataset.create(values)
mathProp(typedDS)(log2(typedDS('a)), sparkFunctions.log2)
}
@@ -1356,7 +1344,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
import spark.implicits._
import NonNegativeArbitraryNumericValues._
- def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])(implicit encX1:Encoder[X1[A]]) = {
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = {
val typedDS = TypedDataset.create(values)
mathProp(typedDS)(log1p(typedDS('a)), sparkFunctions.log1p)
}
@@ -1373,7 +1361,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
import spark.implicits._
import NonNegativeArbitraryNumericValues._
- def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])(implicit encX1:Encoder[X1[A]]) = {
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = {
val typedDS = TypedDataset.create(values)
mathProp(typedDS)(log10(typedDS('a)), sparkFunctions.log10)
}
@@ -1389,7 +1377,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop(values:List[X1[Array[Byte]]])(implicit encX1:Encoder[X1[Array[Byte]]]) = {
+ def prop(values: List[X1[Array[Byte]]])(implicit encX1: Encoder[X1[Array[Byte]]]) = {
val cDS = session.createDataset(values)
val resCompare = cDS
.select(sparkFunctions.base64(cDS("a")))
@@ -1419,7 +1407,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: CatalystNumeric: TypedEncoder : Encoder](
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](
values: List[X1[A]],
base: Double
): Prop = {
@@ -1463,7 +1451,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: CatalystNumeric: TypedEncoder : Encoder](
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](
values: List[X2[A, A]]
): Prop = {
val spark = session
@@ -1498,7 +1486,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: CatalystNumeric: TypedEncoder : Encoder](
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](
values: List[X1[A]],
base: Double
): Prop = {
@@ -1534,7 +1522,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
.run()
.toList
- (res ?= resCompare) && (res2 ?= resCompare2)
+ (res ?= resCompare) && (res2 ?= resCompare2)
}
check(forAll(prop[Int] _))
@@ -1548,7 +1536,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A: CatalystNumeric: TypedEncoder : Encoder](
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](
values: List[X2[A, A]]
): Prop = {
val spark = session
@@ -1584,7 +1572,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
import spark.implicits._
import NonNegativeArbitraryNumericValues._
- def prop[A: CatalystNumeric: TypedEncoder : Encoder](
+ def prop[A: CatalystNumeric: TypedEncoder: Encoder](
values: List[X2[A, A]]
): Prop = {
val spark = session
@@ -1637,11 +1625,11 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
check(forAll(prop _))
}
- test("bin"){
+ test("bin") {
val spark = session
import spark.implicits._
- def prop(values:List[X1[Long]])(implicit encX1:Encoder[X1[Long]]) = {
+ def prop(values: List[X1[Long]])(implicit encX1: Encoder[X1[Long]]) = {
val cDS = session.createDataset(values)
val resCompare = cDS
.select(sparkFunctions.bin(cDS("a")))
@@ -1661,13 +1649,12 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
check(forAll(prop _))
}
- test("bitwiseNOT"){
+ test("bitwiseNOT") {
val spark = session
import spark.implicits._
@nowarn // supress sparkFunctions.bitwiseNOT call which is used to maintain Spark 3.1.x backwards compat
- def prop[A: CatalystBitwise : TypedEncoder : Encoder]
- (values:List[X1[A]])(implicit encX1:Encoder[X1[A]]) = {
+ def prop[A: CatalystBitwise: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = {
val cDS = session.createDataset(values)
val resCompare = cDS
.select(sparkFunctions.bitwiseNOT(cDS("a")))
@@ -1694,7 +1681,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A : TypedEncoder](
+ def prop[A: TypedEncoder](
toFile1: List[X1[A]],
toFile2: List[X1[A]],
inMem: List[X1[A]]
@@ -1727,10 +1714,13 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val grouped = withFileName.groupBy(_.b).mapValues(_.map(_.c).toSet)
grouped.foldLeft(passed) { (p, g) =>
- p && secure { g._1 match {
- case "" => g._2.head == "" //Empty string if didn't come from file
- case f => g._2.forall(_.contains(f))
- }}}
+ p && secure {
+ g._1 match {
+ case "" => g._2.head == "" // Empty string if didn't come from file
+ case f => g._2.forall(_.contains(f))
+ }
+ }
+ }
}
check(forAll(prop[String] _))
@@ -1740,7 +1730,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A : TypedEncoder](xs: List[X1[A]])(implicit x2en: Encoder[X2[A, Long]]) = {
+ def prop[A: TypedEncoder](xs: List[X1[A]])(implicit x2en: Encoder[X2[A, Long]]) = {
val ds = TypedDataset.create(xs)
val result = ds.withColumn[X2[A, Long]](monotonicallyIncreasingId())
@@ -1750,7 +1740,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val ids = result.map(_.b)
(ids.toSet.size ?= ids.length) &&
- (ids.sorted ?= ids)
+ (ids.sorted ?= ids)
}
check(forAll(prop[String] _))
@@ -1760,8 +1750,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val spark = session
import spark.implicits._
- def prop[A : TypedEncoder : Encoder]
- (condition1: Boolean, condition2: Boolean, value1: A, value2: A, otherwise: A) = {
+ def prop[A: TypedEncoder: Encoder](condition1: Boolean, condition2: Boolean, value1: A, value2: A, otherwise: A) = {
val ds = TypedDataset.create(X5(condition1, condition2, value1, value2, otherwise) :: Nil)
val untypedWhen = ds.toDF()
@@ -1855,10 +1844,11 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
check(forAll(pairs) { values: List[X2[String, String]] =>
val ds = TypedDataset.create(values)
- val td = ds.agg(concat(first(ds('a)),first(ds('b)))).collect().run().toVector
+ val td = ds.agg(concat(first(ds('a)), first(ds('b)))).collect().run().toVector
val spark = ds.dataset.select(sparkFunctions.concat(
sparkFunctions.first($"a").as[String],
- sparkFunctions.first($"b").as[String])).as[String].collect().toVector
+ sparkFunctions.first($"b").as[String]
+ )).as[String].collect().toVector
td ?= spark
})
}
@@ -1902,11 +1892,13 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
check(forAll(pairs) { values: List[X2[String, String]] =>
val ds = TypedDataset.create(values)
- val td = ds.agg(concatWs(",",first(ds('a)),first(ds('b)), last(ds('b)))).collect().run().toVector
- val spark = ds.dataset.select(sparkFunctions.concat_ws(",",
+ val td = ds.agg(concatWs(",", first(ds('a)), first(ds('b)), last(ds('b)))).collect().run().toVector
+ val spark = ds.dataset.select(sparkFunctions.concat_ws(
+ ",",
sparkFunctions.first($"a").as[String],
sparkFunctions.first($"b").as[String],
- sparkFunctions.last($"b").as[String])).as[String].collect().toVector
+ sparkFunctions.last($"b").as[String]
+ )).as[String].collect().toVector
td ?= spark
})
}
@@ -1962,13 +1954,13 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
val ds = TypedDataset.create(na +: values)
val sparkResult = ds.toDF()
- .select(sparkFunctions.levenshtein($"a", sparkFunctions.concat($"a",sparkFunctions.lit("Hello"))))
+ .select(sparkFunctions.levenshtein($"a", sparkFunctions.concat($"a", sparkFunctions.lit("Hello"))))
.map(_.getAs[Int](0))
.collect()
.toVector
val typed = ds
- .select(levenshtein(ds('a), concat(ds('a),lit("Hello"))))
+ .select(levenshtein(ds('a), concat(ds('a), lit("Hello"))))
.collect()
.run()
.toVector
@@ -2207,7 +2199,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
}
test("Empty vararg tests") {
- def prop[A : TypedEncoder, B: TypedEncoder](data: Vector[X2[A, B]]) = {
+ def prop[A: TypedEncoder, B: TypedEncoder](data: Vector[X2[A, B]]) = {
val ds = TypedDataset.create(data)
val frameless = ds.select(ds('a), concat(), ds('b), concatWs(":")).collect().run().toVector
val framelessAggr = ds.agg(concat(), concatWs("x"), litAggr(2)).collect().run().toVector
@@ -2220,8 +2212,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
check(forAll(prop[Option[Boolean], Long] _))
}
- def dateTimeStringProp(typedDS: TypedDataset[X1[String]])
- (typedCol: TypedColumn[X1[String], Option[Int]], sparkFunc: Column => Column): Prop = {
+ def dateTimeStringProp(typedDS: TypedDataset[X1[String]])(typedCol: TypedColumn[X1[String], Option[Int]], sparkFunc: Column => Column): Prop = {
val spark = session
import spark.implicits._
@@ -2245,9 +2236,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite {
import spark.implicits._
def prop(data: List[X1[String]])(implicit E: Encoder[Option[Int]]): Prop = {
- val ds = TypedDataset.create(data)
- dateTimeStringProp(ds)(year(ds[String]('a)), sparkFunctions.year)
- }
+ val ds = TypedDataset.create(data)
+ dateTimeStringProp(ds)(year(ds[String]('a)), sparkFunctions.year)
+ }
check(forAll(dateTimeStringGen)(data => prop(data.map(X1.apply))))
check(forAll(prop _))
diff --git a/dataset/src/test/scala/frameless/functions/UdfTests.scala b/dataset/src/test/scala/frameless/functions/UdfTests.scala
index 10e65180f..9a6b7932f 100644
--- a/dataset/src/test/scala/frameless/functions/UdfTests.scala
+++ b/dataset/src/test/scala/frameless/functions/UdfTests.scala
@@ -14,7 +14,7 @@ class UdfTests extends TypedDatasetSuite {
val A = dataset.col[A]('a)
// filter forces whole codegen
- val codegen = dataset.deserialized.filter((_:X1[A]) => true).select(u1(A)).collect().run().toVector
+ val codegen = dataset.deserialized.filter((_: X1[A]) => true).select(u1(A)).collect().run().toVector
// otherwise it uses local relation
val local = dataset.select(u2(A)).collect().run().toVector
@@ -42,8 +42,7 @@ class UdfTests extends TypedDatasetSuite {
}
test("multiple one argument udf") {
- def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder]
- (data: Vector[X3[A, B, C]], f1: A => A, f2: B => B, f3: C => C): Prop = {
+ def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder](data: Vector[X3[A, B, C]], f1: A => A, f2: B => B, f3: C => C): Prop = {
val dataset = TypedDataset.create(data)
val u11 = udf[X3[A, B, C], A, A](f1)
val u21 = udf[X3[A, B, C], B, B](f2)
@@ -69,8 +68,7 @@ class UdfTests extends TypedDatasetSuite {
}
test("two argument udf") {
- def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder]
- (data: Vector[X3[A, B, C]], f1: (A, B) => C): Prop = {
+ def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder](data: Vector[X3[A, B, C]], f1: (A, B) => C): Prop = {
val dataset = TypedDataset.create(data)
val u1 = udf[X3[A, B, C], A, B, C](f1)
val u2 = dataset.makeUDF(f1)
@@ -89,8 +87,7 @@ class UdfTests extends TypedDatasetSuite {
}
test("multiple two argument udf") {
- def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder]
- (data: Vector[X3[A, B, C]], f1: (A, B) => C, f2: (B, C) => A): Prop = {
+ def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder](data: Vector[X3[A, B, C]], f1: (A, B) => C, f2: (B, C) => A): Prop = {
val dataset = TypedDataset.create(data)
val u11 = udf[X3[A, B, C], A, B, C](f1)
val u12 = dataset.makeUDF(f1)
@@ -113,8 +110,7 @@ class UdfTests extends TypedDatasetSuite {
}
test("three argument udf") {
- def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder]
- (data: Vector[X3[A, B, C]], f: (A, B, C) => C): Prop = {
+ def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder](data: Vector[X3[A, B, C]], f: (A, B, C) => C): Prop = {
val dataset = TypedDataset.create(data)
val u1 = udf[X3[A, B, C], A, B, C, C](f)
val u2 = dataset.makeUDF(f)
@@ -135,8 +131,7 @@ class UdfTests extends TypedDatasetSuite {
}
test("four argument udf") {
- def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder, D: TypedEncoder]
- (data: Vector[X4[A, B, C, D]], f: (A, B, C, D) => C): Prop = {
+ def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder, D: TypedEncoder](data: Vector[X4[A, B, C, D]], f: (A, B, C, D) => C): Prop = {
val dataset = TypedDataset.create(data)
val u1 = udf[X4[A, B, C, D], A, B, C, D, C](f)
val u2 = dataset.makeUDF(f)
@@ -161,8 +156,10 @@ class UdfTests extends TypedDatasetSuite {
}
test("five argument udf") {
- def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder, D: TypedEncoder, E: TypedEncoder]
- (data: Vector[X5[A, B, C, D, E]], f: (A, B, C, D, E) => C): Prop = {
+ def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder, D: TypedEncoder, E: TypedEncoder](
+ data: Vector[X5[A, B, C, D, E]],
+ f: (A, B, C, D, E) => C
+ ): Prop = {
val dataset = TypedDataset.create(data)
val u1 = udf[X5[A, B, C, D, E], A, B, C, D, E, C](f)
val u2 = dataset.makeUDF(f)
diff --git a/dataset/src/test/scala/frameless/functions/UnaryFunctionsTest.scala b/dataset/src/test/scala/frameless/functions/UnaryFunctionsTest.scala
index 009179be6..a3ebac3ad 100644
--- a/dataset/src/test/scala/frameless/functions/UnaryFunctionsTest.scala
+++ b/dataset/src/test/scala/frameless/functions/UnaryFunctionsTest.scala
@@ -1,7 +1,7 @@
package frameless
package functions
-import org.scalacheck.{ Arbitrary, Prop }
+import org.scalacheck.{Arbitrary, Prop}
import org.scalacheck.Prop._
import scala.collection.SeqLike
@@ -10,7 +10,10 @@ import scala.reflect.ClassTag
class UnaryFunctionsTest extends TypedDatasetSuite {
test("size tests") {
- def prop[F[X] <: Traversable[X] : CatalystSizableCollection, A](xs: List[X1[F[A]]])(implicit arb: Arbitrary[F[A]], enc: TypedEncoder[F[A]]): Prop = {
+ def prop[F[X] <: Traversable[X]: CatalystSizableCollection, A](xs: List[X1[F[A]]])(implicit
+ arb: Arbitrary[F[A]],
+ enc: TypedEncoder[F[A]]
+ ): Prop = {
val tds = TypedDataset.create(xs)
val framelessResults = tds.select(size(tds('a))).collect().run().toVector
@@ -58,7 +61,7 @@ class UnaryFunctionsTest extends TypedDatasetSuite {
}
test("sort in ascending order") {
- def prop[F[X] <: SeqLike[X, F[X]] : CatalystSortableCollection, A: Ordering](xs: List[X1[F[A]]])(implicit enc: TypedEncoder[F[A]]): Prop = {
+ def prop[F[X] <: SeqLike[X, F[X]]: CatalystSortableCollection, A: Ordering](xs: List[X1[F[A]]])(implicit enc: TypedEncoder[F[A]]): Prop = {
val tds = TypedDataset.create(xs)
val framelessResults = tds.select(sortAscending(tds('a))).collect().run().toVector
@@ -78,7 +81,7 @@ class UnaryFunctionsTest extends TypedDatasetSuite {
}
test("sort in descending order") {
- def prop[F[X] <: SeqLike[X, F[X]] : CatalystSortableCollection, A: Ordering](xs: List[X1[F[A]]])(implicit enc: TypedEncoder[F[A]]): Prop = {
+ def prop[F[X] <: SeqLike[X, F[X]]: CatalystSortableCollection, A: Ordering](xs: List[X1[F[A]]])(implicit enc: TypedEncoder[F[A]]): Prop = {
val tds = TypedDataset.create(xs)
val framelessResults = tds.select(sortDescending(tds('a))).collect().run().toVector
@@ -98,7 +101,7 @@ class UnaryFunctionsTest extends TypedDatasetSuite {
}
test("sort on array test: ascending order") {
- def prop[A: TypedEncoder : Ordering : ClassTag](xs: List[X1[Array[A]]]): Prop = {
+ def prop[A: TypedEncoder: Ordering: ClassTag](xs: List[X1[Array[A]]]): Prop = {
val tds = TypedDataset.create(xs)
val framelessResults = tds.select(sortAscending(tds('a))).collect().run().toVector
@@ -119,7 +122,7 @@ class UnaryFunctionsTest extends TypedDatasetSuite {
}
test("sort on array test: descending order") {
- def prop[A: TypedEncoder : Ordering : ClassTag](xs: List[X1[Array[A]]]): Prop = {
+ def prop[A: TypedEncoder: Ordering: ClassTag](xs: List[X1[Array[A]]]): Prop = {
val tds = TypedDataset.create(xs)
val framelessResults = tds.select(sortDescending(tds('a))).collect().run().toVector
diff --git a/dataset/src/test/scala/frameless/ops/ColumnTypesTest.scala b/dataset/src/test/scala/frameless/ops/ColumnTypesTest.scala
index 303eb2cbd..0bf1595bc 100644
--- a/dataset/src/test/scala/frameless/ops/ColumnTypesTest.scala
+++ b/dataset/src/test/scala/frameless/ops/ColumnTypesTest.scala
@@ -12,12 +12,12 @@ class ColumnTypesTest extends TypedDatasetSuite {
val d: TypedDataset[X4[A, B, C, D]] = TypedDataset.create(data)
val hlist = d('a) :: d('b) :: d('c) :: d('d) :: HNil
- type TC[N] = TypedColumn[X4[A,B,C,D], N]
+ type TC[N] = TypedColumn[X4[A, B, C, D], N]
type IN = TC[A] :: TC[B] :: TC[C] :: TC[D] :: HNil
type OUT = A :: B :: C :: D :: HNil
- implicitly[ColumnTypes.Aux[X4[A,B,C,D], IN, OUT]]
+ implicitly[ColumnTypes.Aux[X4[A, B, C, D], IN, OUT]]
Prop.passed // successful compilation implies test correctness
}
diff --git a/dataset/src/test/scala/frameless/ops/CubeTests.scala b/dataset/src/test/scala/frameless/ops/CubeTests.scala
index 7a06822b9..4fe448b2a 100644
--- a/dataset/src/test/scala/frameless/ops/CubeTests.scala
+++ b/dataset/src/test/scala/frameless/ops/CubeTests.scala
@@ -8,8 +8,7 @@ import org.scalacheck.Prop._
class CubeTests extends TypedDatasetSuite {
test("cube('a).agg(count())") {
- def prop[A: TypedEncoder : Ordering, Out: TypedEncoder : Numeric]
- (data: List[X1[A]])(implicit summable: CatalystSummable[A, Out]): Prop = {
+ def prop[A: TypedEncoder: Ordering, Out: TypedEncoder: Numeric](data: List[X1[A]])(implicit summable: CatalystSummable[A, Out]): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
@@ -24,8 +23,9 @@ class CubeTests extends TypedDatasetSuite {
}
test("cube('a, 'b).agg(count())") {
- def prop[A: TypedEncoder : Ordering, B: TypedEncoder, Out: TypedEncoder : Numeric]
- (data: List[X2[A, B]])(implicit summable: CatalystSummable[B, Out]): Prop = {
+ def prop[A: TypedEncoder: Ordering, B: TypedEncoder, Out: TypedEncoder: Numeric](data: List[X2[A, B]])(implicit
+ summable: CatalystSummable[B, Out]
+ ): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
val B = dataset.col[B]('b)
@@ -41,8 +41,9 @@ class CubeTests extends TypedDatasetSuite {
}
test("cube('a).agg(sum('b)") {
- def prop[A: TypedEncoder : Ordering, B: TypedEncoder, Out: TypedEncoder : Numeric]
- (data: List[X2[A, B]])(implicit summable: CatalystSummable[B, Out]): Prop = {
+ def prop[A: TypedEncoder: Ordering, B: TypedEncoder, Out: TypedEncoder: Numeric](data: List[X2[A, B]])(implicit
+ summable: CatalystSummable[B, Out]
+ ): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
val B = dataset.col[B]('b)
@@ -58,8 +59,7 @@ class CubeTests extends TypedDatasetSuite {
}
test("cube('a).mapGroups('a, sum('b))") {
- def prop[A: TypedEncoder : Ordering, B: TypedEncoder : Numeric]
- (data: List[X2[A, B]]): Prop = {
+ def prop[A: TypedEncoder: Ordering, B: TypedEncoder: Numeric](data: List[X2[A, B]]): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
@@ -76,11 +76,11 @@ class CubeTests extends TypedDatasetSuite {
test("cube('a).agg(sum('b), sum('c)) to cube('a).agg(sum('a), sum('b), sum('a), sum('b), sum('a))") {
def prop[
- A: TypedEncoder : Ordering,
- B: TypedEncoder,
- C: TypedEncoder,
- OutB: TypedEncoder : Numeric,
- OutC: TypedEncoder : Numeric
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder,
+ C: TypedEncoder,
+ OutB: TypedEncoder: Numeric,
+ OutC: TypedEncoder: Numeric
](data: List[X3[A, B, C]])(
implicit
summableB: CatalystSummable[B, OutB],
@@ -138,12 +138,12 @@ class CubeTests extends TypedDatasetSuite {
test("cube('a, 'b).agg(sum('c), sum('d))") {
def prop[
- A: TypedEncoder : Ordering,
- B: TypedEncoder : Ordering,
- C: TypedEncoder,
- D: TypedEncoder,
- OutC: TypedEncoder : Numeric,
- OutD: TypedEncoder : Numeric
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering,
+ C: TypedEncoder,
+ D: TypedEncoder,
+ OutC: TypedEncoder: Numeric,
+ OutD: TypedEncoder: Numeric
](data: List[X4[A, B, C, D]])(
implicit
summableC: CatalystSummable[C, OutC],
@@ -173,10 +173,10 @@ class CubeTests extends TypedDatasetSuite {
test("cube('a, 'b).agg(sum('c)) to cube('a, 'b).agg(sum('c),sum('c),sum('c),sum('c),sum('c))") {
def prop[
- A: TypedEncoder : Ordering,
- B: TypedEncoder : Ordering,
- C: TypedEncoder,
- OutC: TypedEncoder: Numeric
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering,
+ C: TypedEncoder,
+ OutC: TypedEncoder: Numeric
](data: List[X3[A, B, C]])(implicit summableC: CatalystSummable[C, OutC]): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
@@ -224,7 +224,9 @@ class CubeTests extends TypedDatasetSuite {
val sparkSumCCCC = dataset.dataset
.cube("a", "b").sum("c", "c", "c", "c").collect().toVector
- .map(row => (Option(row.getAs[A](0)), Option(row.getAs[B](1)), row.getAs[OutC](2), row.getAs[OutC](3), row.getAs[OutC](4), row.getAs[OutC](5)))
+ .map(row =>
+ (Option(row.getAs[A](0)), Option(row.getAs[B](1)), row.getAs[OutC](2), row.getAs[OutC](3), row.getAs[OutC](4), row.getAs[OutC](5))
+ )
.sortBy(_._2)
val framelessSumCCCCC = dataset
@@ -235,14 +237,24 @@ class CubeTests extends TypedDatasetSuite {
val sparkSumCCCCC = dataset.dataset
.cube("a", "b").sum("c", "c", "c", "c", "c").collect().toVector
- .map(row => (Option(row.getAs[A](0)), Option(row.getAs[B](1)), row.getAs[OutC](2), row.getAs[OutC](3), row.getAs[OutC](4), row.getAs[OutC](5), row.getAs[OutC](6)))
+ .map(row =>
+ (
+ Option(row.getAs[A](0)),
+ Option(row.getAs[B](1)),
+ row.getAs[OutC](2),
+ row.getAs[OutC](3),
+ row.getAs[OutC](4),
+ row.getAs[OutC](5),
+ row.getAs[OutC](6)
+ )
+ )
.sortBy(_._2)
(framelessSumC ?= sparkSumC) &&
- (framelessSumCC ?= sparkSumCC) &&
- (framelessSumCCC ?= sparkSumCCC) &&
- (framelessSumCCCC ?= sparkSumCCCC) &&
- (framelessSumCCCCC ?= sparkSumCCCCC)
+ (framelessSumCC ?= sparkSumCC) &&
+ (framelessSumCCC ?= sparkSumCCC) &&
+ (framelessSumCCCC ?= sparkSumCCCC) &&
+ (framelessSumCCCCC ?= sparkSumCCCCC)
}
check(forAll(prop[String, Long, Double, Double] _))
@@ -250,9 +262,9 @@ class CubeTests extends TypedDatasetSuite {
test("cube('a, 'b).mapGroups('a, 'b, sum('c))") {
def prop[
- A: TypedEncoder : Ordering,
- B: TypedEncoder : Ordering,
- C: TypedEncoder : Numeric
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering,
+ C: TypedEncoder: Numeric
](data: List[X3[A, B, C]]): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
@@ -275,8 +287,8 @@ class CubeTests extends TypedDatasetSuite {
test("cube('a).mapGroups(('a, toVector(('a, 'b))") {
def prop[
- A: TypedEncoder: Ordering,
- B: TypedEncoder: Ordering,
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering
](data: Vector[X2[A, B]]): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
@@ -298,8 +310,8 @@ class CubeTests extends TypedDatasetSuite {
test("cube('a).flatMapGroups(('a, toVector(('a, 'b))") {
def prop[
- A: TypedEncoder : Ordering,
- B: TypedEncoder : Ordering
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering
](data: Vector[X2[A, B]]): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
@@ -325,9 +337,9 @@ class CubeTests extends TypedDatasetSuite {
test("cube('a, 'b).flatMapGroups((('a,'b) toVector((('a,'b), 'c))") {
def prop[
- A: TypedEncoder : Ordering,
- B: TypedEncoder : Ordering,
- C: TypedEncoder : Ordering
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering,
+ C: TypedEncoder: Ordering
](data: Vector[X3[A, B, C]]): Prop = {
val dataset = TypedDataset.create(data)
val cA = dataset.col[A]('a)
@@ -353,8 +365,7 @@ class CubeTests extends TypedDatasetSuite {
}
test("cubeMany('a).agg(sum('b))") {
- def prop[A: TypedEncoder : Ordering, Out: TypedEncoder : Numeric]
- (data: List[X1[A]])(implicit summable: CatalystSummable[A, Out]): Prop = {
+ def prop[A: TypedEncoder: Ordering, Out: TypedEncoder: Numeric](data: List[X1[A]])(implicit summable: CatalystSummable[A, Out]): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
@@ -367,4 +378,4 @@ class CubeTests extends TypedDatasetSuite {
check(forAll(prop[Int, Long] _))
}
-}
\ No newline at end of file
+}
diff --git a/dataset/src/test/scala/frameless/ops/PivotTest.scala b/dataset/src/test/scala/frameless/ops/PivotTest.scala
index dd9bf5e61..66b820a68 100644
--- a/dataset/src/test/scala/frameless/ops/PivotTest.scala
+++ b/dataset/src/test/scala/frameless/ops/PivotTest.scala
@@ -31,12 +31,18 @@ class PivotTest extends TypedDatasetSuite {
.agg(sparkFunctions.sum("c"), sparkFunctions.first("d")).collect().toVector
(frameless.map(_._1) ?= spark.map(x => x.getAs[String](0))).&&(
- frameless.map(_._2) ?= spark.map(x => Option(x.getAs[Long](1)))).&&(
- frameless.map(_._3) ?= spark.map(x => Option(x.getAs[Boolean](2)))).&&(
- frameless.map(_._4) ?= spark.map(x => Option(x.getAs[Long](3)))).&&(
- frameless.map(_._5) ?= spark.map(x => Option(x.getAs[Boolean](4)))).&&(
- frameless.map(_._6) ?= spark.map(x => Option(x.getAs[Long](5)))).&&(
- frameless.map(_._7) ?= spark.map(x => Option(x.getAs[Boolean](6))))
+ frameless.map(_._2) ?= spark.map(x => Option(x.getAs[Long](1)))
+ ).&&(
+ frameless.map(_._3) ?= spark.map(x => Option(x.getAs[Boolean](2)))
+ ).&&(
+ frameless.map(_._4) ?= spark.map(x => Option(x.getAs[Long](3)))
+ ).&&(
+ frameless.map(_._5) ?= spark.map(x => Option(x.getAs[Boolean](4)))
+ ).&&(
+ frameless.map(_._6) ?= spark.map(x => Option(x.getAs[Long](5)))
+ ).&&(
+ frameless.map(_._7) ?= spark.map(x => Option(x.getAs[Boolean](6)))
+ )
}
check(forAll(withCustomGenX4)(prop))
@@ -95,4 +101,4 @@ class PivotTest extends TypedDatasetSuite {
agg(count[X3[String, Boolean, Boolean]]()).
collect().run().toVector ?= Vector(("a", Some(2L), Some(1L)))
}
-}
\ No newline at end of file
+}
diff --git a/dataset/src/test/scala/frameless/ops/RepeatTest.scala b/dataset/src/test/scala/frameless/ops/RepeatTest.scala
index 78dfc6410..f92f827b3 100644
--- a/dataset/src/test/scala/frameless/ops/RepeatTest.scala
+++ b/dataset/src/test/scala/frameless/ops/RepeatTest.scala
@@ -6,13 +6,13 @@ import shapeless.{::, HNil, Nat}
class RepeatTest extends TypedDatasetSuite {
test("summoning with implicitly") {
- implicitly[Repeat.Aux[Int::Boolean::HNil, Nat._1, Int::Boolean::HNil]]
- implicitly[Repeat.Aux[Int::Boolean::HNil, Nat._2, Int::Boolean::Int::Boolean::HNil]]
- implicitly[Repeat.Aux[Int::Boolean::HNil, Nat._3, Int::Boolean::Int::Boolean::Int::Boolean::HNil]]
- implicitly[Repeat.Aux[String::HNil, Nat._5, String::String::String::String::String::HNil]]
+ implicitly[Repeat.Aux[Int :: Boolean :: HNil, Nat._1, Int :: Boolean :: HNil]]
+ implicitly[Repeat.Aux[Int :: Boolean :: HNil, Nat._2, Int :: Boolean :: Int :: Boolean :: HNil]]
+ implicitly[Repeat.Aux[Int :: Boolean :: HNil, Nat._3, Int :: Boolean :: Int :: Boolean :: Int :: Boolean :: HNil]]
+ implicitly[Repeat.Aux[String :: HNil, Nat._5, String :: String :: String :: String :: String :: HNil]]
}
test("ill typed") {
illTyped("""implicitly[Repeat.Aux[String::HNil, Nat._5, String::String::String::String::HNil]]""")
}
-}
\ No newline at end of file
+}
diff --git a/dataset/src/test/scala/frameless/ops/RollupTests.scala b/dataset/src/test/scala/frameless/ops/RollupTests.scala
index da73ef8d0..7fd5a4584 100644
--- a/dataset/src/test/scala/frameless/ops/RollupTests.scala
+++ b/dataset/src/test/scala/frameless/ops/RollupTests.scala
@@ -8,8 +8,7 @@ import org.scalacheck.Prop._
class RollupTests extends TypedDatasetSuite {
test("rollup('a).agg(count())") {
- def prop[A: TypedEncoder : Ordering, Out: TypedEncoder : Numeric]
- (data: List[X1[A]])(implicit summable: CatalystSummable[A, Out]): Prop = {
+ def prop[A: TypedEncoder: Ordering, Out: TypedEncoder: Numeric](data: List[X1[A]])(implicit summable: CatalystSummable[A, Out]): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
@@ -24,8 +23,9 @@ class RollupTests extends TypedDatasetSuite {
}
test("rollup('a, 'b).agg(count())") {
- def prop[A: TypedEncoder : Ordering, B: TypedEncoder, Out: TypedEncoder : Numeric]
- (data: List[X2[A, B]])(implicit summable: CatalystSummable[B, Out]): Prop = {
+ def prop[A: TypedEncoder: Ordering, B: TypedEncoder, Out: TypedEncoder: Numeric](data: List[X2[A, B]])(implicit
+ summable: CatalystSummable[B, Out]
+ ): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
val B = dataset.col[B]('b)
@@ -41,8 +41,9 @@ class RollupTests extends TypedDatasetSuite {
}
test("rollup('a).agg(sum('b)") {
- def prop[A: TypedEncoder : Ordering, B: TypedEncoder, Out: TypedEncoder : Numeric]
- (data: List[X2[A, B]])(implicit summable: CatalystSummable[B, Out]): Prop = {
+ def prop[A: TypedEncoder: Ordering, B: TypedEncoder, Out: TypedEncoder: Numeric](data: List[X2[A, B]])(implicit
+ summable: CatalystSummable[B, Out]
+ ): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
val B = dataset.col[B]('b)
@@ -58,8 +59,7 @@ class RollupTests extends TypedDatasetSuite {
}
test("rollup('a).mapGroups('a, sum('b))") {
- def prop[A: TypedEncoder : Ordering, B: TypedEncoder : Numeric]
- (data: List[X2[A, B]]): Prop = {
+ def prop[A: TypedEncoder: Ordering, B: TypedEncoder: Numeric](data: List[X2[A, B]]): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
@@ -76,11 +76,11 @@ class RollupTests extends TypedDatasetSuite {
test("rollup('a).agg(sum('b), sum('c)) to rollup('a).agg(sum('a), sum('b), sum('a), sum('b), sum('a))") {
def prop[
- A: TypedEncoder : Ordering,
- B: TypedEncoder,
- C: TypedEncoder,
- OutB: TypedEncoder : Numeric,
- OutC: TypedEncoder : Numeric
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder,
+ C: TypedEncoder,
+ OutB: TypedEncoder: Numeric,
+ OutC: TypedEncoder: Numeric
](data: List[X3[A, B, C]])(
implicit
summableB: CatalystSummable[B, OutB],
@@ -138,12 +138,12 @@ class RollupTests extends TypedDatasetSuite {
test("rollup('a, 'b).agg(sum('c), sum('d))") {
def prop[
- A: TypedEncoder : Ordering,
- B: TypedEncoder : Ordering,
- C: TypedEncoder,
- D: TypedEncoder,
- OutC: TypedEncoder : Numeric,
- OutD: TypedEncoder : Numeric
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering,
+ C: TypedEncoder,
+ D: TypedEncoder,
+ OutC: TypedEncoder: Numeric,
+ OutD: TypedEncoder: Numeric
](data: List[X4[A, B, C, D]])(
implicit
summableC: CatalystSummable[C, OutC],
@@ -173,10 +173,10 @@ class RollupTests extends TypedDatasetSuite {
test("rollup('a, 'b).agg(sum('c)) to rollup('a, 'b).agg(sum('c),sum('c),sum('c),sum('c),sum('c))") {
def prop[
- A: TypedEncoder : Ordering,
- B: TypedEncoder : Ordering,
- C: TypedEncoder,
- OutC: TypedEncoder: Numeric
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering,
+ C: TypedEncoder,
+ OutC: TypedEncoder: Numeric
](data: List[X3[A, B, C]])(implicit summableC: CatalystSummable[C, OutC]): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
@@ -224,7 +224,9 @@ class RollupTests extends TypedDatasetSuite {
val sparkSumCCCC = dataset.dataset
.rollup("a", "b").sum("c", "c", "c", "c").collect().toVector
- .map(row => (Option(row.getAs[A](0)), Option(row.getAs[B](1)), row.getAs[OutC](2), row.getAs[OutC](3), row.getAs[OutC](4), row.getAs[OutC](5)))
+ .map(row =>
+ (Option(row.getAs[A](0)), Option(row.getAs[B](1)), row.getAs[OutC](2), row.getAs[OutC](3), row.getAs[OutC](4), row.getAs[OutC](5))
+ )
.sortBy(_._2)
val framelessSumCCCCC = dataset
@@ -235,14 +237,24 @@ class RollupTests extends TypedDatasetSuite {
val sparkSumCCCCC = dataset.dataset
.rollup("a", "b").sum("c", "c", "c", "c", "c").collect().toVector
- .map(row => (Option(row.getAs[A](0)), Option(row.getAs[B](1)), row.getAs[OutC](2), row.getAs[OutC](3), row.getAs[OutC](4), row.getAs[OutC](5), row.getAs[OutC](6)))
+ .map(row =>
+ (
+ Option(row.getAs[A](0)),
+ Option(row.getAs[B](1)),
+ row.getAs[OutC](2),
+ row.getAs[OutC](3),
+ row.getAs[OutC](4),
+ row.getAs[OutC](5),
+ row.getAs[OutC](6)
+ )
+ )
.sortBy(_._2)
(framelessSumC ?= sparkSumC) &&
- (framelessSumCC ?= sparkSumCC) &&
- (framelessSumCCC ?= sparkSumCCC) &&
- (framelessSumCCCC ?= sparkSumCCCC) &&
- (framelessSumCCCCC ?= sparkSumCCCCC)
+ (framelessSumCC ?= sparkSumCC) &&
+ (framelessSumCCC ?= sparkSumCCC) &&
+ (framelessSumCCCC ?= sparkSumCCCC) &&
+ (framelessSumCCCCC ?= sparkSumCCCCC)
}
check(forAll(prop[String, Long, Double, Double] _))
@@ -250,9 +262,9 @@ class RollupTests extends TypedDatasetSuite {
test("rollup('a, 'b).mapGroups('a, 'b, sum('c))") {
def prop[
- A: TypedEncoder : Ordering,
- B: TypedEncoder : Ordering,
- C: TypedEncoder : Numeric
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering,
+ C: TypedEncoder: Numeric
](data: List[X3[A, B, C]]): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
@@ -275,8 +287,8 @@ class RollupTests extends TypedDatasetSuite {
test("rollup('a).mapGroups(('a, toVector(('a, 'b))") {
def prop[
- A: TypedEncoder: Ordering,
- B: TypedEncoder: Ordering
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering
](data: Vector[X2[A, B]]): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
@@ -298,8 +310,8 @@ class RollupTests extends TypedDatasetSuite {
test("rollup('a).flatMapGroups(('a, toVector(('a, 'b))") {
def prop[
- A: TypedEncoder : Ordering,
- B: TypedEncoder : Ordering
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering
](data: Vector[X2[A, B]]): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
@@ -325,9 +337,9 @@ class RollupTests extends TypedDatasetSuite {
test("rollup('a, 'b).flatMapGroups((('a,'b) toVector((('a,'b), 'c))") {
def prop[
- A: TypedEncoder : Ordering,
- B: TypedEncoder : Ordering,
- C: TypedEncoder : Ordering
+ A: TypedEncoder: Ordering,
+ B: TypedEncoder: Ordering,
+ C: TypedEncoder: Ordering
](data: Vector[X3[A, B, C]]): Prop = {
val dataset = TypedDataset.create(data)
val cA = dataset.col[A]('a)
@@ -353,8 +365,7 @@ class RollupTests extends TypedDatasetSuite {
}
test("rollupMany('a).agg(sum('b))") {
- def prop[A: TypedEncoder : Ordering, Out: TypedEncoder : Numeric]
- (data: List[X1[A]])(implicit summable: CatalystSummable[A, Out]): Prop = {
+ def prop[A: TypedEncoder: Ordering, Out: TypedEncoder: Numeric](data: List[X1[A]])(implicit summable: CatalystSummable[A, Out]): Prop = {
val dataset = TypedDataset.create(data)
val A = dataset.col[A]('a)
@@ -367,4 +378,4 @@ class RollupTests extends TypedDatasetSuite {
check(forAll(prop[Int, Long] _))
}
-}
\ No newline at end of file
+}
diff --git a/dataset/src/test/scala/frameless/ops/SmartProjectTest.scala b/dataset/src/test/scala/frameless/ops/SmartProjectTest.scala
index 233a42aec..1324e45dd 100644
--- a/dataset/src/test/scala/frameless/ops/SmartProjectTest.scala
+++ b/dataset/src/test/scala/frameless/ops/SmartProjectTest.scala
@@ -5,7 +5,6 @@ import org.scalacheck.Prop
import org.scalacheck.Prop._
import shapeless.test.illTyped
-
case class Foo(i: Int, j: Int, x: String)
case class Bar(i: Int, x: String)
case class InvalidFooProjectionType(i: Int, x: Boolean)
diff --git a/dataset/src/test/scala/frameless/package.scala b/dataset/src/test/scala/frameless/package.scala
index 82ff375c9..a012a7436 100644
--- a/dataset/src/test/scala/frameless/package.scala
+++ b/dataset/src/test/scala/frameless/package.scala
@@ -4,6 +4,7 @@ import java.time.{LocalDateTime => JavaLocalDateTime}
import org.scalacheck.{Arbitrary, Gen}
package object frameless {
+
/** Fixed decimal point to avoid precision problems specific to Spark */
implicit val arbBigDecimal: Arbitrary[BigDecimal] = Arbitrary {
for {
@@ -72,11 +73,10 @@ package object frameless {
def anyCauseHas(t: Throwable, f: Throwable => Boolean): Boolean =
if (f(t))
true
+ else if (t.getCause ne null)
+ anyCauseHas(t.getCause, f)
else
- if (t.getCause ne null)
- anyCauseHas(t.getCause, f)
- else
- false
+ false
/**
* Runs up to maxRuns and outputs the number of failures (times thrown)
@@ -85,11 +85,11 @@ package object frameless {
* @tparam T
* @return the last passing thunk, or null
*/
- def runLoads[T](maxRuns: Int = 1000)(thunk: => T): T ={
+ def runLoads[T](maxRuns: Int = 1000)(thunk: => T): T = {
var i = 0
var r = null.asInstanceOf[T]
var passed = 0
- while(i < maxRuns){
+ while (i < maxRuns) {
i += 1
try {
r = thunk
@@ -107,20 +107,20 @@ package object frameless {
r
}
- /**
+ /**
* Runs a given thunk up to maxRuns times, restarting the thunk if tolerantOf the thrown Throwable is true
* @param tolerantOf
* @param maxRuns default of 20
* @param thunk
* @return either a successful run result or the last error will be thrown
*/
- def tolerantRun[T](tolerantOf: Throwable => Boolean, maxRuns: Int = 20)(thunk: => T): T ={
+ def tolerantRun[T](tolerantOf: Throwable => Boolean, maxRuns: Int = 20)(thunk: => T): T = {
var passed = false
var i = 0
var res: T = null.asInstanceOf[T]
var thrown: Throwable = null
- while((i < maxRuns) && !passed) {
+ while ((i < maxRuns) && !passed) {
try {
i += 1
res = thunk
diff --git a/dataset/src/test/scala/frameless/sql/package.scala b/dataset/src/test/scala/frameless/sql/package.scala
index fcb45b03d..35b9b4aaa 100644
--- a/dataset/src/test/scala/frameless/sql/package.scala
+++ b/dataset/src/test/scala/frameless/sql/package.scala
@@ -9,8 +9,8 @@ package object sql {
def rec(expr: Expression, acc: List[Expression]): List[Expression] = {
expr match {
case And(left, right) => rec(left, rec(right, acc))
- case Or(left, right) => rec(left, rec(right, acc))
- case e => e +: acc
+ case Or(left, right) => rec(left, rec(right, acc))
+ case e => e +: acc
}
}
diff --git a/dataset/src/test/scala/frameless/sql/rules/SQLRulesSuite.scala b/dataset/src/test/scala/frameless/sql/rules/SQLRulesSuite.scala
index 8555d1809..86a4a3798 100644
--- a/dataset/src/test/scala/frameless/sql/rules/SQLRulesSuite.scala
+++ b/dataset/src/test/scala/frameless/sql/rules/SQLRulesSuite.scala
@@ -34,7 +34,7 @@ trait SQLRulesSuite extends TypedDatasetSuite with Matchers { self =>
val optimizedPlan = ds.queryExecution.optimizedPlan.collect { case logical.Filter(condition, _) => condition }.flatMap(_.toList)
// check the optimized plan
- optimizedPlan.collectFirst(planShouldNotContain) should be (empty)
+ optimizedPlan.collectFirst(planShouldNotContain) should be(empty)
// compare filters
actualPushDownFilters shouldBe expectedPushDownFilters
@@ -53,7 +53,7 @@ trait SQLRulesSuite extends TypedDatasetSuite with Matchers { self =>
if (sparkPlan.children.isEmpty) // assume it's AQE
sparkPlan match {
case aq: AdaptiveSparkPlanExec => aq.initialPlan
- case _ => sparkPlan
+ case _ => sparkPlan
}
else
sparkPlan
diff --git a/dataset/src/test/scala/org/apache/hadoop/fs/local/StreamingFS.scala b/dataset/src/test/scala/org/apache/hadoop/fs/local/StreamingFS.scala
index a28ad0820..c45be11ad 100644
--- a/dataset/src/test/scala/org/apache/hadoop/fs/local/StreamingFS.scala
+++ b/dataset/src/test/scala/org/apache/hadoop/fs/local/StreamingFS.scala
@@ -3,5 +3,5 @@ package org.apache.hadoop.fs.local
import com.globalmentor.apache.hadoop.fs.BareLocalFileSystem
import org.apache.hadoop.fs.DelegateToFileSystem
-class StreamingFS(uri: java.net.URI, conf: org.apache.hadoop.conf.Configuration) extends
- DelegateToFileSystem(uri, new BareLocalFileSystem(), conf, "file", false) {}
+class StreamingFS(uri: java.net.URI, conf: org.apache.hadoop.conf.Configuration)
+ extends DelegateToFileSystem(uri, new BareLocalFileSystem(), conf, "file", false) {}
diff --git a/dataset/src/test/spark-3.3+/frameless/sql/rules/FramelessLitPushDownTests.scala b/dataset/src/test/spark-3.3+/frameless/sql/rules/FramelessLitPushDownTests.scala
index 1df361b9b..ab6ab2cc0 100644
--- a/dataset/src/test/spark-3.3+/frameless/sql/rules/FramelessLitPushDownTests.scala
+++ b/dataset/src/test/spark-3.3+/frameless/sql/rules/FramelessLitPushDownTests.scala
@@ -3,7 +3,7 @@ package frameless.sql.rules
import frameless._
import frameless.functions.Lit
import org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToInstant
-import org.apache.spark.sql.sources.{ EqualTo, GreaterThanOrEqual, IsNotNull }
+import org.apache.spark.sql.sources.{EqualTo, GreaterThanOrEqual, IsNotNull}
import org.apache.spark.sql.catalyst.expressions
import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
import java.time.Instant
diff --git a/ml/src/main/scala/frameless/ml/classification/TypedRandomForestClassifier.scala b/ml/src/main/scala/frameless/ml/classification/TypedRandomForestClassifier.scala
index f6efcceaf..c6565e097 100644
--- a/ml/src/main/scala/frameless/ml/classification/TypedRandomForestClassifier.scala
+++ b/ml/src/main/scala/frameless/ml/classification/TypedRandomForestClassifier.scala
@@ -13,7 +13,7 @@ import org.apache.spark.ml.linalg.Vector
* It supports both binary and multiclass labels, as well as both continuous and categorical
* features.
*/
-final class TypedRandomForestClassifier[Inputs] private[ml](
+final class TypedRandomForestClassifier[Inputs] private[ml] (
rf: RandomForestClassifier,
labelCol: String,
featuresCol: String
@@ -48,4 +48,3 @@ object TypedRandomForestClassifier {
new TypedRandomForestClassifier(new RandomForestClassifier(), inputsChecker.labelCol, inputsChecker.featuresCol)
}
}
-
diff --git a/ml/src/main/scala/frameless/ml/clustering/TypedBisectingKMeans.scala b/ml/src/main/scala/frameless/ml/clustering/TypedBisectingKMeans.scala
index 4a8c974b4..be7836d3e 100644
--- a/ml/src/main/scala/frameless/ml/clustering/TypedBisectingKMeans.scala
+++ b/ml/src/main/scala/frameless/ml/clustering/TypedBisectingKMeans.scala
@@ -22,19 +22,19 @@ import org.apache.spark.ml.clustering.{BisectingKMeans, BisectingKMeansModel}
class TypedBisectingKMeans[Inputs] private[ml] (
bkm: BisectingKMeans,
featuresCol: String
-) extends TypedEstimator[Inputs,TypedBisectingKMeans.Output, BisectingKMeansModel]{
+) extends TypedEstimator[Inputs, TypedBisectingKMeans.Output, BisectingKMeansModel] {
val estimator: BisectingKMeans =
bkm
- .setFeaturesCol(featuresCol)
- .setPredictionCol(AppendTransformer.tempColumnName)
-
+ .setFeaturesCol(featuresCol)
+ .setPredictionCol(AppendTransformer.tempColumnName)
+
def setK(value: Int): TypedBisectingKMeans[Inputs] = copy(bkm.setK(value))
-
+
def setMaxIter(value: Int): TypedBisectingKMeans[Inputs] = copy(bkm.setMaxIter(value))
def setMinDivisibleClusterSize(value: Double): TypedBisectingKMeans[Inputs] =
copy(bkm.setMinDivisibleClusterSize(value))
-
+
def setSeed(value: Long): TypedBisectingKMeans[Inputs] = copy(bkm.setSeed(value))
private def copy(newBkm: BisectingKMeans): TypedBisectingKMeans[Inputs] =
@@ -46,4 +46,4 @@ object TypedBisectingKMeans {
def apply[Inputs]()(implicit inputsChecker: VectorInputsChecker[Inputs]): TypedBisectingKMeans[Inputs] =
new TypedBisectingKMeans(new BisectingKMeans(), inputsChecker.featuresCol)
-}
\ No newline at end of file
+}
diff --git a/ml/src/main/scala/frameless/ml/clustering/TypedKMeans.scala b/ml/src/main/scala/frameless/ml/clustering/TypedKMeans.scala
index 1a32076a5..6732f0133 100644
--- a/ml/src/main/scala/frameless/ml/clustering/TypedKMeans.scala
+++ b/ml/src/main/scala/frameless/ml/clustering/TypedKMeans.scala
@@ -14,7 +14,7 @@ import org.apache.spark.ml.clustering.{KMeans, KMeansModel}
class TypedKMeans[Inputs] private[ml] (
km: KMeans,
featuresCol: String
-) extends TypedEstimator[Inputs,TypedKMeans.Output,KMeansModel] {
+) extends TypedEstimator[Inputs, TypedKMeans.Output, KMeansModel] {
val estimator: KMeans =
km
.setFeaturesCol(featuresCol)
@@ -36,7 +36,7 @@ class TypedKMeans[Inputs] private[ml] (
}
-object TypedKMeans{
+object TypedKMeans {
case class Output(prediction: Int)
def apply[Inputs](implicit inputsChecker: VectorInputsChecker[Inputs]): TypedKMeans[Inputs] = {
diff --git a/ml/src/main/scala/frameless/ml/feature/TypedIndexToString.scala b/ml/src/main/scala/frameless/ml/feature/TypedIndexToString.scala
index af2e9684a..ebb11e380 100644
--- a/ml/src/main/scala/frameless/ml/feature/TypedIndexToString.scala
+++ b/ml/src/main/scala/frameless/ml/feature/TypedIndexToString.scala
@@ -12,8 +12,8 @@ import org.apache.spark.ml.feature.IndexToString
*
* @see `TypedStringIndexer` for converting strings into indices
*/
-final class TypedIndexToString[Inputs] private[ml](indexToString: IndexToString, inputCol: String)
- extends AppendTransformer[Inputs, TypedIndexToString.Outputs, IndexToString] {
+final class TypedIndexToString[Inputs] private[ml] (indexToString: IndexToString, inputCol: String)
+ extends AppendTransformer[Inputs, TypedIndexToString.Outputs, IndexToString] {
val transformer: IndexToString =
indexToString
@@ -25,8 +25,7 @@ final class TypedIndexToString[Inputs] private[ml](indexToString: IndexToString,
object TypedIndexToString {
case class Outputs(originalOutput: String)
- def apply[Inputs](labels: Array[String])
- (implicit inputsChecker: UnaryInputsChecker[Inputs, Double]): TypedIndexToString[Inputs] = {
+ def apply[Inputs](labels: Array[String])(implicit inputsChecker: UnaryInputsChecker[Inputs, Double]): TypedIndexToString[Inputs] = {
new TypedIndexToString[Inputs](new IndexToString().setLabels(labels), inputsChecker.inputCol)
}
-}
\ No newline at end of file
+}
diff --git a/ml/src/main/scala/frameless/ml/feature/TypedStringIndexer.scala b/ml/src/main/scala/frameless/ml/feature/TypedStringIndexer.scala
index 7eba8e306..fe897f3c4 100644
--- a/ml/src/main/scala/frameless/ml/feature/TypedStringIndexer.scala
+++ b/ml/src/main/scala/frameless/ml/feature/TypedStringIndexer.scala
@@ -13,8 +13,8 @@ import org.apache.spark.ml.feature.{StringIndexer, StringIndexerModel}
*
* @see `TypedIndexToString` for the inverse transformation
*/
-final class TypedStringIndexer[Inputs] private[ml](stringIndexer: StringIndexer, inputCol: String)
- extends TypedEstimator[Inputs, TypedStringIndexer.Outputs, StringIndexerModel] {
+final class TypedStringIndexer[Inputs] private[ml] (stringIndexer: StringIndexer, inputCol: String)
+ extends TypedEstimator[Inputs, TypedStringIndexer.Outputs, StringIndexerModel] {
val estimator: StringIndexer = stringIndexer
.setInputCol(inputCol)
@@ -39,4 +39,4 @@ object TypedStringIndexer {
def apply[Inputs](implicit inputsChecker: UnaryInputsChecker[Inputs, String]): TypedStringIndexer[Inputs] = {
new TypedStringIndexer[Inputs](new StringIndexer(), inputsChecker.inputCol)
}
-}
\ No newline at end of file
+}
diff --git a/ml/src/main/scala/frameless/ml/feature/TypedVectorAssembler.scala b/ml/src/main/scala/frameless/ml/feature/TypedVectorAssembler.scala
index d599011b3..ac2f84faa 100644
--- a/ml/src/main/scala/frameless/ml/feature/TypedVectorAssembler.scala
+++ b/ml/src/main/scala/frameless/ml/feature/TypedVectorAssembler.scala
@@ -13,8 +13,8 @@ import scala.annotation.implicitNotFound
/**
* A feature transformer that merges multiple columns into a vector column.
*/
-final class TypedVectorAssembler[Inputs] private[ml](vectorAssembler: VectorAssembler, inputCols: Array[String])
- extends AppendTransformer[Inputs, TypedVectorAssembler.Output, VectorAssembler] {
+final class TypedVectorAssembler[Inputs] private[ml] (vectorAssembler: VectorAssembler, inputCols: Array[String])
+ extends AppendTransformer[Inputs, TypedVectorAssembler.Output, VectorAssembler] {
val transformer: VectorAssembler = vectorAssembler
.setInputCols(inputCols)
@@ -57,7 +57,8 @@ private[ml] object TypedVectorAssemblerInputsValueChecker {
new TypedVectorAssemblerInputsValueChecker[HNil] {}
implicit def hlistCheckInputsValueNumeric[H, T <: HList](
- implicit ch: CatalystNumeric[H],
+ implicit
+ ch: CatalystNumeric[H],
tt: TypedVectorAssemblerInputsValueChecker[T]
): TypedVectorAssemblerInputsValueChecker[H :: T] = new TypedVectorAssemblerInputsValueChecker[H :: T] {}
@@ -65,5 +66,3 @@ private[ml] object TypedVectorAssemblerInputsValueChecker {
implicit tt: TypedVectorAssemblerInputsValueChecker[T]
): TypedVectorAssemblerInputsValueChecker[Boolean :: T] = new TypedVectorAssemblerInputsValueChecker[Boolean :: T] {}
}
-
-
diff --git a/ml/src/main/scala/frameless/ml/internals/LinearInputsChecker.scala b/ml/src/main/scala/frameless/ml/internals/LinearInputsChecker.scala
index 995a3f961..5552cbae7 100644
--- a/ml/src/main/scala/frameless/ml/internals/LinearInputsChecker.scala
+++ b/ml/src/main/scala/frameless/ml/internals/LinearInputsChecker.scala
@@ -25,10 +25,11 @@ trait LinearInputsChecker[Inputs] {
object LinearInputsChecker {
implicit def checkLinearInputs[
- Inputs,
- InputsRec <: HList,
- LabelK <: Symbol,
- FeaturesK <: Symbol](
+ Inputs,
+ InputsRec <: HList,
+ LabelK <: Symbol,
+ FeaturesK <: Symbol
+ ](
implicit
i0: LabelledGeneric.Aux[Inputs, InputsRec],
i1: Length.Aux[InputsRec, Nat._2],
@@ -45,11 +46,12 @@ object LinearInputsChecker {
}
implicit def checkLinearInputs2[
- Inputs,
- InputsRec <: HList,
- LabelK <: Symbol,
- FeaturesK <: Symbol,
- WeightK <: Symbol](
+ Inputs,
+ InputsRec <: HList,
+ LabelK <: Symbol,
+ FeaturesK <: Symbol,
+ WeightK <: Symbol
+ ](
implicit
i0: LabelledGeneric.Aux[Inputs, InputsRec],
i1: Length.Aux[InputsRec, Nat._3],
diff --git a/ml/src/main/scala/frameless/ml/internals/TreesInputsChecker.scala b/ml/src/main/scala/frameless/ml/internals/TreesInputsChecker.scala
index 0fe157654..e735e44be 100644
--- a/ml/src/main/scala/frameless/ml/internals/TreesInputsChecker.scala
+++ b/ml/src/main/scala/frameless/ml/internals/TreesInputsChecker.scala
@@ -24,10 +24,11 @@ trait TreesInputsChecker[Inputs] {
object TreesInputsChecker {
implicit def checkTreesInputs[
- Inputs,
- InputsRec <: HList,
- LabelK <: Symbol,
- FeaturesK <: Symbol](
+ Inputs,
+ InputsRec <: HList,
+ LabelK <: Symbol,
+ FeaturesK <: Symbol
+ ](
implicit
i0: LabelledGeneric.Aux[Inputs, InputsRec],
i1: Length.Aux[InputsRec, Nat._2],
diff --git a/ml/src/main/scala/frameless/ml/internals/UnaryInputsChecker.scala b/ml/src/main/scala/frameless/ml/internals/UnaryInputsChecker.scala
index 56dfc9a57..4be27370d 100644
--- a/ml/src/main/scala/frameless/ml/internals/UnaryInputsChecker.scala
+++ b/ml/src/main/scala/frameless/ml/internals/UnaryInputsChecker.scala
@@ -30,4 +30,3 @@ object UnaryInputsChecker {
}
}
-
diff --git a/ml/src/main/scala/frameless/ml/internals/VectorInputsChecker.scala b/ml/src/main/scala/frameless/ml/internals/VectorInputsChecker.scala
index e993d9a55..033a7d9e2 100644
--- a/ml/src/main/scala/frameless/ml/internals/VectorInputsChecker.scala
+++ b/ml/src/main/scala/frameless/ml/internals/VectorInputsChecker.scala
@@ -20,13 +20,13 @@ trait VectorInputsChecker[Inputs] {
object VectorInputsChecker {
implicit def checkVectorInput[Inputs, InputsRec <: HList, FeaturesK <: Symbol](
implicit
- i0: LabelledGeneric.Aux[Inputs, InputsRec],
- i1: Length.Aux[InputsRec, Nat._1],
- i2: SelectorByValue.Aux[InputsRec, Vector, FeaturesK],
- i3: Witness.Aux[FeaturesK]
- ): VectorInputsChecker[Inputs] = {
- new VectorInputsChecker[Inputs] {
- val featuresCol: String = i3.value.name
- }
+ i0: LabelledGeneric.Aux[Inputs, InputsRec],
+ i1: Length.Aux[InputsRec, Nat._1],
+ i2: SelectorByValue.Aux[InputsRec, Vector, FeaturesK],
+ i3: Witness.Aux[FeaturesK]
+ ): VectorInputsChecker[Inputs] = {
+ new VectorInputsChecker[Inputs] {
+ val featuresCol: String = i3.value.name
}
+ }
}
diff --git a/ml/src/main/scala/frameless/ml/params/kmeans/KMeansInitMode.scala b/ml/src/main/scala/frameless/ml/params/kmeans/KMeansInitMode.scala
index b3c023735..95a265ca8 100644
--- a/ml/src/main/scala/frameless/ml/params/kmeans/KMeansInitMode.scala
+++ b/ml/src/main/scala/frameless/ml/params/kmeans/KMeansInitMode.scala
@@ -11,7 +11,7 @@ package kmeans
* Default: k-means||.
*/
-sealed abstract class KMeansInitMode private[ml](val sparkValue: String)
+sealed abstract class KMeansInitMode private[ml] (val sparkValue: String)
object KMeansInitMode {
case object Random extends KMeansInitMode("random")
diff --git a/ml/src/main/scala/frameless/ml/params/linears/LossStrategy.scala b/ml/src/main/scala/frameless/ml/params/linears/LossStrategy.scala
index 4b9ca6d4e..00420b24e 100644
--- a/ml/src/main/scala/frameless/ml/params/linears/LossStrategy.scala
+++ b/ml/src/main/scala/frameless/ml/params/linears/LossStrategy.scala
@@ -2,6 +2,7 @@ package frameless
package ml
package params
package linears
+
/**
* SquaredError measures the average of the squares of the errors—that is,
* the average squared difference between the estimated values and what is estimated.
@@ -9,8 +10,8 @@ package linears
* Huber Loss loss function less sensitive to outliers in data than the
* squared error loss
*/
-sealed abstract class LossStrategy private[ml](val sparkValue: String)
+sealed abstract class LossStrategy private[ml] (val sparkValue: String)
object LossStrategy {
case object SquaredError extends LossStrategy("squaredError")
- case object Huber extends LossStrategy("huber")
+ case object Huber extends LossStrategy("huber")
}
diff --git a/ml/src/main/scala/frameless/ml/params/linears/Solver.scala b/ml/src/main/scala/frameless/ml/params/linears/Solver.scala
index 277e06e7a..555da751f 100644
--- a/ml/src/main/scala/frameless/ml/params/linears/Solver.scala
+++ b/ml/src/main/scala/frameless/ml/params/linears/Solver.scala
@@ -16,10 +16,9 @@ package linears
* spark
*/
-sealed abstract class Solver private[ml](val sparkValue: String)
+sealed abstract class Solver private[ml] (val sparkValue: String)
object Solver {
- case object LBFGS extends Solver("l-bfgs")
- case object Auto extends Solver("auto")
- case object Normal extends Solver("normal")
+ case object LBFGS extends Solver("l-bfgs")
+ case object Auto extends Solver("auto")
+ case object Normal extends Solver("normal")
}
-
diff --git a/ml/src/main/scala/frameless/ml/params/trees/FeatureSubsetStrategy.scala b/ml/src/main/scala/frameless/ml/params/trees/FeatureSubsetStrategy.scala
index f2167f983..c5a9a94c0 100644
--- a/ml/src/main/scala/frameless/ml/params/trees/FeatureSubsetStrategy.scala
+++ b/ml/src/main/scala/frameless/ml/params/trees/FeatureSubsetStrategy.scala
@@ -2,6 +2,7 @@ package frameless
package ml
package params
package trees
+
/**
* The number of features to consider for splits at each tree node.
* Supported options:
@@ -27,7 +28,7 @@ package trees
* @see
* Breiman manual for random forests
*/
-sealed abstract class FeatureSubsetStrategy private[ml](val sparkValue: String)
+sealed abstract class FeatureSubsetStrategy private[ml] (val sparkValue: String)
object FeatureSubsetStrategy {
case object Auto extends FeatureSubsetStrategy("auto")
case object All extends FeatureSubsetStrategy("all")
@@ -36,4 +37,4 @@ object FeatureSubsetStrategy {
case object Log2 extends FeatureSubsetStrategy("log2")
case class Ratio(value: Double) extends FeatureSubsetStrategy(value.toString)
case class NumberOfFeatures(value: Int) extends FeatureSubsetStrategy(value.toString)
-}
\ No newline at end of file
+}
diff --git a/ml/src/main/scala/frameless/ml/regression/TypedLinearRegression.scala b/ml/src/main/scala/frameless/ml/regression/TypedLinearRegression.scala
index 3b3208623..f578b986f 100644
--- a/ml/src/main/scala/frameless/ml/regression/TypedLinearRegression.scala
+++ b/ml/src/main/scala/frameless/ml/regression/TypedLinearRegression.scala
@@ -11,30 +11,30 @@ import org.apache.spark.ml.regression.{LinearRegression, LinearRegressionModel}
* Linear Regression linear approach to modelling the relationship
* between a scalar response (or dependent variable) and one or more explanatory variables
*/
-final class TypedLinearRegression [Inputs] private[ml](
+final class TypedLinearRegression[Inputs] private[ml] (
lr: LinearRegression,
labelCol: String,
featuresCol: String,
weightCol: Option[String]
) extends TypedEstimator[Inputs, TypedLinearRegression.Outputs, LinearRegressionModel] {
- val estimatorWithoutWeight : LinearRegression = lr
+ val estimatorWithoutWeight: LinearRegression = lr
.setLabelCol(labelCol)
.setFeaturesCol(featuresCol)
.setPredictionCol(AppendTransformer.tempColumnName)
val estimator = if (weightCol.isDefined) estimatorWithoutWeight.setWeightCol(weightCol.get) else estimatorWithoutWeight
- def setRegParam(value: Double): TypedLinearRegression[Inputs] = copy(lr.setRegParam(value))
- def setFitIntercept(value: Boolean): TypedLinearRegression[Inputs] = copy(lr.setFitIntercept(value))
- def setStandardization(value: Boolean): TypedLinearRegression[Inputs] = copy(lr.setStandardization(value))
- def setElasticNetParam(value: Double): TypedLinearRegression[Inputs] = copy(lr.setElasticNetParam(value))
- def setMaxIter(value: Int): TypedLinearRegression[Inputs] = copy(lr.setMaxIter(value))
- def setTol(value: Double): TypedLinearRegression[Inputs] = copy(lr.setTol(value))
- def setSolver(value: Solver): TypedLinearRegression[Inputs] = copy(lr.setSolver(value.sparkValue))
- def setAggregationDepth(value: Int): TypedLinearRegression[Inputs] = copy(lr.setAggregationDepth(value))
- def setLoss(value: LossStrategy): TypedLinearRegression[Inputs] = copy(lr.setLoss(value.sparkValue))
- def setEpsilon(value: Double): TypedLinearRegression[Inputs] = copy(lr.setEpsilon(value))
+ def setRegParam(value: Double): TypedLinearRegression[Inputs] = copy(lr.setRegParam(value))
+ def setFitIntercept(value: Boolean): TypedLinearRegression[Inputs] = copy(lr.setFitIntercept(value))
+ def setStandardization(value: Boolean): TypedLinearRegression[Inputs] = copy(lr.setStandardization(value))
+ def setElasticNetParam(value: Double): TypedLinearRegression[Inputs] = copy(lr.setElasticNetParam(value))
+ def setMaxIter(value: Int): TypedLinearRegression[Inputs] = copy(lr.setMaxIter(value))
+ def setTol(value: Double): TypedLinearRegression[Inputs] = copy(lr.setTol(value))
+ def setSolver(value: Solver): TypedLinearRegression[Inputs] = copy(lr.setSolver(value.sparkValue))
+ def setAggregationDepth(value: Int): TypedLinearRegression[Inputs] = copy(lr.setAggregationDepth(value))
+ def setLoss(value: LossStrategy): TypedLinearRegression[Inputs] = copy(lr.setLoss(value.sparkValue))
+ def setEpsilon(value: Double): TypedLinearRegression[Inputs] = copy(lr.setEpsilon(value))
private def copy(newLr: LinearRegression): TypedLinearRegression[Inputs] =
new TypedLinearRegression[Inputs](newLr, labelCol, featuresCol, weightCol)
@@ -45,8 +45,7 @@ object TypedLinearRegression {
case class Outputs(prediction: Double)
case class Weight(weight: Double)
-
def apply[Inputs](implicit inputsChecker: LinearInputsChecker[Inputs]): TypedLinearRegression[Inputs] = {
new TypedLinearRegression(new LinearRegression(), inputsChecker.labelCol, inputsChecker.featuresCol, inputsChecker.weightCol)
}
-}
\ No newline at end of file
+}
diff --git a/ml/src/main/scala/frameless/ml/regression/TypedRandomForestRegressor.scala b/ml/src/main/scala/frameless/ml/regression/TypedRandomForestRegressor.scala
index 69c1ad68c..84f8fccfb 100644
--- a/ml/src/main/scala/frameless/ml/regression/TypedRandomForestRegressor.scala
+++ b/ml/src/main/scala/frameless/ml/regression/TypedRandomForestRegressor.scala
@@ -11,7 +11,7 @@ import org.apache.spark.ml.regression.{RandomForestRegressionModel, RandomForest
* learning algorithm for regression.
* It supports both continuous and categorical features.
*/
-final class TypedRandomForestRegressor[Inputs] private[ml](
+final class TypedRandomForestRegressor[Inputs] private[ml] (
rf: RandomForestRegressor,
labelCol: String,
featuresCol: String
@@ -40,8 +40,10 @@ final class TypedRandomForestRegressor[Inputs] private[ml](
object TypedRandomForestRegressor {
case class Outputs(prediction: Double)
- def apply[Inputs](implicit inputsChecker: TreesInputsChecker[Inputs])
- : TypedRandomForestRegressor[Inputs] = {
+ def apply[Inputs](implicit
+ inputsChecker: TreesInputsChecker[Inputs]
+ )
+ : TypedRandomForestRegressor[Inputs] = {
new TypedRandomForestRegressor(new RandomForestRegressor(), inputsChecker.labelCol, inputsChecker.featuresCol)
}
-}
\ No newline at end of file
+}
diff --git a/ml/src/test/scala/frameless/ml/Generators.scala b/ml/src/test/scala/frameless/ml/Generators.scala
index f7dde986c..51f5413da 100644
--- a/ml/src/test/scala/frameless/ml/Generators.scala
+++ b/ml/src/test/scala/frameless/ml/Generators.scala
@@ -32,7 +32,8 @@ object Generators {
val genRatio = Gen.choose(0D, 1D).suchThat(_ > 0D).map(FeatureSubsetStrategy.Ratio)
val genNumberOfFeatures = Gen.choose(1, Int.MaxValue).map(FeatureSubsetStrategy.NumberOfFeatures)
- Gen.oneOf(Gen.const(FeatureSubsetStrategy.All),
+ Gen.oneOf(
+ Gen.const(FeatureSubsetStrategy.All),
Gen.const(FeatureSubsetStrategy.All),
Gen.const(FeatureSubsetStrategy.Log2),
Gen.const(FeatureSubsetStrategy.OneThird),
@@ -43,7 +44,7 @@ object Generators {
}
implicit val arbLossStrategy: Arbitrary[LossStrategy] = Arbitrary {
- Gen.const(LossStrategy.SquaredError)
+ Gen.const(LossStrategy.SquaredError)
}
implicit val arbSolver: Arbitrary[Solver] = Arbitrary {
diff --git a/ml/src/test/scala/frameless/ml/classification/TypedRandomForestClassifierTests.scala b/ml/src/test/scala/frameless/ml/classification/TypedRandomForestClassifierTests.scala
index ab03f1aad..e922bac87 100644
--- a/ml/src/test/scala/frameless/ml/classification/TypedRandomForestClassifierTests.scala
+++ b/ml/src/test/scala/frameless/ml/classification/TypedRandomForestClassifierTests.scala
@@ -66,13 +66,13 @@ class TypedRandomForestClassifierTests extends FramelessMlSuite with Matchers {
val model = rf.fit(ds).run()
model.transformer.getNumTrees == 10 &&
- model.transformer.getMaxBins == 100 &&
- model.transformer.getFeatureSubsetStrategy == featureSubsetStrategy.sparkValue &&
- model.transformer.getMaxDepth == 10 &&
- model.transformer.getMaxMemoryInMB == 100 &&
- model.transformer.getMinInfoGain == 0.1D &&
- model.transformer.getMinInstancesPerNode == 2 &&
- model.transformer.getSubsamplingRate == 0.9D
+ model.transformer.getMaxBins == 100 &&
+ model.transformer.getFeatureSubsetStrategy == featureSubsetStrategy.sparkValue &&
+ model.transformer.getMaxDepth == 10 &&
+ model.transformer.getMaxMemoryInMB == 100 &&
+ model.transformer.getMinInfoGain == 0.1D &&
+ model.transformer.getMinInstancesPerNode == 2 &&
+ model.transformer.getSubsamplingRate == 0.9D
}
check(prop)
@@ -86,4 +86,4 @@ class TypedRandomForestClassifierTests extends FramelessMlSuite with Matchers {
illTyped("TypedRandomForestClassifier.create[X2[Vector, String]]()")
}
-}
\ No newline at end of file
+}
diff --git a/ml/src/test/scala/frameless/ml/clustering/BisectingKMeansTests.scala b/ml/src/test/scala/frameless/ml/clustering/BisectingKMeansTests.scala
index 976df39b2..a1ef44dd3 100644
--- a/ml/src/test/scala/frameless/ml/clustering/BisectingKMeansTests.scala
+++ b/ml/src/test/scala/frameless/ml/clustering/BisectingKMeansTests.scala
@@ -24,7 +24,7 @@ class BisectingKMeansTests extends FramelessMlSuite with Matchers {
pDs.select(pDs.col('a)).collect().run().toList == Seq(x1.a)
}
- def prop3[A: TypedEncoder : Arbitrary] = forAll { x2: X2[Vector, A] =>
+ def prop3[A: TypedEncoder: Arbitrary] = forAll { x2: X2[Vector, A] =>
val km = TypedBisectingKMeans[X1[Vector]]()
val ds = TypedDataset.create(Seq(x2))
val model = km.fit(ds).run()
@@ -44,12 +44,12 @@ class BisectingKMeansTests extends FramelessMlSuite with Matchers {
.setMinDivisibleClusterSize(1)
.setSeed(123332)
- val ds = TypedDataset.create(Seq(X2(Vectors.dense(Array(0D)),0)))
+ val ds = TypedDataset.create(Seq(X2(Vectors.dense(Array(0D)), 0)))
val model = rf.fit(ds).run()
- model.transformer.getK == 10 &&
- model.transformer.getMaxIter == 10 &&
- model.transformer.getMinDivisibleClusterSize == 1 &&
- model.transformer.getSeed == 123332
+ model.transformer.getK == 10 &&
+ model.transformer.getMaxIter == 10 &&
+ model.transformer.getMinDivisibleClusterSize == 1 &&
+ model.transformer.getSeed == 123332
}
}
diff --git a/ml/src/test/scala/frameless/ml/clustering/ClusteringIntegrationTests.scala b/ml/src/test/scala/frameless/ml/clustering/ClusteringIntegrationTests.scala
index 398a0963d..9a963a345 100644
--- a/ml/src/test/scala/frameless/ml/clustering/ClusteringIntegrationTests.scala
+++ b/ml/src/test/scala/frameless/ml/clustering/ClusteringIntegrationTests.scala
@@ -14,11 +14,11 @@ class ClusteringIntegrationTests extends FramelessMlSuite with Matchers {
test("predict field2 from field1 using a K-means clustering") {
// Training
- val trainingDataDs = TypedDataset.create(Seq.fill(5)(X2(10D, 0)) :+ X2(100D,0))
+ val trainingDataDs = TypedDataset.create(Seq.fill(5)(X2(10D, 0)) :+ X2(100D, 0))
val vectorAssembler = TypedVectorAssembler[X1[Double]]
- val dataWithFeatures = vectorAssembler.transform(trainingDataDs).as[X3[Double,Int,Vector]]()
+ val dataWithFeatures = vectorAssembler.transform(trainingDataDs).as[X3[Double, Int, Vector]]()
case class Input(c: Vector)
val km = TypedKMeans[Input].setK(2)
@@ -32,9 +32,9 @@ class ClusteringIntegrationTests extends FramelessMlSuite with Matchers {
)
val testData = TypedDataset.create(testSeq)
- val testDataWithFeatures = vectorAssembler.transform(testData).as[X3[Double,Int,Vector]]()
+ val testDataWithFeatures = vectorAssembler.transform(testData).as[X3[Double, Int, Vector]]()
- val predictionDs = model.transform(testDataWithFeatures).as[X4[Double,Int,Vector,Int]]()
+ val predictionDs = model.transform(testDataWithFeatures).as[X4[Double, Int, Vector, Int]]()
val prediction = predictionDs.select(predictionDs.col[Int]('d)).collect().run().toList
@@ -43,7 +43,7 @@ class ClusteringIntegrationTests extends FramelessMlSuite with Matchers {
test("predict field2 from field1 using a bisecting K-means clustering") {
// Training
- val trainingDataDs = TypedDataset.create(Seq.fill(5)(X2(10D, 0)) :+ X2(100D,0))
+ val trainingDataDs = TypedDataset.create(Seq.fill(5)(X2(10D, 0)) :+ X2(100D, 0))
val vectorAssembler = TypedVectorAssembler[X1[Double]]
@@ -63,7 +63,7 @@ class ClusteringIntegrationTests extends FramelessMlSuite with Matchers {
val testData = TypedDataset.create(testSeq)
val testDataWithFeatures = vectorAssembler.transform(testData).as[X3[Double, Int, Vector]]()
- val predictionDs = model.transform(testDataWithFeatures).as[X4[Double,Int,Vector,Int]]()
+ val predictionDs = model.transform(testDataWithFeatures).as[X4[Double, Int, Vector, Int]]()
val prediction = predictionDs.select(predictionDs.col[Int]('d)).collect().run().toList
diff --git a/ml/src/test/scala/frameless/ml/clustering/KMeansTests.scala b/ml/src/test/scala/frameless/ml/clustering/KMeansTests.scala
index a41c1b703..745cf1826 100644
--- a/ml/src/test/scala/frameless/ml/clustering/KMeansTests.scala
+++ b/ml/src/test/scala/frameless/ml/clustering/KMeansTests.scala
@@ -30,7 +30,7 @@ class KMeansTests extends FramelessMlSuite with Matchers {
val dense = Vectors.dense(dubs)
vect match {
case _: SparseVector => dense.toSparse
- case _ => dense
+ case _ => dense
}
}
@@ -46,7 +46,7 @@ class KMeansTests extends FramelessMlSuite with Matchers {
pDs.select(pDs.col('a)).collect().run().toList == Seq(x1.a, x1a.a)
}
- def prop3[A: TypedEncoder : Arbitrary] = forAll { x2: X2[Vector, A] =>
+ def prop3[A: TypedEncoder: Arbitrary] = forAll { x2: X2[Vector, A] =>
val x2a = x2.copy(a = newRowWithSameDimension(x2.a))
val km = TypedKMeans[X1[Vector]]
val ds = TypedDataset.create(Seq(x2, x2a))
@@ -56,7 +56,7 @@ class KMeansTests extends FramelessMlSuite with Matchers {
pDs.select(pDs.col('a), pDs.col('b)).collect().run().toList == Seq((x2.a, x2.b), (x2a.a, x2a.b))
}
- tolerantRun( _.isInstanceOf[ArrayIndexOutOfBoundsException] ) {
+ tolerantRun(_.isInstanceOf[ArrayIndexOutOfBoundsException]) {
check(prop)
check(prop3[Double])
}
@@ -76,11 +76,11 @@ class KMeansTests extends FramelessMlSuite with Matchers {
val model = rf.fit(ds).run()
model.transformer.getInitMode == KMeansInitMode.Random.sparkValue &&
- model.transformer.getInitSteps == 2 &&
- model.transformer.getK == 10 &&
- model.transformer.getMaxIter == 15 &&
- model.transformer.getSeed == 123223L &&
- model.transformer.getTol == 12D
+ model.transformer.getInitSteps == 2 &&
+ model.transformer.getK == 10 &&
+ model.transformer.getMaxIter == 15 &&
+ model.transformer.getSeed == 123223L &&
+ model.transformer.getTol == 12D
}
check(prop)
diff --git a/ml/src/test/scala/frameless/ml/feature/TypedStringIndexerTests.scala b/ml/src/test/scala/frameless/ml/feature/TypedStringIndexerTests.scala
index 18d490758..385c61b8a 100644
--- a/ml/src/test/scala/frameless/ml/feature/TypedStringIndexerTests.scala
+++ b/ml/src/test/scala/frameless/ml/feature/TypedStringIndexerTests.scala
@@ -11,7 +11,7 @@ import org.scalatest.matchers.must.Matchers
class TypedStringIndexerTests extends FramelessMlSuite with Matchers {
test(".fit() returns a correct TypedTransformer") {
- def prop[A: TypedEncoder : Arbitrary] = forAll { x2: X2[String, A] =>
+ def prop[A: TypedEncoder: Arbitrary] = forAll { x2: X2[String, A] =>
val indexer = TypedStringIndexer[X1[String]]
val ds = TypedDataset.create(Seq(x2))
val model = indexer.fit(ds).run()
diff --git a/ml/src/test/scala/frameless/ml/regression/TypedLinearRegressionTests.scala b/ml/src/test/scala/frameless/ml/regression/TypedLinearRegressionTests.scala
index b864b1533..51d7dce62 100644
--- a/ml/src/test/scala/frameless/ml/regression/TypedLinearRegressionTests.scala
+++ b/ml/src/test/scala/frameless/ml/regression/TypedLinearRegressionTests.scala
@@ -66,12 +66,12 @@ class TypedLinearRegressionTests extends FramelessMlSuite with Matchers {
val model = lr.fit(ds).run()
model.transformer.getAggregationDepth == 10 &&
- model.transformer.getEpsilon == 4.0 &&
- model.transformer.getLoss == lossStrategy.sparkValue &&
- model.transformer.getMaxIter == 23 &&
- model.transformer.getRegParam == 1.2 &&
- model.transformer.getTol == 2.3 &&
- model.transformer.getSolver == solver.sparkValue
+ model.transformer.getEpsilon == 4.0 &&
+ model.transformer.getLoss == lossStrategy.sparkValue &&
+ model.transformer.getMaxIter == 23 &&
+ model.transformer.getRegParam == 1.2 &&
+ model.transformer.getTol == 2.3 &&
+ model.transformer.getSolver == solver.sparkValue
}
check(prop)
@@ -98,12 +98,12 @@ class TypedLinearRegressionTests extends FramelessMlSuite with Matchers {
)
val ds2 = Seq(
- X3(new DenseVector(Array(1.0)): Vector,2F, 1.0),
- X3(new DenseVector(Array(2.0)): Vector,2F, 2.0),
- X3(new DenseVector(Array(3.0)): Vector,2F, 3.0),
- X3(new DenseVector(Array(4.0)): Vector,2F, 4.0),
- X3(new DenseVector(Array(5.0)): Vector,2F, 5.0),
- X3(new DenseVector(Array(6.0)): Vector,2F, 6.0)
+ X3(new DenseVector(Array(1.0)): Vector, 2F, 1.0),
+ X3(new DenseVector(Array(2.0)): Vector, 2F, 2.0),
+ X3(new DenseVector(Array(3.0)): Vector, 2F, 3.0),
+ X3(new DenseVector(Array(4.0)): Vector, 2F, 4.0),
+ X3(new DenseVector(Array(5.0)): Vector, 2F, 5.0),
+ X3(new DenseVector(Array(6.0)): Vector, 2F, 6.0)
)
val tds = TypedDataset.create(ds)
diff --git a/ml/src/test/scala/frameless/ml/regression/TypedRandomForestRegressorTests.scala b/ml/src/test/scala/frameless/ml/regression/TypedRandomForestRegressorTests.scala
index 4a6cd37d2..989777ac6 100644
--- a/ml/src/test/scala/frameless/ml/regression/TypedRandomForestRegressorTests.scala
+++ b/ml/src/test/scala/frameless/ml/regression/TypedRandomForestRegressorTests.scala
@@ -64,13 +64,13 @@ class TypedRandomForestRegressorTests extends FramelessMlSuite with Matchers {
val model = rf.fit(ds).run()
model.transformer.getNumTrees == 10 &&
- model.transformer.getMaxBins == 100 &&
- model.transformer.getFeatureSubsetStrategy == featureSubsetStrategy.sparkValue &&
- model.transformer.getMaxDepth == 10 &&
- model.transformer.getMaxMemoryInMB == 100 &&
- model.transformer.getMinInfoGain == 0.1D &&
- model.transformer.getMinInstancesPerNode == 2 &&
- model.transformer.getSubsamplingRate == 0.9D
+ model.transformer.getMaxBins == 100 &&
+ model.transformer.getFeatureSubsetStrategy == featureSubsetStrategy.sparkValue &&
+ model.transformer.getMaxDepth == 10 &&
+ model.transformer.getMaxMemoryInMB == 100 &&
+ model.transformer.getMinInfoGain == 0.1D &&
+ model.transformer.getMinInstancesPerNode == 2 &&
+ model.transformer.getSubsamplingRate == 0.9D
}
check(prop)
diff --git a/project/Common.scala b/project/Common.scala
deleted file mode 100644
index 94bc73ff3..000000000
--- a/project/Common.scala
+++ /dev/null
@@ -1,14 +0,0 @@
-import sbt.Keys._
-import sbt._
-import sbt.plugins.JvmPlugin
-
-import org.scalafmt.sbt.ScalafmtPlugin.autoImport._
-
-object Common extends AutoPlugin {
- override def trigger = allRequirements
- override def requires = JvmPlugin
-
- override def projectSettings = Seq(
- scalafmtFilter := "diff-ref=78f708d"
- )
-}
diff --git a/refined/src/main/scala/frameless/refined/RefinedFieldEncoders.scala b/refined/src/main/scala/frameless/refined/RefinedFieldEncoders.scala
index dba59454c..9803f67ca 100644
--- a/refined/src/main/scala/frameless/refined/RefinedFieldEncoders.scala
+++ b/refined/src/main/scala/frameless/refined/RefinedFieldEncoders.scala
@@ -3,24 +3,23 @@ package frameless.refined
import scala.reflect.ClassTag
import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.objects.{
- Invoke, NewInstance, UnwrapOption, WrapOption
-}
+import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, NewInstance, UnwrapOption, WrapOption}
import org.apache.spark.sql.types._
import eu.timepit.refined.api.RefType
-import frameless.{ TypedEncoder, RecordFieldEncoder }
+import frameless.{RecordFieldEncoder, TypedEncoder}
private[refined] trait RefinedFieldEncoders {
+
/**
* @tparam T the refined type (e.g. `String`)
*/
implicit def optionRefined[F[_, _], T, R](
implicit
- i0: RefType[F],
- i1: TypedEncoder[T],
- i2: ClassTag[F[T, R]],
+ i0: RefType[F],
+ i1: TypedEncoder[T],
+ i2: ClassTag[F[T, R]]
): RecordFieldEncoder[Option[F[T, R]]] =
RecordFieldEncoder[Option[F[T, R]]](new TypedEncoder[Option[F[T, R]]] {
def nullable = true
@@ -55,9 +54,9 @@ private[refined] trait RefinedFieldEncoders {
*/
implicit def refined[F[_, _], T, R](
implicit
- i0: RefType[F],
- i1: TypedEncoder[T],
- i2: ClassTag[F[T, R]],
+ i0: RefType[F],
+ i1: TypedEncoder[T],
+ i2: ClassTag[F[T, R]]
): RecordFieldEncoder[F[T, R]] =
RecordFieldEncoder[F[T, R]](new TypedEncoder[F[T, R]] {
def nullable = i1.nullable
@@ -76,4 +75,3 @@ private[refined] trait RefinedFieldEncoders {
override def toString = s"refined[${i2.runtimeClass.getName}]"
})
}
-
diff --git a/refined/src/main/scala/frameless/refined/package.scala b/refined/src/main/scala/frameless/refined/package.scala
index 8819be2bf..96b214272 100644
--- a/refined/src/main/scala/frameless/refined/package.scala
+++ b/refined/src/main/scala/frameless/refined/package.scala
@@ -2,32 +2,36 @@ package frameless
import scala.reflect.ClassTag
-import eu.timepit.refined.api.{ RefType, Validate }
+import eu.timepit.refined.api.{RefType, Validate}
package object refined extends RefinedFieldEncoders {
implicit def refinedInjection[F[_, _], T, R](
implicit
- refType: RefType[F],
- validate: Validate[T, R]
- ): Injection[F[T, R], T] = Injection(
+ refType: RefType[F],
+ validate: Validate[T, R]
+ ): Injection[F[T, R], T] = Injection(
refType.unwrap,
{ value =>
refType.refine[R](value) match {
case Left(errMsg) =>
throw new IllegalArgumentException(
- s"Value $value does not satisfy refinement predicate: $errMsg")
+ s"Value $value does not satisfy refinement predicate: $errMsg"
+ )
case Right(res) => res
}
- })
+ }
+ )
implicit def refinedEncoder[F[_, _], T, R](
implicit
- i0: RefType[F],
- i1: Validate[T, R],
- i2: TypedEncoder[T],
- i3: ClassTag[F[T, R]]
- ): TypedEncoder[F[T, R]] = TypedEncoder.usingInjection(
- i3, refinedInjection, i2)
+ i0: RefType[F],
+ i1: Validate[T, R],
+ i2: TypedEncoder[T],
+ i3: ClassTag[F[T, R]]
+ ): TypedEncoder[F[T, R]] = TypedEncoder.usingInjection(
+ i3,
+ refinedInjection,
+ i2
+ )
}
-
diff --git a/refined/src/test/scala/frameless/RefinedFieldEncoderTests.scala b/refined/src/test/scala/frameless/RefinedFieldEncoderTests.scala
index 5476284ea..fd0e82048 100644
--- a/refined/src/test/scala/frameless/RefinedFieldEncoderTests.scala
+++ b/refined/src/test/scala/frameless/RefinedFieldEncoderTests.scala
@@ -1,9 +1,7 @@
package frameless
import org.apache.spark.sql.Row
-import org.apache.spark.sql.types.{
- IntegerType, ObjectType, StringType, StructField, StructType
-}
+import org.apache.spark.sql.types.{IntegerType, ObjectType, StringType, StructField, StructType}
import org.scalatest.matchers.should.Matchers
@@ -42,7 +40,8 @@ class RefinedFieldEncoderTests extends TypedDatasetSuite with Matchers {
// Check catalystRepr
val expectedAStructType = StructType(Seq(
StructField("a", IntegerType, false),
- StructField("s", StringType, false)))
+ StructField("s", StringType, false)
+ ))
encoderA.catalystRepr shouldBe expectedAStructType
@@ -73,7 +72,8 @@ class RefinedFieldEncoderTests extends TypedDatasetSuite with Matchers {
// Check catalystRepr
val expectedBStructType = StructType(Seq(
StructField("a", IntegerType, false),
- StructField("s", StringType, true)))
+ StructField("s", StringType, true)
+ ))
encoderB.catalystRepr shouldBe expectedBStructType
@@ -81,7 +81,7 @@ class RefinedFieldEncoderTests extends TypedDatasetSuite with Matchers {
val unsafeDs: TypedDataset[B] = {
val rdd = sc.parallelize(Seq(
Row(bs.a, bs.s.mkString),
- Row(2, null.asInstanceOf[String]),
+ Row(2, null.asInstanceOf[String])
))
val df = session.createDataFrame(rdd, expectedBStructType)