From 8d14d35f7a85c99729e0760644a7aab5802beeba Mon Sep 17 00:00:00 2001 From: Grigory Pomadchin Date: Mon, 25 May 2026 07:53:48 -0400 Subject: [PATCH] Apply scalafmt project-wide --- .scalafmt.conf | 40 +- build.sbt | 15 +- .../frameless/cats/FramelessSyntax.scala | 4 +- .../main/scala/frameless/cats/implicits.scala | 6 +- cats/src/test/scala/frameless/cats/test.scala | 12 +- .../scala/frameless/CatalystAverageable.scala | 8 +- .../scala/frameless/CatalystBitShift.scala | 10 +- .../main/scala/frameless/CatalystCast.scala | 27 +- .../scala/frameless/CatalystCollection.scala | 6 +- .../scala/frameless/CatalystDivisible.scala | 10 +- .../main/scala/frameless/CatalystIsin.scala | 14 +- .../main/scala/frameless/CatalystNaN.scala | 5 +- .../scala/frameless/CatalystNumeric.scala | 10 +- .../CatalystNumericWithJavaBigDecimal.scala | 14 +- .../scala/frameless/CatalystOrdered.scala | 44 +- .../scala/frameless/CatalystPivotable.scala | 6 +- .../main/scala/frameless/CatalystRound.scala | 10 +- .../scala/frameless/CatalystSummable.scala | 8 +- .../scala/frameless/CatalystVariance.scala | 8 +- .../scala/frameless/FramelessSyntax.scala | 2 +- .../main/scala/frameless/InjectionEnum.scala | 6 +- .../main/scala/frameless/IsValueClass.scala | 8 +- dataset/src/main/scala/frameless/Job.scala | 2 +- .../main/scala/frameless/RecordEncoder.scala | 383 ++++---- .../main/scala/frameless/TypedColumn.scala | 698 +++++++-------- .../frameless/TypedColumnMacroImpl.scala | 7 +- .../main/scala/frameless/TypedDataset.scala | 844 +++++++++--------- .../frameless/TypedDatasetForwarded.scala | 46 +- .../main/scala/frameless/TypedEncoder.scala | 77 +- .../frameless/TypedExpressionEncoder.scala | 11 +- dataset/src/main/scala/frameless/With.scala | 2 +- .../functions/AggregateFunctions.scala | 55 +- .../main/scala/frameless/functions/Lit.scala | 10 +- .../functions/NonAggregateFunctions.scala | 214 ++--- .../main/scala/frameless/functions/Udf.scala | 53 +- .../frameless/functions/UnaryFunctions.scala | 19 +- .../scala/frameless/functions/package.scala | 100 +-- .../scala/frameless/ops/AggregateTypes.scala | 4 +- dataset/src/main/scala/frameless/ops/As.scala | 31 +- .../scala/frameless/ops/ColumnTypes.scala | 4 +- .../main/scala/frameless/ops/GroupByOps.scala | 278 +++--- .../frameless/ops/RelationalGroupsOps.scala | 102 ++- .../src/main/scala/frameless/ops/Repeat.scala | 13 +- .../scala/frameless/ops/SmartProject.scala | 26 +- .../apache/spark/sql/reflection/package.scala | 24 +- .../apache/spark/sql/FramelessInternals.scala | 45 +- .../main/spark-4/frameless/MapGroups.scala | 15 +- .../apache/spark/sql/FramelessInternals.scala | 52 +- .../src/test/scala/frameless/AsTests.scala | 4 +- .../test/scala/frameless/BitwiseTests.scala | 18 +- .../src/test/scala/frameless/CastTests.scala | 5 +- .../test/scala/frameless/CollectTests.scala | 6 +- .../test/scala/frameless/ColumnTests.scala | 18 +- .../frameless/ColumnViaLambdaTests.scala | 3 +- .../test/scala/frameless/CreateTests.scala | 68 +- .../test/scala/frameless/DropTupledTest.scala | 6 +- .../test/scala/frameless/ExplodeTests.scala | 16 +- .../test/scala/frameless/FilterTests.scala | 16 +- .../test/scala/frameless/FlattenTests.scala | 5 +- .../test/scala/frameless/GroupByTests.scala | 147 +-- .../test/scala/frameless/InjectionTests.scala | 4 +- .../src/test/scala/frameless/JobTests.scala | 7 +- .../src/test/scala/frameless/JoinTests.scala | 101 ++- .../src/test/scala/frameless/LitTests.scala | 10 +- .../test/scala/frameless/NumericTests.scala | 44 +- .../test/scala/frameless/OrderByTests.scala | 42 +- .../scala/frameless/RecordEncoderTests.scala | 162 +++- .../test/scala/frameless/SchemaTests.scala | 8 +- .../test/scala/frameless/SelfJoinTests.scala | 76 +- .../scala/frameless/TypedDatasetSuite.scala | 25 +- .../scala/frameless/UdtEncodedClass.scala | 2 +- .../test/scala/frameless/WithColumnTest.scala | 10 +- dataset/src/test/scala/frameless/XN.scala | 11 +- .../frameless/forward/CheckpointTests.scala | 3 +- .../frameless/forward/ColumnsTests.scala | 13 +- .../frameless/forward/DistinctTests.scala | 2 +- .../scala/frameless/forward/HeadTests.scala | 6 +- .../frameless/forward/InputFilesTests.scala | 8 +- .../frameless/forward/IntersectTests.scala | 2 +- .../frameless/forward/IsLocalTests.scala | 2 +- .../frameless/forward/IsStreamingTests.scala | 2 +- .../forward/QueryExecutionTests.scala | 2 +- .../frameless/forward/RandomSplitTests.scala | 4 +- .../frameless/forward/SQLContextTests.scala | 7 +- .../frameless/forward/SparkSessionTests.scala | 2 +- .../frameless/forward/StorageLevelTests.scala | 21 +- .../scala/frameless/forward/TakeTests.scala | 14 +- .../scala/frameless/forward/ToJSONTests.scala | 2 +- .../scala/frameless/forward/UnionTests.scala | 2 +- .../frameless/forward/WriteStreamTests.scala | 4 +- .../functions/AggregateFunctionsTests.scala | 87 +- .../DateTimeStringBehaviourUtils.scala | 2 +- .../functions/DoubleBehaviourUtils.scala | 4 +- .../NonAggregateFunctionsTests.scala | 591 ++++++------ .../scala/frameless/functions/UdfTests.scala | 23 +- .../functions/UnaryFunctionsTest.scala | 15 +- .../scala/frameless/ops/ColumnTypesTest.scala | 4 +- .../test/scala/frameless/ops/CubeTests.scala | 95 +- .../test/scala/frameless/ops/PivotTest.scala | 20 +- .../test/scala/frameless/ops/RepeatTest.scala | 10 +- .../scala/frameless/ops/RollupTests.scala | 95 +- .../frameless/ops/SmartProjectTest.scala | 1 - .../src/test/scala/frameless/package.scala | 18 +- .../test/scala/frameless/sql/package.scala | 4 +- .../frameless/sql/rules/SQLRulesSuite.scala | 4 +- .../apache/hadoop/fs/local/StreamingFS.scala | 4 +- .../sql/rules/FramelessLitPushDownTests.scala | 2 +- .../TypedRandomForestClassifier.scala | 3 +- .../ml/clustering/TypedBisectingKMeans.scala | 14 +- .../frameless/ml/clustering/TypedKMeans.scala | 4 +- .../ml/feature/TypedIndexToString.scala | 9 +- .../ml/feature/TypedStringIndexer.scala | 6 +- .../ml/feature/TypedVectorAssembler.scala | 9 +- .../ml/internals/LinearInputsChecker.scala | 20 +- .../ml/internals/TreesInputsChecker.scala | 9 +- .../ml/internals/UnaryInputsChecker.scala | 1 - .../ml/internals/VectorInputsChecker.scala | 16 +- .../ml/params/kmeans/KMeansInitMode.scala | 2 +- .../ml/params/linears/LossStrategy.scala | 5 +- .../frameless/ml/params/linears/Solver.scala | 9 +- .../params/trees/FeatureSubsetStrategy.scala | 5 +- .../ml/regression/TypedLinearRegression.scala | 27 +- .../TypedRandomForestRegressor.scala | 10 +- .../test/scala/frameless/ml/Generators.scala | 5 +- .../TypedRandomForestClassifierTests.scala | 16 +- .../ml/clustering/BisectingKMeansTests.scala | 12 +- .../ClusteringIntegrationTests.scala | 12 +- .../frameless/ml/clustering/KMeansTests.scala | 16 +- .../ml/feature/TypedStringIndexerTests.scala | 2 +- .../TypedLinearRegressionTests.scala | 24 +- .../TypedRandomForestRegressorTests.scala | 14 +- project/Common.scala | 14 - .../refined/RefinedFieldEncoders.scala | 20 +- .../scala/frameless/refined/package.scala | 30 +- .../frameless/RefinedFieldEncoderTests.scala | 12 +- 135 files changed, 2817 insertions(+), 2786 deletions(-) delete mode 100644 project/Common.scala diff --git a/.scalafmt.conf b/.scalafmt.conf index 771bfd31a..a6f90c806 100644 --- a/.scalafmt.conf +++ b/.scalafmt.conf @@ -1,28 +1,32 @@ -version = 3.8.6 +version = 3.11.1 runner.dialect = scala213 -newlines.beforeMultilineDef = keep -newlines.topLevelStatements = [before] -newlines.beforeCurlyLambdaParams = multilineWithCaseOnly -newlines.afterCurlyLambdaParams = squash -newlines.implicitParamListModifierForce = [after] -newlines.avoidForSimpleOverflow = [tooLong] -newlines.avoidInResultType = true -newlines.sometimesBeforeColonInMethodReturnType = false -newlines.beforeTypeBounds = keep +align.openParenCallSite = true +align.openParenDefnSite = true -verticalMultiline.atDefnSite = true -verticalMultiline.arityThreshold = 10 +maxColumn = 150 +continuationIndent.defnSite = 2 -spaces.inImportCurlyBraces = true +assumeStandardLibraryStripMargin = true +danglingParentheses.preset = true + +rewrite.rules = [SortImports, RedundantParens, SortModifiers] + +newlines.source = keep +newlines.afterCurlyLambda = preserve + +spaces.inImportCurlyBraces = false includeCurlyBraceInSelectChains = false includeNoParensInSelectChains = false optIn.breakChainOnFirstMethodDot = false -docstrings.style = Asterisk -docstrings.wrap = no +literals.long = Upper +literals.float = Upper +literals.double = Upper -literals.long=Upper -literals.float=Upper -literals.double=Upper +docstrings = JavaDoc +docstrings.style = keep +docstrings.wrap = no +docstrings.oneline = keep +docstrings.blankFirstLine = keep diff --git a/build.sbt b/build.sbt index afa1fe404..9f4e2d2e8 100644 --- a/build.sbt +++ b/build.sbt @@ -204,9 +204,9 @@ lazy val docs = project .dependsOn(dataset, cats, ml) def sparkDependencies( - sparkVersion: String, - scope: Configuration = Provided - ) = Seq( + sparkVersion: String, + scope: Configuration = Provided +) = Seq( libraryDependencies ++= Seq( "org.apache.spark" %% "spark-core" % sparkVersion % scope, "org.apache.spark" %% "spark-sql" % sparkVersion % scope @@ -378,10 +378,11 @@ lazy val spark40Settings = Seq[Setting[_]]( lazy val spark34Settings = Seq[Setting[_]]( tlVersionIntroduced := Map("2.12" -> "0.14.1", "2.13" -> "0.14.1"), mimaPreviousArtifacts := Set( - organization.value %% moduleName.value - .split("-") - .dropRight(1) - .mkString("-") % "0.14.1" + organization.value %% + moduleName.value + .split("-") + .dropRight(1) + .mkString("-") % "0.14.1" ) ) diff --git a/cats/src/main/scala/frameless/cats/FramelessSyntax.scala b/cats/src/main/scala/frameless/cats/FramelessSyntax.scala index 663ae5958..7dabb4ff3 100644 --- a/cats/src/main/scala/frameless/cats/FramelessSyntax.scala +++ b/cats/src/main/scala/frameless/cats/FramelessSyntax.scala @@ -13,8 +13,8 @@ trait FramelessSyntax extends frameless.FramelessSyntax { def withLocalProperty(key: String, value: String): F[A] = for { session <- ask - _ <- delay(session.sparkContext.setLocalProperty(key, value)) - a <- fa + _ <- delay(session.sparkContext.setLocalProperty(key, value)) + a <- fa } yield a def withGroupId(groupId: String): F[A] = withLocalProperty("spark.jobGroup.id", groupId) diff --git a/cats/src/main/scala/frameless/cats/implicits.scala b/cats/src/main/scala/frameless/cats/implicits.scala index 1fa869a7f..7b084adfc 100644 --- a/cats/src/main/scala/frameless/cats/implicits.scala +++ b/cats/src/main/scala/frameless/cats/implicits.scala @@ -66,9 +66,9 @@ object outer { def combine(lhs: RDD[(K, V)], rhs: RDD[(K, V)]): RDD[(K, V)] = lhs.fullOuterJoin(rhs).mapValues { case (Some(x), Some(y)) => x |+| y - case (None, Some(y)) => y - case (Some(x), None) => x - case (None, None) => m.empty + case (None, Some(y)) => y + case (Some(x), None) => x + case (None, None) => m.empty } } } diff --git a/cats/src/test/scala/frameless/cats/test.scala b/cats/src/test/scala/frameless/cats/test.scala index d75bc3bfd..7780647e2 100644 --- a/cats/src/test/scala/frameless/cats/test.scala +++ b/cats/src/test/scala/frameless/cats/test.scala @@ -21,7 +21,7 @@ import org.scalatest.matchers.should.Matchers import org.scalatest.propspec.AnyPropSpec trait SparkTests { - val appID: String = new java.util.Date().toString + math.floor(math.random() * 10E4).toLong.toString + val appID: String = new java.util.Date().toString + math.floor(math.random() * 10e4).toLong.toString val conf: SparkConf = new SparkConf() .setMaster("local[*]") @@ -68,7 +68,7 @@ class Test extends AnyPropSpec with Matchers with ScalaCheckPropertyChecks with PropertyCheckConfiguration(minSize = PosInt(10)) property("spark is working") { - sc.parallelize(Seq(1, 2, 3)).collect() shouldBe Array(1,2,3) + sc.parallelize(Seq(1, 2, 3)).collect() shouldBe Array(1, 2, 3) } property("inner pairwise monoid") { @@ -120,10 +120,10 @@ class Test extends AnyPropSpec with Matchers with ScalaCheckPropertyChecks with property("pair rdd numeric commutative semigroup example") { import frameless.cats.implicits._ - val seq = Seq( ("a",2), ("b",3), ("d",6), ("b",2), ("d",1) ) + val seq = Seq(("a", 2), ("b", 3), ("d", 6), ("b", 2), ("d", 1)) val rdd = seq.toRdd - rdd.cminByKey.collect().toSeq should contain theSameElementsAs Seq( ("a",2), ("b",2), ("d",1) ) - rdd.cmaxByKey.collect().toSeq should contain theSameElementsAs Seq( ("a",2), ("b",3), ("d",6) ) - rdd.csumByKey.collect().toSeq should contain theSameElementsAs Seq( ("a",2), ("b",5), ("d",7) ) + rdd.cminByKey.collect().toSeq should contain theSameElementsAs Seq(("a", 2), ("b", 2), ("d", 1)) + rdd.cmaxByKey.collect().toSeq should contain theSameElementsAs Seq(("a", 2), ("b", 3), ("d", 6)) + rdd.csumByKey.collect().toSeq should contain theSameElementsAs Seq(("a", 2), ("b", 5), ("d", 7)) } } diff --git a/core/src/main/scala/frameless/CatalystAverageable.scala b/core/src/main/scala/frameless/CatalystAverageable.scala index 401ed65fc..cb0c8504f 100644 --- a/core/src/main/scala/frameless/CatalystAverageable.scala +++ b/core/src/main/scala/frameless/CatalystAverageable.scala @@ -19,8 +19,8 @@ object CatalystAverageable { private[this] def of[In, Out]: CatalystAverageable[In, Out] = theInstance.asInstanceOf[CatalystAverageable[In, Out]] implicit val framelessAverageableBigDecimal: CatalystAverageable[BigDecimal, BigDecimal] = of[BigDecimal, BigDecimal] - implicit val framelessAverageableDouble: CatalystAverageable[Double, Double] = of[Double, Double] - implicit val framelessAverageableLong: CatalystAverageable[Long, Double] = of[Long, Double] - implicit val framelessAverageableInt: CatalystAverageable[Int, Double] = of[Int, Double] - implicit val framelessAverageableShort: CatalystAverageable[Short, Double] = of[Short, Double] + implicit val framelessAverageableDouble: CatalystAverageable[Double, Double] = of[Double, Double] + implicit val framelessAverageableLong: CatalystAverageable[Long, Double] = of[Long, Double] + implicit val framelessAverageableInt: CatalystAverageable[Int, Double] = of[Int, Double] + implicit val framelessAverageableShort: CatalystAverageable[Short, Double] = of[Short, Double] } diff --git a/core/src/main/scala/frameless/CatalystBitShift.scala b/core/src/main/scala/frameless/CatalystBitShift.scala index 753a61907..67aaccc62 100644 --- a/core/src/main/scala/frameless/CatalystBitShift.scala +++ b/core/src/main/scala/frameless/CatalystBitShift.scala @@ -12,9 +12,9 @@ object CatalystBitShift { private[this] val theInstance = new CatalystBitShift[Any, Any] {} private[this] def of[In, Out]: CatalystBitShift[In, Out] = theInstance.asInstanceOf[CatalystBitShift[In, Out]] - implicit val framelessBitShiftBigDecimal: CatalystBitShift[BigDecimal, Int] = of[BigDecimal, Int] - implicit val framelessBitShiftDouble : CatalystBitShift[Byte, Int] = of[Byte, Int] - implicit val framelessBitShiftInt : CatalystBitShift[Short, Int] = of[Short, Int] - implicit val framelessBitShiftLong : CatalystBitShift[Int, Int] = of[Int, Int] - implicit val framelessBitShiftShort : CatalystBitShift[Long, Long] = of[Long, Long] + implicit val framelessBitShiftBigDecimal: CatalystBitShift[BigDecimal, Int] = of[BigDecimal, Int] + implicit val framelessBitShiftDouble: CatalystBitShift[Byte, Int] = of[Byte, Int] + implicit val framelessBitShiftInt: CatalystBitShift[Short, Int] = of[Short, Int] + implicit val framelessBitShiftLong: CatalystBitShift[Int, Int] = of[Int, Int] + implicit val framelessBitShiftShort: CatalystBitShift[Long, Long] = of[Long, Long] } diff --git a/core/src/main/scala/frameless/CatalystCast.scala b/core/src/main/scala/frameless/CatalystCast.scala index 1a8a21573..7ceb563f7 100644 --- a/core/src/main/scala/frameless/CatalystCast.scala +++ b/core/src/main/scala/frameless/CatalystCast.scala @@ -8,25 +8,25 @@ object CatalystCast { implicit def framelessCastToString[T]: CatalystCast[T, String] = of[T, String] - implicit def framelessNumericToLong [A: CatalystNumeric]: CatalystCast[A, Long] = of[A, Long] - implicit def framelessNumericToInt [A: CatalystNumeric]: CatalystCast[A, Int] = of[A, Int] - implicit def framelessNumericToShort [A: CatalystNumeric]: CatalystCast[A, Short] = of[A, Short] - implicit def framelessNumericToByte [A: CatalystNumeric]: CatalystCast[A, Byte] = of[A, Byte] + implicit def framelessNumericToLong[A: CatalystNumeric]: CatalystCast[A, Long] = of[A, Long] + implicit def framelessNumericToInt[A: CatalystNumeric]: CatalystCast[A, Int] = of[A, Int] + implicit def framelessNumericToShort[A: CatalystNumeric]: CatalystCast[A, Short] = of[A, Short] + implicit def framelessNumericToByte[A: CatalystNumeric]: CatalystCast[A, Byte] = of[A, Byte] implicit def framelessNumericToDecimal[A: CatalystNumeric]: CatalystCast[A, BigDecimal] = of[A, BigDecimal] - implicit def framelessNumericToDouble [A: CatalystNumeric]: CatalystCast[A, Double] = of[A, Double] + implicit def framelessNumericToDouble[A: CatalystNumeric]: CatalystCast[A, Double] = of[A, Double] implicit def framelessBooleanToNumeric[A: CatalystNumeric]: CatalystCast[Boolean, A] = of[Boolean, A] // doesn't make any sense to include: // - sqlDateToBoolean: always None // - sqlTimestampToBoolean: compares us to 0 - implicit val framelessStringToBoolean : CatalystCast[String, Option[Boolean]] = of[String, Option[Boolean]] - implicit val framelessLongToBoolean : CatalystCast[Long, Boolean] = of[Long, Boolean] - implicit val framelessIntToBoolean : CatalystCast[Int, Boolean] = of[Int, Boolean] - implicit val framelessShortToBoolean : CatalystCast[Short, Boolean] = of[Short, Boolean] - implicit val framelessByteToBoolean : CatalystCast[Byte, Boolean] = of[Byte, Boolean] - implicit val framelessBigDecimalToBoolean: CatalystCast[BigDecimal, Boolean] = of[BigDecimal, Boolean] - implicit val framelessDoubleToBoolean : CatalystCast[Double, Boolean] = of[Double, Boolean] + implicit val framelessStringToBoolean: CatalystCast[String, Option[Boolean]] = of[String, Option[Boolean]] + implicit val framelessLongToBoolean: CatalystCast[Long, Boolean] = of[Long, Boolean] + implicit val framelessIntToBoolean: CatalystCast[Int, Boolean] = of[Int, Boolean] + implicit val framelessShortToBoolean: CatalystCast[Short, Boolean] = of[Short, Boolean] + implicit val framelessByteToBoolean: CatalystCast[Byte, Boolean] = of[Byte, Boolean] + implicit val framelessBigDecimalToBoolean: CatalystCast[BigDecimal, Boolean] = of[BigDecimal, Boolean] + implicit val framelessDoubleToBoolean: CatalystCast[Double, Boolean] = of[Double, Boolean] // TODO @@ -38,9 +38,8 @@ object CatalystCast { // implicit object stringToLong extends CatalystCast[String, Option[Long]] // implicit object stringToSqlDate extends CatalystCast[String, Option[SQLDate]] - // needs verification: - //implicit object sqlTimestampToSqlDate extends CatalystCast[SQLTimestamp, SQLDate] + // implicit object sqlTimestampToSqlDate extends CatalystCast[SQLTimestamp, SQLDate] // needs verification: // implicit object sqlTimestampToDecimal extends CatalystCast[SQLTimestamp, BigDecimal] diff --git a/core/src/main/scala/frameless/CatalystCollection.scala b/core/src/main/scala/frameless/CatalystCollection.scala index 3456869a0..9fd4f6ba1 100644 --- a/core/src/main/scala/frameless/CatalystCollection.scala +++ b/core/src/main/scala/frameless/CatalystCollection.scala @@ -9,8 +9,8 @@ object CatalystCollection { private[this] val theInstance = new CatalystCollection[Any] {} private[this] def of[A[_]]: CatalystCollection[A] = theInstance.asInstanceOf[CatalystCollection[A]] - implicit val arrayObject : CatalystCollection[Array] = of[Array] - implicit val seqObject : CatalystCollection[Seq] = of[Seq] - implicit val listObject : CatalystCollection[List] = of[List] + implicit val arrayObject: CatalystCollection[Array] = of[Array] + implicit val seqObject: CatalystCollection[Seq] = of[Seq] + implicit val listObject: CatalystCollection[List] = of[List] implicit val vectorObject: CatalystCollection[Vector] = of[Vector] } diff --git a/core/src/main/scala/frameless/CatalystDivisible.scala b/core/src/main/scala/frameless/CatalystDivisible.scala index c9080a5d8..c78622df2 100644 --- a/core/src/main/scala/frameless/CatalystDivisible.scala +++ b/core/src/main/scala/frameless/CatalystDivisible.scala @@ -13,9 +13,9 @@ object CatalystDivisible { private[this] def of[In, Out]: CatalystDivisible[In, Out] = theInstance.asInstanceOf[CatalystDivisible[In, Out]] implicit val framelessDivisibleBigDecimal: CatalystDivisible[BigDecimal, BigDecimal] = of[BigDecimal, BigDecimal] - implicit val framelessDivisibleDouble : CatalystDivisible[Double, Double] = of[Double, Double] - implicit val framelessDivisibleInt : CatalystDivisible[Int, Double] = of[Int, Double] - implicit val framelessDivisibleLong : CatalystDivisible[Long, Double] = of[Long, Double] - implicit val framelessDivisibleByte : CatalystDivisible[Byte, Double] = of[Byte, Double] - implicit val framelessDivisibleShort : CatalystDivisible[Short, Double] = of[Short, Double] + implicit val framelessDivisibleDouble: CatalystDivisible[Double, Double] = of[Double, Double] + implicit val framelessDivisibleInt: CatalystDivisible[Int, Double] = of[Int, Double] + implicit val framelessDivisibleLong: CatalystDivisible[Long, Double] = of[Long, Double] + implicit val framelessDivisibleByte: CatalystDivisible[Byte, Double] = of[Byte, Double] + implicit val framelessDivisibleShort: CatalystDivisible[Short, Double] = of[Short, Double] } diff --git a/core/src/main/scala/frameless/CatalystIsin.scala b/core/src/main/scala/frameless/CatalystIsin.scala index f630a7155..fe12ab622 100644 --- a/core/src/main/scala/frameless/CatalystIsin.scala +++ b/core/src/main/scala/frameless/CatalystIsin.scala @@ -8,11 +8,11 @@ trait CatalystIsin[A] object CatalystIsin { implicit object framelessBigDecimal extends CatalystIsin[BigDecimal] - implicit object framelessByte extends CatalystIsin[Byte] - implicit object framelessDouble extends CatalystIsin[Double] - implicit object framelessFloat extends CatalystIsin[Float] - implicit object framelessInt extends CatalystIsin[Int] - implicit object framelessLong extends CatalystIsin[Long] - implicit object framelessShort extends CatalystIsin[Short] - implicit object framelesssString extends CatalystIsin[String] + implicit object framelessByte extends CatalystIsin[Byte] + implicit object framelessDouble extends CatalystIsin[Double] + implicit object framelessFloat extends CatalystIsin[Float] + implicit object framelessInt extends CatalystIsin[Int] + implicit object framelessLong extends CatalystIsin[Long] + implicit object framelessShort extends CatalystIsin[Short] + implicit object framelesssString extends CatalystIsin[String] } diff --git a/core/src/main/scala/frameless/CatalystNaN.scala b/core/src/main/scala/frameless/CatalystNaN.scala index 3e7be8263..16db67e4a 100644 --- a/core/src/main/scala/frameless/CatalystNaN.scala +++ b/core/src/main/scala/frameless/CatalystNaN.scala @@ -10,7 +10,6 @@ object CatalystNaN { private[this] val theInstance = new CatalystNaN[Any] {} private[this] def of[A]: CatalystNaN[A] = theInstance.asInstanceOf[CatalystNaN[A]] - implicit val framelessFloatNaN : CatalystNaN[Float] = of[Float] - implicit val framelessDoubleNaN : CatalystNaN[Double] = of[Double] + implicit val framelessFloatNaN: CatalystNaN[Float] = of[Float] + implicit val framelessDoubleNaN: CatalystNaN[Double] = of[Double] } - diff --git a/core/src/main/scala/frameless/CatalystNumeric.scala b/core/src/main/scala/frameless/CatalystNumeric.scala index c819ba2ae..74b399f56 100644 --- a/core/src/main/scala/frameless/CatalystNumeric.scala +++ b/core/src/main/scala/frameless/CatalystNumeric.scala @@ -11,9 +11,9 @@ object CatalystNumeric { private[this] def of[A]: CatalystNumeric[A] = theInstance.asInstanceOf[CatalystNumeric[A]] implicit val framelessbigDecimalNumeric: CatalystNumeric[BigDecimal] = of[BigDecimal] - implicit val framelessbyteNumeric : CatalystNumeric[Byte] = of[Byte] - implicit val framelessdoubleNumeric : CatalystNumeric[Double] = of[Double] - implicit val framelessintNumeric : CatalystNumeric[Int] = of[Int] - implicit val framelesslongNumeric : CatalystNumeric[Long] = of[Long] - implicit val framelessshortNumeric : CatalystNumeric[Short] = of[Short] + implicit val framelessbyteNumeric: CatalystNumeric[Byte] = of[Byte] + implicit val framelessdoubleNumeric: CatalystNumeric[Double] = of[Double] + implicit val framelessintNumeric: CatalystNumeric[Int] = of[Int] + implicit val framelesslongNumeric: CatalystNumeric[Long] = of[Long] + implicit val framelessshortNumeric: CatalystNumeric[Short] = of[Short] } diff --git a/core/src/main/scala/frameless/CatalystNumericWithJavaBigDecimal.scala b/core/src/main/scala/frameless/CatalystNumericWithJavaBigDecimal.scala index 8fee63be2..79d61d965 100644 --- a/core/src/main/scala/frameless/CatalystNumericWithJavaBigDecimal.scala +++ b/core/src/main/scala/frameless/CatalystNumericWithJavaBigDecimal.scala @@ -11,11 +11,11 @@ object CatalystNumericWithJavaBigDecimal { private[this] val theInstance = new CatalystNumericWithJavaBigDecimal[Any, Any] {} private[this] def of[In, Out]: CatalystNumericWithJavaBigDecimal[In, Out] = theInstance.asInstanceOf[CatalystNumericWithJavaBigDecimal[In, Out]] - implicit val framelessAbsoluteBigDecimal: CatalystNumericWithJavaBigDecimal[BigDecimal, java.math.BigDecimal] = of[BigDecimal, java.math.BigDecimal] - implicit val framelessAbsoluteDouble : CatalystNumericWithJavaBigDecimal[Double, Double] = of[Double, Double] - implicit val framelessAbsoluteInt : CatalystNumericWithJavaBigDecimal[Int, Int] = of[Int, Int] - implicit val framelessAbsoluteLong : CatalystNumericWithJavaBigDecimal[Long, Long] = of[Long, Long] - implicit val framelessAbsoluteShort : CatalystNumericWithJavaBigDecimal[Short, Short] = of[Short, Short] - implicit val framelessAbsoluteByte : CatalystNumericWithJavaBigDecimal[Byte, Byte] = of[Byte, Byte] + implicit val framelessAbsoluteBigDecimal: CatalystNumericWithJavaBigDecimal[BigDecimal, java.math.BigDecimal] = of[BigDecimal, java.math.BigDecimal] + implicit val framelessAbsoluteDouble: CatalystNumericWithJavaBigDecimal[Double, Double] = of[Double, Double] + implicit val framelessAbsoluteInt: CatalystNumericWithJavaBigDecimal[Int, Int] = of[Int, Int] + implicit val framelessAbsoluteLong: CatalystNumericWithJavaBigDecimal[Long, Long] = of[Long, Long] + implicit val framelessAbsoluteShort: CatalystNumericWithJavaBigDecimal[Short, Short] = of[Short, Short] + implicit val framelessAbsoluteByte: CatalystNumericWithJavaBigDecimal[Byte, Byte] = of[Byte, Byte] -} \ No newline at end of file +} diff --git a/core/src/main/scala/frameless/CatalystOrdered.scala b/core/src/main/scala/frameless/CatalystOrdered.scala index e73604909..bdf361c1b 100644 --- a/core/src/main/scala/frameless/CatalystOrdered.scala +++ b/core/src/main/scala/frameless/CatalystOrdered.scala @@ -13,30 +13,28 @@ object CatalystOrdered { private[this] val theInstance = new CatalystOrdered[Any] {} private[this] def of[A]: CatalystOrdered[A] = theInstance.asInstanceOf[CatalystOrdered[A]] - implicit val framelessIntOrdered : CatalystOrdered[Int] = of[Int] - implicit val framelessBooleanOrdered : CatalystOrdered[Boolean] = of[Boolean] - implicit val framelessByteOrdered : CatalystOrdered[Byte] = of[Byte] - implicit val framelessShortOrdered : CatalystOrdered[Short] = of[Short] - implicit val framelessLongOrdered : CatalystOrdered[Long] = of[Long] - implicit val framelessFloatOrdered : CatalystOrdered[Float] = of[Float] - implicit val framelessDoubleOrdered : CatalystOrdered[Double] = of[Double] - implicit val framelessBigDecimalOrdered : CatalystOrdered[BigDecimal] = of[BigDecimal] - implicit val framelessSQLDateOrdered : CatalystOrdered[SQLDate] = of[SQLDate] + implicit val framelessIntOrdered: CatalystOrdered[Int] = of[Int] + implicit val framelessBooleanOrdered: CatalystOrdered[Boolean] = of[Boolean] + implicit val framelessByteOrdered: CatalystOrdered[Byte] = of[Byte] + implicit val framelessShortOrdered: CatalystOrdered[Short] = of[Short] + implicit val framelessLongOrdered: CatalystOrdered[Long] = of[Long] + implicit val framelessFloatOrdered: CatalystOrdered[Float] = of[Float] + implicit val framelessDoubleOrdered: CatalystOrdered[Double] = of[Double] + implicit val framelessBigDecimalOrdered: CatalystOrdered[BigDecimal] = of[BigDecimal] + implicit val framelessSQLDateOrdered: CatalystOrdered[SQLDate] = of[SQLDate] implicit val framelessSQLTimestampOrdered: CatalystOrdered[SQLTimestamp] = of[SQLTimestamp] - implicit val framelessStringOrdered : CatalystOrdered[String] = of[String] - implicit val framelessInstantOrdered : CatalystOrdered[Instant] = of[Instant] - implicit val framelessDurationOrdered : CatalystOrdered[Duration] = of[Duration] - implicit val framelessPeriodOrdered : CatalystOrdered[Period] = of[Period] + implicit val framelessStringOrdered: CatalystOrdered[String] = of[String] + implicit val framelessInstantOrdered: CatalystOrdered[Instant] = of[Instant] + implicit val framelessDurationOrdered: CatalystOrdered[Duration] = of[Duration] + implicit val framelessPeriodOrdered: CatalystOrdered[Period] = of[Period] - implicit def injectionOrdered[A, B] - (implicit - i0: Injection[A, B], - i1: CatalystOrdered[B] - ): CatalystOrdered[A] = of[A] + implicit def injectionOrdered[A, B](implicit + i0: Injection[A, B], + i1: CatalystOrdered[B] + ): CatalystOrdered[A] = of[A] - implicit def deriveGeneric[G, H <: HList] - (implicit - i0: Generic.Aux[G, H], - i1: Lazy[LiftAll[CatalystOrdered, H]] - ): CatalystOrdered[G] = of[G] + implicit def deriveGeneric[G, H <: HList](implicit + i0: Generic.Aux[G, H], + i1: Lazy[LiftAll[CatalystOrdered, H]] + ): CatalystOrdered[G] = of[G] } diff --git a/core/src/main/scala/frameless/CatalystPivotable.scala b/core/src/main/scala/frameless/CatalystPivotable.scala index a7b34da64..2ff69b033 100644 --- a/core/src/main/scala/frameless/CatalystPivotable.scala +++ b/core/src/main/scala/frameless/CatalystPivotable.scala @@ -9,8 +9,8 @@ object CatalystPivotable { private[this] val theInstance = new CatalystPivotable[Any] {} private[this] def of[A]: CatalystPivotable[A] = theInstance.asInstanceOf[CatalystPivotable[A]] - implicit val framelessIntPivotable : CatalystPivotable[Int] = of[Int] - implicit val framelessLongPivotable : CatalystPivotable[Long] = of[Long] + implicit val framelessIntPivotable: CatalystPivotable[Int] = of[Int] + implicit val framelessLongPivotable: CatalystPivotable[Long] = of[Long] implicit val framelessBooleanPivotable: CatalystPivotable[Boolean] = of[Boolean] - implicit val framelessStringPivotable : CatalystPivotable[String] = of[String] + implicit val framelessStringPivotable: CatalystPivotable[String] = of[String] } diff --git a/core/src/main/scala/frameless/CatalystRound.scala b/core/src/main/scala/frameless/CatalystRound.scala index ee50b794a..8205945d4 100644 --- a/core/src/main/scala/frameless/CatalystRound.scala +++ b/core/src/main/scala/frameless/CatalystRound.scala @@ -12,8 +12,8 @@ object CatalystRound { private[this] def of[In, Out]: CatalystRound[In, Out] = theInstance.asInstanceOf[CatalystRound[In, Out]] implicit val framelessBigDecimal: CatalystRound[BigDecimal, java.math.BigDecimal] = of[BigDecimal, java.math.BigDecimal] - implicit val framelessDouble : CatalystRound[Double, Long] = of[Double, Long] - implicit val framelessInt : CatalystRound[Int, Long] = of[Int, Long] - implicit val framelessLong : CatalystRound[Long, Long] = of[Long, Long] - implicit val framelessShort : CatalystRound[Short, Long] = of[Short, Long] -} \ No newline at end of file + implicit val framelessDouble: CatalystRound[Double, Long] = of[Double, Long] + implicit val framelessInt: CatalystRound[Int, Long] = of[Int, Long] + implicit val framelessLong: CatalystRound[Long, Long] = of[Long, Long] + implicit val framelessShort: CatalystRound[Short, Long] = of[Short, Long] +} diff --git a/core/src/main/scala/frameless/CatalystSummable.scala b/core/src/main/scala/frameless/CatalystSummable.scala index 94010505e..def5d514a 100644 --- a/core/src/main/scala/frameless/CatalystSummable.scala +++ b/core/src/main/scala/frameless/CatalystSummable.scala @@ -23,9 +23,9 @@ object CatalystSummable { new CatalystSummable[In, Out] { val zero: In = _zero } } - implicit val framelessSummableLong : CatalystSummable[Long, Long] = CatalystSummable(zero = 0L) + implicit val framelessSummableLong: CatalystSummable[Long, Long] = CatalystSummable(zero = 0L) implicit val framelessSummableBigDecimal: CatalystSummable[BigDecimal, BigDecimal] = CatalystSummable(zero = BigDecimal(0)) - implicit val framelessSummableDouble : CatalystSummable[Double, Double] = CatalystSummable(zero = 0.0) - implicit val framelessSummableInt : CatalystSummable[Int, Long] = CatalystSummable(zero = 0) - implicit val framelessSummableShort : CatalystSummable[Short, Long] = CatalystSummable(zero = 0) + implicit val framelessSummableDouble: CatalystSummable[Double, Double] = CatalystSummable(zero = 0.0) + implicit val framelessSummableInt: CatalystSummable[Int, Long] = CatalystSummable(zero = 0) + implicit val framelessSummableShort: CatalystSummable[Short, Long] = CatalystSummable(zero = 0) } diff --git a/core/src/main/scala/frameless/CatalystVariance.scala b/core/src/main/scala/frameless/CatalystVariance.scala index 9e843fa70..ef91b4c00 100644 --- a/core/src/main/scala/frameless/CatalystVariance.scala +++ b/core/src/main/scala/frameless/CatalystVariance.scala @@ -12,9 +12,9 @@ object CatalystVariance { private[this] val theInstance = new CatalystVariance[Any] {} private[this] def of[A]: CatalystVariance[A] = theInstance.asInstanceOf[CatalystVariance[A]] - implicit val framelessIntVariance : CatalystVariance[Int] = of[Int] - implicit val framelessLongVariance : CatalystVariance[Long] = of[Long] - implicit val framelessShortVariance : CatalystVariance[Short] = of[Short] + implicit val framelessIntVariance: CatalystVariance[Int] = of[Int] + implicit val framelessLongVariance: CatalystVariance[Long] = of[Long] + implicit val framelessShortVariance: CatalystVariance[Short] = of[Short] implicit val framelessBigDecimalVariance: CatalystVariance[BigDecimal] = of[BigDecimal] - implicit val framelessDoubleVariance : CatalystVariance[Double] = of[Double] + implicit val framelessDoubleVariance: CatalystVariance[Double] = of[Double] } diff --git a/dataset/src/main/scala/frameless/FramelessSyntax.scala b/dataset/src/main/scala/frameless/FramelessSyntax.scala index 5ba294921..d97fe88ed 100644 --- a/dataset/src/main/scala/frameless/FramelessSyntax.scala +++ b/dataset/src/main/scala/frameless/FramelessSyntax.scala @@ -12,7 +12,7 @@ trait FramelessSyntax { def typed: TypedDataset[T] = TypedDataset.create[T](self) } - implicit class DataframeSyntax(self: DataFrame){ + implicit class DataframeSyntax(self: DataFrame) { def unsafeTyped[T: TypedEncoder]: TypedDataset[T] = TypedDataset.createUnsafe(self) } } diff --git a/dataset/src/main/scala/frameless/InjectionEnum.scala b/dataset/src/main/scala/frameless/InjectionEnum.scala index 4ed1006e3..243cb91ac 100644 --- a/dataset/src/main/scala/frameless/InjectionEnum.scala +++ b/dataset/src/main/scala/frameless/InjectionEnum.scala @@ -16,10 +16,10 @@ trait InjectionEnum { implicit def coproductInjectionEnum[H, T <: Coproduct]( implicit - typeable: Typeable[H] , + typeable: Typeable[H], gen: Generic.Aux[H, HNil], tInjectionEnum: Injection[T, String] - ): Injection[H :+: T, String] = { + ): Injection[H :+: T, String] = { val dataConstructorName = typeable.describe.takeWhile(_ != '.') Injection( @@ -40,7 +40,7 @@ trait InjectionEnum { implicit gen: Generic.Aux[A, R], rInjectionEnum: Injection[R, String] - ): Injection[A, String] = + ): Injection[A, String] = Injection( value => rInjectionEnum(gen.to(value)), name => gen.from(rInjectionEnum.invert(name)) diff --git a/dataset/src/main/scala/frameless/IsValueClass.scala b/dataset/src/main/scala/frameless/IsValueClass.scala index 78605c130..7d5a838e1 100644 --- a/dataset/src/main/scala/frameless/IsValueClass.scala +++ b/dataset/src/main/scala/frameless/IsValueClass.scala @@ -5,13 +5,15 @@ import shapeless.labelled.FieldType /** Evidence that `T` is a Value class */ @annotation.implicitNotFound(msg = "${T} is not a Value class") -final class IsValueClass[T] private() {} +final class IsValueClass[T] private () {} object IsValueClass { + /** Provides an evidence `A` is a Value class */ implicit def apply[A <: AnyVal, G <: ::[_, HNil], H <: ::[_ <: FieldType[_ <: Symbol, _], HNil]]( implicit - i0: LabelledGeneric.Aux[A, G], - i1: DropUnitValues.Aux[G, H]): IsValueClass[A] = new IsValueClass[A] + i0: LabelledGeneric.Aux[A, G], + i1: DropUnitValues.Aux[G, H] + ): IsValueClass[A] = new IsValueClass[A] } diff --git a/dataset/src/main/scala/frameless/Job.scala b/dataset/src/main/scala/frameless/Job.scala index 40931b8b4..85e92ed3e 100644 --- a/dataset/src/main/scala/frameless/Job.scala +++ b/dataset/src/main/scala/frameless/Job.scala @@ -3,6 +3,7 @@ package frameless import org.apache.spark.sql.SparkSession sealed abstract class Job[A](implicit spark: SparkSession) { self => + /** Runs a new Spark job. */ def run(): A @@ -32,7 +33,6 @@ sealed abstract class Job[A](implicit spark: SparkSession) { self => } } - object Job { def apply[A](a: => A)(implicit spark: SparkSession): Job[A] = new Job[A] { def run(): A = a diff --git a/dataset/src/main/scala/frameless/RecordEncoder.scala b/dataset/src/main/scala/frameless/RecordEncoder.scala index 7427d9de0..e0ecdd472 100644 --- a/dataset/src/main/scala/frameless/RecordEncoder.scala +++ b/dataset/src/main/scala/frameless/RecordEncoder.scala @@ -3,9 +3,7 @@ package frameless import org.apache.spark.sql.FramelessInternals import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.objects.{ - Invoke, NewInstance, UnwrapOption, WrapOption -} +import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, NewInstance, UnwrapOption, WrapOption} import org.apache.spark.sql.types._ import shapeless._ @@ -30,25 +28,24 @@ trait RecordEncoderFields[T <: HList] extends Serializable { object RecordEncoderFields { - implicit def deriveRecordLast[K <: Symbol, H] - (implicit - key: Witness.Aux[K], - head: RecordFieldEncoder[H] - ): RecordEncoderFields[FieldType[K, H] :: HNil] = new RecordEncoderFields[FieldType[K, H] :: HNil] { - def value: List[RecordEncoderField] = fieldEncoder[K, H] :: Nil - } + implicit def deriveRecordLast[K <: Symbol, H](implicit + key: Witness.Aux[K], + head: RecordFieldEncoder[H] + ): RecordEncoderFields[FieldType[K, H] :: HNil] = new RecordEncoderFields[FieldType[K, H] :: HNil] { + def value: List[RecordEncoderField] = fieldEncoder[K, H] :: Nil + } - implicit def deriveRecordCons[K <: Symbol, H, T <: HList] - (implicit - key: Witness.Aux[K], - head: RecordFieldEncoder[H], - tail: RecordEncoderFields[T] - ): RecordEncoderFields[FieldType[K, H] :: T] = new RecordEncoderFields[FieldType[K, H] :: T] { - def value: List[RecordEncoderField] = - fieldEncoder[K, H] :: tail.value.map(x => x.copy(ordinal = x.ordinal + 1)) + implicit def deriveRecordCons[K <: Symbol, H, T <: HList](implicit + key: Witness.Aux[K], + head: RecordFieldEncoder[H], + tail: RecordEncoderFields[T] + ): RecordEncoderFields[FieldType[K, H] :: T] = new RecordEncoderFields[FieldType[K, H] :: T] { + def value: List[RecordEncoderField] = + fieldEncoder[K, H] :: tail.value.map(x => x.copy(ordinal = x.ordinal + 1)) } - private def fieldEncoder[K <: Symbol, H](implicit key: Witness.Aux[K], e: RecordFieldEncoder[H]): RecordEncoderField = RecordEncoderField(0, key.value.name, e.encoder) + private def fieldEncoder[K <: Symbol, H](implicit key: Witness.Aux[K], e: RecordFieldEncoder[H]): RecordEncoderField = + RecordEncoderField(0, key.value.name, e.encoder) } /** @@ -67,21 +64,19 @@ object NewInstanceExprs { def from(exprs: List[Expression]): Seq[Expression] = Nil } - implicit def deriveUnit[K <: Symbol, T <: HList] - (implicit - tail: NewInstanceExprs[T] - ): NewInstanceExprs[FieldType[K, Unit] :: T] = new NewInstanceExprs[FieldType[K, Unit] :: T] { - def from(exprs: List[Expression]): Seq[Expression] = - Literal.fromObject(()) +: tail.from(exprs) - } + implicit def deriveUnit[K <: Symbol, T <: HList](implicit + tail: NewInstanceExprs[T] + ): NewInstanceExprs[FieldType[K, Unit] :: T] = new NewInstanceExprs[FieldType[K, Unit] :: T] { + def from(exprs: List[Expression]): Seq[Expression] = + Literal.fromObject(()) +: tail.from(exprs) + } - implicit def deriveNonUnit[K <: Symbol, V, T <: HList] - (implicit - notUnit: V =:!= Unit, - tail: NewInstanceExprs[T] - ): NewInstanceExprs[FieldType[K, V] :: T] = new NewInstanceExprs[FieldType[K, V] :: T] { - def from(exprs: List[Expression]): Seq[Expression] = exprs.head +: tail.from(exprs.tail) - } + implicit def deriveNonUnit[K <: Symbol, V, T <: HList](implicit + notUnit: V =:!= Unit, + tail: NewInstanceExprs[T] + ): NewInstanceExprs[FieldType[K, V] :: T] = new NewInstanceExprs[FieldType[K, V] :: T] { + def from(exprs: List[Expression]): Seq[Expression] = exprs.head +: tail.from(exprs.tail) + } } /** @@ -101,85 +96,87 @@ object DropUnitValues { def apply(l: HNil): Out = HNil } - implicit def deriveUnit[K <: Symbol, T <: HList, OutT <: HList] - (implicit - dropUnitValues : DropUnitValues.Aux[T, OutT] - ): Aux[FieldType[K, Unit] :: T, OutT] = new DropUnitValues[FieldType[K, Unit] :: T] { - type Out = OutT - def apply(l : FieldType[K, Unit] :: T): Out = dropUnitValues(l.tail) - } + implicit def deriveUnit[K <: Symbol, T <: HList, OutT <: HList](implicit + dropUnitValues: DropUnitValues.Aux[T, OutT] + ): Aux[FieldType[K, Unit] :: T, OutT] = new DropUnitValues[FieldType[K, Unit] :: T] { + type Out = OutT + def apply(l: FieldType[K, Unit] :: T): Out = dropUnitValues(l.tail) + } - implicit def deriveNonUnit[K <: Symbol, V, T <: HList, OutH, OutT <: HList] - (implicit - nonUnit: V =:!= Unit, - dropUnitValues : DropUnitValues.Aux[T, OutT] - ): Aux[FieldType[K, V] :: T, FieldType[K, V] :: OutT] = new DropUnitValues[FieldType[K, V] :: T] { - type Out = FieldType[K, V] :: OutT - def apply(l : FieldType[K, V] :: T): Out = l.head :: dropUnitValues(l.tail) - } + implicit def deriveNonUnit[K <: Symbol, V, T <: HList, OutH, OutT <: HList](implicit + nonUnit: V =:!= Unit, + dropUnitValues: DropUnitValues.Aux[T, OutT] + ): Aux[FieldType[K, V] :: T, FieldType[K, V] :: OutT] = new DropUnitValues[FieldType[K, V] :: T] { + type Out = FieldType[K, V] :: OutT + def apply(l: FieldType[K, V] :: T): Out = l.head :: dropUnitValues(l.tail) + } } -class RecordEncoder[F, G <: HList, H <: HList] - (implicit - i0: LabelledGeneric.Aux[F, G], - i1: DropUnitValues.Aux[G, H], - i2: IsHCons[H], - fields: Lazy[RecordEncoderFields[H]], - newInstanceExprs: Lazy[NewInstanceExprs[G]], - classTag: ClassTag[F] - ) extends TypedEncoder[F] { - def nullable: Boolean = false - - def jvmRepr: DataType = FramelessInternals.objectTypeFor[F] - - def catalystRepr: DataType = { - val structFields = fields.value.value.map { field => - StructField( - name = field.name, - dataType = field.encoder.catalystRepr, - nullable = field.encoder.nullable, - metadata = Metadata.empty - ) - } +class RecordEncoder[F, G <: HList, H <: HList](implicit + i0: LabelledGeneric.Aux[F, G], + i1: DropUnitValues.Aux[G, H], + i2: IsHCons[H], + fields: Lazy[RecordEncoderFields[H]], + newInstanceExprs: Lazy[NewInstanceExprs[G]], + classTag: ClassTag[F] +) extends TypedEncoder[F] { + def nullable: Boolean = false + + def jvmRepr: DataType = FramelessInternals.objectTypeFor[F] + + def catalystRepr: DataType = { + val structFields = fields.value.value.map { field => + StructField( + name = field.name, + dataType = field.encoder.catalystRepr, + nullable = field.encoder.nullable, + metadata = Metadata.empty + ) + } - StructType(structFields) + StructType(structFields) + } + + def toCatalyst(path: Expression): Expression = { + val nameExprs = fields.value.value.map { field => + Literal(field.name) } - def toCatalyst(path: Expression): Expression = { - val nameExprs = fields.value.value.map { field => - Literal(field.name) - } + val valueExprs = fields.value.value.map { field => + val fieldPath = Invoke(path, field.name, field.encoder.jvmRepr, Nil) + field.encoder.toCatalyst(fieldPath) + } - val valueExprs = fields.value.value.map { field => - val fieldPath = Invoke(path, field.name, field.encoder.jvmRepr, Nil) - field.encoder.toCatalyst(fieldPath) - } + // the way exprs are encoded in CreateNamedStruct + val exprs = nameExprs.zip(valueExprs).flatMap { + case (nameExpr, valueExpr) => nameExpr :: valueExpr :: Nil + } - // the way exprs are encoded in CreateNamedStruct - val exprs = nameExprs.zip(valueExprs).flatMap { - case (nameExpr, valueExpr) => nameExpr :: valueExpr :: Nil - } + val createExpr = CreateNamedStruct(exprs) + val nullExpr = Literal.create(null, createExpr.dataType) - val createExpr = CreateNamedStruct(exprs) - val nullExpr = Literal.create(null, createExpr.dataType) + If(IsNull(path), nullExpr, createExpr) + } - If(IsNull(path), nullExpr, createExpr) + def fromCatalyst(path: Expression): Expression = { + val exprs = fields.value.value.map { field => + field.encoder.fromCatalyst( + GetStructField(path, field.ordinal, Some(field.name)) + ) } - def fromCatalyst(path: Expression): Expression = { - val exprs = fields.value.value.map { field => - field.encoder.fromCatalyst( - GetStructField(path, field.ordinal, Some(field.name))) - } - - val newArgs = newInstanceExprs.value.from(exprs) - val newExpr = NewInstance( - classTag.runtimeClass, newArgs, jvmRepr, propagateNull = true) + val newArgs = newInstanceExprs.value.from(exprs) + val newExpr = NewInstance( + classTag.runtimeClass, + newArgs, + jvmRepr, + propagateNull = true + ) - val nullExpr = Literal.create(null, jvmRepr) + val nullExpr = Literal.create(null, jvmRepr) - If(IsNull(path), nullExpr, newExpr) - } + If(IsNull(path), nullExpr, newExpr) + } } final class RecordFieldEncoder[T]( @@ -198,59 +195,70 @@ object RecordFieldEncoder extends RecordFieldEncoderLowPriority { * @tparam K the key type for the fields * @tparam V the inner value type */ - implicit def optionValueClass[F : IsValueClass, G <: ::[_, HNil], H <: ::[_ <: FieldType[_ <: Symbol, _], HNil], K <: Symbol, V, KS <: ::[_ <: Symbol, HNil]] - (implicit - i0: LabelledGeneric.Aux[F, G], - i1: DropUnitValues.Aux[G, H], - i2: IsHCons.Aux[H, _ <: FieldType[K, V], HNil], - i3: Keys.Aux[H, KS], - i4: IsHCons.Aux[KS, K, HNil], - i5: TypedEncoder[V], - i6: ClassTag[F] - ): RecordFieldEncoder[Option[F]] = { - val fieldName = i4.head(i3()).name - val innerJvmRepr = ObjectType(i6.runtimeClass) - - val catalyst: Expression => Expression = { path => - val value = UnwrapOption(innerJvmRepr, path) - val javaValue = Invoke(value, fieldName, i5.jvmRepr, Nil) - - i5.toCatalyst(javaValue) - } + implicit def optionValueClass[F: IsValueClass, + G <: ::[ + _, + HNil + ], + H <: ::[_ <: FieldType[_ <: Symbol, _], HNil], + K <: Symbol, + V, + KS <: ::[_ <: Symbol, HNil] + ](implicit + i0: LabelledGeneric.Aux[F, G], + i1: DropUnitValues.Aux[G, H], + i2: IsHCons.Aux[H, _ <: FieldType[K, V], HNil], + i3: Keys.Aux[H, KS], + i4: IsHCons.Aux[KS, K, HNil], + i5: TypedEncoder[V], + i6: ClassTag[F] + ): RecordFieldEncoder[Option[F]] = { + val fieldName = i4.head(i3()).name + val innerJvmRepr = ObjectType(i6.runtimeClass) + + val catalyst: Expression => Expression = { path => + val value = UnwrapOption(innerJvmRepr, path) + val javaValue = Invoke(value, fieldName, i5.jvmRepr, Nil) + + i5.toCatalyst(javaValue) + } - val fromCatalyst: Expression => Expression = { path => - val javaValue = i5.fromCatalyst(path) - val value = NewInstance(i6.runtimeClass, Seq(javaValue), innerJvmRepr) + val fromCatalyst: Expression => Expression = { path => + val javaValue = i5.fromCatalyst(path) + val value = NewInstance(i6.runtimeClass, Seq(javaValue), innerJvmRepr) - WrapOption(value, innerJvmRepr) - } + WrapOption(value, innerJvmRepr) + } - val jvmr = ObjectType(classOf[Option[F]]) + val jvmr = ObjectType(classOf[Option[F]]) - new RecordFieldEncoder[Option[F]]( - encoder = new TypedEncoder[Option[F]] { - val nullable = true + new RecordFieldEncoder[Option[F]]( + encoder = new TypedEncoder[Option[F]] { + val nullable = true - val jvmRepr = jvmr + val jvmRepr = jvmr - @inline def catalystRepr: DataType = i5.catalystRepr + @inline def catalystRepr: DataType = i5.catalystRepr - def fromCatalyst(path: Expression): Expression = { - val javaValue = i5.fromCatalyst(path) - val value = NewInstance( - i6.runtimeClass, Seq(javaValue), innerJvmRepr) + def fromCatalyst(path: Expression): Expression = { + val javaValue = i5.fromCatalyst(path) + val value = NewInstance( + i6.runtimeClass, + Seq(javaValue), + innerJvmRepr + ) - WrapOption(value, innerJvmRepr) - } + WrapOption(value, innerJvmRepr) + } - def toCatalyst(path: Expression): Expression = catalyst(path) + def toCatalyst(path: Expression): Expression = catalyst(path) - override def toString: String = s"RecordFieldEncoder.optionValueClass[${i6.runtimeClass.getName}]('${fieldName}', $i5)" - }, - jvmRepr = jvmr, - fromCatalyst = fromCatalyst, - toCatalyst = catalyst - ) + override def toString: String = s"RecordFieldEncoder.optionValueClass[${i6.runtimeClass.getName}]('${fieldName}', $i5)" + }, + jvmRepr = jvmr, + fromCatalyst = fromCatalyst, + toCatalyst = catalyst + ) } /** @@ -259,50 +267,59 @@ object RecordFieldEncoder extends RecordFieldEncoderLowPriority { * @tparam H the single field of the value class (with guarantee it's not a `Unit` value) * @tparam V the inner value type */ - implicit def valueClass[F : IsValueClass, G <: ::[_, HNil], H <: ::[_ <: FieldType[_ <: Symbol, _], HNil], K <: Symbol, V, KS <: ::[_ <: Symbol, HNil]] - (implicit - i0: LabelledGeneric.Aux[F, G], - i1: DropUnitValues.Aux[G, H], - i2: IsHCons.Aux[H, _ <: FieldType[K, V], HNil], - i3: Keys.Aux[H, KS], - i4: IsHCons.Aux[KS, K, HNil], - i5: TypedEncoder[V], - i6: ClassTag[F] - ): RecordFieldEncoder[F] = { - val cls = i6.runtimeClass - val jvmr = i5.jvmRepr - val fieldName = i4.head(i3()).name - - new RecordFieldEncoder[F]( - encoder = new TypedEncoder[F] { - def nullable = i5.nullable - - def jvmRepr = jvmr - - def catalystRepr: DataType = i5.catalystRepr - - def fromCatalyst(path: Expression): Expression = - i5.fromCatalyst(path) - - @inline def toCatalyst(path: Expression): Expression = - i5.toCatalyst(path) - - override def toString: String = s"RecordFieldEncoder.valueClass[${cls.getName}]('${fieldName}', ${i5})" - }, - jvmRepr = FramelessInternals.objectTypeFor[F], - fromCatalyst = { expr: Expression => - NewInstance( - i6.runtimeClass, - i5.fromCatalyst(expr) :: Nil, - ObjectType(i6.runtimeClass)) - }, - toCatalyst = { expr: Expression => - i5.toCatalyst(Invoke(expr, fieldName, jvmr)) - } - ) + implicit def valueClass[F: IsValueClass, + G <: ::[ + _, + HNil + ], + H <: ::[_ <: FieldType[_ <: Symbol, _], HNil], + K <: Symbol, + V, + KS <: ::[_ <: Symbol, HNil] + ](implicit + i0: LabelledGeneric.Aux[F, G], + i1: DropUnitValues.Aux[G, H], + i2: IsHCons.Aux[H, _ <: FieldType[K, V], HNil], + i3: Keys.Aux[H, KS], + i4: IsHCons.Aux[KS, K, HNil], + i5: TypedEncoder[V], + i6: ClassTag[F] + ): RecordFieldEncoder[F] = { + val cls = i6.runtimeClass + val jvmr = i5.jvmRepr + val fieldName = i4.head(i3()).name + + new RecordFieldEncoder[F]( + encoder = new TypedEncoder[F] { + def nullable = i5.nullable + + def jvmRepr = jvmr + + def catalystRepr: DataType = i5.catalystRepr + + def fromCatalyst(path: Expression): Expression = + i5.fromCatalyst(path) + + @inline def toCatalyst(path: Expression): Expression = + i5.toCatalyst(path) + + override def toString: String = s"RecordFieldEncoder.valueClass[${cls.getName}]('${fieldName}', ${i5})" + }, + jvmRepr = FramelessInternals.objectTypeFor[F], + fromCatalyst = { expr: Expression => + NewInstance( + i6.runtimeClass, + i5.fromCatalyst(expr) :: Nil, + ObjectType(i6.runtimeClass) + ) + }, + toCatalyst = { expr: Expression => + i5.toCatalyst(Invoke(expr, fieldName, jvmr)) + } + ) } } -private[frameless] sealed trait RecordFieldEncoderLowPriority { +sealed private[frameless] trait RecordFieldEncoderLowPriority { implicit def apply[T](implicit e: TypedEncoder[T]): RecordFieldEncoder[T] = new RecordFieldEncoder[T](e, e.jvmRepr, e.fromCatalyst, e.toCatalyst) } diff --git a/dataset/src/main/scala/frameless/TypedColumn.scala b/dataset/src/main/scala/frameless/TypedColumn.scala index 2888d8608..4140cabe8 100644 --- a/dataset/src/main/scala/frameless/TypedColumn.scala +++ b/dataset/src/main/scala/frameless/TypedColumn.scala @@ -1,16 +1,16 @@ package frameless -import frameless.functions.{ litAggr, lit => flit } +import frameless.functions.{lit => flit, litAggr} import frameless.syntax._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types.DecimalType -import org.apache.spark.sql.{ Column, FramelessInternals } +import org.apache.spark.sql.{Column, FramelessInternals} // Spark 4 added org.apache.spark.sql.catalyst.expressions.With, which the wildcard import // above would otherwise bind in preference to frameless.With. Alias frameless.With so its // references resolve consistently on every supported Spark version. -import frameless.{ With => FWith } +import frameless.{With => FWith} import shapeless._ import shapeless.ops.record.Selector @@ -30,18 +30,17 @@ sealed trait UntypedExpression[T] { * Expression used in `select`-like constructions. */ sealed class TypedColumn[T, U]( - expr: Expression - )(implicit - val uenc: TypedEncoder[U]) + expr: Expression +)(implicit val uenc: TypedEncoder[U]) extends AbstractTypedColumn[T, U](expr) { type ThisType[A, B] = TypedColumn[A, B] def this( - column: Column - )(implicit - uencoder: TypedEncoder[U] - ) = + column: Column + )(implicit + uencoder: TypedEncoder[U] + ) = this(FramelessInternals.expr(column)) override def typed[W, U1: TypedEncoder](c: Column): TypedColumn[W, U1] = @@ -54,18 +53,17 @@ sealed class TypedColumn[T, U]( * Expression used in `agg`-like constructions. */ sealed class TypedAggregate[T, U]( - expr: Expression - )(implicit - val uenc: TypedEncoder[U]) + expr: Expression +)(implicit val uenc: TypedEncoder[U]) extends AbstractTypedColumn[T, U](expr) { type ThisType[A, B] = TypedAggregate[A, B] def this( - column: Column - )(implicit - uencoder: TypedEncoder[U] - ) = { + column: Column + )(implicit + uencoder: TypedEncoder[U] + ) = { this(FramelessInternals.expr(column)) } @@ -88,9 +86,8 @@ sealed class TypedAggregate[T, U]( * @tparam U type of column */ abstract class AbstractTypedColumn[T, U]( - val expr: Expression - )(implicit - val uencoder: TypedEncoder[U]) + val expr: Expression +)(implicit val uencoder: TypedEncoder[U]) extends UntypedExpression[T] { self => type ThisType[A, B] <: AbstractTypedColumn[A, B] @@ -109,10 +106,10 @@ abstract class AbstractTypedColumn[T, U]( trait Mapper[X] { def map[G, OutputType[_, _]]( - u: ThisType[T, X] => OutputType[T, G] - )(implicit - ev: OutputType[T, G] <:< AbstractTypedColumn[T, G] - ): OutputType[T, Option[G]] = { + u: ThisType[T, X] => OutputType[T, G] + )(implicit + ev: OutputType[T, G] <:< AbstractTypedColumn[T, G] + ): OutputType[T, Option[G]] = { u(self.asInstanceOf[ThisType[T, X]]) .asInstanceOf[OutputType[T, Option[G]]] } @@ -129,18 +126,17 @@ abstract class AbstractTypedColumn[T, U]( * }}} */ def opt[X]( - implicit - x: U <:< Option[X] - ): Mapper[X] = new Mapper[X] {} + implicit x: U <:< Option[X] + ): Mapper[X] = new Mapper[X] {} /** Fall back to an untyped Column */ def untyped: Column = FramelessInternals.column(expr) private def equalsTo[TT, W]( - other: ThisType[TT, U] - )(implicit - w: FWith.Aux[T, TT, W] - ): ThisType[W, Boolean] = typed { + other: ThisType[TT, U] + )(implicit + w: FWith.Aux[T, TT, W] + ): ThisType[W, Boolean] = typed { if (uencoder.nullable) EqualNullSafe(self.expr, other.expr) else EqualTo(self.expr, other.expr) } @@ -175,10 +171,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def ===[TT, W]( - other: ThisType[TT, U] - )(implicit - w: FWith.Aux[T, TT, W] - ): ThisType[W, Boolean] = + other: ThisType[TT, U] + )(implicit + w: FWith.Aux[T, TT, W] + ): ThisType[W, Boolean] = equalsTo(other) /** @@ -191,10 +187,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def =!=[TT, W]( - other: ThisType[TT, U] - )(implicit - w: FWith.Aux[T, TT, W] - ): ThisType[W, Boolean] = + other: ThisType[TT, U] + )(implicit + w: FWith.Aux[T, TT, W] + ): ThisType[W, Boolean] = typed(Not(equalsTo(other).expr)) /** @@ -214,9 +210,8 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def isNone( - implicit - i0: U <:< Option[_] - ): ThisType[T, Boolean] = + implicit i0: U <:< Option[_] + ): ThisType[T, Boolean] = typed(IsNull(expr)) /** @@ -225,9 +220,8 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def isNotNone( - implicit - i0: U <:< Option[_] - ): ThisType[T, Boolean] = + implicit i0: U <:< Option[_] + ): ThisType[T, Boolean] = typed(IsNotNull(expr)) /** @@ -236,9 +230,8 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def isNaN( - implicit - n: CatalystNaN[U] - ): ThisType[T, Boolean] = + implicit n: CatalystNaN[U] + ): ThisType[T, Boolean] = typed(self.untyped.isNaN) /** @@ -250,10 +243,10 @@ abstract class AbstractTypedColumn[T, U]( * }}} */ def isSome[V]( - exists: ThisType[T, V] => ThisType[T, Boolean] - )(implicit - i0: U <:< Option[V] - ): ThisType[T, Boolean] = someOr[V](exists, false) + exists: ThisType[T, V] => ThisType[T, Boolean] + )(implicit + i0: U <:< Option[V] + ): ThisType[T, Boolean] = someOr[V](exists, false) /** * True if the value for this optional column `exists` as expected, @@ -264,17 +257,17 @@ abstract class AbstractTypedColumn[T, U]( * }}} */ def isSomeOrNone[V]( - exists: ThisType[T, V] => ThisType[T, Boolean] - )(implicit - i0: U <:< Option[V] - ): ThisType[T, Boolean] = someOr[V](exists, true) + exists: ThisType[T, V] => ThisType[T, Boolean] + )(implicit + i0: U <:< Option[V] + ): ThisType[T, Boolean] = someOr[V](exists, true) private def someOr[V]( - exists: ThisType[T, V] => ThisType[T, Boolean], - default: Boolean - )(implicit - i0: U <:< Option[V] - ): ThisType[T, Boolean] = { + exists: ThisType[T, V] => ThisType[T, Boolean], + default: Boolean + )(implicit + i0: U <:< Option[V] + ): ThisType[T, Boolean] = { val defaultExpr = if (default) Literal.TrueLiteral else Literal.FalseLiteral typed(Coalesce(Seq(opt(i0).map(exists).expr, defaultExpr))) @@ -288,11 +281,11 @@ abstract class AbstractTypedColumn[T, U]( * }}} */ def getOrElse[TT, W, Out]( - default: ThisType[TT, Out] - )(implicit - i0: U =:= Option[Out], - i1: FWith.Aux[T, TT, W] - ): ThisType[W, Out] = + default: ThisType[TT, Out] + )(implicit + i0: U =:= Option[Out], + i1: FWith.Aux[T, TT, W] + ): ThisType[W, Out] = typed(Coalesce(Seq(expr, default.expr)))(default.uencoder) /** @@ -303,10 +296,10 @@ abstract class AbstractTypedColumn[T, U]( * }}} */ def getOrElse[Out: TypedEncoder]( - default: Out - )(implicit - i0: U =:= Option[Out] - ): ThisType[T, Out] = + default: Out + )(implicit + i0: U =:= Option[Out] + ): ThisType[T, Out] = getOrElse(lit[Out](default)) /** @@ -320,11 +313,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def plus[TT, W]( - other: ThisType[TT, U] - )(implicit - n: CatalystNumeric[U], - w: FWith.Aux[T, TT, W] - ): ThisType[W, U] = + other: ThisType[TT, U] + )(implicit + n: CatalystNumeric[U], + w: FWith.Aux[T, TT, W] + ): ThisType[W, U] = typed(self.untyped.plus(other.untyped)) /** @@ -337,11 +330,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def +[TT, W]( - other: ThisType[TT, U] - )(implicit - n: CatalystNumeric[U], - w: FWith.Aux[T, TT, W] - ): ThisType[W, U] = + other: ThisType[TT, U] + )(implicit + n: CatalystNumeric[U], + w: FWith.Aux[T, TT, W] + ): ThisType[W, U] = plus(other) /** @@ -355,10 +348,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def +( - u: U - )(implicit - n: CatalystNumeric[U] - ): ThisType[T, U] = + u: U + )(implicit + n: CatalystNumeric[U] + ): ThisType[T, U] = typed(self.untyped.plus(u)) /** @@ -371,9 +364,8 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def unary_!( - implicit - i0: U <:< Boolean - ): ThisType[T, Boolean] = + implicit i0: U <:< Boolean + ): ThisType[T, Boolean] = typed(!untyped) /** @@ -386,9 +378,8 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def unary_-( - implicit - n: CatalystNumeric[U] - ): ThisType[T, U] = + implicit n: CatalystNumeric[U] + ): ThisType[T, U] = typed(-self.untyped) /** @@ -401,11 +392,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def minus[TT, W]( - other: ThisType[TT, U] - )(implicit - n: CatalystNumeric[U], - w: FWith.Aux[T, TT, W] - ): ThisType[W, U] = + other: ThisType[TT, U] + )(implicit + n: CatalystNumeric[U], + w: FWith.Aux[T, TT, W] + ): ThisType[W, U] = typed(self.untyped.minus(other.untyped)) /** @@ -418,11 +409,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def -[TT, W]( - other: ThisType[TT, U] - )(implicit - n: CatalystNumeric[U], - w: FWith.Aux[T, TT, W] - ): ThisType[W, U] = + other: ThisType[TT, U] + )(implicit + n: CatalystNumeric[U], + w: FWith.Aux[T, TT, W] + ): ThisType[W, U] = minus(other) /** @@ -436,10 +427,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def -( - u: U - )(implicit - n: CatalystNumeric[U] - ): ThisType[T, U] = + u: U + )(implicit + n: CatalystNumeric[U] + ): ThisType[T, U] = typed(self.untyped.minus(u)) /** @@ -452,12 +443,12 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def multiply[TT, W]( - other: ThisType[TT, U] - )(implicit - n: CatalystNumeric[U], - w: FWith.Aux[T, TT, W], - t: ClassTag[U] - ): ThisType[W, U] = typed { + other: ThisType[TT, U] + )(implicit + n: CatalystNumeric[U], + w: FWith.Aux[T, TT, W], + t: ClassTag[U] + ): ThisType[W, U] = typed { if (t.runtimeClass == BigDecimal(0).getClass) { // That's apparently the only way to get sound multiplication. // See https://issues.apache.org/jira/browse/SPARK-22036 @@ -478,12 +469,12 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def *[TT, W]( - other: ThisType[TT, U] - )(implicit - n: CatalystNumeric[U], - w: FWith.Aux[T, TT, W], - t: ClassTag[U] - ): ThisType[W, U] = + other: ThisType[TT, U] + )(implicit + n: CatalystNumeric[U], + w: FWith.Aux[T, TT, W], + t: ClassTag[U] + ): ThisType[W, U] = multiply(other) /** @@ -496,10 +487,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def *( - u: U - )(implicit - n: CatalystNumeric[U] - ): ThisType[T, U] = + u: U + )(implicit + n: CatalystNumeric[U] + ): ThisType[T, U] = typed(self.untyped.multiply(u)) /** @@ -508,11 +499,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def mod[Out: TypedEncoder, TT, W]( - other: ThisType[TT, U] - )(implicit - n: CatalystNumeric[U], - w: FWith.Aux[T, TT, W] - ): ThisType[W, Out] = + other: ThisType[TT, U] + )(implicit + n: CatalystNumeric[U], + w: FWith.Aux[T, TT, W] + ): ThisType[W, Out] = typed(self.untyped.mod(other.untyped)) /** @@ -521,11 +512,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def %[TT, W]( - other: ThisType[TT, U] - )(implicit - n: CatalystNumeric[U], - w: FWith.Aux[T, TT, W] - ): ThisType[W, U] = + other: ThisType[TT, U] + )(implicit + n: CatalystNumeric[U], + w: FWith.Aux[T, TT, W] + ): ThisType[W, U] = mod(other) /** @@ -534,10 +525,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def %( - u: U - )(implicit - n: CatalystNumeric[U] - ): ThisType[T, U] = + u: U + )(implicit + n: CatalystNumeric[U] + ): ThisType[T, U] = typed(self.untyped.mod(u)) /** @@ -551,11 +542,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def divide[Out: TypedEncoder, TT, W]( - other: ThisType[TT, U] - )(implicit - n: CatalystDivisible[U, Out], - w: FWith.Aux[T, TT, W] - ): ThisType[W, Out] = + other: ThisType[TT, U] + )(implicit + n: CatalystDivisible[U, Out], + w: FWith.Aux[T, TT, W] + ): ThisType[W, Out] = typed(self.untyped.divide(other.untyped)) /** @@ -569,12 +560,12 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def /[Out, TT, W]( - other: ThisType[TT, U] - )(implicit - n: CatalystDivisible[U, Out], - e: TypedEncoder[Out], - w: FWith.Aux[T, TT, W] - ): ThisType[W, Out] = + other: ThisType[TT, U] + )(implicit + n: CatalystDivisible[U, Out], + e: TypedEncoder[Out], + w: FWith.Aux[T, TT, W] + ): ThisType[W, Out] = divide(other) /** @@ -588,10 +579,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def /( - u: U - )(implicit - n: CatalystNumeric[U] - ): ThisType[T, Double] = + u: U + )(implicit + n: CatalystNumeric[U] + ): ThisType[T, Double] = typed(self.untyped.divide(u)) /** @@ -600,9 +591,8 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def desc( - implicit - catalystOrdered: CatalystOrdered[U] - ): SortedTypedColumn[T, U] = + implicit catalystOrdered: CatalystOrdered[U] + ): SortedTypedColumn[T, U] = new SortedTypedColumn[T, U](untyped.desc) /** @@ -611,9 +601,8 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def asc( - implicit - catalystOrdered: CatalystOrdered[U] - ): SortedTypedColumn[T, U] = + implicit catalystOrdered: CatalystOrdered[U] + ): SortedTypedColumn[T, U] = new SortedTypedColumn[T, U](untyped.asc) /** @@ -626,10 +615,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def bitwiseAND( - u: U - )(implicit - n: CatalystBitwise[U] - ): ThisType[T, U] = + u: U + )(implicit + n: CatalystBitwise[U] + ): ThisType[T, U] = typed(self.untyped.bitwiseAND(u)) /** @@ -642,11 +631,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def bitwiseAND[TT, W]( - other: ThisType[TT, U] - )(implicit - n: CatalystBitwise[U], - w: FWith.Aux[T, TT, W] - ): ThisType[W, U] = + other: ThisType[TT, U] + )(implicit + n: CatalystBitwise[U], + w: FWith.Aux[T, TT, W] + ): ThisType[W, U] = typed(self.untyped.bitwiseAND(other.untyped)) /** @@ -659,10 +648,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def &( - u: U - )(implicit - n: CatalystBitwise[U] - ): ThisType[T, U] = + u: U + )(implicit + n: CatalystBitwise[U] + ): ThisType[T, U] = bitwiseAND(u) /** @@ -675,11 +664,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def &[TT, W]( - other: ThisType[TT, U] - )(implicit - n: CatalystBitwise[U], - w: FWith.Aux[T, TT, W] - ): ThisType[W, U] = + other: ThisType[TT, U] + )(implicit + n: CatalystBitwise[U], + w: FWith.Aux[T, TT, W] + ): ThisType[W, U] = bitwiseAND(other) /** @@ -692,10 +681,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def bitwiseOR( - u: U - )(implicit - n: CatalystBitwise[U] - ): ThisType[T, U] = + u: U + )(implicit + n: CatalystBitwise[U] + ): ThisType[T, U] = typed(self.untyped.bitwiseOR(u)) /** @@ -708,11 +697,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def bitwiseOR[TT, W]( - other: ThisType[TT, U] - )(implicit - n: CatalystBitwise[U], - w: FWith.Aux[T, TT, W] - ): ThisType[W, U] = + other: ThisType[TT, U] + )(implicit + n: CatalystBitwise[U], + w: FWith.Aux[T, TT, W] + ): ThisType[W, U] = typed(self.untyped.bitwiseOR(other.untyped)) /** @@ -725,10 +714,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def |( - u: U - )(implicit - n: CatalystBitwise[U] - ): ThisType[T, U] = + u: U + )(implicit + n: CatalystBitwise[U] + ): ThisType[T, U] = bitwiseOR(u) /** @@ -741,11 +730,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def |[TT, W]( - other: ThisType[TT, U] - )(implicit - n: CatalystBitwise[U], - w: FWith.Aux[T, TT, W] - ): ThisType[W, U] = + other: ThisType[TT, U] + )(implicit + n: CatalystBitwise[U], + w: FWith.Aux[T, TT, W] + ): ThisType[W, U] = bitwiseOR(other) /** @@ -758,10 +747,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def bitwiseXOR( - u: U - )(implicit - n: CatalystBitwise[U] - ): ThisType[T, U] = + u: U + )(implicit + n: CatalystBitwise[U] + ): ThisType[T, U] = typed(self.untyped.bitwiseXOR(u)) /** @@ -774,11 +763,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def bitwiseXOR[TT, W]( - other: ThisType[TT, U] - )(implicit - n: CatalystBitwise[U], - w: FWith.Aux[T, TT, W] - ): ThisType[W, U] = + other: ThisType[TT, U] + )(implicit + n: CatalystBitwise[U], + w: FWith.Aux[T, TT, W] + ): ThisType[W, U] = typed(self.untyped.bitwiseXOR(other.untyped)) /** @@ -791,10 +780,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def ^( - u: U - )(implicit - n: CatalystBitwise[U] - ): ThisType[T, U] = + u: U + )(implicit + n: CatalystBitwise[U] + ): ThisType[T, U] = bitwiseXOR(u) /** @@ -807,11 +796,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def ^[TT, W]( - other: ThisType[TT, U] - )(implicit - n: CatalystBitwise[U], - w: FWith.Aux[T, TT, W] - ): ThisType[W, U] = + other: ThisType[TT, U] + )(implicit + n: CatalystBitwise[U], + w: FWith.Aux[T, TT, W] + ): ThisType[W, U] = bitwiseXOR(other) /** @@ -821,9 +810,8 @@ abstract class AbstractTypedColumn[T, U]( * }}} */ def cast[A: TypedEncoder]( - implicit - c: CatalystCast[U, A] - ): ThisType[T, A] = + implicit c: CatalystCast[U, A] + ): ThisType[T, A] = typed(self.untyped.cast(TypedEncoder[A].catalystRepr)) /** @@ -836,11 +824,11 @@ abstract class AbstractTypedColumn[T, U]( * @param len length of the substring */ def substr( - startPos: Int, - len: Int - )(implicit - ev: U =:= String - ): ThisType[T, String] = + startPos: Int, + len: Int + )(implicit + ev: U =:= String + ): ThisType[T, String] = typed(self.untyped.substr(startPos, len)) /** @@ -853,13 +841,13 @@ abstract class AbstractTypedColumn[T, U]( * @param len expression for the length of the substring */ def substr[TT1, TT2, W1, W2]( - startPos: ThisType[TT1, Int], - len: ThisType[TT2, Int] - )(implicit - ev: U =:= String, - w1: FWith.Aux[T, TT1, W1], - w2: FWith.Aux[W1, TT2, W2] - ): ThisType[W2, String] = + startPos: ThisType[TT1, Int], + len: ThisType[TT2, Int] + )(implicit + ev: U =:= String, + w1: FWith.Aux[T, TT1, W1], + w2: FWith.Aux[W1, TT2, W2] + ): ThisType[W2, String] = typed(self.untyped.substr(startPos.untyped, len.untyped)) /** @@ -875,10 +863,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def like( - literal: String - )(implicit - ev: U =:= String - ): ThisType[T, Boolean] = + literal: String + )(implicit + ev: U =:= String + ): ThisType[T, Boolean] = typed(self.untyped.like(literal)) /** @@ -894,10 +882,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def rlike( - literal: String - )(implicit - ev: U =:= String - ): ThisType[T, Boolean] = + literal: String + )(implicit + ev: U =:= String + ): ThisType[T, Boolean] = typed(self.untyped.rlike(literal)) /** @@ -910,10 +898,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def contains( - other: String - )(implicit - ev: U =:= String - ): ThisType[T, Boolean] = + other: String + )(implicit + ev: U =:= String + ): ThisType[T, Boolean] = typed(self.untyped.contains(other)) /** @@ -926,11 +914,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def contains[TT, W]( - other: ThisType[TT, U] - )(implicit - ev: U =:= String, - w: FWith.Aux[T, TT, W] - ): ThisType[W, Boolean] = + other: ThisType[TT, U] + )(implicit + ev: U =:= String, + w: FWith.Aux[T, TT, W] + ): ThisType[W, Boolean] = typed(self.untyped.contains(other.untyped)) /** @@ -943,10 +931,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def startsWith( - other: String - )(implicit - ev: U =:= String - ): ThisType[T, Boolean] = + other: String + )(implicit + ev: U =:= String + ): ThisType[T, Boolean] = typed(self.untyped.startsWith(other)) /** @@ -959,11 +947,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def startsWith[TT, W]( - other: ThisType[TT, U] - )(implicit - ev: U =:= String, - w: FWith.Aux[T, TT, W] - ): ThisType[W, Boolean] = + other: ThisType[TT, U] + )(implicit + ev: U =:= String, + w: FWith.Aux[T, TT, W] + ): ThisType[W, Boolean] = typed(self.untyped.startsWith(other.untyped)) /** @@ -976,10 +964,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def endsWith( - other: String - )(implicit - ev: U =:= String - ): ThisType[T, Boolean] = + other: String + )(implicit + ev: U =:= String + ): ThisType[T, Boolean] = typed(self.untyped.endsWith(other)) /** @@ -992,11 +980,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def endsWith[TT, W]( - other: ThisType[TT, U] - )(implicit - ev: U =:= String, - w: FWith.Aux[T, TT, W] - ): ThisType[W, Boolean] = + other: ThisType[TT, U] + )(implicit + ev: U =:= String, + w: FWith.Aux[T, TT, W] + ): ThisType[W, Boolean] = typed(self.untyped.endsWith(other.untyped)) /** @@ -1006,10 +994,10 @@ abstract class AbstractTypedColumn[T, U]( * }}} */ def and[TT, W]( - other: ThisType[TT, Boolean] - )(implicit - w: FWith.Aux[T, TT, W] - ): ThisType[W, Boolean] = + other: ThisType[TT, Boolean] + )(implicit + w: FWith.Aux[T, TT, W] + ): ThisType[W, Boolean] = typed(self.untyped.and(other.untyped)) /** @@ -1019,10 +1007,10 @@ abstract class AbstractTypedColumn[T, U]( * }}} */ def &&[TT, W]( - other: ThisType[TT, Boolean] - )(implicit - w: FWith.Aux[T, TT, W] - ): ThisType[W, Boolean] = + other: ThisType[TT, Boolean] + )(implicit + w: FWith.Aux[T, TT, W] + ): ThisType[W, Boolean] = and(other) /** @@ -1032,10 +1020,10 @@ abstract class AbstractTypedColumn[T, U]( * }}} */ def or[TT, W]( - other: ThisType[TT, Boolean] - )(implicit - w: FWith.Aux[T, TT, W] - ): ThisType[W, Boolean] = + other: ThisType[TT, Boolean] + )(implicit + w: FWith.Aux[T, TT, W] + ): ThisType[W, Boolean] = typed(self.untyped.or(other.untyped)) /** @@ -1045,10 +1033,10 @@ abstract class AbstractTypedColumn[T, U]( * }}} */ def ||[TT, W]( - other: ThisType[TT, Boolean] - )(implicit - w: FWith.Aux[T, TT, W] - ): ThisType[W, Boolean] = + other: ThisType[TT, Boolean] + )(implicit + w: FWith.Aux[T, TT, W] + ): ThisType[W, Boolean] = or(other) /** @@ -1063,11 +1051,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def <[TT, W]( - other: ThisType[TT, U] - )(implicit - i0: CatalystOrdered[U], - w: FWith.Aux[T, TT, W] - ): ThisType[W, Boolean] = + other: ThisType[TT, U] + )(implicit + i0: CatalystOrdered[U], + w: FWith.Aux[T, TT, W] + ): ThisType[W, Boolean] = typed(self.untyped < other.untyped) /** @@ -1082,11 +1070,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def <=[TT, W]( - other: ThisType[TT, U] - )(implicit - i0: CatalystOrdered[U], - w: FWith.Aux[T, TT, W] - ): ThisType[W, Boolean] = + other: ThisType[TT, U] + )(implicit + i0: CatalystOrdered[U], + w: FWith.Aux[T, TT, W] + ): ThisType[W, Boolean] = typed(self.untyped <= other.untyped) /** @@ -1100,11 +1088,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def >[TT, W]( - other: ThisType[TT, U] - )(implicit - i0: CatalystOrdered[U], - w: FWith.Aux[T, TT, W] - ): ThisType[W, Boolean] = + other: ThisType[TT, U] + )(implicit + i0: CatalystOrdered[U], + w: FWith.Aux[T, TT, W] + ): ThisType[W, Boolean] = typed(self.untyped > other.untyped) /** @@ -1118,11 +1106,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def >=[TT, W]( - other: ThisType[TT, U] - )(implicit - i0: CatalystOrdered[U], - w: FWith.Aux[T, TT, W] - ): ThisType[W, Boolean] = + other: ThisType[TT, U] + )(implicit + i0: CatalystOrdered[U], + w: FWith.Aux[T, TT, W] + ): ThisType[W, Boolean] = typed(self.untyped >= other.untyped) /** @@ -1136,10 +1124,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def <( - u: U - )(implicit - i0: CatalystOrdered[U] - ): ThisType[T, Boolean] = + u: U + )(implicit + i0: CatalystOrdered[U] + ): ThisType[T, Boolean] = typed(self.untyped < lit(u)(self.uencoder).untyped) /** @@ -1153,10 +1141,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def <=( - u: U - )(implicit - i0: CatalystOrdered[U] - ): ThisType[T, Boolean] = + u: U + )(implicit + i0: CatalystOrdered[U] + ): ThisType[T, Boolean] = typed(self.untyped <= lit(u)(self.uencoder).untyped) /** @@ -1170,10 +1158,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def >( - u: U - )(implicit - i0: CatalystOrdered[U] - ): ThisType[T, Boolean] = + u: U + )(implicit + i0: CatalystOrdered[U] + ): ThisType[T, Boolean] = typed(self.untyped > lit(u)(self.uencoder).untyped) /** @@ -1187,10 +1175,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def >=( - u: U - )(implicit - i0: CatalystOrdered[U] - ): ThisType[T, Boolean] = + u: U + )(implicit + i0: CatalystOrdered[U] + ): ThisType[T, Boolean] = typed(self.untyped >= lit(u)(self.uencoder).untyped) /** @@ -1204,10 +1192,10 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def isin( - values: U* - )(implicit - e: CatalystIsin[U] - ): ThisType[T, Boolean] = + values: U* + )(implicit + e: CatalystIsin[U] + ): ThisType[T, Boolean] = typed(self.untyped.isin(values: _*)) /** @@ -1218,11 +1206,11 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def between( - lowerBound: U, - upperBound: U - )(implicit - i0: CatalystOrdered[U] - ): ThisType[T, Boolean] = + lowerBound: U, + upperBound: U + )(implicit + i0: CatalystOrdered[U] + ): ThisType[T, Boolean] = typed( self.untyped.between( lit(lowerBound)(self.uencoder).untyped, @@ -1238,13 +1226,13 @@ abstract class AbstractTypedColumn[T, U]( * apache/spark */ def between[TT1, TT2, W1, W2]( - lowerBound: ThisType[TT1, U], - upperBound: ThisType[TT2, U] - )(implicit - i0: CatalystOrdered[U], - w0: FWith.Aux[T, TT1, W1], - w1: FWith.Aux[TT2, W1, W2] - ): ThisType[W2, Boolean] = + lowerBound: ThisType[TT1, U], + upperBound: ThisType[TT2, U] + )(implicit + i0: CatalystOrdered[U], + w0: FWith.Aux[T, TT1, W1], + w1: FWith.Aux[TT2, W1, W2] + ): ThisType[W2, Boolean] = typed(self.untyped.between(lowerBound.untyped, upperBound.untyped)) /** @@ -1254,26 +1242,25 @@ abstract class AbstractTypedColumn[T, U]( * @tparam V the type of the nested field */ def field[V]( - symbol: Witness.Lt[Symbol] - )(implicit - i0: TypedColumn.Exists[U, symbol.T, V], - i1: TypedEncoder[V] - ): ThisType[T, V] = + symbol: Witness.Lt[Symbol] + )(implicit + i0: TypedColumn.Exists[U, symbol.T, V], + i1: TypedEncoder[V] + ): ThisType[T, V] = typed(self.untyped.getField(symbol.value.name)) } sealed class SortedTypedColumn[T, U]( - val expr: Expression - )(implicit - val uencoder: TypedEncoder[U]) + val expr: Expression +)(implicit val uencoder: TypedEncoder[U]) extends UntypedExpression[T] { def this( - column: Column - )(implicit - e: TypedEncoder[U] - ) = { + column: Column + )(implicit + e: TypedEncoder[U] + ) = { this(FramelessInternals.expr(column)) } @@ -1283,8 +1270,8 @@ sealed class SortedTypedColumn[T, U]( object SortedTypedColumn { implicit def defaultAscending[T, U: CatalystOrdered]( - typedColumn: TypedColumn[T, U] - ): SortedTypedColumn[T, U] = + typedColumn: TypedColumn[T, U] + ): SortedTypedColumn[T, U] = new SortedTypedColumn[T, U](typedColumn.untyped.asc)(typedColumn.uencoder) object defaultAscendingPoly extends Poly1 { @@ -1309,32 +1296,31 @@ object TypedColumn { object ExistsMany { implicit def deriveCons[T, KH, KT <: HList, V0, V1]( - implicit - head: Exists[T, KH, V0], - tail: ExistsMany[V0, KT, V1] - ): ExistsMany[T, KH :: KT, V1] = + implicit + head: Exists[T, KH, V0], + tail: ExistsMany[V0, KT, V1] + ): ExistsMany[T, KH :: KT, V1] = new ExistsMany[T, KH :: KT, V1] {} implicit def deriveHNil[T, K, V]( - implicit - head: Exists[T, K, V] - ): ExistsMany[T, K :: HNil, V] = + implicit head: Exists[T, K, V] + ): ExistsMany[T, K :: HNil, V] = new ExistsMany[T, K :: HNil, V] {} } object Exists { def apply[T, V]( - column: Witness - )(implicit - e: Exists[T, column.T, V] - ): Exists[T, column.T, V] = e + column: Witness + )(implicit + e: Exists[T, column.T, V] + ): Exists[T, column.T, V] = e implicit def deriveRecord[T, H <: HList, K, V]( - implicit - i0: LabelledGeneric.Aux[T, H], - i1: Selector.Aux[H, K, V] - ): Exists[T, K, V] = new Exists[T, K, V] {} + implicit + i0: LabelledGeneric.Aux[T, H], + i1: Selector.Aux[H, K, V] + ): Exists[T, K, V] = new Exists[T, K, V] {} } /** diff --git a/dataset/src/main/scala/frameless/TypedColumnMacroImpl.scala b/dataset/src/main/scala/frameless/TypedColumnMacroImpl.scala index 23502ef3b..49bffdf87 100644 --- a/dataset/src/main/scala/frameless/TypedColumnMacroImpl.scala +++ b/dataset/src/main/scala/frameless/TypedColumnMacroImpl.scala @@ -5,9 +5,8 @@ import scala.reflect.macros.whitebox private[frameless] object TypedColumnMacroImpl { def applyImpl[T: c.WeakTypeTag, U: c.WeakTypeTag]( - c: whitebox.Context - )(x: c.Tree - ): c.Expr[TypedColumn[T, U]] = { + c: whitebox.Context + )(x: c.Tree): c.Expr[TypedColumn[T, U]] = { import c.universe._ val t = c.weakTypeOf[T] @@ -71,7 +70,7 @@ private[frameless] object TypedColumnMacroImpl { path(select, List.empty) match { case root :: tail - if (expectedRoot.forall(_ == root) && check(t, tail)) => { + if expectedRoot.forall(_ == root) && check(t, tail) => { val colPath = tail.mkString(".") c.Expr[TypedColumn[T, U]]( diff --git a/dataset/src/main/scala/frameless/TypedDataset.scala b/dataset/src/main/scala/frameless/TypedDataset.scala index 82a016a3a..6a7780bde 100644 --- a/dataset/src/main/scala/frameless/TypedDataset.scala +++ b/dataset/src/main/scala/frameless/TypedDataset.scala @@ -4,32 +4,15 @@ import java.util import frameless.functions.CatalystExplodableCollection import frameless.ops._ import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{ - Column, - DataFrame, - Dataset, - FramelessInternals, - SparkSession -} -import org.apache.spark.sql.catalyst.expressions.{ - Attribute, - AttributeReference, - Literal -} -import org.apache.spark.sql.catalyst.plans.logical.{ Join, JoinHint } +import org.apache.spark.sql.{Column, DataFrame, Dataset, FramelessInternals, SparkSession} +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Literal} +import org.apache.spark.sql.catalyst.plans.logical.{Join, JoinHint} import org.apache.spark.sql.catalyst.plans.Inner import org.apache.spark.sql.types.StructType import shapeless._ import shapeless.labelled.FieldType -import shapeless.ops.hlist.{ - Diff, - IsHCons, - Mapper, - Prepend, - ToTraversable, - Tupler -} -import shapeless.ops.record.{ Keys, Modifier, Remover, Values } +import shapeless.ops.hlist.{Diff, IsHCons, Mapper, Prepend, ToTraversable, Tupler} +import shapeless.ops.record.{Keys, Modifier, Remover, Values} import scala.language.experimental.macros @@ -44,12 +27,11 @@ import scala.language.experimental.macros * http://www.apache.org/licenses/LICENSE-2.0 */ class TypedDataset[T] protected[frameless] ( - val dataset: Dataset[T] - )(implicit - val encoder: TypedEncoder[T]) + val dataset: Dataset[T] +)(implicit val encoder: TypedEncoder[T]) extends TypedDatasetForwarded[T] { self => - private implicit val spark: SparkSession = dataset.sparkSession + implicit private val spark: SparkSession = dataset.sparkSession /** * Aggregates on the entire Dataset without groups. @@ -80,9 +62,9 @@ class TypedDataset[T] protected[frameless] ( * apache/spark */ def agg[A, B]( - ca: TypedAggregate[T, A], - cb: TypedAggregate[T, B] - ): TypedDataset[(A, B)] = { + ca: TypedAggregate[T, A], + cb: TypedAggregate[T, B] + ): TypedDataset[(A, B)] = { implicit val (ea, eb) = (ca.uencoder, cb.uencoder) aggMany(ca, cb) } @@ -93,10 +75,10 @@ class TypedDataset[T] protected[frameless] ( * apache/spark */ def agg[A, B, C]( - ca: TypedAggregate[T, A], - cb: TypedAggregate[T, B], - cc: TypedAggregate[T, C] - ): TypedDataset[(A, B, C)] = { + ca: TypedAggregate[T, A], + cb: TypedAggregate[T, B], + cc: TypedAggregate[T, C] + ): TypedDataset[(A, B, C)] = { implicit val (ea, eb, ec) = (ca.uencoder, cb.uencoder, cc.uencoder) aggMany(ca, cb, cc) } @@ -107,11 +89,11 @@ class TypedDataset[T] protected[frameless] ( * apache/spark */ def agg[A, B, C, D]( - ca: TypedAggregate[T, A], - cb: TypedAggregate[T, B], - cc: TypedAggregate[T, C], - cd: TypedAggregate[T, D] - ): TypedDataset[(A, B, C, D)] = { + ca: TypedAggregate[T, A], + cb: TypedAggregate[T, B], + cc: TypedAggregate[T, C], + cd: TypedAggregate[T, D] + ): TypedDataset[(A, B, C, D)] = { implicit val (ea, eb, ec, ed) = (ca.uencoder, cb.uencoder, cc.uencoder, cd.uencoder) aggMany(ca, cb, cc, cd) @@ -125,13 +107,13 @@ class TypedDataset[T] protected[frameless] ( object aggMany extends ProductArgs { def applyProduct[U <: HList, Out0 <: HList, Out]( - columns: U - )(implicit - i0: AggregateTypes.Aux[T, U, Out0], - i1: ToTraversable.Aux[U, List, UntypedExpression[T]], - i2: Tupler.Aux[Out0, Out], - i3: TypedEncoder[Out] - ): TypedDataset[Out] = { + columns: U + )(implicit + i0: AggregateTypes.Aux[T, U, Out0], + i1: ToTraversable.Aux[U, List, UntypedExpression[T]], + i2: Tupler.Aux[Out0, Out], + i3: TypedEncoder[Out] + ): TypedDataset[Out] = { val underlyingColumns = columns.toList[UntypedExpression[T]] val cols: Seq[Column] = for { @@ -159,9 +141,9 @@ class TypedDataset[T] protected[frameless] ( /** Returns a new [[TypedDataset]] where each record has been mapped on to the specified type. */ def as[U]( - )(implicit - as: As[T, U] - ): TypedDataset[U] = { + )(implicit + as: As[T, U] + ): TypedDataset[U] = { implicit val uencoder = as.encoder TypedDataset.create(dataset.as[U](TypedExpressionEncoder[U])) } @@ -177,10 +159,10 @@ class TypedDataset[T] protected[frameless] ( * apache/spark */ def checkpoint[F[_]]( - eager: Boolean - )(implicit - F: SparkDelay[F] - ): F[TypedDataset[T]] = + eager: Boolean + )(implicit + F: SparkDelay[F] + ): F[TypedDataset[T]] = F.delay(TypedDataset.create[T](dataset.checkpoint(eager))) /** @@ -200,9 +182,8 @@ class TypedDataset[T] protected[frameless] ( * }}} */ def project[U]( - implicit - projector: SmartProject[T, U] - ): TypedDataset[U] = projector.apply(this) + implicit projector: SmartProject[T, U] + ): TypedDataset[U] = projector.apply(this) /** * Returns a new [[TypedDataset]] that contains the elements of both this and the `other` [[TypedDataset]] @@ -234,10 +215,10 @@ class TypedDataset[T] protected[frameless] ( * apache/spark */ def union[U: TypedEncoder]( - other: TypedDataset[U] - )(implicit - projector: SmartProject[U, T] - ): TypedDataset[T] = + other: TypedDataset[U] + )(implicit + projector: SmartProject[U, T] + ): TypedDataset[T] = TypedDataset.create(dataset.union(other.project[T].dataset)) /** @@ -259,9 +240,9 @@ class TypedDataset[T] protected[frameless] ( * Differs from `Dataset#count` by wrapping its result into an effect-suspending `F[_]`. */ def count[F[_]]( - )(implicit - F: SparkDelay[F] - ): F[Long] = + )(implicit + F: SparkDelay[F] + ): F[Long] = F.delay(dataset.count()) /** @@ -274,11 +255,11 @@ class TypedDataset[T] protected[frameless] ( * It is statically checked that column with such name exists and has type `A`. */ def apply[A]( - column: Witness.Lt[Symbol] - )(implicit - i0: TypedColumn.Exists[T, column.T, A], - i1: TypedEncoder[A] - ): TypedColumn[T, A] = col(column) + column: Witness.Lt[Symbol] + )(implicit + i0: TypedColumn.Exists[T, column.T, A], + i1: TypedEncoder[A] + ): TypedColumn[T, A] = col(column) /** * Returns `TypedColumn` of type `A` given its name. @@ -290,11 +271,11 @@ class TypedDataset[T] protected[frameless] ( * It is statically checked that column with such name exists and has type `A`. */ def col[A]( - column: Witness.Lt[Symbol] - )(implicit - i0: TypedColumn.Exists[T, column.T, A], - i1: TypedEncoder[A] - ): TypedColumn[T, A] = + column: Witness.Lt[Symbol] + )(implicit + i0: TypedColumn.Exists[T, column.T, A], + i1: TypedEncoder[A] + ): TypedColumn[T, A] = new TypedColumn[T, A]( dataset(column.value.name).as[A](TypedExpressionEncoder[A]) ) @@ -341,9 +322,8 @@ class TypedDataset[T] protected[frameless] ( * }}} */ def asJoinColValue( - implicit - i0: IsValueClass[T] - ): TypedColumn[T, T] = { + implicit i0: IsValueClass[T] + ): TypedColumn[T, T] = { import _root_.frameless.syntax._ dataset.col("value").typedColumn @@ -352,12 +332,12 @@ class TypedDataset[T] protected[frameless] ( object colMany extends SingletonProductArgs { def applyProduct[U <: HList, Out]( - columns: U - )(implicit - i0: TypedColumn.ExistsMany[T, U, Out], - i1: TypedEncoder[Out], - i2: ToTraversable.Aux[U, List, Symbol] - ): TypedColumn[T, Out] = { + columns: U + )(implicit + i0: TypedColumn.ExistsMany[T, U, Out], + i1: TypedEncoder[Out], + i2: ToTraversable.Aux[U, List, Symbol] + ): TypedColumn[T, Out] = { val names = columns.toList[Symbol].map(_.name) val colExpr = FramelessInternals.resolveExpr(dataset, names) new TypedColumn[T, Out](colExpr) @@ -372,11 +352,11 @@ class TypedDataset[T] protected[frameless] ( * String based aliases, which is obviously unsafe. */ def colRight[A]( - column: Witness.Lt[Symbol] - )(implicit - i0: TypedColumn.Exists[T, column.T, A], - i1: TypedEncoder[A] - ): TypedColumn[T, A] = + column: Witness.Lt[Symbol] + )(implicit + i0: TypedColumn.Exists[T, column.T, A], + i1: TypedEncoder[A] + ): TypedColumn[T, A] = new TypedColumn[T, A]( FramelessInternals.DisambiguateRight(col(column).expr) ) @@ -389,11 +369,11 @@ class TypedDataset[T] protected[frameless] ( * String based aliases, which is obviously unsafe. */ def colLeft[A]( - column: Witness.Lt[Symbol] - )(implicit - i0: TypedColumn.Exists[T, column.T, A], - i1: TypedEncoder[A] - ): TypedColumn[T, A] = + column: Witness.Lt[Symbol] + )(implicit + i0: TypedColumn.Exists[T, column.T, A], + i1: TypedEncoder[A] + ): TypedColumn[T, A] = new TypedColumn[T, A](FramelessInternals.DisambiguateLeft(col(column).expr)) /** @@ -405,9 +385,9 @@ class TypedDataset[T] protected[frameless] ( * Differs from `Dataset#collect` by wrapping its result into an effect-suspending `F[_]`. */ def collect[F[_]]( - )(implicit - F: SparkDelay[F] - ): F[Seq[T]] = + )(implicit + F: SparkDelay[F] + ): F[Seq[T]] = F.delay(dataset.collect().toSeq) /** @@ -416,9 +396,9 @@ class TypedDataset[T] protected[frameless] ( * Differs from `Dataset#first` by wrapping its result into an `Option` and an effect-suspending `F[_]`. */ def firstOption[F[_]]( - )(implicit - F: SparkDelay[F] - ): F[Option[T]] = + )(implicit + F: SparkDelay[F] + ): F[Option[T]] = F.delay { try { Option(dataset.first()) @@ -438,10 +418,10 @@ class TypedDataset[T] protected[frameless] ( * apache/spark */ def take[F[_]]( - num: Int - )(implicit - F: SparkDelay[F] - ): F[Seq[T]] = + num: Int + )(implicit + F: SparkDelay[F] + ): F[Seq[T]] = F.delay(dataset.take(num).toSeq) /** @@ -458,27 +438,27 @@ class TypedDataset[T] protected[frameless] ( * apache/spark */ def toLocalIterator[F[_]]( - )(implicit - F: SparkDelay[F] - ): F[util.Iterator[T]] = + )(implicit + F: SparkDelay[F] + ): F[util.Iterator[T]] = F.delay(dataset.toLocalIterator()) /** * Alias for firstOption(). */ def headOption[F[_]]( - )(implicit - F: SparkDelay[F] - ): F[Option[T]] = firstOption() + )(implicit + F: SparkDelay[F] + ): F[Option[T]] = firstOption() /** * Alias for take(). */ def head[F[_]]( - num: Int - )(implicit - F: SparkDelay[F] - ): F[Seq[T]] = take(num) + num: Int + )(implicit + F: SparkDelay[F] + ): F[Seq[T]] = take(num) // $COVERAGE-OFF$ /** @@ -520,11 +500,11 @@ class TypedDataset[T] protected[frameless] ( * apache/spark */ def show[F[_]]( - numRows: Int = 20, - truncate: Boolean = true - )(implicit - F: SparkDelay[F] - ): F[Unit] = + numRows: Int = 20, + truncate: Boolean = true + )(implicit + F: SparkDelay[F] + ): F[Unit] = F.delay(dataset.show(numRows, truncate)) /** @@ -547,10 +527,10 @@ class TypedDataset[T] protected[frameless] ( * Differs from `Dataset#foreach` by wrapping its result into an effect-suspending `F[_]`. */ def foreach[F[_]]( - func: T => Unit - )(implicit - F: SparkDelay[F] - ): F[Unit] = + func: T => Unit + )(implicit + F: SparkDelay[F] + ): F[Unit] = F.delay(dataset.foreach(func)) /** @@ -559,10 +539,10 @@ class TypedDataset[T] protected[frameless] ( * Differs from `Dataset#foreachPartition` by wrapping its result into an effect-suspending `F[_]`. */ def foreachPartition[F[_]]( - func: Iterator[T] => Unit - )(implicit - F: SparkDelay[F] - ): F[Unit] = + func: Iterator[T] => Unit + )(implicit + F: SparkDelay[F] + ): F[Unit] = F.delay(dataset.foreachPartition(func)) /** @@ -575,8 +555,8 @@ class TypedDataset[T] protected[frameless] ( * apache/spark */ def cube[K1]( - c1: TypedColumn[T, K1] - ): Cube1Ops[K1, T] = new Cube1Ops[K1, T](this, c1) + c1: TypedColumn[T, K1] + ): Cube1Ops[K1, T] = new Cube1Ops[K1, T](this, c1) /** * Create a multi-dimensional cube for the current [[TypedDataset]] using the specified columns, @@ -588,9 +568,9 @@ class TypedDataset[T] protected[frameless] ( * apache/spark */ def cube[K1, K2]( - c1: TypedColumn[T, K1], - c2: TypedColumn[T, K2] - ): Cube2Ops[K1, K2, T] = new Cube2Ops[K1, K2, T](this, c1, c2) + c1: TypedColumn[T, K1], + c2: TypedColumn[T, K2] + ): Cube2Ops[K1, K2, T] = new Cube2Ops[K1, K2, T](this, c1, c2) /** * Create a multi-dimensional cube for the current [[TypedDataset]] using the specified columns, @@ -628,12 +608,12 @@ class TypedDataset[T] protected[frameless] ( object cubeMany extends ProductArgs { def applyProduct[TK <: HList, K <: HList, KT]( - groupedBy: TK - )(implicit - i0: ColumnTypes.Aux[T, TK, K], - i1: Tupler.Aux[K, KT], - i2: ToTraversable.Aux[TK, List, UntypedExpression[T]] - ): CubeManyOps[T, TK, K, KT] = + groupedBy: TK + )(implicit + i0: ColumnTypes.Aux[T, TK, K], + i1: Tupler.Aux[K, KT], + i2: ToTraversable.Aux[TK, List, UntypedExpression[T]] + ): CubeManyOps[T, TK, K, KT] = new CubeManyOps[T, TK, K, KT](self, groupedBy) } @@ -644,8 +624,8 @@ class TypedDataset[T] protected[frameless] ( * apache/spark */ def groupBy[K1]( - c1: TypedColumn[T, K1] - ): GroupedBy1Ops[K1, T] = new GroupedBy1Ops[K1, T](this, c1) + c1: TypedColumn[T, K1] + ): GroupedBy1Ops[K1, T] = new GroupedBy1Ops[K1, T](this, c1) /** * Groups the [[TypedDataset]] using the specified columns, so that we can run aggregation on them. @@ -654,9 +634,9 @@ class TypedDataset[T] protected[frameless] ( * apache/spark */ def groupBy[K1, K2]( - c1: TypedColumn[T, K1], - c2: TypedColumn[T, K2] - ): GroupedBy2Ops[K1, K2, T] = new GroupedBy2Ops[K1, K2, T](this, c1, c2) + c1: TypedColumn[T, K1], + c2: TypedColumn[T, K2] + ): GroupedBy2Ops[K1, K2, T] = new GroupedBy2Ops[K1, K2, T](this, c1, c2) /** * Groups the [[TypedDataset]] using the specified columns, so that we can run aggregation on them. @@ -686,12 +666,12 @@ class TypedDataset[T] protected[frameless] ( object groupByMany extends ProductArgs { def applyProduct[TK <: HList, K <: HList, KT]( - groupedBy: TK - )(implicit - i0: ColumnTypes.Aux[T, TK, K], - i1: Tupler.Aux[K, KT], - i2: ToTraversable.Aux[TK, List, UntypedExpression[T]] - ): GroupedByManyOps[T, TK, K, KT] = + groupedBy: TK + )(implicit + i0: ColumnTypes.Aux[T, TK, K], + i1: Tupler.Aux[K, KT], + i2: ToTraversable.Aux[TK, List, UntypedExpression[T]] + ): GroupedByManyOps[T, TK, K, KT] = new GroupedByManyOps[T, TK, K, KT](self, groupedBy) } @@ -705,8 +685,8 @@ class TypedDataset[T] protected[frameless] ( * apache/spark */ def rollup[K1]( - c1: TypedColumn[T, K1] - ): Rollup1Ops[K1, T] = new Rollup1Ops[K1, T](this, c1) + c1: TypedColumn[T, K1] + ): Rollup1Ops[K1, T] = new Rollup1Ops[K1, T](this, c1) /** * Create a multi-dimensional rollup for the current [[TypedDataset]] using the specified columns, @@ -718,9 +698,9 @@ class TypedDataset[T] protected[frameless] ( * apache/spark */ def rollup[K1, K2]( - c1: TypedColumn[T, K1], - c2: TypedColumn[T, K2] - ): Rollup2Ops[K1, K2, T] = new Rollup2Ops[K1, K2, T](this, c1, c2) + c1: TypedColumn[T, K1], + c2: TypedColumn[T, K2] + ): Rollup2Ops[K1, K2, T] = new Rollup2Ops[K1, K2, T](this, c1, c2) /** * Create a multi-dimensional rollup for the current [[TypedDataset]] using the specified columns, @@ -756,21 +736,21 @@ class TypedDataset[T] protected[frameless] ( object rollupMany extends ProductArgs { def applyProduct[TK <: HList, K <: HList, KT]( - groupedBy: TK - )(implicit - i0: ColumnTypes.Aux[T, TK, K], - i1: Tupler.Aux[K, KT], - i2: ToTraversable.Aux[TK, List, UntypedExpression[T]] - ): RollupManyOps[T, TK, K, KT] = + groupedBy: TK + )(implicit + i0: ColumnTypes.Aux[T, TK, K], + i1: Tupler.Aux[K, KT], + i2: ToTraversable.Aux[TK, List, UntypedExpression[T]] + ): RollupManyOps[T, TK, K, KT] = new RollupManyOps[T, TK, K, KT](self, groupedBy) } /** Computes the cartesian project of `this` `Dataset` with the `other` `Dataset` */ def joinCross[U]( - other: TypedDataset[U] - )(implicit - e: TypedEncoder[(T, U)] - ): TypedDataset[(T, U)] = + other: TypedDataset[U] + )(implicit + e: TypedEncoder[(T, U)] + ): TypedDataset[(T, U)] = new TypedDataset( self.dataset .joinWith( @@ -786,11 +766,10 @@ class TypedDataset[T] protected[frameless] ( * returning a `Tuple2` for each pair where condition evaluates to true. */ def joinFull[U]( - other: TypedDataset[U] - )(condition: TypedColumn[T with U, Boolean] - )(implicit - e: TypedEncoder[(Option[T], Option[U])] - ): TypedDataset[(Option[T], Option[U])] = + other: TypedDataset[U] + )(condition: TypedColumn[T with U, Boolean])(implicit + e: TypedEncoder[(Option[T], Option[U])] + ): TypedDataset[(Option[T], Option[U])] = new TypedDataset( self.dataset .joinWith(other.dataset, condition.untyped, "full") @@ -804,11 +783,10 @@ class TypedDataset[T] protected[frameless] ( * returning a `Tuple2` for each pair where condition evaluates to true. */ def joinInner[U]( - other: TypedDataset[U] - )(condition: TypedColumn[T with U, Boolean] - )(implicit - e: TypedEncoder[(T, U)] - ): TypedDataset[(T, U)] = { + other: TypedDataset[U] + )(condition: TypedColumn[T with U, Boolean])(implicit + e: TypedEncoder[(T, U)] + ): TypedDataset[(T, U)] = { import FramelessInternals._ val leftPlan = logicalPlan(dataset) @@ -828,11 +806,10 @@ class TypedDataset[T] protected[frameless] ( * returning a `Tuple2` for each pair where condition evaluates to true. */ def joinLeft[U]( - other: TypedDataset[U] - )(condition: TypedColumn[T with U, Boolean] - )(implicit - e: TypedEncoder[(T, Option[U])] - ): TypedDataset[(T, Option[U])] = + other: TypedDataset[U] + )(condition: TypedColumn[T with U, Boolean])(implicit + e: TypedEncoder[(T, Option[U])] + ): TypedDataset[(T, Option[U])] = new TypedDataset( self.dataset .joinWith(other.dataset, condition.untyped, "left_outer") @@ -844,9 +821,8 @@ class TypedDataset[T] protected[frameless] ( * returning a `Tuple2` for each pair where condition evaluates to true. */ def joinLeftSemi[U]( - other: TypedDataset[U] - )(condition: TypedColumn[T with U, Boolean] - ): TypedDataset[T] = + other: TypedDataset[U] + )(condition: TypedColumn[T with U, Boolean]): TypedDataset[T] = new TypedDataset( self.dataset .join(other.dataset, condition.untyped, "leftsemi") @@ -858,9 +834,8 @@ class TypedDataset[T] protected[frameless] ( * returning a `Tuple2` for each pair where condition evaluates to true. */ def joinLeftAnti[U]( - other: TypedDataset[U] - )(condition: TypedColumn[T with U, Boolean] - ): TypedDataset[T] = + other: TypedDataset[U] + )(condition: TypedColumn[T with U, Boolean]): TypedDataset[T] = new TypedDataset( self.dataset .join(other.dataset, condition.untyped, "leftanti") @@ -872,11 +847,10 @@ class TypedDataset[T] protected[frameless] ( * returning a `Tuple2` for each pair where condition evaluates to true. */ def joinRight[U]( - other: TypedDataset[U] - )(condition: TypedColumn[T with U, Boolean] - )(implicit - e: TypedEncoder[(Option[T], U)] - ): TypedDataset[(Option[T], U)] = + other: TypedDataset[U] + )(condition: TypedColumn[T with U, Boolean])(implicit + e: TypedEncoder[(Option[T], U)] + ): TypedDataset[(Option[T], U)] = new TypedDataset( self.dataset .joinWith(other.dataset, condition.untyped, "right_outer") @@ -907,16 +881,16 @@ class TypedDataset[T] protected[frameless] ( * Takes a function from A => R and converts it to a UDF for TypedColumn[T, A] => TypedColumn[T, R]. */ def makeUDF[A: TypedEncoder, R: TypedEncoder]( - f: A => R - ): TypedColumn[T, A] => TypedColumn[T, R] = functions.udf(f) + f: A => R + ): TypedColumn[T, A] => TypedColumn[T, R] = functions.udf(f) /** * Takes a function from (A1, A2) => R and converts it to a UDF for * (TypedColumn[T, A1], TypedColumn[T, A2]) => TypedColumn[T, R]. */ def makeUDF[A1: TypedEncoder, A2: TypedEncoder, R: TypedEncoder]( - f: (A1, A2) => R - ): (TypedColumn[T, A1], TypedColumn[T, A2]) => TypedColumn[T, R] = + f: (A1, A2) => R + ): (TypedColumn[T, A1], TypedColumn[T, A2]) => TypedColumn[T, R] = functions.udf(f) /** @@ -924,12 +898,11 @@ class TypedDataset[T] protected[frameless] ( * (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3]) => TypedColumn[T, R]. */ def makeUDF[ - A1: TypedEncoder, - A2: TypedEncoder, - A3: TypedEncoder, - R: TypedEncoder - ](f: (A1, A2, A3) => R - ): (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3]) => TypedColumn[T, R] = + A1: TypedEncoder, + A2: TypedEncoder, + A3: TypedEncoder, + R: TypedEncoder + ](f: (A1, A2, A3) => R): (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3]) => TypedColumn[T, R] = functions.udf(f) /** @@ -937,38 +910,36 @@ class TypedDataset[T] protected[frameless] ( * (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3], TypedColumn[T, A4]) => TypedColumn[T, R]. */ def makeUDF[ - A1: TypedEncoder, - A2: TypedEncoder, - A3: TypedEncoder, - A4: TypedEncoder, - R: TypedEncoder - ](f: (A1, A2, A3, A4) => R - ): ( - TypedColumn[T, A1], - TypedColumn[T, A2], - TypedColumn[T, A3], - TypedColumn[T, A4] - ) => TypedColumn[T, R] = functions.udf(f) + A1: TypedEncoder, + A2: TypedEncoder, + A3: TypedEncoder, + A4: TypedEncoder, + R: TypedEncoder + ](f: (A1, A2, A3, A4) => R): ( + TypedColumn[T, A1], + TypedColumn[T, A2], + TypedColumn[T, A3], + TypedColumn[T, A4] + ) => TypedColumn[T, R] = functions.udf(f) /** * Takes a function from (A1, A2, A3, A4, A5) => R and converts it to a UDF for * (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3], TypedColumn[T, A4], TypedColumn[T, A5]) => TypedColumn[T, R]. */ def makeUDF[ - A1: TypedEncoder, - A2: TypedEncoder, - A3: TypedEncoder, - A4: TypedEncoder, - A5: TypedEncoder, - R: TypedEncoder - ](f: (A1, A2, A3, A4, A5) => R - ): ( - TypedColumn[T, A1], - TypedColumn[T, A2], - TypedColumn[T, A3], - TypedColumn[T, A4], - TypedColumn[T, A5] - ) => TypedColumn[T, R] = functions.udf(f) + A1: TypedEncoder, + A2: TypedEncoder, + A3: TypedEncoder, + A4: TypedEncoder, + A5: TypedEncoder, + R: TypedEncoder + ](f: (A1, A2, A3, A4, A5) => R): ( + TypedColumn[T, A1], + TypedColumn[T, A2], + TypedColumn[T, A3], + TypedColumn[T, A4], + TypedColumn[T, A5] + ) => TypedColumn[T, R] = functions.udf(f) /** * Type-safe projection from type T to Tuple1[A] @@ -977,8 +948,8 @@ class TypedDataset[T] protected[frameless] ( * }}} */ def select[A]( - ca: TypedColumn[T, A] - ): TypedDataset[A] = { + ca: TypedColumn[T, A] + ): TypedDataset[A] = { implicit val ea = ca.uencoder val tuple1: TypedDataset[Tuple1[A]] = selectMany(ca) @@ -1005,9 +976,9 @@ class TypedDataset[T] protected[frameless] ( * }}} */ def select[A, B]( - ca: TypedColumn[T, A], - cb: TypedColumn[T, B] - ): TypedDataset[(A, B)] = { + ca: TypedColumn[T, A], + cb: TypedColumn[T, B] + ): TypedDataset[(A, B)] = { implicit val (ea, eb) = (ca.uencoder, cb.uencoder) selectMany(ca, cb) } @@ -1019,10 +990,10 @@ class TypedDataset[T] protected[frameless] ( * }}} */ def select[A, B, C]( - ca: TypedColumn[T, A], - cb: TypedColumn[T, B], - cc: TypedColumn[T, C] - ): TypedDataset[(A, B, C)] = { + ca: TypedColumn[T, A], + cb: TypedColumn[T, B], + cc: TypedColumn[T, C] + ): TypedDataset[(A, B, C)] = { implicit val (ea, eb, ec) = (ca.uencoder, cb.uencoder, cc.uencoder) selectMany(ca, cb, cc) } @@ -1034,11 +1005,11 @@ class TypedDataset[T] protected[frameless] ( * }}} */ def select[A, B, C, D]( - ca: TypedColumn[T, A], - cb: TypedColumn[T, B], - cc: TypedColumn[T, C], - cd: TypedColumn[T, D] - ): TypedDataset[(A, B, C, D)] = { + ca: TypedColumn[T, A], + cb: TypedColumn[T, B], + cc: TypedColumn[T, C], + cd: TypedColumn[T, D] + ): TypedDataset[(A, B, C, D)] = { implicit val (ea, eb, ec, ed) = (ca.uencoder, cb.uencoder, cc.uencoder, cd.uencoder) selectMany(ca, cb, cc, cd) @@ -1051,12 +1022,12 @@ class TypedDataset[T] protected[frameless] ( * }}} */ def select[A, B, C, D, E]( - ca: TypedColumn[T, A], - cb: TypedColumn[T, B], - cc: TypedColumn[T, C], - cd: TypedColumn[T, D], - ce: TypedColumn[T, E] - ): TypedDataset[(A, B, C, D, E)] = { + ca: TypedColumn[T, A], + cb: TypedColumn[T, B], + cc: TypedColumn[T, C], + cd: TypedColumn[T, D], + ce: TypedColumn[T, E] + ): TypedDataset[(A, B, C, D, E)] = { implicit val (ea, eb, ec, ed, ee) = (ca.uencoder, cb.uencoder, cc.uencoder, cd.uencoder, ce.uencoder) @@ -1070,13 +1041,13 @@ class TypedDataset[T] protected[frameless] ( * }}} */ def select[A, B, C, D, E, F]( - ca: TypedColumn[T, A], - cb: TypedColumn[T, B], - cc: TypedColumn[T, C], - cd: TypedColumn[T, D], - ce: TypedColumn[T, E], - cf: TypedColumn[T, F] - ): TypedDataset[(A, B, C, D, E, F)] = { + ca: TypedColumn[T, A], + cb: TypedColumn[T, B], + cc: TypedColumn[T, C], + cd: TypedColumn[T, D], + ce: TypedColumn[T, E], + cf: TypedColumn[T, F] + ): TypedDataset[(A, B, C, D, E, F)] = { implicit val (ea, eb, ec, ed, ee, ef) = ( ca.uencoder, @@ -1097,14 +1068,14 @@ class TypedDataset[T] protected[frameless] ( * }}} */ def select[A, B, C, D, E, F, G]( - ca: TypedColumn[T, A], - cb: TypedColumn[T, B], - cc: TypedColumn[T, C], - cd: TypedColumn[T, D], - ce: TypedColumn[T, E], - cf: TypedColumn[T, F], - cg: TypedColumn[T, G] - ): TypedDataset[(A, B, C, D, E, F, G)] = { + ca: TypedColumn[T, A], + cb: TypedColumn[T, B], + cc: TypedColumn[T, C], + cd: TypedColumn[T, D], + ce: TypedColumn[T, E], + cf: TypedColumn[T, F], + cg: TypedColumn[T, G] + ): TypedDataset[(A, B, C, D, E, F, G)] = { implicit val (ea, eb, ec, ed, ee, ef, eg) = ( ca.uencoder, @@ -1126,15 +1097,15 @@ class TypedDataset[T] protected[frameless] ( * }}} */ def select[A, B, C, D, E, F, G, H]( - ca: TypedColumn[T, A], - cb: TypedColumn[T, B], - cc: TypedColumn[T, C], - cd: TypedColumn[T, D], - ce: TypedColumn[T, E], - cf: TypedColumn[T, F], - cg: TypedColumn[T, G], - ch: TypedColumn[T, H] - ): TypedDataset[(A, B, C, D, E, F, G, H)] = { + ca: TypedColumn[T, A], + cb: TypedColumn[T, B], + cc: TypedColumn[T, C], + cd: TypedColumn[T, D], + ce: TypedColumn[T, E], + cf: TypedColumn[T, F], + cg: TypedColumn[T, G], + ch: TypedColumn[T, H] + ): TypedDataset[(A, B, C, D, E, F, G, H)] = { implicit val (ea, eb, ec, ed, ee, ef, eg, eh) = ( ca.uencoder, @@ -1157,16 +1128,16 @@ class TypedDataset[T] protected[frameless] ( * }}} */ def select[A, B, C, D, E, F, G, H, I]( - ca: TypedColumn[T, A], - cb: TypedColumn[T, B], - cc: TypedColumn[T, C], - cd: TypedColumn[T, D], - ce: TypedColumn[T, E], - cf: TypedColumn[T, F], - cg: TypedColumn[T, G], - ch: TypedColumn[T, H], - ci: TypedColumn[T, I] - ): TypedDataset[(A, B, C, D, E, F, G, H, I)] = { + ca: TypedColumn[T, A], + cb: TypedColumn[T, B], + cc: TypedColumn[T, C], + cd: TypedColumn[T, D], + ce: TypedColumn[T, E], + cf: TypedColumn[T, F], + cg: TypedColumn[T, G], + ch: TypedColumn[T, H], + ci: TypedColumn[T, I] + ): TypedDataset[(A, B, C, D, E, F, G, H, I)] = { implicit val (ea, eb, ec, ed, ee, ef, eg, eh, ei) = ( ca.uencoder, @@ -1190,17 +1161,17 @@ class TypedDataset[T] protected[frameless] ( * }}} */ def select[A, B, C, D, E, F, G, H, I, J]( - ca: TypedColumn[T, A], - cb: TypedColumn[T, B], - cc: TypedColumn[T, C], - cd: TypedColumn[T, D], - ce: TypedColumn[T, E], - cf: TypedColumn[T, F], - cg: TypedColumn[T, G], - ch: TypedColumn[T, H], - ci: TypedColumn[T, I], - cj: TypedColumn[T, J] - ): TypedDataset[(A, B, C, D, E, F, G, H, I, J)] = { + ca: TypedColumn[T, A], + cb: TypedColumn[T, B], + cc: TypedColumn[T, C], + cd: TypedColumn[T, D], + ce: TypedColumn[T, E], + cf: TypedColumn[T, F], + cg: TypedColumn[T, G], + ch: TypedColumn[T, H], + ci: TypedColumn[T, I], + cj: TypedColumn[T, J] + ): TypedDataset[(A, B, C, D, E, F, G, H, I, J)] = { implicit val (ea, eb, ec, ed, ee, ef, eg, eh, ei, ej) = ( ca.uencoder, @@ -1220,13 +1191,13 @@ class TypedDataset[T] protected[frameless] ( object selectMany extends ProductArgs { def applyProduct[U <: HList, Out0 <: HList, Out]( - columns: U - )(implicit - i0: ColumnTypes.Aux[T, U, Out0], - i1: ToTraversable.Aux[U, List, UntypedExpression[T]], - i2: Tupler.Aux[Out0, Out], - i3: TypedEncoder[Out] - ): TypedDataset[Out] = { + columns: U + )(implicit + i0: ColumnTypes.Aux[T, U, Out0], + i1: ToTraversable.Aux[U, List, UntypedExpression[T]], + i2: Tupler.Aux[Out0, Out], + i3: TypedEncoder[Out] + ): TypedDataset[Out] = { val base = dataset .toDF() .select( @@ -1242,25 +1213,22 @@ class TypedDataset[T] protected[frameless] ( /** Sort each partition in the dataset using the columns selected. */ def sortWithinPartitions[A: CatalystOrdered]( - ca: SortedTypedColumn[T, A] - ): TypedDataset[T] = + ca: SortedTypedColumn[T, A] + ): TypedDataset[T] = sortWithinPartitionsMany(ca) /** Sort each partition in the dataset using the columns selected. */ def sortWithinPartitions[A: CatalystOrdered, B: CatalystOrdered]( - ca: SortedTypedColumn[T, A], - cb: SortedTypedColumn[T, B] - ): TypedDataset[T] = sortWithinPartitionsMany(ca, cb) + ca: SortedTypedColumn[T, A], + cb: SortedTypedColumn[T, B] + ): TypedDataset[T] = sortWithinPartitionsMany(ca, cb) /** Sort each partition in the dataset using the columns selected. */ def sortWithinPartitions[ - A: CatalystOrdered, - B: CatalystOrdered, - C: CatalystOrdered - ](ca: SortedTypedColumn[T, A], - cb: SortedTypedColumn[T, B], - cc: SortedTypedColumn[T, C] - ): TypedDataset[T] = sortWithinPartitionsMany(ca, cb, cc) + A: CatalystOrdered, + B: CatalystOrdered, + C: CatalystOrdered + ](ca: SortedTypedColumn[T, A], cb: SortedTypedColumn[T, B], cc: SortedTypedColumn[T, C]): TypedDataset[T] = sortWithinPartitionsMany(ca, cb, cc) /** * Sort each partition in the dataset by the given column expressions @@ -1272,11 +1240,11 @@ class TypedDataset[T] protected[frameless] ( object sortWithinPartitionsMany extends ProductArgs { def applyProduct[U <: HList, O <: HList]( - columns: U - )(implicit - i0: Mapper.Aux[SortedTypedColumn.defaultAscendingPoly.type, U, O], - i1: ToTraversable.Aux[O, List, SortedTypedColumn[T, _]] - ): TypedDataset[T] = { + columns: U + )(implicit + i0: Mapper.Aux[SortedTypedColumn.defaultAscendingPoly.type, U, O], + i1: ToTraversable.Aux[O, List, SortedTypedColumn[T, _]] + ): TypedDataset[T] = { val sorted = dataset .toDF() .sortWithinPartitions( @@ -1290,22 +1258,22 @@ class TypedDataset[T] protected[frameless] ( /** Orders the TypedDataset using the column selected. */ def orderBy[A: CatalystOrdered]( - ca: SortedTypedColumn[T, A] - ): TypedDataset[T] = + ca: SortedTypedColumn[T, A] + ): TypedDataset[T] = orderByMany(ca) /** Orders the TypedDataset using the columns selected. */ def orderBy[A: CatalystOrdered, B: CatalystOrdered]( - ca: SortedTypedColumn[T, A], - cb: SortedTypedColumn[T, B] - ): TypedDataset[T] = orderByMany(ca, cb) + ca: SortedTypedColumn[T, A], + cb: SortedTypedColumn[T, B] + ): TypedDataset[T] = orderByMany(ca, cb) /** Orders the TypedDataset using the columns selected. */ def orderBy[A: CatalystOrdered, B: CatalystOrdered, C: CatalystOrdered]( - ca: SortedTypedColumn[T, A], - cb: SortedTypedColumn[T, B], - cc: SortedTypedColumn[T, C] - ): TypedDataset[T] = orderByMany(ca, cb, cc) + ca: SortedTypedColumn[T, A], + cb: SortedTypedColumn[T, B], + cc: SortedTypedColumn[T, C] + ): TypedDataset[T] = orderByMany(ca, cb, cc) /** * Sort the dataset by any number of column expressions. @@ -1317,11 +1285,11 @@ class TypedDataset[T] protected[frameless] ( object orderByMany extends ProductArgs { def applyProduct[U <: HList, O <: HList]( - columns: U - )(implicit - i0: Mapper.Aux[SortedTypedColumn.defaultAscendingPoly.type, U, O], - i1: ToTraversable.Aux[O, List, SortedTypedColumn[T, _]] - ): TypedDataset[T] = { + columns: U + )(implicit + i0: Mapper.Aux[SortedTypedColumn.defaultAscendingPoly.type, U, O], + i1: ToTraversable.Aux[O, List, SortedTypedColumn[T, _]] + ): TypedDataset[T] = { val sorted = dataset .toDF() .orderBy(i0(columns).toList[SortedTypedColumn[T, _]].map(_.untyped): _*) @@ -1354,19 +1322,18 @@ class TypedDataset[T] protected[frameless] ( * @return */ def dropTupled[ - Out, - TRep <: HList, - Removed <: HList, - ValuesFromRemoved <: HList, - V - ](column: Witness.Lt[Symbol] - )(implicit - i0: LabelledGeneric.Aux[T, TRep], - i1: Remover.Aux[TRep, column.T, (V, Removed)], - i2: Values.Aux[Removed, ValuesFromRemoved], - i3: Tupler.Aux[ValuesFromRemoved, Out], - i4: TypedEncoder[Out] - ): TypedDataset[Out] = { + Out, + TRep <: HList, + Removed <: HList, + ValuesFromRemoved <: HList, + V + ](column: Witness.Lt[Symbol])(implicit + i0: LabelledGeneric.Aux[T, TRep], + i1: Remover.Aux[TRep, column.T, (V, Removed)], + i2: Values.Aux[Removed, ValuesFromRemoved], + i3: Tupler.Aux[ValuesFromRemoved, Out], + i4: TypedEncoder[Out] + ): TypedDataset[Out] = { val dropped = dataset .toDF() .drop(column.value.name) @@ -1391,9 +1358,8 @@ class TypedDataset[T] protected[frameless] ( * @see [[frameless.TypedDataset#project]] */ def drop[U]( - implicit - projector: SmartProject[T, U] - ): TypedDataset[U] = project[U] + implicit projector: SmartProject[T, U] + ): TypedDataset[U] = project[U] /** * Prepends a new column to the Dataset. @@ -1405,13 +1371,13 @@ class TypedDataset[T] protected[frameless] ( * }}} */ def withColumnTupled[A: TypedEncoder, H <: HList, FH <: HList, Out]( - ca: TypedColumn[T, A] - )(implicit - i0: Generic.Aux[T, H], - i1: Prepend.Aux[H, A :: HNil, FH], - i2: Tupler.Aux[FH, Out], - i3: TypedEncoder[Out] - ): TypedDataset[Out] = { + ca: TypedColumn[T, A] + )(implicit + i0: Generic.Aux[T, H], + i1: Prepend.Aux[H, A :: HNil, FH], + i2: Tupler.Aux[FH, Out], + i3: TypedEncoder[Out] + ): TypedDataset[Out] = { // Giving a random name to the new column (the proper name will be given by the Tuple-based encoder) val selected = dataset .toDF() @@ -1433,11 +1399,11 @@ class TypedDataset[T] protected[frameless] ( * @param i0 Evidence that a column with the correct type and name exists */ def withColumnReplaced[A]( - column: Witness.Lt[Symbol], - replacement: TypedColumn[T, A] - )(implicit - i0: TypedColumn.Exists[T, column.T, A] - ): TypedDataset[T] = { + column: Witness.Lt[Symbol], + replacement: TypedColumn[T, A] + )(implicit + i0: TypedColumn.Exists[T, column.T, A] + ): TypedDataset[T] = { val updated = dataset .toDF() .withColumn(column.value.name, replacement.untyped) @@ -1485,27 +1451,26 @@ class TypedDataset[T] protected[frameless] ( class WithColumnApply[U] { def apply[ - A, - TRep <: HList, - URep <: HList, - UKeys <: HList, - NewFields <: HList, - NewKeys <: HList, - NewKey <: Symbol - ](ca: TypedColumn[T, A] - )(implicit - i0: TypedEncoder[U], - i1: TypedEncoder[A], - i2: LabelledGeneric.Aux[T, TRep], - i3: LabelledGeneric.Aux[U, URep], - i4: Diff.Aux[TRep, URep, HNil], - i5: Diff.Aux[URep, TRep, NewFields], - i6: Keys.Aux[NewFields, NewKeys], - i7: IsHCons.Aux[NewKeys, NewKey, HNil], - i8: IsHCons.Aux[NewFields, FieldType[NewKey, A], HNil], - i9: Keys.Aux[URep, UKeys], - iA: ToTraversable.Aux[UKeys, Seq, Symbol] - ): TypedDataset[U] = { + A, + TRep <: HList, + URep <: HList, + UKeys <: HList, + NewFields <: HList, + NewKeys <: HList, + NewKey <: Symbol + ](ca: TypedColumn[T, A])(implicit + i0: TypedEncoder[U], + i1: TypedEncoder[A], + i2: LabelledGeneric.Aux[T, TRep], + i3: LabelledGeneric.Aux[U, URep], + i4: Diff.Aux[TRep, URep, HNil], + i5: Diff.Aux[URep, TRep, NewFields], + i6: Keys.Aux[NewFields, NewKeys], + i7: IsHCons.Aux[NewKeys, NewKey, HNil], + i8: IsHCons.Aux[NewFields, FieldType[NewKey, A], HNil], + i9: Keys.Aux[URep, UKeys], + iA: ToTraversable.Aux[UKeys, Seq, Symbol] + ): TypedDataset[U] = { val newColumnName = i7.head(i6()).name @@ -1535,24 +1500,23 @@ class TypedDataset[T] protected[frameless] ( * @param column the column we wish to explode */ def explode[ - A, - TRep <: HList, - V[_], - OutMod <: HList, - OutModValues <: HList, - Out - ](column: Witness.Lt[Symbol] - )(implicit - i0: TypedColumn.Exists[T, column.T, V[A]], - i1: TypedEncoder[A], - i2: CatalystExplodableCollection[V], - i3: LabelledGeneric.Aux[T, TRep], - i4: Modifier.Aux[TRep, column.T, V[A], A, OutMod], - i5: Values.Aux[OutMod, OutModValues], - i6: Tupler.Aux[OutModValues, Out], - i7: TypedEncoder[Out] - ): TypedDataset[Out] = { - import org.apache.spark.sql.functions.{ explode => sparkExplode } + A, + TRep <: HList, + V[_], + OutMod <: HList, + OutModValues <: HList, + Out + ](column: Witness.Lt[Symbol])(implicit + i0: TypedColumn.Exists[T, column.T, V[A]], + i1: TypedEncoder[A], + i2: CatalystExplodableCollection[V], + i3: LabelledGeneric.Aux[T, TRep], + i4: Modifier.Aux[TRep, column.T, V[A], A, OutMod], + i5: Values.Aux[OutMod, OutModValues], + i6: Tupler.Aux[OutModValues, Out], + i7: TypedEncoder[Out] + ): TypedDataset[Out] = { + import org.apache.spark.sql.functions.{explode => sparkExplode} val df = dataset.toDF() val trans = @@ -1576,29 +1540,24 @@ class TypedDataset[T] protected[frameless] ( * @param column the column we wish to explode */ def explodeMap[ - A, - B, - V[_, _], - TRep <: HList, - OutMod <: HList, - OutModValues <: HList, - Out - ](column: Witness.Lt[Symbol] - )(implicit - i0: TypedColumn.Exists[T, column.T, V[A, B]], - i1: TypedEncoder[A], - i2: TypedEncoder[B], - i3: LabelledGeneric.Aux[T, TRep], - i4: Modifier.Aux[TRep, column.T, V[A, B], (A, B), OutMod], - i5: Values.Aux[OutMod, OutModValues], - i6: Tupler.Aux[OutModValues, Out], - i7: TypedEncoder[Out] - ): TypedDataset[Out] = { - import org.apache.spark.sql.functions.{ - explode => sparkExplode, - struct => sparkStruct, - col => sparkCol - } + A, + B, + V[_, _], + TRep <: HList, + OutMod <: HList, + OutModValues <: HList, + Out + ](column: Witness.Lt[Symbol])(implicit + i0: TypedColumn.Exists[T, column.T, V[A, B]], + i1: TypedEncoder[A], + i2: TypedEncoder[B], + i3: LabelledGeneric.Aux[T, TRep], + i4: Modifier.Aux[TRep, column.T, V[A, B], (A, B), OutMod], + i5: Values.Aux[OutMod, OutModValues], + i6: Tupler.Aux[OutModValues, Out], + i7: TypedEncoder[Out] + ): TypedDataset[Out] = { + import org.apache.spark.sql.functions.{explode => sparkExplode, struct => sparkStruct, col => sparkCol} val df = dataset.toDF() // select all columns, all original columns and [key, value] columns appeared after the map explode @@ -1648,23 +1607,22 @@ class TypedDataset[T] protected[frameless] ( * @param column the column we wish to flatten */ def flattenOption[ - A, - TRep <: HList, - V[_], - OutMod <: HList, - OutModValues <: HList, - Out - ](column: Witness.Lt[Symbol] - )(implicit - i0: TypedColumn.Exists[T, column.T, V[A]], - i1: TypedEncoder[A], - i2: V[A] =:= Option[A], - i3: LabelledGeneric.Aux[T, TRep], - i4: Modifier.Aux[TRep, column.T, V[A], A, OutMod], - i5: Values.Aux[OutMod, OutModValues], - i6: Tupler.Aux[OutModValues, Out], - i7: TypedEncoder[Out] - ): TypedDataset[Out] = { + A, + TRep <: HList, + V[_], + OutMod <: HList, + OutModValues <: HList, + Out + ](column: Witness.Lt[Symbol])(implicit + i0: TypedColumn.Exists[T, column.T, V[A]], + i1: TypedEncoder[A], + i2: V[A] =:= Option[A], + i3: LabelledGeneric.Aux[T, TRep], + i4: Modifier.Aux[TRep, column.T, V[A], A, OutMod], + i5: Values.Aux[OutMod, OutModValues], + i6: Tupler.Aux[OutModValues, Out], + i7: TypedEncoder[Out] + ): TypedDataset[Out] = { val df = dataset.toDF() val trans = df .filter(df(column.value.name).isNotNull) @@ -1677,22 +1635,22 @@ class TypedDataset[T] protected[frameless] ( object TypedDataset { def create[A]( - data: Seq[A] - )(implicit - encoder: TypedEncoder[A], - sqlContext: SparkSession - ): TypedDataset[A] = { + data: Seq[A] + )(implicit + encoder: TypedEncoder[A], + sqlContext: SparkSession + ): TypedDataset[A] = { val dataset = sqlContext.createDataset(data)(TypedExpressionEncoder[A]) TypedDataset.create[A](dataset) } def create[A]( - data: RDD[A] - )(implicit - encoder: TypedEncoder[A], - sqlContext: SparkSession - ): TypedDataset[A] = { + data: RDD[A] + )(implicit + encoder: TypedEncoder[A], + sqlContext: SparkSession + ): TypedDataset[A] = { val dataset = sqlContext.createDataset(data)(TypedExpressionEncoder[A]) TypedDataset.create[A](dataset) diff --git a/dataset/src/main/scala/frameless/TypedDatasetForwarded.scala b/dataset/src/main/scala/frameless/TypedDatasetForwarded.scala index 0856732f2..658e5b4e2 100644 --- a/dataset/src/main/scala/frameless/TypedDatasetForwarded.scala +++ b/dataset/src/main/scala/frameless/TypedDatasetForwarded.scala @@ -6,13 +6,7 @@ import org.apache.spark.rdd.RDD import org.apache.spark.sql.execution.QueryExecution import org.apache.spark.sql.streaming.DataStreamWriter import org.apache.spark.sql.types.StructType -import org.apache.spark.sql.{ - DataFrame, - DataFrameWriter, - FramelessInternals, - SQLContext, - SparkSession -} +import org.apache.spark.sql.{DataFrame, DataFrameWriter, FramelessInternals, SQLContext, SparkSession} import org.apache.spark.storage.StorageLevel import scala.util.Random @@ -177,10 +171,10 @@ trait TypedDatasetForwarded[T] { self: TypedDataset[T] => * apache/spark */ def sample( - withReplacement: Boolean, - fraction: Double, - seed: Long = Random.nextLong() - ): TypedDataset[T] = + withReplacement: Boolean, + fraction: Double, + seed: Long = Random.nextLong() + ): TypedDataset[T] = TypedDataset.create(dataset.sample(withReplacement, fraction, seed)) /** @@ -266,9 +260,9 @@ trait TypedDatasetForwarded[T] { self: TypedDataset[T] => * apache/spark */ def randomSplitAsList( - weights: Array[Double], - seed: Long - ): util.List[TypedDataset[T]] = { + weights: Array[Double], + seed: Long + ): util.List[TypedDataset[T]] = { val values = randomSplit(weights, seed) java.util.Arrays.asList(values: _*) } @@ -301,8 +295,8 @@ trait TypedDatasetForwarded[T] { self: TypedDataset[T] => * apache/spark */ def persist( - newLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK - ): TypedDataset[T] = + newLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK + ): TypedDataset[T] = TypedDataset.create(dataset.persist(newLevel)) /** @@ -327,8 +321,8 @@ trait TypedDatasetForwarded[T] { self: TypedDataset[T] => "0.4.0" ) def mapPartitions[U: TypedEncoder]( - func: Iterator[T] => Iterator[U] - ): TypedDataset[U] = + func: Iterator[T] => Iterator[U] + ): TypedDataset[U] = deserialized.mapPartitions(func) @deprecated( @@ -393,8 +387,8 @@ trait TypedDatasetForwarded[T] { self: TypedDataset[T] => * apache/spark */ def mapPartitions[U: TypedEncoder]( - func: Iterator[T] => Iterator[U] - ): TypedDataset[U] = + func: Iterator[T] => Iterator[U] + ): TypedDataset[U] = TypedDataset.create( self.dataset.mapPartitions(func)(TypedExpressionEncoder[U]) ) @@ -406,8 +400,8 @@ trait TypedDatasetForwarded[T] { self: TypedDataset[T] => * apache/spark */ def flatMap[U: TypedEncoder]( - func: T => TraversableOnce[U] - ): TypedDataset[U] = + func: T => TraversableOnce[U] + ): TypedDataset[U] = TypedDataset.create(self.dataset.flatMap(func)(TypedExpressionEncoder[U])) /** @@ -425,10 +419,10 @@ trait TypedDatasetForwarded[T] { self: TypedDataset[T] => * Differs from `Dataset#reduce` by wrapping its result into an `Option` and an effect-suspending `F`. */ def reduceOption[F[_]]( - func: (T, T) => T - )(implicit - F: SparkDelay[F] - ): F[Option[T]] = + func: (T, T) => T + )(implicit + F: SparkDelay[F] + ): F[Option[T]] = F.delay { try { Option(self.dataset.reduce(func)) diff --git a/dataset/src/main/scala/frameless/TypedEncoder.scala b/dataset/src/main/scala/frameless/TypedEncoder.scala index b42b026ee..8525edeed 100644 --- a/dataset/src/main/scala/frameless/TypedEncoder.scala +++ b/dataset/src/main/scala/frameless/TypedEncoder.scala @@ -4,7 +4,7 @@ import java.math.BigInteger import java.util.Date -import java.time.{ Duration, Instant, Period, LocalDate } +import java.time.{Duration, Instant, LocalDate, Period} import java.sql.Timestamp @@ -12,14 +12,10 @@ import scala.reflect.ClassTag import org.apache.spark.sql.FramelessInternals import org.apache.spark.sql.FramelessInternals.UserDefinedType -import org.apache.spark.sql.{ reflection => ScalaReflection } +import org.apache.spark.sql.{reflection => ScalaReflection} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.objects._ -import org.apache.spark.sql.catalyst.util.{ - ArrayBasedMapData, - DateTimeUtils, - GenericArrayData -} +import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -27,9 +23,8 @@ import shapeless._ import shapeless.ops.hlist.IsHCons abstract class TypedEncoder[T]( - implicit - val classTag: ClassTag[T]) - extends Serializable { + implicit val classTag: ClassTag[T] +) extends Serializable { def nullable: Boolean def jvmRepr: DataType @@ -436,9 +431,8 @@ object TypedEncoder { TypedEncoder.usingInjection implicit def arrayEncoder[T: ClassTag]( - implicit - i0: Lazy[RecordFieldEncoder[T]] - ): TypedEncoder[Array[T]] = + implicit i0: Lazy[RecordFieldEncoder[T]] + ): TypedEncoder[Array[T]] = new TypedEncoder[Array[T]] { private lazy val encodeT = i0.value.encoder @@ -502,10 +496,10 @@ object TypedEncoder { } implicit def collectionEncoder[C[X] <: Seq[X], T]( - implicit - i0: Lazy[RecordFieldEncoder[T]], - i1: ClassTag[C[T]] - ): TypedEncoder[C[T]] = new TypedEncoder[C[T]] { + implicit + i0: Lazy[RecordFieldEncoder[T]], + i1: ClassTag[C[T]] + ): TypedEncoder[C[T]] = new TypedEncoder[C[T]] { private lazy val encodeT = i0.value.encoder def nullable: Boolean = false @@ -544,10 +538,10 @@ object TypedEncoder { * @return a `TypedEncoder` instance for `Set[T]`. */ implicit def setEncoder[T]( - implicit - i1: shapeless.Lazy[RecordFieldEncoder[T]], - i2: ClassTag[Set[T]] - ): TypedEncoder[Set[T]] = { + implicit + i1: shapeless.Lazy[RecordFieldEncoder[T]], + i2: ClassTag[Set[T]] + ): TypedEncoder[Set[T]] = { implicit val inj: Injection[Set[T], Seq[T]] = Injection(_.toSeq, _.toSet) TypedEncoder.usingInjection @@ -560,10 +554,10 @@ object TypedEncoder { * @param i1 the values encoder */ implicit def mapEncoder[A: NotCatalystNullable, B]( - implicit - i0: Lazy[RecordFieldEncoder[A]], - i1: Lazy[RecordFieldEncoder[B]] - ): TypedEncoder[Map[A, B]] = new TypedEncoder[Map[A, B]] { + implicit + i0: Lazy[RecordFieldEncoder[A]], + i1: Lazy[RecordFieldEncoder[B]] + ): TypedEncoder[Map[A, B]] = new TypedEncoder[Map[A, B]] { def nullable: Boolean = false def jvmRepr: DataType = FramelessInternals.objectTypeFor[Map[A, B]] @@ -626,9 +620,8 @@ object TypedEncoder { } implicit def optionEncoder[A]( - implicit - underlying: TypedEncoder[A] - ): TypedEncoder[Option[A]] = + implicit underlying: TypedEncoder[A] + ): TypedEncoder[Option[A]] = new TypedEncoder[Option[A]] { def nullable: Boolean = true @@ -706,10 +699,10 @@ object TypedEncoder { /** Encodes things using injection if there is one defined */ implicit def usingInjection[A: ClassTag, B]( - implicit - inj: Injection[A, B], - trb: TypedEncoder[B] - ): TypedEncoder[A] = + implicit + inj: Injection[A, B], + trb: TypedEncoder[B] + ): TypedEncoder[A] = new TypedEncoder[A] { def nullable: Boolean = trb.nullable def jvmRepr: DataType = FramelessInternals.objectTypeFor[A](classTag) @@ -728,19 +721,19 @@ object TypedEncoder { /** Encodes things as records if there is no Injection defined */ implicit def usingDerivation[F, G <: HList, H <: HList]( - implicit - i0: LabelledGeneric.Aux[F, G], - i1: DropUnitValues.Aux[G, H], - i2: IsHCons[H], - i3: Lazy[RecordEncoderFields[H]], - i4: Lazy[NewInstanceExprs[G]], - i5: ClassTag[F] - ): TypedEncoder[F] = new RecordEncoder[F, G, H] + implicit + i0: LabelledGeneric.Aux[F, G], + i1: DropUnitValues.Aux[G, H], + i2: IsHCons[H], + i3: Lazy[RecordEncoderFields[H]], + i4: Lazy[NewInstanceExprs[G]], + i5: ClassTag[F] + ): TypedEncoder[F] = new RecordEncoder[F, G, H] /** Encodes things using a Spark SQL's User Defined Type (UDT) if there is one defined in implicit */ implicit def usingUserDefinedType[ - A >: Null: UserDefinedType: ClassTag - ]: TypedEncoder[A] = { + A >: Null: UserDefinedType: ClassTag + ]: TypedEncoder[A] = { val udt = implicitly[UserDefinedType[A]] val udtInstance = NewInstance(udt.getClass, Nil, dataType = ObjectType(udt.getClass)) diff --git a/dataset/src/main/scala/frameless/TypedExpressionEncoder.scala b/dataset/src/main/scala/frameless/TypedExpressionEncoder.scala index 71fa286a5..c2de827d8 100644 --- a/dataset/src/main/scala/frameless/TypedExpressionEncoder.scala +++ b/dataset/src/main/scala/frameless/TypedExpressionEncoder.scala @@ -3,11 +3,7 @@ package frameless import org.apache.spark.sql.Encoder import org.apache.spark.sql.FramelessInternals import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal -import org.apache.spark.sql.catalyst.expressions.{ - BoundReference, - CreateNamedStruct, - If -} +import org.apache.spark.sql.catalyst.expressions.{BoundReference, CreateNamedStruct, If} import org.apache.spark.sql.types.StructType object TypedExpressionEncoder { @@ -28,9 +24,8 @@ object TypedExpressionEncoder { } def apply[T]( - implicit - encoder: TypedEncoder[T] - ): Encoder[T] = { + implicit encoder: TypedEncoder[T] + ): Encoder[T] = { val in = BoundReference(0, encoder.jvmRepr, encoder.nullable) val (out, serializer) = encoder.toCatalyst(in) match { diff --git a/dataset/src/main/scala/frameless/With.scala b/dataset/src/main/scala/frameless/With.scala index 11ceaa35b..571921bb7 100644 --- a/dataset/src/main/scala/frameless/With.scala +++ b/dataset/src/main/scala/frameless/With.scala @@ -15,7 +15,7 @@ object With extends LowPrioWith { implicit def combine[A, B]: Aux[A, B, A with B] = of[A, B, A with B] } -private[frameless] sealed trait LowPrioWith { +sealed private[frameless] trait LowPrioWith { type Aux[A, B, W] = With[A, B] { type Out = W } protected[this] val theInstance = new With[Any, Any] {} diff --git a/dataset/src/main/scala/frameless/functions/AggregateFunctions.scala b/dataset/src/main/scala/frameless/functions/AggregateFunctions.scala index e371ea048..bf9e36331 100644 --- a/dataset/src/main/scala/frameless/functions/AggregateFunctions.scala +++ b/dataset/src/main/scala/frameless/functions/AggregateFunctions.scala @@ -9,6 +9,7 @@ import frameless.syntax._ import scala.annotation.nowarn trait AggregateFunctions { + /** Aggregate function: returns the number of items in a group. * * apache/spark @@ -147,7 +148,7 @@ trait AggregateFunctions { * * apache/spark */ - def stddevSamp[A, T](column: TypedColumn[T, A])(implicit ev: CatalystCast[A, Double] ): TypedAggregate[T, Option[Double]] = { + def stddevSamp[A, T](column: TypedColumn[T, A])(implicit ev: CatalystCast[A, Double]): TypedAggregate[T, Option[Double]] = { new TypedAggregate[T, Option[Double]]( sparkFunctions.stddev_samp(column.cast[Double].untyped) ) @@ -203,15 +204,14 @@ trait AggregateFunctions { * * apache/spark */ - def corr[A, B, T](column1: TypedColumn[T, A], column2: TypedColumn[T, B]) - (implicit - i0: CatalystCast[A, Double], - i1: CatalystCast[B, Double] - ): TypedAggregate[T, Option[Double]] = { - new TypedAggregate[T, Option[Double]]( - sparkFunctions.corr(column1.cast[Double].untyped, column2.cast[Double].untyped) - ) - } + def corr[A, B, T](column1: TypedColumn[T, A], column2: TypedColumn[T, B])(implicit + i0: CatalystCast[A, Double], + i1: CatalystCast[B, Double] + ): TypedAggregate[T, Option[Double]] = { + new TypedAggregate[T, Option[Double]]( + sparkFunctions.corr(column1.cast[Double].untyped, column2.cast[Double].untyped) + ) + } /** * Aggregate function: returns the covariance of two collumns. @@ -221,15 +221,14 @@ trait AggregateFunctions { * * apache/spark */ - def covarPop[A, B, T](column1: TypedColumn[T, A], column2: TypedColumn[T, B]) - (implicit - i0: CatalystCast[A, Double], - i1: CatalystCast[B, Double] - ): TypedAggregate[T, Option[Double]] = { - new TypedAggregate[T, Option[Double]]( - sparkFunctions.covar_pop(column1.cast[Double].untyped, column2.cast[Double].untyped) - ) - } + def covarPop[A, B, T](column1: TypedColumn[T, A], column2: TypedColumn[T, B])(implicit + i0: CatalystCast[A, Double], + i1: CatalystCast[B, Double] + ): TypedAggregate[T, Option[Double]] = { + new TypedAggregate[T, Option[Double]]( + sparkFunctions.covar_pop(column1.cast[Double].untyped, column2.cast[Double].untyped) + ) + } /** * Aggregate function: returns the covariance of two columns. @@ -239,16 +238,14 @@ trait AggregateFunctions { * * apache/spark */ - def covarSamp[A, B, T](column1: TypedColumn[T, A], column2: TypedColumn[T, B]) - (implicit - i0: CatalystCast[A, Double], - i1: CatalystCast[B, Double] - ): TypedAggregate[T, Option[Double]] = { - new TypedAggregate[T, Option[Double]]( - sparkFunctions.covar_samp(column1.cast[Double].untyped, column2.cast[Double].untyped) - ) - } - + def covarSamp[A, B, T](column1: TypedColumn[T, A], column2: TypedColumn[T, B])(implicit + i0: CatalystCast[A, Double], + i1: CatalystCast[B, Double] + ): TypedAggregate[T, Option[Double]] = { + new TypedAggregate[T, Option[Double]]( + sparkFunctions.covar_samp(column1.cast[Double].untyped, column2.cast[Double].untyped) + ) + } /** * Aggregate function: returns the kurtosis of a column. diff --git a/dataset/src/main/scala/frameless/functions/Lit.scala b/dataset/src/main/scala/frameless/functions/Lit.scala index d01467b13..69d6f38a3 100644 --- a/dataset/src/main/scala/frameless/functions/Lit.scala +++ b/dataset/src/main/scala/frameless/functions/Lit.scala @@ -6,10 +6,10 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, NonSQLExpression} import org.apache.spark.sql.types.DataType private[frameless] case class Lit[T <: AnyVal]( - dataType: DataType, - nullable: Boolean, - show: () => String, - catalystExpr: Expression // must be a generated Expression from a literal TypedEncoder's toCatalyst function + dataType: DataType, + nullable: Boolean, + show: () => String, + catalystExpr: Expression // must be a generated Expression from a literal TypedEncoder's toCatalyst function ) extends Expression with NonSQLExpression { override def toString: String = s"FramelessLit(${show()})" @@ -52,7 +52,7 @@ private[frameless] case class Lit[T <: AnyVal]( } def eval(input: InternalRow): Any = codegen(input) - + def children: Seq[Expression] = Nil protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = catalystExpr.genCode(ctx) diff --git a/dataset/src/main/scala/frameless/functions/NonAggregateFunctions.scala b/dataset/src/main/scala/frameless/functions/NonAggregateFunctions.scala index 939bf5b8d..935f369f3 100644 --- a/dataset/src/main/scala/frameless/functions/NonAggregateFunctions.scala +++ b/dataset/src/main/scala/frameless/functions/NonAggregateFunctions.scala @@ -1,12 +1,13 @@ package frameless package functions -import org.apache.spark.sql.{Column, functions => sparkFunctions} +import org.apache.spark.sql.{functions => sparkFunctions, Column} import scala.annotation.nowarn import scala.util.matching.Regex trait NonAggregateFunctions { + /** Non-Aggregate function: calculates the SHA-2 digest of a binary column and returns the value as a 40 character hex string * * apache/spark @@ -27,15 +28,17 @@ trait NonAggregateFunctions { */ def crc32[T](column: AbstractTypedColumn[T, Array[Byte]]): column.ThisType[T, Long] = column.typed(sparkFunctions.crc32(column.untyped)) + /** * Non-Aggregate function: returns the negated value of column. * * apache/spark */ - def negate[A, B, T](column: AbstractTypedColumn[T,A])( - implicit i0: CatalystNumericWithJavaBigDecimal[A, B], + def negate[A, B, T](column: AbstractTypedColumn[T, A])( + implicit + i0: CatalystNumericWithJavaBigDecimal[A, B], i1: TypedEncoder[B] - ): column.ThisType[T,B] = + ): column.ThisType[T, B] = column.typed(sparkFunctions.negate(column.untyped)) /** @@ -43,7 +46,7 @@ trait NonAggregateFunctions { * * apache/spark */ - def not[T](column: AbstractTypedColumn[T,Boolean]): column.ThisType[T,Boolean] = + def not[T](column: AbstractTypedColumn[T, Boolean]): column.ThisType[T, Boolean] = column.typed(sparkFunctions.not(column.untyped)) /** @@ -51,36 +54,34 @@ trait NonAggregateFunctions { * * apache/spark */ - def conv[T](column: AbstractTypedColumn[T,String], fromBase: Int, toBase: Int): column.ThisType[T,String] = - column.typed(sparkFunctions.conv(column.untyped,fromBase,toBase)) + def conv[T](column: AbstractTypedColumn[T, String], fromBase: Int, toBase: Int): column.ThisType[T, String] = + column.typed(sparkFunctions.conv(column.untyped, fromBase, toBase)) /** Non-Aggregate function: Converts an angle measured in radians to an approximately equivalent angle measured in degrees. * * apache/spark */ - def degrees[A,T](column: AbstractTypedColumn[T,A]): column.ThisType[T,Double] = + def degrees[A, T](column: AbstractTypedColumn[T, A]): column.ThisType[T, Double] = column.typed(sparkFunctions.degrees(column.untyped)) /** Non-Aggregate function: returns the ceiling of a numeric column * * apache/spark */ - def ceil[A, B, T](column: AbstractTypedColumn[T, A]) - (implicit - i0: CatalystRound[A, B], - i1: TypedEncoder[B] - ): column.ThisType[T, B] = - column.typed(sparkFunctions.ceil(column.untyped))(i1) + def ceil[A, B, T](column: AbstractTypedColumn[T, A])(implicit + i0: CatalystRound[A, B], + i1: TypedEncoder[B] + ): column.ThisType[T, B] = + column.typed(sparkFunctions.ceil(column.untyped))(i1) /** Non-Aggregate function: returns the floor of a numeric column * * apache/spark */ - def floor[A, B, T](column: AbstractTypedColumn[T, A]) - (implicit + def floor[A, B, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystRound[A, B], i1: TypedEncoder[B] - ): column.ThisType[T, B] = + ): column.ThisType[T, B] = column.typed(sparkFunctions.floor(column.untyped))(i1) /** Non-Aggregate function: unsigned shift the the given value numBits right. If given long, will return long else it will return an integer. @@ -88,47 +89,43 @@ trait NonAggregateFunctions { * apache/spark */ @nowarn // supress sparkFunctions.shiftRightUnsigned call which is used to maintain Spark 3.1.x backwards compat - def shiftRightUnsigned[A, B, T](column: AbstractTypedColumn[T, A], numBits: Int) - (implicit - i0: CatalystBitShift[A, B], - i1: TypedEncoder[B] - ): column.ThisType[T, B] = - column.typed(sparkFunctions.shiftRightUnsigned(column.untyped, numBits)) + def shiftRightUnsigned[A, B, T](column: AbstractTypedColumn[T, A], numBits: Int)(implicit + i0: CatalystBitShift[A, B], + i1: TypedEncoder[B] + ): column.ThisType[T, B] = + column.typed(sparkFunctions.shiftRightUnsigned(column.untyped, numBits)) /** Non-Aggregate function: shift the the given value numBits right. If given long, will return long else it will return an integer. * * apache/spark */ @nowarn // supress sparkFunctions.shiftReft call which is used to maintain Spark 3.1.x backwards compat - def shiftRight[A, B, T](column: AbstractTypedColumn[T, A], numBits: Int) - (implicit - i0: CatalystBitShift[A, B], - i1: TypedEncoder[B] - ): column.ThisType[T, B] = - column.typed(sparkFunctions.shiftRight(column.untyped, numBits)) + def shiftRight[A, B, T](column: AbstractTypedColumn[T, A], numBits: Int)(implicit + i0: CatalystBitShift[A, B], + i1: TypedEncoder[B] + ): column.ThisType[T, B] = + column.typed(sparkFunctions.shiftRight(column.untyped, numBits)) /** Non-Aggregate function: shift the the given value numBits left. If given long, will return long else it will return an integer. * * apache/spark */ @nowarn // supress sparkFunctions.shiftLeft call which is used to maintain Spark 3.1.x backwards compat - def shiftLeft[A, B, T](column: AbstractTypedColumn[T, A], numBits: Int) - (implicit - i0: CatalystBitShift[A, B], - i1: TypedEncoder[B] - ): column.ThisType[T, B] = + def shiftLeft[A, B, T](column: AbstractTypedColumn[T, A], numBits: Int)(implicit + i0: CatalystBitShift[A, B], + i1: TypedEncoder[B] + ): column.ThisType[T, B] = column.typed(sparkFunctions.shiftLeft(column.untyped, numBits)) - + /** Non-Aggregate function: returns the absolute value of a numeric column * * apache/spark */ - def abs[A, B, T](column: AbstractTypedColumn[T, A]) - (implicit - i0: CatalystNumericWithJavaBigDecimal[A, B], - i1: TypedEncoder[B] - ): column.ThisType[T, B] = - column.typed(sparkFunctions.abs(column.untyped))(i1) + def abs[A, B, T](column: AbstractTypedColumn[T, A])(implicit + i0: CatalystNumericWithJavaBigDecimal[A, B], + i1: TypedEncoder[B] + ): column.ThisType[T, B] = + column.typed(sparkFunctions.abs(column.untyped))(i1) /** Non-Aggregate function: Computes the cosine of the given value. * @@ -136,9 +133,8 @@ trait NonAggregateFunctions { * [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]] * apache/spark */ - def cos[A, T](column: AbstractTypedColumn[T, A]) - (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = - column.typed(sparkFunctions.cos(column.cast[Double].untyped)) + def cos[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = + column.typed(sparkFunctions.cos(column.cast[Double].untyped)) /** Non-Aggregate function: Computes the hyperbolic cosine of the given value. * @@ -146,9 +142,8 @@ trait NonAggregateFunctions { * [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]] * apache/spark */ - def cosh[A, T](column: AbstractTypedColumn[T, A]) - (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = - column.typed(sparkFunctions.cosh(column.cast[Double].untyped)) + def cosh[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = + column.typed(sparkFunctions.cosh(column.cast[Double].untyped)) /** Non-Aggregate function: Computes the signum of the given value. * @@ -156,8 +151,7 @@ trait NonAggregateFunctions { * [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]] * apache/spark */ - def signum[A, T](column: AbstractTypedColumn[T, A]) - (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = + def signum[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = column.typed(sparkFunctions.signum(column.cast[Double].untyped)) /** Non-Aggregate function: Computes the sine of the given value. @@ -166,9 +160,8 @@ trait NonAggregateFunctions { * [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]] * apache/spark */ - def sin[A, T](column: AbstractTypedColumn[T, A]) - (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = - column.typed(sparkFunctions.sin(column.cast[Double].untyped)) + def sin[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = + column.typed(sparkFunctions.sin(column.cast[Double].untyped)) /** Non-Aggregate function: Computes the hyperbolic sine of the given value. * @@ -176,9 +169,8 @@ trait NonAggregateFunctions { * [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]] * apache/spark */ - def sinh[A, T](column: AbstractTypedColumn[T, A]) - (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = - column.typed(sparkFunctions.sinh(column.cast[Double].untyped)) + def sinh[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = + column.typed(sparkFunctions.sinh(column.cast[Double].untyped)) /** Non-Aggregate function: Computes the tangent of the given column. * @@ -186,9 +178,8 @@ trait NonAggregateFunctions { * [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]] * apache/spark */ - def tan[A, T](column: AbstractTypedColumn[T, A]) - (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = - column.typed(sparkFunctions.tan(column.cast[Double].untyped)) + def tan[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = + column.typed(sparkFunctions.tan(column.cast[Double].untyped)) /** Non-Aggregate function: Computes the hyperbolic tangent of the given value. * @@ -196,9 +187,8 @@ trait NonAggregateFunctions { * [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]] * apache/spark */ - def tanh[A, T](column: AbstractTypedColumn[T, A]) - (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = - column.typed(sparkFunctions.tanh(column.cast[Double].untyped)) + def tanh[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = + column.typed(sparkFunctions.tanh(column.cast[Double].untyped)) /** Non-Aggregate function: returns the acos of a numeric column * @@ -206,9 +196,8 @@ trait NonAggregateFunctions { * [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]] * apache/spark */ - def acos[A, T](column: AbstractTypedColumn[T, A]) - (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = - column.typed(sparkFunctions.acos(column.cast[Double].untyped)) + def acos[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = + column.typed(sparkFunctions.acos(column.cast[Double].untyped)) /** Non-Aggregate function: returns true if value is contained with in the array in the specified column * @@ -223,9 +212,8 @@ trait NonAggregateFunctions { * [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]] * apache/spark */ - def atan[A, T](column: AbstractTypedColumn[T,A]) - (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = - column.typed(sparkFunctions.atan(column.cast[Double].untyped)) + def atan[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = + column.typed(sparkFunctions.atan(column.cast[Double].untyped)) /** Non-Aggregate function: returns the asin of a numeric column * @@ -233,9 +221,8 @@ trait NonAggregateFunctions { * [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]] * apache/spark */ - def asin[A, T](column: AbstractTypedColumn[T, A]) - (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = - column.typed(sparkFunctions.asin(column.cast[Double].untyped)) + def asin[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = + column.typed(sparkFunctions.asin(column.cast[Double].untyped)) /** Non-Aggregate function: returns the angle theta from the conversion of rectangular coordinates (x, y) to * polar coordinates (r, theta). @@ -244,12 +231,11 @@ trait NonAggregateFunctions { * [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]] * apache/spark */ - def atan2[A, B, T](l: TypedColumn[T, A], r: TypedColumn[T, B]) - (implicit - i0: CatalystCast[A, Double], - i1: CatalystCast[B, Double] - ): TypedColumn[T, Double] = - r.typed(sparkFunctions.atan2(l.cast[Double].untyped, r.cast[Double].untyped)) + def atan2[A, B, T](l: TypedColumn[T, A], r: TypedColumn[T, B])(implicit + i0: CatalystCast[A, Double], + i1: CatalystCast[B, Double] + ): TypedColumn[T, Double] = + r.typed(sparkFunctions.atan2(l.cast[Double].untyped, r.cast[Double].untyped)) /** Non-Aggregate function: returns the angle theta from the conversion of rectangular coordinates (x, y) to * polar coordinates (r, theta). @@ -258,51 +244,43 @@ trait NonAggregateFunctions { * [[https://github.com/apache/spark/blob/4a3c09601ba69f7d49d1946bb6f20f5cfe453031/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L67]] * apache/spark */ - def atan2[A, B, T](l: TypedAggregate[T, A], r: TypedAggregate[T, B]) - (implicit - i0: CatalystCast[A, Double], - i1: CatalystCast[B, Double] - ): TypedAggregate[T, Double] = - r.typed(sparkFunctions.atan2(l.cast[Double].untyped, r.cast[Double].untyped)) + def atan2[A, B, T](l: TypedAggregate[T, A], r: TypedAggregate[T, B])(implicit + i0: CatalystCast[A, Double], + i1: CatalystCast[B, Double] + ): TypedAggregate[T, Double] = + r.typed(sparkFunctions.atan2(l.cast[Double].untyped, r.cast[Double].untyped)) - def atan2[B, T](l: Double, r: TypedColumn[T, B]) - (implicit i0: CatalystCast[B, Double]): TypedColumn[T, Double] = - atan2(r.lit(l), r) + def atan2[B, T](l: Double, r: TypedColumn[T, B])(implicit i0: CatalystCast[B, Double]): TypedColumn[T, Double] = + atan2(r.lit(l), r) - def atan2[A, T](l: TypedColumn[T, A], r: Double) - (implicit i0: CatalystCast[A, Double]): TypedColumn[T, Double] = - atan2(l, l.lit(r)) + def atan2[A, T](l: TypedColumn[T, A], r: Double)(implicit i0: CatalystCast[A, Double]): TypedColumn[T, Double] = + atan2(l, l.lit(r)) - def atan2[B, T](l: Double, r: TypedAggregate[T, B]) - (implicit i0: CatalystCast[B, Double]): TypedAggregate[T, Double] = - atan2(r.lit(l), r) + def atan2[B, T](l: Double, r: TypedAggregate[T, B])(implicit i0: CatalystCast[B, Double]): TypedAggregate[T, Double] = + atan2(r.lit(l), r) - def atan2[A, T](l: TypedAggregate[T, A], r: Double) - (implicit i0: CatalystCast[A, Double]): TypedAggregate[T, Double] = - atan2(l, l.lit(r)) + def atan2[A, T](l: TypedAggregate[T, A], r: Double)(implicit i0: CatalystCast[A, Double]): TypedAggregate[T, Double] = + atan2(l, l.lit(r)) /** Non-Aggregate function: returns the square root value of a numeric column. * * apache/spark */ - def sqrt[A, T](column: AbstractTypedColumn[T, A]) - (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = + def sqrt[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = column.typed(sparkFunctions.sqrt(column.cast[Double].untyped)) /** Non-Aggregate function: returns the cubic root value of a numeric column. * * apache/spark */ - def cbrt[A, T](column: AbstractTypedColumn[T, A]) - (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = + def cbrt[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = column.typed(sparkFunctions.cbrt(column.cast[Double].untyped)) /** Non-Aggregate function: returns the exponential value of a numeric column. * * apache/spark */ - def exp[A, T](column: AbstractTypedColumn[T, A]) - (implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = + def exp[A, T](column: AbstractTypedColumn[T, A])(implicit i0: CatalystCast[A, Double]): column.ThisType[T, Double] = column.typed(sparkFunctions.exp(column.cast[Double].untyped)) /** Non-Aggregate function: Returns the value of the column `e` rounded to 0 decimal places with HALF_UP round mode. @@ -310,7 +288,9 @@ trait NonAggregateFunctions { * apache/spark */ def round[A, B, T](column: AbstractTypedColumn[T, A])( - implicit i0: CatalystNumericWithJavaBigDecimal[A, B], i1: TypedEncoder[B] + implicit + i0: CatalystNumericWithJavaBigDecimal[A, B], + i1: TypedEncoder[B] ): column.ThisType[T, B] = column.typed(sparkFunctions.round(column.untyped))(i1) @@ -320,7 +300,9 @@ trait NonAggregateFunctions { * apache/spark */ def round[A, B, T](column: AbstractTypedColumn[T, A], scale: Int)( - implicit i0: CatalystNumericWithJavaBigDecimal[A, B], i1: TypedEncoder[B] + implicit + i0: CatalystNumericWithJavaBigDecimal[A, B], + i1: TypedEncoder[B] ): column.ThisType[T, B] = column.typed(sparkFunctions.round(column.untyped, scale))(i1) @@ -330,7 +312,9 @@ trait NonAggregateFunctions { * apache/spark */ def bround[A, B, T](column: AbstractTypedColumn[T, A])( - implicit i0: CatalystNumericWithJavaBigDecimal[A, B], i1: TypedEncoder[B] + implicit + i0: CatalystNumericWithJavaBigDecimal[A, B], + i1: TypedEncoder[B] ): column.ThisType[T, B] = column.typed(sparkFunctions.bround(column.untyped))(i1) @@ -340,7 +324,9 @@ trait NonAggregateFunctions { * apache/spark */ def bround[A, B, T](column: AbstractTypedColumn[T, A], scale: Int)( - implicit i0: CatalystNumericWithJavaBigDecimal[A, B], i1: TypedEncoder[B] + implicit + i0: CatalystNumericWithJavaBigDecimal[A, B], + i1: TypedEncoder[B] ): column.ThisType[T, B] = column.typed(sparkFunctions.bround(column.untyped, scale))(i1) @@ -394,7 +380,6 @@ trait NonAggregateFunctions { ): column.ThisType[T, Double] = column.typed(sparkFunctions.log10(column.untyped)) - /** * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. * @@ -465,7 +450,6 @@ trait NonAggregateFunctions { ): column.ThisType[T, A] = column.typed(sparkFunctions.pmod(column.untyped, column2.untyped)) - /** Non-Aggregate function: Returns the string representation of the binary value of the given long * column. For example, bin("12") returns "1100". * @@ -480,7 +464,7 @@ trait NonAggregateFunctions { * * apache/spark */ - def md5[T, A](column: AbstractTypedColumn[T, A])(implicit i0: TypedEncoder[A]): column.ThisType[T, String] = + def md5[T, A](column: AbstractTypedColumn[T, A])(implicit i0: TypedEncoder[A]): column.ThisType[T, String] = column.typed(sparkFunctions.md5(column.untyped)) /** @@ -488,7 +472,7 @@ trait NonAggregateFunctions { * * apache/spark */ - def factorial[T](column: AbstractTypedColumn[T, Long])(implicit i0: TypedEncoder[Long]): column.ThisType[T, Long] = + def factorial[T](column: AbstractTypedColumn[T, Long])(implicit i0: TypedEncoder[Long]): column.ThisType[T, Long] = column.typed(sparkFunctions.factorial(column.untyped)) /** Non-Aggregate function: Computes bitwise NOT. @@ -542,7 +526,6 @@ trait NonAggregateFunctions { // String functions ////////////////////////////////////////////////////////////////////////////////////////////// - /** Non-Aggregate function: takes the first letter of a string column and returns the ascii int value in a new column * * apache/spark @@ -615,7 +598,7 @@ trait NonAggregateFunctions { * * apache/spark */ - //TODO: Also for binary + // TODO: Also for binary def length[T](str: AbstractTypedColumn[T, String]): str.ThisType[T, Int] = str.typed(sparkFunctions.length(str.untyped)) @@ -645,9 +628,7 @@ trait NonAggregateFunctions { * * apache/spark */ - def lpad[T](str: AbstractTypedColumn[T, String], - len: Int, - pad: String): str.ThisType[T, String] = + def lpad[T](str: AbstractTypedColumn[T, String], len: Int, pad: String): str.ThisType[T, String] = str.typed(sparkFunctions.lpad(str.untyped, len, pad)) /** Non-Aggregate function: Trim the spaces from left end for the specified string value. @@ -661,12 +642,9 @@ trait NonAggregateFunctions { * * apache/spark */ - def regexpReplace[T](str: AbstractTypedColumn[T, String], - pattern: Regex, - replacement: String): str.ThisType[T, String] = + def regexpReplace[T](str: AbstractTypedColumn[T, String], pattern: Regex, replacement: String): str.ThisType[T, String] = str.typed(sparkFunctions.regexp_replace(str.untyped, pattern.regex, replacement)) - /** Non-Aggregate function: Reverses the string column and returns it as a new string column. * * apache/spark @@ -693,7 +671,7 @@ trait NonAggregateFunctions { * * apache/spark */ - //TODO: Also for byte array + // TODO: Also for byte array def substring[T](str: AbstractTypedColumn[T, String], pos: Int, len: Int): str.ThisType[T, String] = str.typed(sparkFunctions.substring(str.untyped, pos, len)) diff --git a/dataset/src/main/scala/frameless/functions/Udf.scala b/dataset/src/main/scala/frameless/functions/Udf.scala index 93ba7f118..ca6ce8271 100644 --- a/dataset/src/main/scala/frameless/functions/Udf.scala +++ b/dataset/src/main/scala/frameless/functions/Udf.scala @@ -19,8 +19,7 @@ trait Udf { * * apache/spark */ - def udf[T, A, R: TypedEncoder](f: A => R): - TypedColumn[T, A] => TypedColumn[T, R] = { + def udf[T, A, R: TypedEncoder](f: A => R): TypedColumn[T, A] => TypedColumn[T, R] = { u => val scalaUdf = FramelessUdf(f, List(u), TypedEncoder[R]) new TypedColumn[T, R](scalaUdf) @@ -31,48 +30,46 @@ trait Udf { * * apache/spark */ - def udf[T, A1, A2, R: TypedEncoder](f: (A1,A2) => R): - (TypedColumn[T, A1], TypedColumn[T, A2]) => TypedColumn[T, R] = { + def udf[T, A1, A2, R: TypedEncoder](f: (A1, A2) => R): (TypedColumn[T, A1], TypedColumn[T, A2]) => TypedColumn[T, R] = { case us => val scalaUdf = FramelessUdf(f, us.toList[UntypedExpression[T]], TypedEncoder[R]) new TypedColumn[T, R](scalaUdf) - } + } /** Defines a user-defined function of 3 arguments as user-defined function (UDF). * The data types are automatically inferred based on the function's signature. * * apache/spark */ - def udf[T, A1, A2, A3, R: TypedEncoder](f: (A1,A2,A3) => R): - (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3]) => TypedColumn[T, R] = { + def udf[T, A1, A2, A3, R: TypedEncoder](f: (A1, A2, A3) => R): (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3]) => TypedColumn[T, R] = { case us => val scalaUdf = FramelessUdf(f, us.toList[UntypedExpression[T]], TypedEncoder[R]) new TypedColumn[T, R](scalaUdf) - } + } /** Defines a user-defined function of 4 arguments as user-defined function (UDF). * The data types are automatically inferred based on the function's signature. * * apache/spark */ - def udf[T, A1, A2, A3, A4, R: TypedEncoder](f: (A1,A2,A3,A4) => R): - (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3], TypedColumn[T, A4]) => TypedColumn[T, R] = { + def udf[T, A1, A2, A3, A4, R: TypedEncoder](f: (A1, A2, A3, A4) => R) + : (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3], TypedColumn[T, A4]) => TypedColumn[T, R] = { case us => val scalaUdf = FramelessUdf(f, us.toList[UntypedExpression[T]], TypedEncoder[R]) new TypedColumn[T, R](scalaUdf) - } + } /** Defines a user-defined function of 5 arguments as user-defined function (UDF). * The data types are automatically inferred based on the function's signature. * * apache/spark */ - def udf[T, A1, A2, A3, A4, A5, R: TypedEncoder](f: (A1,A2,A3,A4,A5) => R): - (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3], TypedColumn[T, A4], TypedColumn[T, A5]) => TypedColumn[T, R] = { + def udf[T, A1, A2, A3, A4, A5, R: TypedEncoder](f: (A1, A2, A3, A4, A5) => R) + : (TypedColumn[T, A1], TypedColumn[T, A2], TypedColumn[T, A3], TypedColumn[T, A4], TypedColumn[T, A5]) => TypedColumn[T, R] = { case us => val scalaUdf = FramelessUdf(f, us.toList[UntypedExpression[T]], TypedEncoder[R]) new TypedColumn[T, R](scalaUdf) - } + } } /** @@ -118,7 +115,8 @@ case class FramelessUdf[T, R]( """ val code = CodeFormatter.stripOverlappingComments( - new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())) + new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()) + ) val (clazz, _) = CodeGenerator.compile(code) val codegen = clazz.generate(ctx.references.toArray).asInstanceOf[InternalRow => AnyRef] @@ -139,9 +137,13 @@ case class FramelessUdf[T, R]( val framelessUdfClassName = classOf[FramelessUdf[_, _]].getName val funcClassName = s"scala.Function${children.size}" val funcExpressionIdx = ctx.references.size - 1 - val funcTerm = ctx.addMutableState(funcClassName, ctx.freshName("udf"), - v => s"$v = ($funcClassName)((($framelessUdfClassName)references" + - s"[$funcExpressionIdx]).function());") + val funcTerm = ctx.addMutableState( + funcClassName, + ctx.freshName("udf"), + v => + s"$v = ($funcClassName)((($framelessUdfClassName)references" + + s"[$funcExpressionIdx]).function());" + ) val (argsCode, funcArguments) = encoders.zip(children).map { case (encoder, child) => @@ -161,7 +163,8 @@ case class FramelessUdf[T, R]( val resultEval = rencoder.toCatalyst(internalExpr).genCode(ctx) - ev.copy(code = code""" + ev.copy( + code = code""" ${argsCode.mkString("\n")} $internalTerm = @@ -179,17 +182,17 @@ case class FramelessUdf[T, R]( } case class Spark2_4_LambdaVariable( - value: String, - isNull: String, - dataType: DataType, - nullable: Boolean = true) extends LeafExpression with NonSQLExpression { + value: String, + isNull: String, + dataType: DataType, + nullable: Boolean = true +) extends LeafExpression with NonSQLExpression { private val accessor: (InternalRow, Int) => Any = InternalRow.getAccessor(dataType) // Interpreted execution of `LambdaVariable` always get the 0-index element from input row. override def eval(input: InternalRow): Any = { - assert(input.numFields == 1, - "The input row of interpreted LambdaVariable should have only 1 field.") + assert(input.numFields == 1, "The input row of interpreted LambdaVariable should have only 1 field.") if (nullable && input.isNullAt(0)) { null } else { diff --git a/dataset/src/main/scala/frameless/functions/UnaryFunctions.scala b/dataset/src/main/scala/frameless/functions/UnaryFunctions.scala index 64bdf0ed1..f76b1604e 100644 --- a/dataset/src/main/scala/frameless/functions/UnaryFunctions.scala +++ b/dataset/src/main/scala/frameless/functions/UnaryFunctions.scala @@ -1,16 +1,17 @@ package frameless package functions -import org.apache.spark.sql.{Column, functions => sparkFunctions} +import org.apache.spark.sql.{functions => sparkFunctions, Column} import scala.math.Ordering trait UnaryFunctions { + /** Returns length of array * * apache/spark */ - def size[T, A, V[_] : CatalystSizableCollection](column: TypedColumn[T, V[A]]): TypedColumn[T, Int] = + def size[T, A, V[_]: CatalystSizableCollection](column: TypedColumn[T, V[A]]): TypedColumn[T, Int] = new TypedColumn[T, Int](implicitly[CatalystSizableCollection[V]].sizeOp(column.untyped)) /** Returns length of Map @@ -25,7 +26,7 @@ trait UnaryFunctions { * * apache/spark */ - def sortAscending[T, A: Ordering, V[_] : CatalystSortableCollection](column: TypedColumn[T, V[A]]): TypedColumn[T, V[A]] = + def sortAscending[T, A: Ordering, V[_]: CatalystSortableCollection](column: TypedColumn[T, V[A]]): TypedColumn[T, V[A]] = new TypedColumn[T, V[A]](implicitly[CatalystSortableCollection[V]].sortOp(column.untyped, sortAscending = true))(column.uencoder) /** Sorts the input array for the given column in descending order, according to @@ -33,18 +34,20 @@ trait UnaryFunctions { * * apache/spark */ - def sortDescending[T, A: Ordering, V[_] : CatalystSortableCollection](column: TypedColumn[T, V[A]]): TypedColumn[T, V[A]] = + def sortDescending[T, A: Ordering, V[_]: CatalystSortableCollection](column: TypedColumn[T, V[A]]): TypedColumn[T, V[A]] = new TypedColumn[T, V[A]](implicitly[CatalystSortableCollection[V]].sortOp(column.untyped, sortAscending = false))(column.uencoder) - /** Creates a new row for each element in the given collection. The column types * eligible for this operation are constrained by CatalystExplodableCollection. * * apache/spark */ - @deprecated("Use explode() from the TypedDataset instead. This method will result in " + - "runtime error if applied to two columns in the same select statement.", "0.6.2") - def explode[T, A: TypedEncoder, V[_] : CatalystExplodableCollection](column: TypedColumn[T, V[A]]): TypedColumn[T, A] = + @deprecated( + "Use explode() from the TypedDataset instead. This method will result in " + + "runtime error if applied to two columns in the same select statement.", + "0.6.2" + ) + def explode[T, A: TypedEncoder, V[_]: CatalystExplodableCollection](column: TypedColumn[T, V[A]]): TypedColumn[T, A] = new TypedColumn[T, A](sparkFunctions.explode(column.untyped)) } diff --git a/dataset/src/main/scala/frameless/functions/package.scala b/dataset/src/main/scala/frameless/functions/package.scala index 1a57101e0..291cb4857 100644 --- a/dataset/src/main/scala/frameless/functions/package.scala +++ b/dataset/src/main/scala/frameless/functions/package.scala @@ -5,9 +5,9 @@ import scala.reflect.ClassTag import shapeless._ import shapeless.labelled.FieldType import shapeless.ops.hlist.IsHCons -import shapeless.ops.record.{ Keys, Values } +import shapeless.ops.record.{Keys, Values} -import org.apache.spark.sql.{ reflection => ScalaReflection } +import org.apache.spark.sql.{reflection => ScalaReflection} import org.apache.spark.sql.catalyst.expressions.Literal package object functions extends Udf with UnaryFunctions { @@ -22,11 +22,11 @@ package object functions extends Udf with UnaryFunctions { * apache/spark */ def litAggr[A, T]( - value: A - )(implicit - i0: TypedEncoder[A], - i1: Refute[IsValueClass[A]] - ): TypedAggregate[T, A] = + value: A + )(implicit + i0: TypedEncoder[A], + i1: Refute[IsValueClass[A]] + ): TypedAggregate[T, A] = new TypedAggregate[T, A](lit(value).expr) /** @@ -39,10 +39,10 @@ package object functions extends Udf with UnaryFunctions { * @tparam T the row type */ def lit[A, T]( - value: A - )(implicit - encoder: TypedEncoder[A] - ): TypedColumn[T, A] = { + value: A + )(implicit + encoder: TypedEncoder[A] + ): TypedColumn[T, A] = { if ( ScalaReflection.isNativeType( @@ -74,26 +74,25 @@ package object functions extends Udf with UnaryFunctions { * @tparam T the row type */ def litValue[ - A: IsValueClass, - T, - G <: ::[_, HNil], - H <: ::[_ <: FieldType[_ <: Symbol, _], HNil], - K <: Symbol, - V, - KS <: ::[_ <: Symbol, HNil], - VS <: HList - ](value: A - )(implicit - i0: LabelledGeneric.Aux[A, G], - i1: DropUnitValues.Aux[G, H], - i2: IsHCons.Aux[H, _ <: FieldType[K, V], HNil], - i3: Keys.Aux[H, KS], - i4: Values.Aux[H, VS], - i5: IsHCons.Aux[KS, K, HNil], - i6: IsHCons.Aux[VS, V, HNil], - i7: TypedEncoder[V], - i8: ClassTag[A] - ): TypedColumn[T, A] = { + A: IsValueClass, + T, + G <: ::[_, HNil], + H <: ::[_ <: FieldType[_ <: Symbol, _], HNil], + K <: Symbol, + V, + KS <: ::[_ <: Symbol, HNil], + VS <: HList + ](value: A)(implicit + i0: LabelledGeneric.Aux[A, G], + i1: DropUnitValues.Aux[G, H], + i2: IsHCons.Aux[H, _ <: FieldType[K, V], HNil], + i3: Keys.Aux[H, KS], + i4: Values.Aux[H, VS], + i5: IsHCons.Aux[KS, K, HNil], + i6: IsHCons.Aux[VS, V, HNil], + i7: TypedEncoder[V], + i8: ClassTag[A] + ): TypedColumn[T, A] = { val expr = { val field: H = i1(i0.to(value)) val v: V = i6.head(i4(field)) @@ -122,26 +121,25 @@ package object functions extends Udf with UnaryFunctions { * @tparam T the row type */ def litValue[ - A: IsValueClass, - T, - G <: ::[_, HNil], - H <: ::[_ <: FieldType[_ <: Symbol, _], HNil], - K <: Symbol, - V, - KS <: ::[_ <: Symbol, HNil], - VS <: HList - ](value: Option[A] - )(implicit - i0: LabelledGeneric.Aux[A, G], - i1: DropUnitValues.Aux[G, H], - i2: IsHCons.Aux[H, _ <: FieldType[K, V], HNil], - i3: Keys.Aux[H, KS], - i4: Values.Aux[H, VS], - i5: IsHCons.Aux[KS, K, HNil], - i6: IsHCons.Aux[VS, V, HNil], - i7: TypedEncoder[V], - i8: ClassTag[A] - ): TypedColumn[T, Option[A]] = { + A: IsValueClass, + T, + G <: ::[_, HNil], + H <: ::[_ <: FieldType[_ <: Symbol, _], HNil], + K <: Symbol, + V, + KS <: ::[_ <: Symbol, HNil], + VS <: HList + ](value: Option[A])(implicit + i0: LabelledGeneric.Aux[A, G], + i1: DropUnitValues.Aux[G, H], + i2: IsHCons.Aux[H, _ <: FieldType[K, V], HNil], + i3: Keys.Aux[H, KS], + i4: Values.Aux[H, VS], + i5: IsHCons.Aux[KS, K, HNil], + i6: IsHCons.Aux[VS, V, HNil], + i7: TypedEncoder[V], + i8: ClassTag[A] + ): TypedColumn[T, Option[A]] = { val expr = value match { case Some(some) => { val field: H = i1(i0.to(some)) diff --git a/dataset/src/main/scala/frameless/ops/AggregateTypes.scala b/dataset/src/main/scala/frameless/ops/AggregateTypes.scala index 403c25301..225e88647 100644 --- a/dataset/src/main/scala/frameless/ops/AggregateTypes.scala +++ b/dataset/src/main/scala/frameless/ops/AggregateTypes.scala @@ -17,12 +17,12 @@ trait AggregateTypes[V, U <: HList] { } object AggregateTypes { - type Aux[V, U <: HList, Out0 <: HList] = AggregateTypes[V, U] {type Out = Out0} + type Aux[V, U <: HList, Out0 <: HList] = AggregateTypes[V, U] { type Out = Out0 } implicit def deriveHNil[T]: AggregateTypes.Aux[T, HNil, HNil] = new AggregateTypes[T, HNil] { type Out = HNil } implicit def deriveCons1[T, H, TT <: HList, V <: HList]( implicit tail: AggregateTypes.Aux[T, TT, V] ): AggregateTypes.Aux[T, TypedAggregate[T, H] :: TT, H :: V] = - new AggregateTypes[T, TypedAggregate[T, H] :: TT] {type Out = H :: V} + new AggregateTypes[T, TypedAggregate[T, H] :: TT] { type Out = H :: V } } diff --git a/dataset/src/main/scala/frameless/ops/As.scala b/dataset/src/main/scala/frameless/ops/As.scala index 06b691028..04a49efc4 100644 --- a/dataset/src/main/scala/frameless/ops/As.scala +++ b/dataset/src/main/scala/frameless/ops/As.scala @@ -12,11 +12,10 @@ object As extends LowPriorityAs { implicit def equivIdentity[A] = new Equiv[A, A] - implicit def deriveAs[A, B] - (implicit - i0: TypedEncoder[B], - i1: Equiv[A, B] - ): As[A, B] = new As[A, B] + implicit def deriveAs[A, B](implicit + i0: TypedEncoder[B], + i1: Equiv[A, B] + ): As[A, B] = new As[A, B] } @@ -24,17 +23,15 @@ trait LowPriorityAs { import As.Equiv - implicit def equivHList[AH, AT <: HList, BH, BT <: HList] - (implicit - i0: Lazy[Equiv[AH, BH]], - i1: Equiv[AT, BT] - ): Equiv[AH :: AT, BH :: BT] = new Equiv[AH :: AT, BH :: BT] - - implicit def equivGeneric[A, B, R, S] - (implicit - i0: Generic.Aux[A, R], - i1: Generic.Aux[B, S], - i2: Lazy[Equiv[R, S]] - ): Equiv[A, B] = new Equiv[A, B] + implicit def equivHList[AH, AT <: HList, BH, BT <: HList](implicit + i0: Lazy[Equiv[AH, BH]], + i1: Equiv[AT, BT] + ): Equiv[AH :: AT, BH :: BT] = new Equiv[AH :: AT, BH :: BT] + + implicit def equivGeneric[A, B, R, S](implicit + i0: Generic.Aux[A, R], + i1: Generic.Aux[B, S], + i2: Lazy[Equiv[R, S]] + ): Equiv[A, B] = new Equiv[A, B] } diff --git a/dataset/src/main/scala/frameless/ops/ColumnTypes.scala b/dataset/src/main/scala/frameless/ops/ColumnTypes.scala index e5ae6aea2..4411500da 100644 --- a/dataset/src/main/scala/frameless/ops/ColumnTypes.scala +++ b/dataset/src/main/scala/frameless/ops/ColumnTypes.scala @@ -17,12 +17,12 @@ trait ColumnTypes[T, U <: HList] { } object ColumnTypes { - type Aux[T, U <: HList, Out0 <: HList] = ColumnTypes[T, U] {type Out = Out0} + type Aux[T, U <: HList, Out0 <: HList] = ColumnTypes[T, U] { type Out = Out0 } implicit def deriveHNil[T]: ColumnTypes.Aux[T, HNil, HNil] = new ColumnTypes[T, HNil] { type Out = HNil } implicit def deriveCons[T, H, TT <: HList, V <: HList]( implicit tail: ColumnTypes.Aux[T, TT, V] ): ColumnTypes.Aux[T, TypedColumn[T, H] :: TT, H :: V] = - new ColumnTypes[T, TypedColumn[T, H] :: TT] {type Out = H :: V} + new ColumnTypes[T, TypedColumn[T, H] :: TT] { type Out = H :: V } } diff --git a/dataset/src/main/scala/frameless/ops/GroupByOps.scala b/dataset/src/main/scala/frameless/ops/GroupByOps.scala index e6f51a407..fff67dca0 100644 --- a/dataset/src/main/scala/frameless/ops/GroupByOps.scala +++ b/dataset/src/main/scala/frameless/ops/GroupByOps.scala @@ -3,29 +3,14 @@ package ops import org.apache.spark.sql.catalyst.analysis.UnresolvedAlias import org.apache.spark.sql.catalyst.plans.logical.Project -import org.apache.spark.sql.{ - Column, - Dataset, - FramelessInternals, - RelationalGroupedDataset -} +import org.apache.spark.sql.{Column, Dataset, FramelessInternals, RelationalGroupedDataset} import shapeless._ -import shapeless.ops.hlist.{ - Length, - Mapped, - Prepend, - ToList, - ToTraversable, - Tupler -} +import shapeless.ops.hlist.{Length, Mapped, Prepend, ToList, ToTraversable, Tupler} class GroupedByManyOps[T, TK <: HList, K <: HList, KT]( - self: TypedDataset[T], - groupedBy: TK - )(implicit - i0: ColumnTypes.Aux[T, TK, K], - i1: ToTraversable.Aux[TK, List, UntypedExpression[T]], - i3: Tupler.Aux[K, KT]) + self: TypedDataset[T], + groupedBy: TK +)(implicit i0: ColumnTypes.Aux[T, TK, K], i1: ToTraversable.Aux[TK, List, UntypedExpression[T]], i3: Tupler.Aux[K, KT]) extends AggregatingOps[T, TK, K, KT]( self, groupedBy, @@ -35,24 +20,25 @@ class GroupedByManyOps[T, TK <: HList, K <: HList, KT]( object agg extends ProductArgs { def applyProduct[TC <: HList, C <: HList, Out0 <: HList, Out1]( - columns: TC - )(implicit - i3: AggregateTypes.Aux[T, TC, C], - i4: Prepend.Aux[K, C, Out0], - i5: Tupler.Aux[Out0, Out1], - i6: TypedEncoder[Out1], - i7: ToTraversable.Aux[TC, List, UntypedExpression[T]] - ): TypedDataset[Out1] = { + columns: TC + )(implicit + i3: AggregateTypes.Aux[T, TC, C], + i4: Prepend.Aux[K, C, Out0], + i5: Tupler.Aux[Out0, Out1], + i6: TypedEncoder[Out1], + i7: ToTraversable.Aux[TC, List, UntypedExpression[T]] + ): TypedDataset[Out1] = { aggregate[TC, Out1](columns) } } } class GroupedBy1Ops[K1, V]( - self: TypedDataset[V], - g1: TypedColumn[V, K1]) { + self: TypedDataset[V], + g1: TypedColumn[V, K1] +) { private def underlying = new GroupedByManyOps(self, g1 :: HNil) - private implicit def eg1 = g1.uencoder + implicit private def eg1 = g1.uencoder def agg[U1](c1: TypedAggregate[V, U1]): TypedDataset[(K1, U1)] = { implicit val e1 = c1.uencoder @@ -60,41 +46,41 @@ class GroupedBy1Ops[K1, V]( } def agg[U1, U2]( - c1: TypedAggregate[V, U1], - c2: TypedAggregate[V, U2] - ): TypedDataset[(K1, U1, U2)] = { + c1: TypedAggregate[V, U1], + c2: TypedAggregate[V, U2] + ): TypedDataset[(K1, U1, U2)] = { implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder underlying.agg(c1, c2) } def agg[U1, U2, U3]( - c1: TypedAggregate[V, U1], - c2: TypedAggregate[V, U2], - c3: TypedAggregate[V, U3] - ): TypedDataset[(K1, U1, U2, U3)] = { + c1: TypedAggregate[V, U1], + c2: TypedAggregate[V, U2], + c3: TypedAggregate[V, U3] + ): TypedDataset[(K1, U1, U2, U3)] = { implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder; implicit val e3 = c3.uencoder underlying.agg(c1, c2, c3) } def agg[U1, U2, U3, U4]( - c1: TypedAggregate[V, U1], - c2: TypedAggregate[V, U2], - c3: TypedAggregate[V, U3], - c4: TypedAggregate[V, U4] - ): TypedDataset[(K1, U1, U2, U3, U4)] = { + c1: TypedAggregate[V, U1], + c2: TypedAggregate[V, U2], + c3: TypedAggregate[V, U3], + c4: TypedAggregate[V, U4] + ): TypedDataset[(K1, U1, U2, U3, U4)] = { implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder; implicit val e3 = c3.uencoder; implicit val e4 = c4.uencoder underlying.agg(c1, c2, c3, c4) } def agg[U1, U2, U3, U4, U5]( - c1: TypedAggregate[V, U1], - c2: TypedAggregate[V, U2], - c3: TypedAggregate[V, U3], - c4: TypedAggregate[V, U4], - c5: TypedAggregate[V, U5] - ): TypedDataset[(K1, U1, U2, U3, U4, U5)] = { + c1: TypedAggregate[V, U1], + c2: TypedAggregate[V, U2], + c3: TypedAggregate[V, U3], + c4: TypedAggregate[V, U4], + c5: TypedAggregate[V, U5] + ): TypedDataset[(K1, U1, U2, U3, U4, U5)] = { implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder; implicit val e3 = c3.uencoder; implicit val e4 = c4.uencoder; implicit val e5 = c5.uencoder @@ -108,31 +94,32 @@ class GroupedBy1Ops[K1, V]( object deserialized { def mapGroups[U: TypedEncoder]( - f: (K1, Iterator[V]) => U - ): TypedDataset[U] = { + f: (K1, Iterator[V]) => U + ): TypedDataset[U] = { underlying.deserialized.mapGroups(AggregatingOps.tuple1(f)) } def flatMapGroups[U: TypedEncoder]( - f: (K1, Iterator[V]) => TraversableOnce[U] - ): TypedDataset[U] = { + f: (K1, Iterator[V]) => TraversableOnce[U] + ): TypedDataset[U] = { underlying.deserialized.flatMapGroups(AggregatingOps.tuple1(f)) } } def pivot[P: CatalystPivotable]( - pivotColumn: TypedColumn[V, P] - ): PivotNotValues[V, TypedColumn[V, K1] :: HNil, P] = + pivotColumn: TypedColumn[V, P] + ): PivotNotValues[V, TypedColumn[V, K1] :: HNil, P] = PivotNotValues(self, g1 :: HNil, pivotColumn) } class GroupedBy2Ops[K1, K2, V]( - self: TypedDataset[V], - g1: TypedColumn[V, K1], - g2: TypedColumn[V, K2]) { + self: TypedDataset[V], + g1: TypedColumn[V, K1], + g2: TypedColumn[V, K2] +) { private def underlying = new GroupedByManyOps(self, g1 :: g2 :: HNil) - private implicit def eg1 = g1.uencoder - private implicit def eg2 = g2.uencoder + implicit private def eg1 = g1.uencoder + implicit private def eg2 = g2.uencoder def agg[U1](c1: TypedAggregate[V, U1]): TypedDataset[(K1, K2, U1)] = { implicit val e1 = c1.uencoder @@ -140,41 +127,41 @@ class GroupedBy2Ops[K1, K2, V]( } def agg[U1, U2]( - c1: TypedAggregate[V, U1], - c2: TypedAggregate[V, U2] - ): TypedDataset[(K1, K2, U1, U2)] = { + c1: TypedAggregate[V, U1], + c2: TypedAggregate[V, U2] + ): TypedDataset[(K1, K2, U1, U2)] = { implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder underlying.agg(c1, c2) } def agg[U1, U2, U3]( - c1: TypedAggregate[V, U1], - c2: TypedAggregate[V, U2], - c3: TypedAggregate[V, U3] - ): TypedDataset[(K1, K2, U1, U2, U3)] = { + c1: TypedAggregate[V, U1], + c2: TypedAggregate[V, U2], + c3: TypedAggregate[V, U3] + ): TypedDataset[(K1, K2, U1, U2, U3)] = { implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder; implicit val e3 = c3.uencoder underlying.agg(c1, c2, c3) } def agg[U1, U2, U3, U4]( - c1: TypedAggregate[V, U1], - c2: TypedAggregate[V, U2], - c3: TypedAggregate[V, U3], - c4: TypedAggregate[V, U4] - ): TypedDataset[(K1, K2, U1, U2, U3, U4)] = { + c1: TypedAggregate[V, U1], + c2: TypedAggregate[V, U2], + c3: TypedAggregate[V, U3], + c4: TypedAggregate[V, U4] + ): TypedDataset[(K1, K2, U1, U2, U3, U4)] = { implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder; implicit val e3 = c3.uencoder; implicit val e4 = c4.uencoder underlying.agg(c1, c2, c3, c4) } def agg[U1, U2, U3, U4, U5]( - c1: TypedAggregate[V, U1], - c2: TypedAggregate[V, U2], - c3: TypedAggregate[V, U3], - c4: TypedAggregate[V, U4], - c5: TypedAggregate[V, U5] - ): TypedDataset[(K1, K2, U1, U2, U3, U4, U5)] = { + c1: TypedAggregate[V, U1], + c2: TypedAggregate[V, U2], + c3: TypedAggregate[V, U3], + c4: TypedAggregate[V, U4], + c5: TypedAggregate[V, U5] + ): TypedDataset[(K1, K2, U1, U2, U3, U4, U5)] = { implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder; implicit val e3 = c3.uencoder; implicit val e4 = c4.uencoder; implicit val e5 = c5.uencoder @@ -188,39 +175,36 @@ class GroupedBy2Ops[K1, K2, V]( object deserialized { def mapGroups[U: TypedEncoder]( - f: ((K1, K2), Iterator[V]) => U - ): TypedDataset[U] = { + f: ((K1, K2), Iterator[V]) => U + ): TypedDataset[U] = { underlying.deserialized.mapGroups(f) } def flatMapGroups[U: TypedEncoder]( - f: ((K1, K2), Iterator[V]) => TraversableOnce[U] - ): TypedDataset[U] = { + f: ((K1, K2), Iterator[V]) => TraversableOnce[U] + ): TypedDataset[U] = { underlying.deserialized.flatMapGroups(f) } } def pivot[P: CatalystPivotable]( - pivotColumn: TypedColumn[V, P] - ): PivotNotValues[V, TypedColumn[V, K1] :: TypedColumn[V, K2] :: HNil, P] = + pivotColumn: TypedColumn[V, P] + ): PivotNotValues[V, TypedColumn[V, K1] :: TypedColumn[V, K2] :: HNil, P] = PivotNotValues(self, g1 :: g2 :: HNil, pivotColumn) } -private[ops] abstract class AggregatingOps[T, TK <: HList, K <: HList, KT]( - self: TypedDataset[T], - groupedBy: TK, - groupingFunc: (Dataset[T], Seq[Column]) => RelationalGroupedDataset - )(implicit - i0: ColumnTypes.Aux[T, TK, K], - i1: ToTraversable.Aux[TK, List, UntypedExpression[T]], - i2: Tupler.Aux[K, KT]) { +abstract private[ops] class AggregatingOps[T, TK <: HList, K <: HList, KT]( + self: TypedDataset[T], + groupedBy: TK, + groupingFunc: (Dataset[T], Seq[Column]) => RelationalGroupedDataset +)(implicit i0: ColumnTypes.Aux[T, TK, K], i1: ToTraversable.Aux[TK, List, UntypedExpression[T]], i2: Tupler.Aux[K, KT]) { def aggregate[TC <: HList, Out1]( - columns: TC - )(implicit - i7: TypedEncoder[Out1], - i8: ToTraversable.Aux[TC, List, UntypedExpression[T]] - ): TypedDataset[Out1] = { + columns: TC + )(implicit + i7: TypedEncoder[Out1], + i8: ToTraversable.Aux[TC, List, UntypedExpression[T]] + ): TypedDataset[Out1] = { def expr(c: UntypedExpression[T]): Column = FramelessInternals.column(c.expr) @@ -244,19 +228,19 @@ private[ops] abstract class AggregatingOps[T, TK <: HList, K <: HList, KT]( object deserialized { def mapGroups[U: TypedEncoder]( - f: (KT, Iterator[T]) => U - )(implicit - e: TypedEncoder[KT] - ): TypedDataset[U] = { + f: (KT, Iterator[T]) => U + )(implicit + e: TypedEncoder[KT] + ): TypedDataset[U] = { val func = (key: KT, it: Iterator[T]) => Iterator(f(key, it)) flatMapGroups(func) } def flatMapGroups[U: TypedEncoder]( - f: (KT, Iterator[T]) => TraversableOnce[U] - )(implicit - e: TypedEncoder[KT] - ): TypedDataset[U] = { + f: (KT, Iterator[T]) => TraversableOnce[U] + )(implicit + e: TypedEncoder[KT] + ): TypedDataset[U] = { implicit val tendcoder = self.encoder val cols = groupedBy.toList[UntypedExpression[T]] @@ -296,8 +280,8 @@ private[ops] abstract class AggregatingOps[T, TK <: HList, K <: HList, KT]( } def pivot[P: CatalystPivotable]( - pivotColumn: TypedColumn[T, P] - ): PivotNotValues[T, TK, P] = + pivotColumn: TypedColumn[T, P] + ): PivotNotValues[T, TK, P] = PivotNotValues(self, groupedBy, pivotColumn) } @@ -305,8 +289,8 @@ private[ops] object AggregatingOps { /** Utility function to help Spark with serialization of closures */ def tuple1[K1, V, U]( - f: (K1, Iterator[V]) => U - ): (Tuple1[K1], Iterator[V]) => U = { (x: Tuple1[K1], it: Iterator[V]) => + f: (K1, Iterator[V]) => U + ): (Tuple1[K1], Iterator[V]) => U = { (x: Tuple1[K1], it: Iterator[V]) => f(x._1, it) } } @@ -315,37 +299,37 @@ private[ops] object AggregatingOps { * Represents a typed Pivot operation. */ final case class Pivot[T, GroupedColumns <: HList, PivotType, Values <: HList]( - ds: TypedDataset[T], - groupedBy: GroupedColumns, - pivotedBy: TypedColumn[T, PivotType], - values: Values) { + ds: TypedDataset[T], + groupedBy: GroupedColumns, + pivotedBy: TypedColumn[T, PivotType], + values: Values +) { object agg extends ProductArgs { def applyProduct[ - AggrColumns <: HList, - AggrColumnTypes <: HList, - GroupedColumnTypes <: HList, - NumValues <: Nat, - TypesForPivotedValues <: HList, - TypesForPivotedValuesOpt <: HList, - OutAsHList <: HList, - Out - ](aggrColumns: AggrColumns - )(implicit - i0: AggregateTypes.Aux[T, AggrColumns, AggrColumnTypes], - i1: ColumnTypes.Aux[T, GroupedColumns, GroupedColumnTypes], - i2: Length.Aux[Values, NumValues], - i3: Repeat.Aux[AggrColumnTypes, NumValues, TypesForPivotedValues], - i4: Mapped.Aux[TypesForPivotedValues, Option, TypesForPivotedValuesOpt], - i5: Prepend.Aux[ - GroupedColumnTypes, - TypesForPivotedValuesOpt, - OutAsHList - ], - i6: Tupler.Aux[OutAsHList, Out], - i7: TypedEncoder[Out] - ): TypedDataset[Out] = { + AggrColumns <: HList, + AggrColumnTypes <: HList, + GroupedColumnTypes <: HList, + NumValues <: Nat, + TypesForPivotedValues <: HList, + TypesForPivotedValuesOpt <: HList, + OutAsHList <: HList, + Out + ](aggrColumns: AggrColumns)(implicit + i0: AggregateTypes.Aux[T, AggrColumns, AggrColumnTypes], + i1: ColumnTypes.Aux[T, GroupedColumns, GroupedColumnTypes], + i2: Length.Aux[Values, NumValues], + i3: Repeat.Aux[AggrColumnTypes, NumValues, TypesForPivotedValues], + i4: Mapped.Aux[TypesForPivotedValues, Option, TypesForPivotedValuesOpt], + i5: Prepend.Aux[ + GroupedColumnTypes, + TypesForPivotedValuesOpt, + OutAsHList + ], + i6: Tupler.Aux[OutAsHList, Out], + i7: TypedEncoder[Out] + ): TypedDataset[Out] = { def mapAny[X](h: HList)(f: Any => X): List[X] = h match { case HNil => Nil @@ -369,18 +353,18 @@ final case class Pivot[T, GroupedColumns <: HList, PivotType, Values <: HList]( } final case class PivotNotValues[T, GroupedColumns <: HList, PivotType]( - ds: TypedDataset[T], - groupedBy: GroupedColumns, - pivotedBy: TypedColumn[T, PivotType]) - extends ProductArgs { + ds: TypedDataset[T], + groupedBy: GroupedColumns, + pivotedBy: TypedColumn[T, PivotType] +) extends ProductArgs { def onProduct[Values <: HList]( - values: Values - )(implicit - validValues: ToList[ - Values, - PivotType - ] // validValues: FilterNot.Aux[Values, PivotType, HNil] // did not work - ): Pivot[T, GroupedColumns, PivotType, Values] = + values: Values + )(implicit + validValues: ToList[ + Values, + PivotType + ] // validValues: FilterNot.Aux[Values, PivotType, HNil] // did not work + ): Pivot[T, GroupedColumns, PivotType, Values] = Pivot(ds, groupedBy, pivotedBy, values) } diff --git a/dataset/src/main/scala/frameless/ops/RelationalGroupsOps.scala b/dataset/src/main/scala/frameless/ops/RelationalGroupsOps.scala index 569407762..b8f683000 100644 --- a/dataset/src/main/scala/frameless/ops/RelationalGroupsOps.scala +++ b/dataset/src/main/scala/frameless/ops/RelationalGroupsOps.scala @@ -12,15 +12,18 @@ import shapeless.{::, HList, HNil, ProductArgs} * @tparam K individual columns' types as HList * @tparam KT individual columns' types as Tuple */ -private[ops] abstract class RelationalGroupsOps[T, TK <: HList, K <: HList, KT] - (self: TypedDataset[T], groupedBy: TK, groupingFunc: (Dataset[T], Seq[Column]) => RelationalGroupedDataset) - (implicit - i0: ColumnTypes.Aux[T, TK, K], - i1: ToTraversable.Aux[TK, List, UntypedExpression[T]], - i2: Tupler.Aux[K, KT] - ) extends AggregatingOps(self, groupedBy, groupingFunc){ +abstract private[ops] class RelationalGroupsOps[T, TK <: HList, K <: HList, KT]( + self: TypedDataset[T], + groupedBy: TK, + groupingFunc: (Dataset[T], Seq[Column]) => RelationalGroupedDataset +)(implicit + i0: ColumnTypes.Aux[T, TK, K], + i1: ToTraversable.Aux[TK, List, UntypedExpression[T]], + i2: Tupler.Aux[K, KT] +) extends AggregatingOps(self, groupedBy, groupingFunc) { object agg extends ProductArgs { + /** * @tparam TC resulting columns after aggregation function * @tparam C individual columns' types as HList @@ -28,9 +31,7 @@ private[ops] abstract class RelationalGroupsOps[T, TK <: HList, K <: HList, KT] * @tparam Out0 OptK columns appended to C * @tparam Out1 output type */ - def applyProduct[TC <: HList, C <: HList, OptK <: HList, Out0 <: HList, Out1] - (columns: TC) - (implicit + def applyProduct[TC <: HList, C <: HList, OptK <: HList, Out0 <: HList, Out1](columns: TC)(implicit i3: AggregateTypes.Aux[T, TC, C], // shares individual columns' types after agg function as HList i4: Mapped.Aux[K, Option, OptK], // maps all original columns' types to Option i5: Prepend.Aux[OptK, C, Out0], // concatenates Option columns with those resulting from applying agg function @@ -43,9 +44,9 @@ private[ops] abstract class RelationalGroupsOps[T, TK <: HList, K <: HList, KT] } } -private[ops] abstract class RelationalGroups1Ops[K1, V](self: TypedDataset[V], g1: TypedColumn[V, K1]) { +abstract private[ops] class RelationalGroups1Ops[K1, V](self: TypedDataset[V], g1: TypedColumn[V, K1]) { protected def underlying: RelationalGroupsOps[V, ::[TypedColumn[V, K1], HNil], ::[K1, HNil], Tuple1[K1]] - private implicit def eg1 = g1.uencoder + implicit private def eg1 = g1.uencoder def agg[U1](c1: TypedAggregate[V, U1]): TypedDataset[(Option[K1], U1)] = { implicit val e1 = c1.uencoder @@ -62,13 +63,25 @@ private[ops] abstract class RelationalGroups1Ops[K1, V](self: TypedDataset[V], g underlying.agg(c1, c2, c3) } - def agg[U1, U2, U3, U4](c1: TypedAggregate[V, U1], c2: TypedAggregate[V, U2], c3: TypedAggregate[V, U3], c4: TypedAggregate[V, U4]): TypedDataset[(Option[K1], U1, U2, U3, U4)] = { + def agg[U1, U2, U3, U4]( + c1: TypedAggregate[V, U1], + c2: TypedAggregate[V, U2], + c3: TypedAggregate[V, U3], + c4: TypedAggregate[V, U4] + ): TypedDataset[(Option[K1], U1, U2, U3, U4)] = { implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder; implicit val e3 = c3.uencoder; implicit val e4 = c4.uencoder underlying.agg(c1, c2, c3, c4) } - def agg[U1, U2, U3, U4, U5](c1: TypedAggregate[V, U1], c2: TypedAggregate[V, U2], c3: TypedAggregate[V, U3], c4: TypedAggregate[V, U4], c5: TypedAggregate[V, U5]): TypedDataset[(Option[K1], U1, U2, U3, U4, U5)] = { - implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder; implicit val e3 = c3.uencoder; implicit val e4 = c4.uencoder; implicit val e5 = c5.uencoder + def agg[U1, U2, U3, U4, U5]( + c1: TypedAggregate[V, U1], + c2: TypedAggregate[V, U2], + c3: TypedAggregate[V, U3], + c4: TypedAggregate[V, U4], + c5: TypedAggregate[V, U5] + ): TypedDataset[(Option[K1], U1, U2, U3, U4, U5)] = { + implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder; implicit val e3 = c3.uencoder; implicit val e4 = c4.uencoder; + implicit val e5 = c5.uencoder underlying.agg(c1, c2, c3, c4, c5) } @@ -85,14 +98,14 @@ private[ops] abstract class RelationalGroups1Ops[K1, V](self: TypedDataset[V], g } } - def pivot[P: CatalystPivotable](pivotColumn: TypedColumn[V, P]): PivotNotValues[V, TypedColumn[V,K1] :: HNil, P] = + def pivot[P: CatalystPivotable](pivotColumn: TypedColumn[V, P]): PivotNotValues[V, TypedColumn[V, K1] :: HNil, P] = PivotNotValues(self, g1 :: HNil, pivotColumn) } -private[ops] abstract class RelationalGroups2Ops[K1, K2, V](self: TypedDataset[V], g1: TypedColumn[V, K1], g2: TypedColumn[V, K2]) { +abstract private[ops] class RelationalGroups2Ops[K1, K2, V](self: TypedDataset[V], g1: TypedColumn[V, K1], g2: TypedColumn[V, K2]) { protected def underlying: RelationalGroupsOps[V, ::[TypedColumn[V, K1], ::[TypedColumn[V, K2], HNil]], ::[K1, ::[K2, HNil]], (K1, K2)] - private implicit def eg1 = g1.uencoder - private implicit def eg2 = g2.uencoder + implicit private def eg1 = g1.uencoder + implicit private def eg2 = g2.uencoder def agg[U1](c1: TypedAggregate[V, U1]): TypedDataset[(Option[K1], Option[K2], U1)] = { implicit val e1 = c1.uencoder @@ -104,18 +117,34 @@ private[ops] abstract class RelationalGroups2Ops[K1, K2, V](self: TypedDataset[V underlying.agg(c1, c2) } - def agg[U1, U2, U3](c1: TypedAggregate[V, U1], c2: TypedAggregate[V, U2], c3: TypedAggregate[V, U3]): TypedDataset[(Option[K1], Option[K2], U1, U2, U3)] = { + def agg[U1, U2, U3]( + c1: TypedAggregate[V, U1], + c2: TypedAggregate[V, U2], + c3: TypedAggregate[V, U3] + ): TypedDataset[(Option[K1], Option[K2], U1, U2, U3)] = { implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder; implicit val e3 = c3.uencoder underlying.agg(c1, c2, c3) } - def agg[U1, U2, U3, U4](c1: TypedAggregate[V, U1], c2: TypedAggregate[V, U2], c3: TypedAggregate[V, U3], c4: TypedAggregate[V, U4]): TypedDataset[(Option[K1], Option[K2], U1, U2, U3, U4)] = { + def agg[U1, U2, U3, U4]( + c1: TypedAggregate[V, U1], + c2: TypedAggregate[V, U2], + c3: TypedAggregate[V, U3], + c4: TypedAggregate[V, U4] + ): TypedDataset[(Option[K1], Option[K2], U1, U2, U3, U4)] = { implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder; implicit val e3 = c3.uencoder; implicit val e4 = c4.uencoder - underlying.agg(c1 , c2 , c3 , c4) + underlying.agg(c1, c2, c3, c4) } - def agg[U1, U2, U3, U4, U5](c1: TypedAggregate[V, U1], c2: TypedAggregate[V, U2], c3: TypedAggregate[V, U3], c4: TypedAggregate[V, U4], c5: TypedAggregate[V, U5]): TypedDataset[(Option[K1], Option[K2], U1, U2, U3, U4, U5)] = { - implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder; implicit val e3 = c3.uencoder; implicit val e4 = c4.uencoder; implicit val e5 = c5.uencoder + def agg[U1, U2, U3, U4, U5]( + c1: TypedAggregate[V, U1], + c2: TypedAggregate[V, U2], + c3: TypedAggregate[V, U3], + c4: TypedAggregate[V, U4], + c5: TypedAggregate[V, U5] + ): TypedDataset[(Option[K1], Option[K2], U1, U2, U3, U4, U5)] = { + implicit val e1 = c1.uencoder; implicit val e2 = c2.uencoder; implicit val e3 = c3.uencoder; implicit val e4 = c4.uencoder; + implicit val e5 = c5.uencoder underlying.agg(c1, c2, c3, c4, c5) } @@ -132,17 +161,15 @@ private[ops] abstract class RelationalGroups2Ops[K1, K2, V](self: TypedDataset[V } } - def pivot[P: CatalystPivotable](pivotColumn: TypedColumn[V, P]): - PivotNotValues[V, TypedColumn[V,K1] :: TypedColumn[V, K2] :: HNil, P] = + def pivot[P: CatalystPivotable](pivotColumn: TypedColumn[V, P]): PivotNotValues[V, TypedColumn[V, K1] :: TypedColumn[V, K2] :: HNil, P] = PivotNotValues(self, g1 :: g2 :: HNil, pivotColumn) } -class RollupManyOps[T, TK <: HList, K <: HList, KT](self: TypedDataset[T], groupedBy: TK) - (implicit - i0: ColumnTypes.Aux[T, TK, K], - i1: ToTraversable.Aux[TK, List, UntypedExpression[T]], - i2: Tupler.Aux[K, KT] - ) extends RelationalGroupsOps[T, TK, K, KT](self, groupedBy, (dataset, cols) => dataset.rollup(cols: _*)) +class RollupManyOps[T, TK <: HList, K <: HList, KT](self: TypedDataset[T], groupedBy: TK)(implicit + i0: ColumnTypes.Aux[T, TK, K], + i1: ToTraversable.Aux[TK, List, UntypedExpression[T]], + i2: Tupler.Aux[K, KT] +) extends RelationalGroupsOps[T, TK, K, KT](self, groupedBy, (dataset, cols) => dataset.rollup(cols: _*)) class Rollup1Ops[K1, V](self: TypedDataset[V], g1: TypedColumn[V, K1]) extends RelationalGroups1Ops(self, g1) { override protected def underlying = new RollupManyOps(self, g1 :: HNil) @@ -152,12 +179,11 @@ class Rollup2Ops[K1, K2, V](self: TypedDataset[V], g1: TypedColumn[V, K1], g2: T override protected def underlying = new RollupManyOps(self, g1 :: g2 :: HNil) } -class CubeManyOps[T, TK <: HList, K <: HList, KT](self: TypedDataset[T], groupedBy: TK) - (implicit - i0: ColumnTypes.Aux[T, TK, K], - i1: ToTraversable.Aux[TK, List, UntypedExpression[T]], - i2: Tupler.Aux[K, KT] - ) extends RelationalGroupsOps[T, TK, K, KT](self, groupedBy, (dataset, cols) => dataset.cube(cols: _*)) +class CubeManyOps[T, TK <: HList, K <: HList, KT](self: TypedDataset[T], groupedBy: TK)(implicit + i0: ColumnTypes.Aux[T, TK, K], + i1: ToTraversable.Aux[TK, List, UntypedExpression[T]], + i2: Tupler.Aux[K, KT] +) extends RelationalGroupsOps[T, TK, K, KT](self, groupedBy, (dataset, cols) => dataset.cube(cols: _*)) class Cube1Ops[K1, V](self: TypedDataset[V], g1: TypedColumn[V, K1]) extends RelationalGroups1Ops(self, g1) { override protected def underlying = new CubeManyOps(self, g1 :: HNil) diff --git a/dataset/src/main/scala/frameless/ops/Repeat.scala b/dataset/src/main/scala/frameless/ops/Repeat.scala index bde855500..5caad2958 100644 --- a/dataset/src/main/scala/frameless/ops/Repeat.scala +++ b/dataset/src/main/scala/frameless/ops/Repeat.scala @@ -23,11 +23,10 @@ object Repeat { type Out = L } - implicit def succ[L <: HList, Prev <: Nat, PrevOut <: HList, P <: HList] - (implicit - i0: Aux[L, Prev, PrevOut], - i1: Prepend.Aux[L, PrevOut, P] - ): Aux[L, Succ[Prev], P] = new Repeat[L, Succ[Prev]] { - type Out = P - } + implicit def succ[L <: HList, Prev <: Nat, PrevOut <: HList, P <: HList](implicit + i0: Aux[L, Prev, PrevOut], + i1: Prepend.Aux[L, PrevOut, P] + ): Aux[L, Succ[Prev], P] = new Repeat[L, Succ[Prev]] { + type Out = P + } } diff --git a/dataset/src/main/scala/frameless/ops/SmartProject.scala b/dataset/src/main/scala/frameless/ops/SmartProject.scala index ec3628efd..d86f01aaa 100644 --- a/dataset/src/main/scala/frameless/ops/SmartProject.scala +++ b/dataset/src/main/scala/frameless/ops/SmartProject.scala @@ -11,6 +11,7 @@ import scala.annotation.implicitNotFound case class SmartProject[T: TypedEncoder, U: TypedEncoder](apply: TypedDataset[T] => TypedDataset[U]) object SmartProject { + /** * Proofs that there is a type-safe projection from a type T to another type U. It requires that: * (a) both T and U are Products for which a LabelledGeneric can be derived (e.g., case classes), @@ -32,17 +33,16 @@ object SmartProject { * @tparam UKeys the keys of U as an HList * @return a projection if it exists */ - implicit def deriveProduct[T: TypedEncoder, U: TypedEncoder, TRec <: HList, TProj <: HList, URec <: HList, UVals <: HList, UKeys <: HList] - (implicit - i0: LabelledGeneric.Aux[T, TRec], - i1: LabelledGeneric.Aux[U, URec], - i2: Keys.Aux[URec, UKeys], - i3: SelectAll.Aux[TRec, UKeys, TProj], - i4: Values.Aux[URec, UVals], - i5: UVals =:= TProj, - i6: ToTraversable.Aux[UKeys, Seq, Symbol] - ): SmartProject[T,U] = SmartProject[T, U]({ from => - val names = implicitly[Keys.Aux[URec, UKeys]].apply().to[Seq].map(_.name).map(from.dataset.col) - TypedDataset.create(from.dataset.toDF().select(names: _*).as[U](TypedExpressionEncoder[U])) - }) + implicit def deriveProduct[T: TypedEncoder, U: TypedEncoder, TRec <: HList, TProj <: HList, URec <: HList, UVals <: HList, UKeys <: HList](implicit + i0: LabelledGeneric.Aux[T, TRec], + i1: LabelledGeneric.Aux[U, URec], + i2: Keys.Aux[URec, UKeys], + i3: SelectAll.Aux[TRec, UKeys, TProj], + i4: Values.Aux[URec, UVals], + i5: UVals =:= TProj, + i6: ToTraversable.Aux[UKeys, Seq, Symbol] + ): SmartProject[T, U] = SmartProject[T, U] { from => + val names = implicitly[Keys.Aux[URec, UKeys]].apply().to[Seq].map(_.name).map(from.dataset.col) + TypedDataset.create(from.dataset.toDF().select(names: _*).as[U](TypedExpressionEncoder[U])) + } } diff --git a/dataset/src/main/scala/org/apache/spark/sql/reflection/package.scala b/dataset/src/main/scala/org/apache/spark/sql/reflection/package.scala index 07090a8db..8bc651c65 100644 --- a/dataset/src/main/scala/org/apache/spark/sql/reflection/package.scala +++ b/dataset/src/main/scala/org/apache/spark/sql/reflection/package.scala @@ -1,10 +1,6 @@ package org.apache.spark.sql -import org.apache.spark.sql.catalyst.ScalaReflection.{ - cleanUpReflectionObjects, - getClassFromType, - localTypeOf -} +import org.apache.spark.sql.catalyst.ScalaReflection.{cleanUpReflectionObjects, getClassFromType, localTypeOf} import org.apache.spark.sql.types.{ BinaryType, BooleanType, @@ -70,15 +66,15 @@ package object reflection { private def dataTypeFor(tpe: `Type`): DataType = cleanUpReflectionObjects { tpe.dealias match { - case t if isSubtype(t, definitions.NullTpe) => NullType - case t if isSubtype(t, definitions.IntTpe) => IntegerType - case t if isSubtype(t, definitions.LongTpe) => LongType - case t if isSubtype(t, definitions.DoubleTpe) => DoubleType - case t if isSubtype(t, definitions.FloatTpe) => FloatType - case t if isSubtype(t, definitions.ShortTpe) => ShortType - case t if isSubtype(t, definitions.ByteTpe) => ByteType - case t if isSubtype(t, definitions.BooleanTpe) => BooleanType - case t if isSubtype(t, localTypeOf[Array[Byte]]) => BinaryType + case t if isSubtype(t, definitions.NullTpe) => NullType + case t if isSubtype(t, definitions.IntTpe) => IntegerType + case t if isSubtype(t, definitions.LongTpe) => LongType + case t if isSubtype(t, definitions.DoubleTpe) => DoubleType + case t if isSubtype(t, definitions.FloatTpe) => FloatType + case t if isSubtype(t, definitions.ShortTpe) => ShortType + case t if isSubtype(t, definitions.ByteTpe) => ByteType + case t if isSubtype(t, definitions.BooleanTpe) => BooleanType + case t if isSubtype(t, localTypeOf[Array[Byte]]) => BinaryType case t if isSubtype(t, localTypeOf[CalendarInterval]) => CalendarIntervalType case t if isSubtype(t, localTypeOf[Decimal]) => DecimalType.SYSTEM_DEFAULT diff --git a/dataset/src/main/spark-3.4+/org/apache/spark/sql/FramelessInternals.scala b/dataset/src/main/spark-3.4+/org/apache/spark/sql/FramelessInternals.scala index 3022bf23c..79172360d 100644 --- a/dataset/src/main/spark-3.4+/org/apache/spark/sql/FramelessInternals.scala +++ b/dataset/src/main/spark-3.4+/org/apache/spark/sql/FramelessInternals.scala @@ -2,12 +2,12 @@ package org.apache.spark.sql import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.codegen._ -import org.apache.spark.sql.catalyst.expressions.{ Alias, CreateStruct } -import org.apache.spark.sql.catalyst.expressions.{ Expression, NamedExpression } +import org.apache.spark.sql.catalyst.expressions.{Alias, CreateStruct} +import org.apache.spark.sql.catalyst.expressions.{Expression, NamedExpression} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.catalyst.plans.logical.{ LogicalPlan, Project } +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} import org.apache.spark.sql.execution.QueryExecution import org.apache.spark.sql.types._ import org.apache.spark.sql.types.ObjectType @@ -16,9 +16,8 @@ import scala.reflect.ClassTag object FramelessInternals { def objectTypeFor[A]( - implicit - classTag: ClassTag[A] - ): ObjectType = ObjectType(classTag.runtimeClass) + implicit classTag: ClassTag[A] + ): ObjectType = ObjectType(classTag.runtimeClass) def resolveExpr(ds: Dataset[_], colNames: Seq[String]): NamedExpression = { ds.toDF() @@ -49,11 +48,11 @@ object FramelessInternals { ds.sqlContext.getConf(key, default) def joinPlan( - ds: Dataset[_], - plan: LogicalPlan, - leftPlan: LogicalPlan, - rightPlan: LogicalPlan - ): LogicalPlan = { + ds: Dataset[_], + plan: LogicalPlan, + leftPlan: LogicalPlan, + rightPlan: LogicalPlan + ): LogicalPlan = { val joined = executePlan(ds, plan) val leftOutput = joined.analyzed.output.take(leftPlan.output.length) val rightOutput = joined.analyzed.output.takeRight(rightPlan.output.length) @@ -68,10 +67,10 @@ object FramelessInternals { } def mkDataset[T]( - source: Dataset[_], - plan: LogicalPlan, - encoder: Encoder[T] - ): Dataset[T] = + source: Dataset[_], + plan: LogicalPlan, + encoder: Encoder[T] + ): Dataset[T] = new Dataset(source.sparkSession, plan, encoder) def ofRows(sparkSession: SparkSession, logicalPlan: LogicalPlan): DataFrame = @@ -79,10 +78,10 @@ object FramelessInternals { /** Builds an `ExpressionEncoder` from frameless' own serializer/deserializer expressions. */ def expressionEncoder[T]( - objSerializer: Expression, - objDeserializer: Expression, - classTag: ClassTag[T] - ): ExpressionEncoder[T] = + objSerializer: Expression, + objDeserializer: Expression, + classTag: ClassTag[T] + ): ExpressionEncoder[T] = new ExpressionEncoder[T](objSerializer, objDeserializer, classTag) // because org.apache.spark.sql.types.UserDefinedType is private[spark] @@ -104,8 +103,8 @@ object FramelessInternals { tagged.genCode(ctx) protected def withNewChildrenInternal( - newChildren: IndexedSeq[Expression] - ): Expression = copy(newChildren.head) + newChildren: IndexedSeq[Expression] + ): Expression = copy(newChildren.head) } /** Expression to tag columns from the right hand side of join expression. */ @@ -121,7 +120,7 @@ object FramelessInternals { tagged.genCode(ctx) protected def withNewChildrenInternal( - newChildren: IndexedSeq[Expression] - ): Expression = copy(newChildren.head) + newChildren: IndexedSeq[Expression] + ): Expression = copy(newChildren.head) } } diff --git a/dataset/src/main/spark-4/frameless/MapGroups.scala b/dataset/src/main/spark-4/frameless/MapGroups.scala index 25411420b..6a6751bd3 100644 --- a/dataset/src/main/spark-4/frameless/MapGroups.scala +++ b/dataset/src/main/spark-4/frameless/MapGroups.scala @@ -2,19 +2,16 @@ package frameless import org.apache.spark.sql.Encoder import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.catalyst.plans.logical.{ - LogicalPlan, - MapGroups => SMapGroups -} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, MapGroups => SMapGroups} object MapGroups { def apply[K: Encoder, T: Encoder, U: Encoder]( - func: (K, Iterator[T]) => TraversableOnce[U], - groupingAttributes: Seq[Attribute], - dataAttributes: Seq[Attribute], - child: LogicalPlan - ): LogicalPlan = + func: (K, Iterator[T]) => TraversableOnce[U], + groupingAttributes: Seq[Attribute], + dataAttributes: Seq[Attribute], + child: LogicalPlan + ): LogicalPlan = SMapGroups( func, groupingAttributes, diff --git a/dataset/src/main/spark-4/org/apache/spark/sql/FramelessInternals.scala b/dataset/src/main/spark-4/org/apache/spark/sql/FramelessInternals.scala index 6daf2b4e4..850aac9e1 100644 --- a/dataset/src/main/spark-4/org/apache/spark/sql/FramelessInternals.scala +++ b/dataset/src/main/spark-4/org/apache/spark/sql/FramelessInternals.scala @@ -2,19 +2,14 @@ package org.apache.spark.sql import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.codegen._ -import org.apache.spark.sql.catalyst.expressions.{ Alias, CreateStruct } -import org.apache.spark.sql.catalyst.expressions.{ Expression, NamedExpression } +import org.apache.spark.sql.catalyst.expressions.{Alias, CreateStruct} +import org.apache.spark.sql.catalyst.expressions.{Expression, NamedExpression} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.JavaBeanEncoder import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.catalyst.plans.logical.{ LogicalPlan, Project } -import org.apache.spark.sql.classic.{ - Dataset => ClassicDataset, - SparkSession => ClassicSparkSession, - ExpressionUtils, - ColumnNodeToExpressionConverter -} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} +import org.apache.spark.sql.classic.{ColumnNodeToExpressionConverter, Dataset => ClassicDataset, ExpressionUtils, SparkSession => ClassicSparkSession} import org.apache.spark.sql.execution.QueryExecution import org.apache.spark.sql.types._ import org.apache.spark.sql.types.ObjectType @@ -32,9 +27,8 @@ import scala.reflect.ClassTag object FramelessInternals { def objectTypeFor[A]( - implicit - classTag: ClassTag[A] - ): ObjectType = ObjectType(classTag.runtimeClass) + implicit classTag: ClassTag[A] + ): ObjectType = ObjectType(classTag.runtimeClass) private def classic(ds: Dataset[_]): ClassicDataset[_] = ds.asInstanceOf[ClassicDataset[_]] @@ -76,11 +70,11 @@ object FramelessInternals { classic(ds).sparkSession.conf.get(key, default) def joinPlan( - ds: Dataset[_], - plan: LogicalPlan, - leftPlan: LogicalPlan, - rightPlan: LogicalPlan - ): LogicalPlan = { + ds: Dataset[_], + plan: LogicalPlan, + leftPlan: LogicalPlan, + rightPlan: LogicalPlan + ): LogicalPlan = { val joined = executePlan(ds, plan) val leftOutput = joined.analyzed.output.take(leftPlan.output.length) val rightOutput = joined.analyzed.output.takeRight(rightPlan.output.length) @@ -95,10 +89,10 @@ object FramelessInternals { } def mkDataset[T]( - source: Dataset[_], - plan: LogicalPlan, - encoder: Encoder[T] - ): Dataset[T] = + source: Dataset[_], + plan: LogicalPlan, + encoder: Encoder[T] + ): Dataset[T] = new ClassicDataset[T](classic(source).sparkSession, plan, encoder) def ofRows(sparkSession: SparkSession, logicalPlan: LogicalPlan): DataFrame = @@ -116,10 +110,10 @@ object FramelessInternals { * carrying the right `ClassTag` is therefore a correct, metadata-only stand-in. */ def expressionEncoder[T]( - objSerializer: Expression, - objDeserializer: Expression, - classTag: ClassTag[T] - ): ExpressionEncoder[T] = + objSerializer: Expression, + objDeserializer: Expression, + classTag: ClassTag[T] + ): ExpressionEncoder[T] = new ExpressionEncoder[T]( JavaBeanEncoder(classTag, Nil), objSerializer, @@ -145,8 +139,8 @@ object FramelessInternals { tagged.genCode(ctx) protected def withNewChildrenInternal( - newChildren: IndexedSeq[Expression] - ): Expression = copy(newChildren.head) + newChildren: IndexedSeq[Expression] + ): Expression = copy(newChildren.head) } /** Expression to tag columns from the right hand side of join expression. */ @@ -162,7 +156,7 @@ object FramelessInternals { tagged.genCode(ctx) protected def withNewChildrenInternal( - newChildren: IndexedSeq[Expression] - ): Expression = copy(newChildren.head) + newChildren: IndexedSeq[Expression] + ): Expression = copy(newChildren.head) } } diff --git a/dataset/src/test/scala/frameless/AsTests.scala b/dataset/src/test/scala/frameless/AsTests.scala index c1091f9ca..08620dcd3 100644 --- a/dataset/src/test/scala/frameless/AsTests.scala +++ b/dataset/src/test/scala/frameless/AsTests.scala @@ -12,7 +12,7 @@ class AsTests extends TypedDatasetSuite { ): Prop = { val dataset = TypedDataset.create(data) - val dataset2 = dataset.as[X2[A,B]]().collect().run().toVector + val dataset2 = dataset.as[X2[A, B]]().collect().run().toVector val data2 = data.map { case (a, b) => X2(a, b) } dataset2 ?= data2 @@ -37,7 +37,7 @@ class AsTests extends TypedDatasetSuite { } val dataset = TypedDataset.create(data2) - val dataset2 = dataset.as[X2[X2[A,B], C]]().collect().run().toVector + val dataset2 = dataset.as[X2[X2[A, B], C]]().collect().run().toVector val data3 = data2.map { case ((a, b), c) => X2(X2(a, b), c) } dataset2 ?= data3 diff --git a/dataset/src/test/scala/frameless/BitwiseTests.scala b/dataset/src/test/scala/frameless/BitwiseTests.scala index f58c906a2..bb32b3309 100644 --- a/dataset/src/test/scala/frameless/BitwiseTests.scala +++ b/dataset/src/test/scala/frameless/BitwiseTests.scala @@ -12,7 +12,7 @@ class BitwiseTests extends TypedDatasetSuite with Matchers { * for Numeric it is easy to test since scala comes with Numeric typeclass but there seems * to be no equivalent typeclass for bitwise ops for Byte Short Int and Long types supported in Catalyst */ - trait CatalystBitwise4Tests[A]{ + trait CatalystBitwise4Tests[A] { def bitwiseAnd(a1: A, a2: A): A def bitwiseOr(a1: A, a2: A): A def bitwiseXor(a1: A, a2: A): A @@ -22,23 +22,23 @@ class BitwiseTests extends TypedDatasetSuite with Matchers { } object CatalystBitwise4Tests { - implicit val framelessbyteBitwise : CatalystBitwise4Tests[Byte] = new CatalystBitwise4Tests[Byte] { - def bitwiseOr(a1: Byte, a2: Byte) : Byte = (a1 | a2).toByte + implicit val framelessbyteBitwise: CatalystBitwise4Tests[Byte] = new CatalystBitwise4Tests[Byte] { + def bitwiseOr(a1: Byte, a2: Byte): Byte = (a1 | a2).toByte def bitwiseAnd(a1: Byte, a2: Byte): Byte = (a1 & a2).toByte def bitwiseXor(a1: Byte, a2: Byte): Byte = (a1 ^ a2).toByte } - implicit val framelessshortBitwise : CatalystBitwise4Tests[Short] = new CatalystBitwise4Tests[Short] { - def bitwiseOr(a1: Short, a2: Short) : Short = (a1 | a2).toShort + implicit val framelessshortBitwise: CatalystBitwise4Tests[Short] = new CatalystBitwise4Tests[Short] { + def bitwiseOr(a1: Short, a2: Short): Short = (a1 | a2).toShort def bitwiseAnd(a1: Short, a2: Short): Short = (a1 & a2).toShort def bitwiseXor(a1: Short, a2: Short): Short = (a1 ^ a2).toShort } - implicit val framelessintBitwise : CatalystBitwise4Tests[Int] = new CatalystBitwise4Tests[Int] { - def bitwiseOr(a1: Int, a2: Int) : Int = a1 | a2 + implicit val framelessintBitwise: CatalystBitwise4Tests[Int] = new CatalystBitwise4Tests[Int] { + def bitwiseOr(a1: Int, a2: Int): Int = a1 | a2 def bitwiseAnd(a1: Int, a2: Int): Int = a1 & a2 def bitwiseXor(a1: Int, a2: Int): Int = a1 ^ a2 } - implicit val framelesslongBitwise : CatalystBitwise4Tests[Long] = new CatalystBitwise4Tests[Long] { - def bitwiseOr(a1: Long, a2: Long) : Long = a1 | a2 + implicit val framelesslongBitwise: CatalystBitwise4Tests[Long] = new CatalystBitwise4Tests[Long] { + def bitwiseOr(a1: Long, a2: Long): Long = a1 | a2 def bitwiseAnd(a1: Long, a2: Long): Long = a1 & a2 def bitwiseXor(a1: Long, a2: Long): Long = a1 ^ a2 } diff --git a/dataset/src/test/scala/frameless/CastTests.scala b/dataset/src/test/scala/frameless/CastTests.scala index 5f79f8fa6..cefcce533 100644 --- a/dataset/src/test/scala/frameless/CastTests.scala +++ b/dataset/src/test/scala/frameless/CastTests.scala @@ -6,8 +6,7 @@ import org.scalacheck.Prop._ class CastTests extends TypedDatasetSuite { def prop[A: TypedEncoder, B: TypedEncoder](f: A => B)(a: A)( - implicit - cast: CatalystCast[A, B] + implicit cast: CatalystCast[A, B] ): Prop = { val df = TypedDataset.create(X1(a) :: Nil) val got = df.select(df.col('a).cast[B]).collect().run() @@ -102,7 +101,7 @@ class CastTests extends TypedDatasetSuite { // booleanToNumeric check(prop[Boolean, BigDecimal](x => if (x) BigDecimal(1) else BigDecimal(0)) _) check(prop[Boolean, Byte](x => if (x) 1 else 0) _) - check(prop[Boolean, Double](x => if (x) 1.0f else 0.0f) _) + check(prop[Boolean, Double](x => if (x) 1.0F else 0.0F) _) check(prop[Boolean, Int](x => if (x) 1 else 0) _) check(prop[Boolean, Long](x => if (x) 1L else 0L) _) check(prop[Boolean, Short](x => if (x) 1 else 0) _) diff --git a/dataset/src/test/scala/frameless/CollectTests.scala b/dataset/src/test/scala/frameless/CollectTests.scala index 0ff1e6956..7b4269665 100644 --- a/dataset/src/test/scala/frameless/CollectTests.scala +++ b/dataset/src/test/scala/frameless/CollectTests.scala @@ -1,6 +1,6 @@ package frameless -import frameless.CollectTests.{ prop, propArray } +import frameless.CollectTests.{prop, propArray} import org.apache.spark.sql.SparkSession import org.scalacheck.Prop import org.scalacheck.Prop._ @@ -85,10 +85,10 @@ class CollectTests extends TypedDatasetSuite { object CollectTests { import frameless.syntax._ - def prop[A: TypedEncoder : ClassTag](data: Vector[A])(implicit c: SparkSession): Prop = + def prop[A: TypedEncoder: ClassTag](data: Vector[A])(implicit c: SparkSession): Prop = TypedDataset.create(data).collect().run().toVector ?= data - def propArray[A: TypedEncoder : ClassTag](data: Vector[X1[Array[A]]])(implicit c: SparkSession): Prop = + def propArray[A: TypedEncoder: ClassTag](data: Vector[X1[Array[A]]])(implicit c: SparkSession): Prop = Prop(TypedDataset.create(data).collect().run().toVector.zip(data).forall { case (X1(l), X1(r)) => l.sameElements(r) }) diff --git a/dataset/src/test/scala/frameless/ColumnTests.scala b/dataset/src/test/scala/frameless/ColumnTests.scala index c56cf499c..baee93718 100644 --- a/dataset/src/test/scala/frameless/ColumnTests.scala +++ b/dataset/src/test/scala/frameless/ColumnTests.scala @@ -3,15 +3,15 @@ package frameless import java.util.Date import java.math.BigInteger -import java.time.{ Instant, LocalDate, Period, Duration } +import java.time.{Duration, Instant, LocalDate, Period} import java.time.temporal.ChronoUnit -import java.sql.{ Date => SqlDate, Timestamp } +import java.sql.{Date => SqlDate, Timestamp} import scala.math.Ordering.Implicits._ import scala.util.Try -import org.scalacheck.{ Arbitrary, Gen, Prop }, Arbitrary.arbitrary, Prop._ +import org.scalacheck.{Arbitrary, Gen, Prop}, Arbitrary.arbitrary, Prop._ import org.scalatest.matchers.should.Matchers @@ -29,7 +29,7 @@ final class ColumnTests extends TypedDatasetSuite with Matchers { OrderingImplicits.arbInstant.arbitrary.map(Date from _) } - private implicit object OrderingImplicits { + implicit private object OrderingImplicits { implicit val sqlDateOrdering: Ordering[SQLDate] = Ordering.by(_.days) implicit val sqlTimestmapOrdering: Ordering[SQLTimestamp] = @@ -104,10 +104,10 @@ final class ColumnTests extends TypedDatasetSuite with Matchers { test("between") { import OrderingImplicits._ def prop[A: TypedEncoder: CatalystOrdered: Ordering]( - a: A, - b: A, - c: A - ): Prop = { + a: A, + b: A, + c: A + ): Prop = { val dataset = TypedDataset.create(X3(a, b, c) :: Nil) val A = dataset.col('a) val B = dataset.col('b) @@ -537,7 +537,7 @@ final class ColumnTests extends TypedDatasetSuite with Matchers { } test("reference Value class so can join on") { - import RecordEncoderTests.{ Name, Person } + import RecordEncoderTests.{Name, Person} val bar = new Name("bar") diff --git a/dataset/src/test/scala/frameless/ColumnViaLambdaTests.scala b/dataset/src/test/scala/frameless/ColumnViaLambdaTests.scala index 0a9c532a6..f9b77ad9a 100644 --- a/dataset/src/test/scala/frameless/ColumnViaLambdaTests.scala +++ b/dataset/src/test/scala/frameless/ColumnViaLambdaTests.scala @@ -13,7 +13,8 @@ final class ColumnViaLambdaTests extends TypedDatasetSuite with Matchers { def ds = { TypedDataset.create(Seq( MyClass1(1, "2", MyClass2(3L, MyClass3(7.0D)), Some(MyClass4(true))), - MyClass1(4, "5", MyClass2(6L, MyClass3(8.0D)), None))) + MyClass1(4, "5", MyClass2(6L, MyClass3(8.0D)), None) + )) } test("col(_.a)") { diff --git a/dataset/src/test/scala/frameless/CreateTests.scala b/dataset/src/test/scala/frameless/CreateTests.scala index 4d9b5547d..183828988 100644 --- a/dataset/src/test/scala/frameless/CreateTests.scala +++ b/dataset/src/test/scala/frameless/CreateTests.scala @@ -13,10 +13,11 @@ class CreateTests extends TypedDatasetSuite with Matchers { test("creation using X4 derived DataFrames") { def prop[ - A: TypedEncoder, - B: TypedEncoder, - C: TypedEncoder, - D: TypedEncoder](data: Vector[X4[A, B, C, D]]): Prop = { + A: TypedEncoder, + B: TypedEncoder, + C: TypedEncoder, + D: TypedEncoder + ](data: Vector[X4[A, B, C, D]]): Prop = { val ds = TypedDataset.create(data) TypedDataset.createUnsafe[X4[A, B, C, D]](ds.toDF()).collect().run() ?= data } @@ -29,13 +30,13 @@ class CreateTests extends TypedDatasetSuite with Matchers { Option[Vector[Food]], Vector[Vector[X2[Vector[(Person, X1[Char])], Country]]], X3[Food, Country, String], - Vector[(Food, Country)]] _)) + Vector[(Food, Country)] + ] _)) } test("array fields") { def prop[T: Arbitrary: TypedEncoder: ClassTag] = forAll { - (d1: Array[T], d2: Array[Option[T]], d3: Array[X1[T]], d4: Array[X1[Option[T]]], - d5: X1[Array[T]]) => + (d1: Array[T], d2: Array[Option[T]], d3: Array[X1[T]], d4: Array[X1[Option[T]]], d5: X1[Array[T]]) => TypedDataset.create(Seq(d1)).collect().run().head.sameElements(d1) && TypedDataset.create(Seq(d2)).collect().run().head.sameElements(d2) && TypedDataset.create(Seq(d3)).collect().run().head.sameElements(d3) && @@ -55,13 +56,12 @@ class CreateTests extends TypedDatasetSuite with Matchers { test("vector fields") { def prop[T: Arbitrary: TypedEncoder] = forAll { - (d1: Vector[T], d2: Vector[Option[T]], d3: Vector[X1[T]], d4: Vector[X1[Option[T]]], - d5: X1[Vector[T]]) => - (TypedDataset.create(Seq(d1)).collect().run().head ?= d1) && - (TypedDataset.create(Seq(d2)).collect().run().head ?= d2) && - (TypedDataset.create(Seq(d3)).collect().run().head ?= d3) && - (TypedDataset.create(Seq(d4)).collect().run().head ?= d4) && - (TypedDataset.create(Seq(d5)).collect().run().head ?= d5) + (d1: Vector[T], d2: Vector[Option[T]], d3: Vector[X1[T]], d4: Vector[X1[Option[T]]], d5: X1[Vector[T]]) => + (TypedDataset.create(Seq(d1)).collect().run().head ?= d1) && + (TypedDataset.create(Seq(d2)).collect().run().head ?= d2) && + (TypedDataset.create(Seq(d3)).collect().run().head ?= d3) && + (TypedDataset.create(Seq(d4)).collect().run().head ?= d4) && + (TypedDataset.create(Seq(d5)).collect().run().head ?= d5) } check(prop[Boolean]) @@ -77,9 +77,8 @@ class CreateTests extends TypedDatasetSuite with Matchers { test("list fields") { def prop[T: Arbitrary: TypedEncoder] = forAll { - (d1: List[T], d2: List[Option[T]], d3: List[X1[T]], d4: List[X1[Option[T]]], - d5: X1[List[T]]) => - (TypedDataset.create(Seq(d1)).collect().run().head ?= d1) && + (d1: List[T], d2: List[Option[T]], d3: List[X1[T]], d4: List[X1[Option[T]]], d5: X1[List[T]]) => + (TypedDataset.create(Seq(d1)).collect().run().head ?= d1) && (TypedDataset.create(Seq(d2)).collect().run().head ?= d2) && (TypedDataset.create(Seq(d3)).collect().run().head ?= d3) && (TypedDataset.create(Seq(d4)).collect().run().head ?= d4) && @@ -99,15 +98,13 @@ class CreateTests extends TypedDatasetSuite with Matchers { test("Map fields (scala.Predef.Map / scala.collection.immutable.Map)") { def prop[A: Arbitrary: NotCatalystNullable: TypedEncoder, B: Arbitrary: NotCatalystNullable: TypedEncoder] = forAll { - (d1: Map[A, B], d2: Map[B, A], d3: Map[A, Option[B]], - d4: Map[A, X1[B]], d5: Map[X1[A], B], d6: Map[X1[A], X1[B]]) => - - (TypedDataset.create(Seq(d1)).collect().run().head ?= d1) && - (TypedDataset.create(Seq(d2)).collect().run().head ?= d2) && - (TypedDataset.create(Seq(d3)).collect().run().head ?= d3) && - (TypedDataset.create(Seq(d4)).collect().run().head ?= d4) && - (TypedDataset.create(Seq(d5)).collect().run().head ?= d5) && - (TypedDataset.create(Seq(d6)).collect().run().head ?= d6) + (d1: Map[A, B], d2: Map[B, A], d3: Map[A, Option[B]], d4: Map[A, X1[B]], d5: Map[X1[A], B], d6: Map[X1[A], X1[B]]) => + (TypedDataset.create(Seq(d1)).collect().run().head ?= d1) && + (TypedDataset.create(Seq(d2)).collect().run().head ?= d2) && + (TypedDataset.create(Seq(d3)).collect().run().head ?= d3) && + (TypedDataset.create(Seq(d4)).collect().run().head ?= d4) && + (TypedDataset.create(Seq(d5)).collect().run().head ?= d5) && + (TypedDataset.create(Seq(d6)).collect().run().head ?= d6) } check(prop[String, String]) @@ -127,10 +124,10 @@ class CreateTests extends TypedDatasetSuite with Matchers { } test("not aligned columns should throw an exception") { - val v = Vector(X2(1,2)) + val v = Vector(X2(1, 2)) val df = TypedDataset.create(v).dataset.toDF() - a [IllegalStateException] should be thrownBy { + a[IllegalStateException] should be thrownBy { TypedDataset.createUnsafe[X1[Int]](df).show().run() } } @@ -139,13 +136,14 @@ class CreateTests extends TypedDatasetSuite with Matchers { // e.g. when loading data from partitioned dataset // the partition columns get appended to the end of the underlying relation def prop[A: Arbitrary: TypedEncoder, B: Arbitrary: TypedEncoder] = forAll { - (a1: A, b1: B) => { - val ds = TypedDataset.create( - Vector((b1, a1)) - ).dataset.toDF("b", "a").as[X2[A, B]](TypedExpressionEncoder[X2[A, B]]) - TypedDataset.create(ds).collect().run().head ?= X2(a1, b1) - - } + (a1: A, b1: B) => + { + val ds = TypedDataset.create( + Vector((b1, a1)) + ).dataset.toDF("b", "a").as[X2[A, B]](TypedExpressionEncoder[X2[A, B]]) + TypedDataset.create(ds).collect().run().head ?= X2(a1, b1) + + } } check(prop[X1[Double], X1[X1[SQLDate]]]) check(prop[String, Int]) diff --git a/dataset/src/test/scala/frameless/DropTupledTest.scala b/dataset/src/test/scala/frameless/DropTupledTest.scala index ff0158b91..d23b8a640 100644 --- a/dataset/src/test/scala/frameless/DropTupledTest.scala +++ b/dataset/src/test/scala/frameless/DropTupledTest.scala @@ -7,9 +7,9 @@ class DropTupledTest extends TypedDatasetSuite { test("drop five columns") { def prop[A: TypedEncoder](value: A): Prop = { val d5 = TypedDataset.create(X5(value, value, value, value, value) :: Nil) - val d4 = d5.dropTupled('a) //drops first column - val d3 = d4.dropTupled('_4) //drops last column - val d2 = d3.dropTupled('_2) //drops middle column + val d4 = d5.dropTupled('a) // drops first column + val d3 = d4.dropTupled('_4) // drops last column + val d2 = d3.dropTupled('_2) // drops middle column val d1 = d2.dropTupled('_2) Tuple1(value) ?= d1.collect().run().head diff --git a/dataset/src/test/scala/frameless/ExplodeTests.scala b/dataset/src/test/scala/frameless/ExplodeTests.scala index 3078ceb12..b4c420929 100644 --- a/dataset/src/test/scala/frameless/ExplodeTests.scala +++ b/dataset/src/test/scala/frameless/ExplodeTests.scala @@ -9,12 +9,15 @@ import scala.reflect.ClassTag class ExplodeTests extends TypedDatasetSuite { test("simple explode test") { - val ds = TypedDataset.create(Seq((1,Array(1,2)))) - ds.explode('_2): TypedDataset[(Int,Int)] + val ds = TypedDataset.create(Seq((1, Array(1, 2)))) + ds.explode('_2): TypedDataset[(Int, Int)] } test("explode on vectors/list/seq") { - def prop[F[X] <: Traversable[X] : CatalystExplodableCollection, A: TypedEncoder](xs: List[X1[F[A]]])(implicit arb: Arbitrary[F[A]], enc: TypedEncoder[F[A]]): Prop = { + def prop[F[X] <: Traversable[X]: CatalystExplodableCollection, A: TypedEncoder](xs: List[X1[F[A]]])(implicit + arb: Arbitrary[F[A]], + enc: TypedEncoder[F[A]] + ): Prop = { val tds = TypedDataset.create(xs) val framelessResults = tds.explode('a).collect().run().toVector @@ -79,7 +82,12 @@ class ExplodeTests extends TypedDatasetSuite { } test("explode on maps making sure no key / value naming collision happens") { - def prop[K: TypedEncoder: ClassTag, V: TypedEncoder: ClassTag, A: TypedEncoder: ClassTag, B: TypedEncoder: ClassTag](xs: List[X3KV[K, V, Map[A, B]]]): Prop = { + def prop[ + K: TypedEncoder: ClassTag, + V: TypedEncoder: ClassTag, + A: TypedEncoder: ClassTag, + B: TypedEncoder: ClassTag + ](xs: List[X3KV[K, V, Map[A, B]]]): Prop = { val tds = TypedDataset.create(xs) val framelessResults = tds.explodeMap('c).collect().run().toVector diff --git a/dataset/src/test/scala/frameless/FilterTests.scala b/dataset/src/test/scala/frameless/FilterTests.scala index 56d5d2ec5..6cd4c2e60 100644 --- a/dataset/src/test/scala/frameless/FilterTests.scala +++ b/dataset/src/test/scala/frameless/FilterTests.scala @@ -61,13 +61,13 @@ final class FilterTests extends TypedDatasetSuite with Matchers { } test("filter('a =!= 'b") { - def prop[A: TypedEncoder](elem: A, data: Vector[X2[A,A]]): Prop = { + def prop[A: TypedEncoder](elem: A, data: Vector[X2[A, A]]): Prop = { val dataset = TypedDataset.create(data) val cA = dataset.col('a) val cB = dataset.col('b) val dataset2 = dataset.filter(cA =!= cB).collect().run().toVector - val data2 = data.filter(x => x.a != x.b ) + val data2 = data.filter(x => x.a != x.b) (dataset2 ?= data2).&&(dataset.filter(cA =!= cA).count().run() ?= 0) } @@ -104,7 +104,7 @@ final class FilterTests extends TypedDatasetSuite with Matchers { } test("Option equality/inequality for columns") { - def prop[A <: Option[_] : TypedEncoder](a: A, b: A): Prop = { + def prop[A <: Option[_]: TypedEncoder](a: A, b: A): Prop = { val data = X2(a, b) :: X2(a, a) :: Nil val dataset = TypedDataset.create(data) val A = dataset.col('a) @@ -126,7 +126,7 @@ final class FilterTests extends TypedDatasetSuite with Matchers { } test("Option equality/inequality for lit") { - def prop[A <: Option[_] : TypedEncoder](a: A, b: A, cLit: A): Prop = { + def prop[A <: Option[_]: TypedEncoder](a: A, b: A, cLit: A): Prop = { val data = X2(a, b) :: X2(a, cLit) :: Nil val dataset = TypedDataset.create(data) val colA = dataset.col('a) @@ -162,13 +162,15 @@ final class FilterTests extends TypedDatasetSuite with Matchers { ds.filter(exists).collect().run() shouldEqual Seq(Option(0L) -> Option(1L)) ds.filter(forall).collect().run() shouldEqual Seq( - Option(0L) -> Option(1L), (None -> None)) + Option(0L) -> Option(1L), + None -> None + ) } test("filter with isin values") { - def prop[A: TypedEncoder](data: Vector[X1[A]], values: Vector[A])(implicit a : CatalystIsin[A]): Prop = { + def prop[A: TypedEncoder](data: Vector[X1[A]], values: Vector[A])(implicit a: CatalystIsin[A]): Prop = { val ds = TypedDataset.create(data) - val res = ds.filter(ds('a).isin(values:_*)).collect().run().toVector + val res = ds.filter(ds('a).isin(values: _*)).collect().run().toVector res ?= data.filter(d => values.contains(d.a)) } diff --git a/dataset/src/test/scala/frameless/FlattenTests.scala b/dataset/src/test/scala/frameless/FlattenTests.scala index a65e51b8f..a1c49b37e 100644 --- a/dataset/src/test/scala/frameless/FlattenTests.scala +++ b/dataset/src/test/scala/frameless/FlattenTests.scala @@ -4,11 +4,10 @@ import org.scalacheck.Prop import org.scalacheck.Prop.forAll import org.scalacheck.Prop._ - class FlattenTests extends TypedDatasetSuite { test("simple flatten test") { - val ds: TypedDataset[(Int,Option[Int])] = TypedDataset.create(Seq((1,Option(1)))) - ds.flattenOption('_2): TypedDataset[(Int,Int)] + val ds: TypedDataset[(Int, Option[Int])] = TypedDataset.create(Seq((1, Option(1)))) + ds.flattenOption('_2): TypedDataset[(Int, Int)] } test("different Optional types") { diff --git a/dataset/src/test/scala/frameless/GroupByTests.scala b/dataset/src/test/scala/frameless/GroupByTests.scala index 7178def30..e117c72be 100644 --- a/dataset/src/test/scala/frameless/GroupByTests.scala +++ b/dataset/src/test/scala/frameless/GroupByTests.scala @@ -7,9 +7,9 @@ import org.scalacheck.Prop._ class GroupByTests extends TypedDatasetSuite { test("groupByMany('a).agg(sum('b))") { def prop[ - A: TypedEncoder : Ordering, + A: TypedEncoder: Ordering, B: TypedEncoder, - Out: TypedEncoder : Numeric + Out: TypedEncoder: Numeric ](data: List[X2[A, B]])( implicit summable: CatalystSummable[B, Out], @@ -29,9 +29,8 @@ class GroupByTests extends TypedDatasetSuite { } test("agg(sum('a))") { - def prop[A: TypedEncoder : Numeric](data: List[X1[A]])( - implicit - summable: CatalystSummable[A, A] + def prop[A: TypedEncoder: Numeric](data: List[X1[A]])( + implicit summable: CatalystSummable[A, A] ): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) @@ -47,8 +46,8 @@ class GroupByTests extends TypedDatasetSuite { test("agg(sum('a), sum('b))") { def prop[ - A: TypedEncoder : Numeric, - B: TypedEncoder : Numeric + A: TypedEncoder: Numeric, + B: TypedEncoder: Numeric ](data: List[X2[A, B]])( implicit as: CatalystSummable[A, A], @@ -70,9 +69,9 @@ class GroupByTests extends TypedDatasetSuite { test("agg(sum('a), sum('b), sum('c))") { def prop[ - A: TypedEncoder : Numeric, - B: TypedEncoder : Numeric, - C: TypedEncoder : Numeric + A: TypedEncoder: Numeric, + B: TypedEncoder: Numeric, + C: TypedEncoder: Numeric ](data: List[X3[A, B, C]])( implicit as: CatalystSummable[A, A], @@ -97,10 +96,10 @@ class GroupByTests extends TypedDatasetSuite { test("agg(sum('a), sum('b), min('c), max('d))") { def prop[ - A: TypedEncoder : Numeric, - B: TypedEncoder : Numeric, - C: TypedEncoder : Numeric, - D: TypedEncoder : Numeric + A: TypedEncoder: Numeric, + B: TypedEncoder: Numeric, + C: TypedEncoder: Numeric, + D: TypedEncoder: Numeric ](data: List[X4[A, B, C, D]])( implicit as: CatalystSummable[A, A], @@ -117,8 +116,8 @@ class GroupByTests extends TypedDatasetSuite { val datasetSum = dataset.agg(sum(A), sum(B), min(C), max(D)).collect().run().toVector val listSumA = data.map(_.a).sum val listSumB = data.map(_.b).sum - val listMinC = if(data.isEmpty) implicitly[Numeric[C]].fromInt(0) else data.map(_.c).min - val listMaxD = if(data.isEmpty) implicitly[Numeric[D]].fromInt(0) else data.map(_.d).max + val listMinC = if (data.isEmpty) implicitly[Numeric[C]].fromInt(0) else data.map(_.c).min + val listMaxD = if (data.isEmpty) implicitly[Numeric[D]].fromInt(0) else data.map(_.d).max datasetSum ?= Vector(if (data.isEmpty) null else (listSumA, listSumB, listMinC, listMaxD)) } @@ -130,9 +129,9 @@ class GroupByTests extends TypedDatasetSuite { test("groupBy('a).agg(sum('b))") { def prop[ - A: TypedEncoder : Ordering, + A: TypedEncoder: Ordering, B: TypedEncoder, - Out: TypedEncoder : Numeric + Out: TypedEncoder: Numeric ](data: List[X2[A, B]])( implicit summable: CatalystSummable[B, Out], @@ -153,8 +152,8 @@ class GroupByTests extends TypedDatasetSuite { test("groupBy('a).mapGroups('a, sum('b))") { def prop[ - A: TypedEncoder : Ordering, - B: TypedEncoder : Numeric + A: TypedEncoder: Ordering, + B: TypedEncoder: Numeric ](data: List[X2[A, B]]): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) @@ -173,11 +172,11 @@ class GroupByTests extends TypedDatasetSuite { test("groupBy('a).agg(sum('b), sum('c)) to groupBy('a).agg(sum('a), sum('b), sum('a), sum('b), sum('a))") { def prop[ - A: TypedEncoder : Ordering, - B: TypedEncoder, - C: TypedEncoder, - OutB: TypedEncoder : Numeric, - OutC: TypedEncoder : Numeric + A: TypedEncoder: Ordering, + B: TypedEncoder, + C: TypedEncoder, + OutB: TypedEncoder: Numeric, + OutC: TypedEncoder: Numeric ](data: List[X3[A, B, C]])( implicit summableB: CatalystSummable[B, OutB], @@ -229,7 +228,13 @@ class GroupByTests extends TypedDatasetSuite { .collect().run.toVector.sortBy(_._1) val scalaSumBCBCB = data.groupBy(_.a).mapValues { xs => - (xs.map(_.b).map(widenb).sum, xs.map(_.c).map(widenc).sum, xs.map(_.b).map(widenb).sum, xs.map(_.c).map(widenc).sum, xs.map(_.b).map(widenb).sum) + ( + xs.map(_.b).map(widenb).sum, + xs.map(_.c).map(widenc).sum, + xs.map(_.b).map(widenb).sum, + xs.map(_.c).map(widenc).sum, + xs.map(_.b).map(widenb).sum + ) }.toVector.map { case (a, (b1, c1, b2, c2, b3)) => (a, b1, c1, b2, c2, b3) }.sortBy(_._1) @@ -245,10 +250,10 @@ class GroupByTests extends TypedDatasetSuite { test("groupBy('a, 'b).agg(sum('c)) to groupBy('a, 'b).agg(sum('c),sum('c),sum('c),sum('c),sum('c))") { def prop[ - A: TypedEncoder : Ordering, - B: TypedEncoder : Ordering, - C: TypedEncoder, - OutC: TypedEncoder: Numeric + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering, + C: TypedEncoder, + OutC: TypedEncoder: Numeric ](data: List[X3[A, B, C]])( implicit summableC: CatalystSummable[C, OutC], @@ -260,55 +265,55 @@ class GroupByTests extends TypedDatasetSuite { val C = dataset.col[C]('c) val framelessSumC = dataset - .groupBy(A,B) + .groupBy(A, B) .agg(sum(C)) - .collect().run.toVector.sortBy(x => (x._1,x._2)) + .collect().run.toVector.sortBy(x => (x._1, x._2)) - val scalaSumC = data.groupBy(x => (x.a,x.b)).mapValues { xs => + val scalaSumC = data.groupBy(x => (x.a, x.b)).mapValues { xs => xs.map(_.c).map(widenc).sum - }.toVector.map { case ((a, b), c) => (a, b, c) }.sortBy(x => (x._1,x._2)) + }.toVector.map { case ((a, b), c) => (a, b, c) }.sortBy(x => (x._1, x._2)) val framelessSumCC = dataset - .groupBy(A,B) + .groupBy(A, B) .agg(sum(C), sum(C)) - .collect().run.toVector.sortBy(x => (x._1,x._2)) + .collect().run.toVector.sortBy(x => (x._1, x._2)) - val scalaSumCC = data.groupBy(x => (x.a,x.b)).mapValues { xs => - val s = xs.map(_.c).map(widenc).sum; (s,s) - }.toVector.map { case ((a, b), (c1, c2)) => (a, b, c1, c2) }.sortBy(x => (x._1,x._2)) + val scalaSumCC = data.groupBy(x => (x.a, x.b)).mapValues { xs => + val s = xs.map(_.c).map(widenc).sum; (s, s) + }.toVector.map { case ((a, b), (c1, c2)) => (a, b, c1, c2) }.sortBy(x => (x._1, x._2)) val framelessSumCCC = dataset - .groupBy(A,B) + .groupBy(A, B) .agg(sum(C), sum(C), sum(C)) - .collect().run.toVector.sortBy(x => (x._1,x._2)) + .collect().run.toVector.sortBy(x => (x._1, x._2)) - val scalaSumCCC = data.groupBy(x => (x.a,x.b)).mapValues { xs => - val s = xs.map(_.c).map(widenc).sum; (s,s,s) - }.toVector.map { case ((a, b), (c1, c2, c3)) => (a, b, c1, c2, c3) }.sortBy(x => (x._1,x._2)) + val scalaSumCCC = data.groupBy(x => (x.a, x.b)).mapValues { xs => + val s = xs.map(_.c).map(widenc).sum; (s, s, s) + }.toVector.map { case ((a, b), (c1, c2, c3)) => (a, b, c1, c2, c3) }.sortBy(x => (x._1, x._2)) val framelessSumCCCC = dataset - .groupBy(A,B) + .groupBy(A, B) .agg(sum(C), sum(C), sum(C), sum(C)) - .collect().run.toVector.sortBy(x => (x._1,x._2)) + .collect().run.toVector.sortBy(x => (x._1, x._2)) - val scalaSumCCCC = data.groupBy(x => (x.a,x.b)).mapValues { xs => - val s = xs.map(_.c).map(widenc).sum; (s,s,s,s) - }.toVector.map { case ((a, b), (c1, c2, c3, c4)) => (a, b, c1, c2, c3, c4) }.sortBy(x => (x._1,x._2)) + val scalaSumCCCC = data.groupBy(x => (x.a, x.b)).mapValues { xs => + val s = xs.map(_.c).map(widenc).sum; (s, s, s, s) + }.toVector.map { case ((a, b), (c1, c2, c3, c4)) => (a, b, c1, c2, c3, c4) }.sortBy(x => (x._1, x._2)) val framelessSumCCCCC = dataset - .groupBy(A,B) + .groupBy(A, B) .agg(sum(C), sum(C), sum(C), sum(C), sum(C)) - .collect().run.toVector.sortBy(x => (x._1,x._2)) + .collect().run.toVector.sortBy(x => (x._1, x._2)) - val scalaSumCCCCC = data.groupBy(x => (x.a,x.b)).mapValues { xs => - val s = xs.map(_.c).map(widenc).sum; (s,s,s,s,s) - }.toVector.map { case ((a, b), (c1, c2, c3, c4, c5)) => (a, b, c1, c2, c3, c4, c5) }.sortBy(x => (x._1,x._2)) + val scalaSumCCCCC = data.groupBy(x => (x.a, x.b)).mapValues { xs => + val s = xs.map(_.c).map(widenc).sum; (s, s, s, s, s) + }.toVector.map { case ((a, b), (c1, c2, c3, c4, c5)) => (a, b, c1, c2, c3, c4, c5) }.sortBy(x => (x._1, x._2)) (framelessSumC ?= scalaSumC) && - (framelessSumCC ?= scalaSumCC) && - (framelessSumCCC ?= scalaSumCCC) && - (framelessSumCCCC ?= scalaSumCCCC) && - (framelessSumCCCCC ?= scalaSumCCCCC) + (framelessSumCC ?= scalaSumCC) && + (framelessSumCCC ?= scalaSumCCC) && + (framelessSumCCCC ?= scalaSumCCCC) && + (framelessSumCCCCC ?= scalaSumCCCCC) } check(forAll(prop[String, Long, BigDecimal, BigDecimal] _)) @@ -316,12 +321,12 @@ class GroupByTests extends TypedDatasetSuite { test("groupBy('a, 'b).agg(sum('c), sum('d))") { def prop[ - A: TypedEncoder : Ordering, - B: TypedEncoder : Ordering, + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering, C: TypedEncoder, D: TypedEncoder, - OutC: TypedEncoder : Numeric, - OutD: TypedEncoder : Numeric + OutC: TypedEncoder: Numeric, + OutD: TypedEncoder: Numeric ](data: List[X4[A, B, C, D]])( implicit summableC: CatalystSummable[C, OutC], @@ -354,9 +359,9 @@ class GroupByTests extends TypedDatasetSuite { test("groupBy('a, 'b).mapGroups('a, 'b, sum('c))") { def prop[ - A: TypedEncoder : Ordering, - B: TypedEncoder : Ordering, - C: TypedEncoder : Numeric + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering, + C: TypedEncoder: Numeric ](data: List[X3[A, B, C]]): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) @@ -403,8 +408,8 @@ class GroupByTests extends TypedDatasetSuite { test("groupBy('a).flatMapGroups(('a, toVector(('a, 'b))") { def prop[ - A: TypedEncoder : Ordering, - B: TypedEncoder : Ordering + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering ](data: Vector[X2[A, B]]): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) @@ -430,9 +435,9 @@ class GroupByTests extends TypedDatasetSuite { test("groupBy('a, 'b).flatMapGroups((('a,'b) toVector((('a,'b), 'c))") { def prop[ - A: TypedEncoder : Ordering, - B: TypedEncoder : Ordering, - C: TypedEncoder : Ordering + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering, + C: TypedEncoder: Ordering ](data: Vector[X3[A, B, C]]): Prop = { val dataset = TypedDataset.create(data) val cA = dataset.col[A]('a) @@ -445,7 +450,7 @@ class GroupByTests extends TypedDatasetSuite { .sorted val dataGrouped = data - .groupBy(t => (t.a,t.b)).toSeq + .groupBy(t => (t.a, t.b)).toSeq .flatMap { case (a, xs) => xs.map(x => (a, x)) } .sorted diff --git a/dataset/src/test/scala/frameless/InjectionTests.scala b/dataset/src/test/scala/frameless/InjectionTests.scala index c17a52bd7..9ae136409 100644 --- a/dataset/src/test/scala/frameless/InjectionTests.scala +++ b/dataset/src/test/scala/frameless/InjectionTests.scala @@ -30,8 +30,8 @@ object Food { Injection( { case Burger => 0 - case Pasta => 1 - case Rice => 2 + case Pasta => 1 + case Rice => 2 }, { case 0 => Burger diff --git a/dataset/src/test/scala/frameless/JobTests.scala b/dataset/src/test/scala/frameless/JobTests.scala index 9650a020f..a1d37a8db 100644 --- a/dataset/src/test/scala/frameless/JobTests.scala +++ b/dataset/src/test/scala/frameless/JobTests.scala @@ -6,7 +6,6 @@ import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks import org.scalatest.freespec.AnyFreeSpec import org.scalatest.matchers.should.Matchers - class JobTests extends AnyFreeSpec with BeforeAndAfterAll with SparkTesting with ScalaCheckDrivenPropertyChecks with Matchers { "map" - { @@ -45,10 +44,10 @@ class JobTests extends AnyFreeSpec with BeforeAndAfterAll with SparkTesting with "properties" - { "read back" in forAll { - (k:String, v: String) => + (k: String, v: String) => val scopedKey = "frameless.tests." + k - Job(1).withLocalProperty(scopedKey,v).run() + Job(1).withLocalProperty(scopedKey, v).run() sc.getLocalProperty(scopedKey) shouldBe v } } -} \ No newline at end of file +} diff --git a/dataset/src/test/scala/frameless/JoinTests.scala b/dataset/src/test/scala/frameless/JoinTests.scala index b34911c4f..f6a75f3da 100644 --- a/dataset/src/test/scala/frameless/JoinTests.scala +++ b/dataset/src/test/scala/frameless/JoinTests.scala @@ -7,9 +7,9 @@ import org.scalacheck.Prop._ class JoinTests extends TypedDatasetSuite { test("ab.joinCross(ac)") { def prop[ - A : TypedEncoder : Ordering, - B : TypedEncoder : Ordering, - C : TypedEncoder : Ordering + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering, + C: TypedEncoder: Ordering ](left: List[X2[A, B]], right: List[X2[A, C]]): Prop = { val leftDs = TypedDataset.create(left) val rightDs = TypedDataset.create(right) @@ -27,7 +27,8 @@ class JoinTests extends TypedDatasetSuite { val equalSchemas = joinedDs.schema ?= StructType(Seq( StructField("_1", leftDs.schema, nullable = false), - StructField("_2", rightDs.schema, nullable = false))) + StructField("_2", rightDs.schema, nullable = false) + )) (joined.sorted ?= joinedData) && equalSchemas } @@ -37,9 +38,9 @@ class JoinTests extends TypedDatasetSuite { test("ab.joinFull(ac)(ab.a == ac.a)") { def prop[ - A : TypedEncoder : Ordering, - B : TypedEncoder : Ordering, - C : TypedEncoder : Ordering + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering, + C: TypedEncoder: Ordering ](left: List[X2[A, B]], right: List[X2[A, C]]): Prop = { val leftDs = TypedDataset.create(left) val rightDs = TypedDataset.create(right) @@ -49,25 +50,28 @@ class JoinTests extends TypedDatasetSuite { val joinedData = joinedDs.collect().run().toVector.sorted val rightKeys = right.map(_.a).toSet - val leftKeys = left.map(_.a).toSet + val leftKeys = left.map(_.a).toSet val joined = { for { ab <- left ac <- right if ac.a == ab.a } yield (Some(ab), Some(ac)) - }.toVector ++ { - for { - ab <- left if !rightKeys.contains(ab.a) - } yield (Some(ab), None) - }.toVector ++ { - for { - ac <- right if !leftKeys.contains(ac.a) - } yield (None, Some(ac)) - }.toVector + }.toVector ++ + { + for { + ab <- left if !rightKeys.contains(ab.a) + } yield (Some(ab), None) + }.toVector ++ + { + for { + ac <- right if !leftKeys.contains(ac.a) + } yield (None, Some(ac)) + }.toVector val equalSchemas = joinedDs.schema ?= StructType(Seq( StructField("_1", leftDs.schema, nullable = true), - StructField("_2", rightDs.schema, nullable = true))) + StructField("_2", rightDs.schema, nullable = true) + )) (joined.sorted ?= joinedData) && equalSchemas } @@ -77,9 +81,9 @@ class JoinTests extends TypedDatasetSuite { test("ab.joinInner(ac)(ab.a == ac.a)") { def prop[ - A : TypedEncoder : Ordering, - B : TypedEncoder : Ordering, - C : TypedEncoder : Ordering + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering, + C: TypedEncoder: Ordering ](left: List[X2[A, B]], right: List[X2[A, C]]): Prop = { val leftDs = TypedDataset.create(left) val rightDs = TypedDataset.create(right) @@ -97,7 +101,8 @@ class JoinTests extends TypedDatasetSuite { val equalSchemas = joinedDs.schema ?= StructType(Seq( StructField("_1", leftDs.schema, nullable = false), - StructField("_2", rightDs.schema, nullable = false))) + StructField("_2", rightDs.schema, nullable = false) + )) (joined.sorted ?= joinedData) && equalSchemas } @@ -107,9 +112,9 @@ class JoinTests extends TypedDatasetSuite { test("ab.joinLeft(ac)(ab.a == ac.a)") { def prop[ - A : TypedEncoder : Ordering, - B : TypedEncoder : Ordering, - C : TypedEncoder : Ordering + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering, + C: TypedEncoder: Ordering ](left: List[X2[A, B]], right: List[X2[A, C]]): Prop = { val leftDs = TypedDataset.create(left) val rightDs = TypedDataset.create(right) @@ -124,15 +129,17 @@ class JoinTests extends TypedDatasetSuite { ab <- left ac <- right if ac.a == ab.a } yield (ab, Some(ac)) - }.toVector ++ { - for { - ab <- left if !rightKeys.contains(ab.a) - } yield (ab, None) - }.toVector + }.toVector ++ + { + for { + ab <- left if !rightKeys.contains(ab.a) + } yield (ab, None) + }.toVector val equalSchemas = joinedDs.schema ?= StructType(Seq( StructField("_1", leftDs.schema, nullable = false), - StructField("_2", rightDs.schema, nullable = true))) + StructField("_2", rightDs.schema, nullable = true) + )) (joined.sorted ?= joinedData) && (joinedData.map(_._1).toSet ?= left.toSet) && equalSchemas } @@ -142,9 +149,9 @@ class JoinTests extends TypedDatasetSuite { test("ab.joinLeftAnti(ac)(ab.a == ac.a)") { def prop[ - A : TypedEncoder : Ordering, - B : TypedEncoder : Ordering, - C : TypedEncoder : Ordering + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering, + C: TypedEncoder: Ordering ](left: List[X2[A, B]], right: List[X2[A, C]]): Prop = { val leftDs = TypedDataset.create(left) val rightDs = TypedDataset.create(right) @@ -170,9 +177,9 @@ class JoinTests extends TypedDatasetSuite { test("ab.joinLeftSemi(ac)(ab.a == ac.a)") { def prop[ - A : TypedEncoder : Ordering, - B : TypedEncoder : Ordering, - C : TypedEncoder : Ordering + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering, + C: TypedEncoder: Ordering ](left: List[X2[A, B]], right: List[X2[A, C]]): Prop = { val leftDs = TypedDataset.create(left) val rightDs = TypedDataset.create(right) @@ -198,9 +205,9 @@ class JoinTests extends TypedDatasetSuite { test("ab.joinRight(ac)(ab.a == ac.a)") { def prop[ - A : TypedEncoder : Ordering, - B : TypedEncoder : Ordering, - C : TypedEncoder : Ordering + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering, + C: TypedEncoder: Ordering ](left: List[X2[A, B]], right: List[X2[A, C]]): Prop = { val leftDs = TypedDataset.create(left) val rightDs = TypedDataset.create(right) @@ -215,15 +222,17 @@ class JoinTests extends TypedDatasetSuite { ab <- left ac <- right if ac.a == ab.a } yield (Some(ab), ac) - }.toVector ++ { - for { - ac <- right if !leftKeys.contains(ac.a) - } yield (None, ac) - }.toVector + }.toVector ++ + { + for { + ac <- right if !leftKeys.contains(ac.a) + } yield (None, ac) + }.toVector val equalSchemas = joinedDs.schema ?= StructType(Seq( StructField("_1", leftDs.schema, nullable = true), - StructField("_2", rightDs.schema, nullable = false))) + StructField("_2", rightDs.schema, nullable = false) + )) (joined.sorted ?= joinedData) && (joinedData.map(_._2).toSet ?= right.toSet) && equalSchemas } diff --git a/dataset/src/test/scala/frameless/LitTests.scala b/dataset/src/test/scala/frameless/LitTests.scala index 50df45220..1ab3c167c 100644 --- a/dataset/src/test/scala/frameless/LitTests.scala +++ b/dataset/src/test/scala/frameless/LitTests.scala @@ -15,7 +15,7 @@ class LitTests extends TypedDatasetSuite with Matchers { val l: TypedColumn[Int, A] = lit(value) // filter forces whole codegen - val elems = df.deserialized.filter((_:Int) => true).select(l) + val elems = df.deserialized.filter((_: Int) => true).select(l) .collect() .run() .toVector @@ -58,7 +58,8 @@ class LitTests extends TypedDatasetSuite with Matchers { test("support value class") { val initial = Seq( Q(name = new Name("Foo"), id = 1), - Q(name = new Name("Bar"), id = 2)) + Q(name = new Name("Bar"), id = 2) + ) val ds = TypedDataset.create(initial) ds.collect.run() shouldBe initial @@ -72,7 +73,8 @@ class LitTests extends TypedDatasetSuite with Matchers { test("support optional value class") { val initial = Seq( R(name = "Foo", id = 1, alias = None), - R(name = "Bar", id = 2, alias = Some(new Name("Lorem")))) + R(name = "Bar", id = 2, alias = Some(new Name("Lorem"))) + ) val ds = TypedDataset.create(initial) ds.collect.run() shouldBe initial @@ -82,7 +84,7 @@ class LitTests extends TypedDatasetSuite with Matchers { val lit = functions.litValue(someIpsum) val tds = ds.withColumnReplaced('alias, functions.litValue(someIpsum)) - tds.queryExecution.toString() should include (lit.toString) + tds.queryExecution.toString() should include(lit.toString) tds. collect.run() shouldBe initial.map(_.copy(alias = someIpsum)) diff --git a/dataset/src/test/scala/frameless/NumericTests.scala b/dataset/src/test/scala/frameless/NumericTests.scala index 0c13ae5a3..60e921ee2 100644 --- a/dataset/src/test/scala/frameless/NumericTests.scala +++ b/dataset/src/test/scala/frameless/NumericTests.scala @@ -43,7 +43,7 @@ class NumericTests extends TypedDatasetSuite with Matchers { } test("multiply") { - def prop[A: TypedEncoder : CatalystNumeric : Numeric : ClassTag](a: A, b: A): Prop = { + def prop[A: TypedEncoder: CatalystNumeric: Numeric: ClassTag](a: A, b: A): Prop = { val df = TypedDataset.create(X2(a, b) :: Nil) val result = implicitly[Numeric[A]].times(a, b) val got = df.select(df.col('a) * df.col('b)).collect().run() @@ -61,7 +61,8 @@ class NumericTests extends TypedDatasetSuite with Matchers { test("divide") { def prop[A: TypedEncoder: CatalystNumeric: Numeric](a: A, b: A)(implicit cd: CatalystDivisible[A, Double]): Prop = { val df = TypedDataset.create(X2(a, b) :: Nil) - if (b == 0) proved else { + if (b == 0) proved + else { val div: Double = implicitly[Numeric[A]].toDouble(a) / implicitly[Numeric[A]].toDouble(b) val got: Seq[Double] = df.select(df.col('a) / df.col('b)).collect().run() @@ -69,17 +70,18 @@ class NumericTests extends TypedDatasetSuite with Matchers { } } - check(prop[Byte ] _) + check(prop[Byte] _) check(prop[Double] _) - check(prop[Int ] _) - check(prop[Long ] _) - check(prop[Short ] _) + check(prop[Int] _) + check(prop[Long] _) + check(prop[Short] _) } test("divide BigDecimals") { def prop(a: BigDecimal, b: BigDecimal): Prop = { val df = TypedDataset.create(X2(a, b) :: Nil) - if (b.doubleValue == 0) proved else { + if (b.doubleValue == 0) proved + else { // Spark performs something in between Double division and BigDecimal division, // we approximate it using double vision and `approximatelyEqual`: val div = BigDecimal(a.doubleValue / b.doubleValue) @@ -133,9 +135,10 @@ class NumericTests extends TypedDatasetSuite with Matchers { test("mod") { import NumericMod._ - def prop[A: TypedEncoder : CatalystNumeric : NumericMod](a: A, b: A): Prop = { + def prop[A: TypedEncoder: CatalystNumeric: NumericMod](a: A, b: A): Prop = { val df = TypedDataset.create(X2(a, b) :: Nil) - if (b == 0) proved else { + if (b == 0) proved + else { val mod: A = implicitly[NumericMod[A]].mod(a, b) val got: Seq[A] = df.select(df.col('a) % df.col('b)).collect().run() @@ -145,19 +148,20 @@ class NumericTests extends TypedDatasetSuite with Matchers { check(prop[Byte] _) check(prop[Double] _) - check(prop[Int ] _) - check(prop[Long ] _) - check(prop[Short ] _) + check(prop[Int] _) + check(prop[Long] _) + check(prop[Short] _) check(prop[BigDecimal] _) } - test("a mod lit(b)"){ + test("a mod lit(b)") { import NumericMod._ - def prop[A: TypedEncoder : CatalystNumeric : NumericMod](elem: A, data: X1[A]): Prop = { + def prop[A: TypedEncoder: CatalystNumeric: NumericMod](elem: A, data: X1[A]): Prop = { val dataset = TypedDataset.create(Seq(data)) val a = dataset.col('a) - if (elem == 0) proved else { + if (elem == 0) proved + else { val mod: A = implicitly[NumericMod[A]].mod(data.a, elem) val got: Seq[A] = dataset.select(a % elem).collect().run() @@ -167,9 +171,9 @@ class NumericTests extends TypedDatasetSuite with Matchers { check(prop[Byte] _) check(prop[Double] _) - check(prop[Int ] _) - check(prop[Long ] _) - check(prop[Short ] _) + check(prop[Int] _) + check(prop[Long] _) + check(prop[Short] _) check(prop[BigDecimal] _) } @@ -180,9 +184,9 @@ class NumericTests extends TypedDatasetSuite with Matchers { implicit val doubleWithNaN = Arbitrary { implicitly[Arbitrary[Double]].arbitrary.flatMap(Gen.oneOf(_, Double.NaN)) } - implicit val x1 = Arbitrary{ doubleWithNaN.arbitrary.map(X1(_)) } + implicit val x1 = Arbitrary { doubleWithNaN.arbitrary.map(X1(_)) } - def prop[A : TypedEncoder : Encoder : CatalystNaN](data: List[X1[A]]): Prop = { + def prop[A: TypedEncoder: Encoder: CatalystNaN](data: List[X1[A]]): Prop = { val ds = TypedDataset.create(data) val expected = ds.toDF().filter(!$"a".isNaN).map(_.getAs[A](0)).collect().toSeq diff --git a/dataset/src/test/scala/frameless/OrderByTests.scala b/dataset/src/test/scala/frameless/OrderByTests.scala index 98bd7442d..20137819a 100644 --- a/dataset/src/test/scala/frameless/OrderByTests.scala +++ b/dataset/src/test/scala/frameless/OrderByTests.scala @@ -7,19 +7,20 @@ import org.apache.spark.sql.Column import org.scalatest.matchers.should.Matchers class OrderByTests extends TypedDatasetSuite with Matchers { - def sortings[A : CatalystOrdered, T]: Seq[(TypedColumn[T, A] => SortedTypedColumn[T, A], Column => Column)] = Seq( + def sortings[A: CatalystOrdered, T]: Seq[(TypedColumn[T, A] => SortedTypedColumn[T, A], Column => Column)] = Seq( (_.desc, _.desc), (_.asc, _.asc), - (t => t, t => t) //default ascending + (t => t, t => t) // default ascending ) test("single column non nullable orderBy") { - def prop[A: TypedEncoder : CatalystOrdered](data: Vector[X1[A]]): Prop = { + def prop[A: TypedEncoder: CatalystOrdered](data: Vector[X1[A]]): Prop = { val ds = TypedDataset.create(data) sortings[A, X1[A]].map { case (typ, untyp) => ds.dataset.orderBy(untyp(ds.dataset.col("a"))).collect().toVector.?=( - ds.orderBy(typ(ds('a))).collect().run().toVector) + ds.orderBy(typ(ds('a))).collect().run().toVector + ) }.reduce(_ && _) } @@ -36,12 +37,13 @@ class OrderByTests extends TypedDatasetSuite with Matchers { } test("single column non nullable partition sorting") { - def prop[A: TypedEncoder : CatalystOrdered](data: Vector[X1[A]]): Prop = { + def prop[A: TypedEncoder: CatalystOrdered](data: Vector[X1[A]]): Prop = { val ds = TypedDataset.create(data) sortings[A, X1[A]].map { case (typ, untyp) => ds.dataset.sortWithinPartitions(untyp(ds.dataset.col("a"))).collect().toVector.?=( - ds.sortWithinPartitions(typ(ds('a))).collect().run().toVector) + ds.sortWithinPartitions(typ(ds('a))).collect().run().toVector + ) }.reduce(_ && _) } @@ -58,7 +60,7 @@ class OrderByTests extends TypedDatasetSuite with Matchers { } test("two columns non nullable orderBy") { - def prop[A: TypedEncoder : CatalystOrdered, B: TypedEncoder : CatalystOrdered](data: Vector[X2[A,B]]): Prop = { + def prop[A: TypedEncoder: CatalystOrdered, B: TypedEncoder: CatalystOrdered](data: Vector[X2[A, B]]): Prop = { val ds = TypedDataset.create(data) sortings[A, X2[A, B]].reverse.zip(sortings[B, X2[A, B]]).map { case ((typA, untypA), (typB, untypB)) => @@ -75,7 +77,7 @@ class OrderByTests extends TypedDatasetSuite with Matchers { } test("two columns non nullable partition sorting") { - def prop[A: TypedEncoder : CatalystOrdered, B: TypedEncoder : CatalystOrdered](data: Vector[X2[A,B]]): Prop = { + def prop[A: TypedEncoder: CatalystOrdered, B: TypedEncoder: CatalystOrdered](data: Vector[X2[A, B]]): Prop = { val ds = TypedDataset.create(data) sortings[A, X2[A, B]].reverse.zip(sortings[B, X2[A, B]]).map { case ((typA, untypA), (typB, untypB)) => @@ -92,7 +94,7 @@ class OrderByTests extends TypedDatasetSuite with Matchers { } test("three columns non nullable orderBy") { - def prop[A: TypedEncoder : CatalystOrdered, B: TypedEncoder : CatalystOrdered](data: Vector[X3[A,B,A]]): Prop = { + def prop[A: TypedEncoder: CatalystOrdered, B: TypedEncoder: CatalystOrdered](data: Vector[X3[A, B, A]]): Prop = { val ds = TypedDataset.create(data) sortings[A, X3[A, B, A]].reverse @@ -115,7 +117,7 @@ class OrderByTests extends TypedDatasetSuite with Matchers { } test("three columns non nullable partition sorting") { - def prop[A: TypedEncoder : CatalystOrdered, B: TypedEncoder : CatalystOrdered](data: Vector[X3[A,B,A]]): Prop = { + def prop[A: TypedEncoder: CatalystOrdered, B: TypedEncoder: CatalystOrdered](data: Vector[X3[A, B, A]]): Prop = { val ds = TypedDataset.create(data) sortings[A, X3[A, B, A]].reverse @@ -138,13 +140,15 @@ class OrderByTests extends TypedDatasetSuite with Matchers { } test("sort support for mixed default and explicit ordering") { - def prop[A: TypedEncoder : CatalystOrdered, B: TypedEncoder : CatalystOrdered](data: Vector[X2[A, B]]): Prop = { + def prop[A: TypedEncoder: CatalystOrdered, B: TypedEncoder: CatalystOrdered](data: Vector[X2[A, B]]): Prop = { val ds = TypedDataset.create(data) ds.dataset.orderBy(ds.dataset.col("a"), ds.dataset.col("b").desc).collect().toVector.?=( - ds.orderByMany(ds('a), ds('b).desc).collect().run().toVector) && + ds.orderByMany(ds('a), ds('b).desc).collect().run().toVector + ) && ds.dataset.sortWithinPartitions(ds.dataset.col("a"), ds.dataset.col("b").desc).collect().toVector.?=( - ds.sortWithinPartitionsMany(ds('a), ds('b).desc).collect().run().toVector) + ds.sortWithinPartitionsMany(ds('a), ds('b).desc).collect().run().toVector + ) } check(forAll(prop[SQLDate, Long] _)) @@ -162,13 +166,13 @@ class OrderByTests extends TypedDatasetSuite with Matchers { test("derives a CatalystOrdered for case classes when all fields are comparable") { type T[A, B] = X3[Int, Boolean, X2[A, B]] def prop[ - A: TypedEncoder : CatalystOrdered, - B: TypedEncoder : CatalystOrdered + A: TypedEncoder: CatalystOrdered, + B: TypedEncoder: CatalystOrdered ](data: Vector[T[A, B]]): Prop = { val ds = TypedDataset.create(data) sortings[X2[A, B], T[A, B]].map { case (typX2, untypX2) => - val vanilla = ds.dataset.orderBy(untypX2(ds.dataset.col("c"))).collect().toVector + val vanilla = ds.dataset.orderBy(untypX2(ds.dataset.col("c"))).collect().toVector val frameless = ds.orderBy(typX2(ds('c))).collect().run.toVector vanilla ?= frameless }.reduce(_ && _) @@ -183,13 +187,13 @@ class OrderByTests extends TypedDatasetSuite with Matchers { test("derives a CatalystOrdered for tuples when all fields are comparable") { type T[A, B] = X2[Int, (A, B)] def prop[ - A: TypedEncoder : CatalystOrdered, - B: TypedEncoder : CatalystOrdered + A: TypedEncoder: CatalystOrdered, + B: TypedEncoder: CatalystOrdered ](data: Vector[T[A, B]]): Prop = { val ds = TypedDataset.create(data) sortings[(A, B), T[A, B]].map { case (typX2, untypX2) => - val vanilla = ds.dataset.orderBy(untypX2(ds.dataset.col("b"))).collect().toVector + val vanilla = ds.dataset.orderBy(untypX2(ds.dataset.col("b"))).collect().toVector val frameless = ds.orderBy(typX2(ds('b))).collect().run.toVector vanilla ?= frameless }.reduce(_ && _) diff --git a/dataset/src/test/scala/frameless/RecordEncoderTests.scala b/dataset/src/test/scala/frameless/RecordEncoderTests.scala index 98274cf01..206c2d6e8 100644 --- a/dataset/src/test/scala/frameless/RecordEncoderTests.scala +++ b/dataset/src/test/scala/frameless/RecordEncoderTests.scala @@ -1,6 +1,6 @@ package frameless -import org.apache.spark.sql.{Row, functions => F} +import org.apache.spark.sql.{functions => F, Row} import org.apache.spark.sql.types.{ ArrayType, BinaryType, @@ -95,7 +95,8 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { encoder.jvmRepr shouldBe ObjectType(classOf[Name]) encoder.catalystRepr shouldBe StructType( - Seq(StructField("value", StringType, false))) + Seq(StructField("value", StringType, false)) + ) val sqlContext = session.sqlContext import sqlContext.implicits._ @@ -111,7 +112,8 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { illTyped( // As `Person` is not a Value class - "val _: RecordFieldEncoder[Person] = RecordFieldEncoder.valueClass") + "val _: RecordFieldEncoder[Person] = RecordFieldEncoder.valueClass" + ) val fieldEncoder: RecordFieldEncoder[Name] = RecordFieldEncoder.valueClass @@ -125,7 +127,8 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { val expectedPersonStructType = StructType(Seq( StructField("name", StringType, false), - StructField("age", IntegerType, false))) + StructField("age", IntegerType, false) + )) encoder.catalystRepr shouldBe expectedPersonStructType @@ -140,7 +143,9 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { } val expected = Seq( - Person(new Name("Foo"), 2), Person(new Name("Bar"), 3)) + Person(new Name("Foo"), 2), + Person(new Name("Bar"), 3) + ) unsafeDs.collect.run() shouldBe expected @@ -160,7 +165,8 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { illTyped( // As `Person` is not a Value class """val _: RecordFieldEncoder[Option[Person]] = - RecordFieldEncoder.optionValueClass""") + RecordFieldEncoder.optionValueClass""" + ) val fieldEncoder: RecordFieldEncoder[Option[Name]] = RecordFieldEncoder.optionValueClass @@ -177,7 +183,8 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { val expectedPersonStructType = StructType(Seq( StructField("id", LongType, false), - StructField("name", StringType, true))) + StructField("name", StringType, true) + )) encoder.catalystRepr shouldBe expectedPersonStructType @@ -194,7 +201,8 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { ds1.collect.run() shouldBe Seq( User(1L, None), - User(2L, Some(new Name("Foo")))) + User(2L, Some(new Name("Foo"))) + ) val ds2: TypedDataset[User] = { val sqlContext = session.sqlContext @@ -208,7 +216,8 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { val df2 = df1.withColumn( "jsonValue", - F.from_json(df1.col("value"), expectedPersonStructType)). + F.from_json(df1.col("value"), expectedPersonStructType) + ). select("jsonValue.id", "jsonValue.name") TypedDataset.createUnsafe[User](df2) @@ -217,7 +226,8 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { val expected = Seq( User(3L, None), User(4L, Some(new Name("Lorem"))), - User(5L, None)) + User(5L, None) + ) ds2.collect.run() shouldBe expected @@ -233,10 +243,16 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { encoder.jvmRepr shouldBe ObjectType(classOf[D]) val expectedStructType = StructType(Seq( - StructField("m", MapType( - keyType = StringType, - valueType = IntegerType, - valueContainsNull = false), false))) + StructField( + "m", + MapType( + keyType = StringType, + valueType = IntegerType, + valueContainsNull = false + ), + false + ) + )) encoder.catalystRepr shouldBe expectedStructType @@ -246,18 +262,20 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { val ds1 = TypedDataset.createUnsafe[D] { val df = Seq( """{"m":{"pizza":1,"sushi":2}}""", - """{"m":{"red":3,"blue":4}}""", + """{"m":{"red":3,"blue":4}}""" ).toDF df.withColumn( "jsonValue", - F.from_json(df.col("value"), expectedStructType)). + F.from_json(df.col("value"), expectedStructType) + ). select("jsonValue.*") } val expected = Seq( D(m = Map("pizza" -> 1, "sushi" -> 2)), - D(m = Map("red" -> 3, "blue" -> 4))) + D(m = Map("red" -> 3, "blue" -> 4)) + ) ds1.collect.run() shouldBe expected @@ -277,10 +295,16 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { val expectedStudentStructType = StructType(Seq( StructField("name", StringType, false), - StructField("grades", MapType( - keyType = StringType, - valueType = DecimalType.SYSTEM_DEFAULT, - valueContainsNull = false), false))) + StructField( + "grades", + MapType( + keyType = StringType, + valueType = DecimalType.SYSTEM_DEFAULT, + valueContainsNull = false + ), + false + ) + )) encoder.catalystRepr shouldBe expectedStudentStructType @@ -290,43 +314,58 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { val ds1 = TypedDataset.createUnsafe[Student] { val df = Seq( """{"name":"Foo","grades":{"math":1,"physics":"23.4"}}""", - """{"name":"Bar","grades":{"biology":18.5,"geography":4}}""", + """{"name":"Bar","grades":{"biology":18.5,"geography":4}}""" ).toDF df.withColumn( "jsonValue", - F.from_json(df.col("value"), expectedStudentStructType)). + F.from_json(df.col("value"), expectedStudentStructType) + ). select("jsonValue.*") } val expected = Seq( - Student(name = "Foo", grades = Map( - new Subject("math") -> new Grade(BigDecimal(1)), - new Subject("physics") -> new Grade(BigDecimal(23.4D)))), - Student(name = "Bar", grades = Map( - new Subject("biology") -> new Grade(BigDecimal(18.5)), - new Subject("geography") -> new Grade(BigDecimal(4L))))) + Student( + name = "Foo", + grades = Map( + new Subject("math") -> new Grade(BigDecimal(1)), + new Subject("physics") -> new Grade(BigDecimal(23.4D)) + ) + ), + Student( + name = "Bar", + grades = Map( + new Subject("biology") -> new Grade(BigDecimal(18.5)), + new Subject("geography") -> new Grade(BigDecimal(4L)) + ) + ) + ) ds1.collect.run() shouldBe expected val grades = Map[Subject, Grade]( - new Subject("any") -> new Grade(BigDecimal(Long.MaxValue) + 1L)) + new Subject("any") -> new Grade(BigDecimal(Long.MaxValue) + 1L) + ) val ds2 = ds1.withColumnReplaced('grades, functions.lit(grades)) ds2.collect.run() shouldBe Seq( - Student("Foo", grades), Student("Bar", grades)) + Student("Foo", grades), + Student("Bar", grades) + ) } test("Encode binary array") { val encoder = TypedEncoder[Tuple2[String, Array[Byte]]] encoder.jvmRepr shouldBe ObjectType( - classOf[Tuple2[String, Array[Byte]]]) + classOf[Tuple2[String, Array[Byte]]] + ) val expectedStructType = StructType(Seq( StructField("_1", StringType, false), - StructField("_2", BinaryType, false))) + StructField("_2", BinaryType, false) + )) encoder.catalystRepr shouldBe expectedStructType @@ -359,11 +398,13 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { val encoder = TypedEncoder[Tuple2[String, Array[Int]]] encoder.jvmRepr shouldBe ObjectType( - classOf[Tuple2[String, Array[Int]]]) + classOf[Tuple2[String, Array[Int]]] + ) val expectedStructType = StructType(Seq( StructField("_1", StringType, false), - StructField("_2", ArrayType(IntegerType, false), false))) + StructField("_2", ArrayType(IntegerType, false), false) + )) encoder.catalystRepr shouldBe expectedStructType @@ -373,12 +414,13 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { val ds1 = TypedDataset.createUnsafe[(String, Array[Int])] { val df = Seq( """{"_1":"Foo", "_2":[3, 4]}""", - """{"_1":"Bar", "_2":[5]}""", + """{"_1":"Bar", "_2":[5]}""" ).toDF df.withColumn( "jsonValue", - F.from_json(df.col("value"), expectedStructType)). + F.from_json(df.col("value"), expectedStructType) + ). select("jsonValue.*") } @@ -403,11 +445,13 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { val encoder = TypedEncoder[Tuple2[String, Array[Subject]]] encoder.jvmRepr shouldBe ObjectType( - classOf[Tuple2[String, Array[Subject]]]) + classOf[Tuple2[String, Array[Subject]]] + ) val expectedStructType = StructType(Seq( StructField("_1", StringType, false), - StructField("_2", ArrayType(StringType, false), false))) + StructField("_2", ArrayType(StringType, false), false) + )) encoder.catalystRepr shouldBe expectedStructType @@ -417,18 +461,20 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { val ds1 = TypedDataset.createUnsafe[(String, Array[Subject])] { val df = Seq( """{"_1":"Foo", "_2":["math","physics"]}""", - """{"_1":"Bar", "_2":["biology","geography"]}""", + """{"_1":"Bar", "_2":["biology","geography"]}""" ).toDF df.withColumn( "jsonValue", - F.from_json(df.col("value"), expectedStructType)). + F.from_json(df.col("value"), expectedStructType) + ). select("jsonValue.*") } val expected = Seq( "Foo" -> Seq(new Subject("math"), new Subject("physics")), - "Bar" -> Seq(new Subject("biology"), new Subject("geography"))) + "Bar" -> Seq(new Subject("biology"), new Subject("geography")) + ) ds1.collect.run().map { case (_1, _2) => _1 -> _2.toSeq @@ -451,8 +497,17 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { encoder.jvmRepr shouldBe ObjectType(classOf[B]) val expectedStructType = StructType(Seq( - StructField("a", ArrayType(StructType(Seq( - StructField("x", IntegerType, false))), false), false))) + StructField( + "a", + ArrayType( + StructType(Seq( + StructField("x", IntegerType, false) + )), + false + ), + false + ) + )) encoder.catalystRepr shouldBe expectedStructType @@ -491,7 +546,8 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { val expectedStructType = StructType(Seq( StructField("_1", IntegerType, false), - StructField("_2", ArrayType(StringType, false), false))) + StructField("_2", ArrayType(StringType, false), false) + )) encoder.catalystRepr shouldBe expectedStructType @@ -501,18 +557,20 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { val df = Seq( """{"_1":1, "_2":["foo", "bar"]}""", - """{"_1":2, "_2":["lorem"]}""", + """{"_1":2, "_2":["lorem"]}""" ).toDF df.withColumn( "jsonValue", - F.from_json(df.col("value"), expectedStructType)). + F.from_json(df.col("value"), expectedStructType) + ). select("jsonValue.*") } val expected = Seq( 1 -> Seq(new Name("foo"), new Name("bar")), - 2 -> Seq(new Name("lorem"))) + 2 -> Seq(new Name("lorem")) + ) ds1.collect.run() shouldBe expected } @@ -523,7 +581,13 @@ final class RecordEncoderTests extends TypedDatasetSuite with Matchers { case class UnitsOnly(a: Unit, b: Unit) case class TupleWithUnits( - u0: Unit, _1: Int, u1: Unit, u2: Unit, _2: String, u3: Unit) + u0: Unit, + _1: Int, + u1: Unit, + u2: Unit, + _2: String, + u3: Unit +) object TupleWithUnits { def apply(_1: Int, _2: String): TupleWithUnits = diff --git a/dataset/src/test/scala/frameless/SchemaTests.scala b/dataset/src/test/scala/frameless/SchemaTests.scala index 89fed7f86..520ee195e 100644 --- a/dataset/src/test/scala/frameless/SchemaTests.scala +++ b/dataset/src/test/scala/frameless/SchemaTests.scala @@ -2,7 +2,7 @@ package frameless import frameless.functions.aggregate._ import frameless.functions._ -import org.apache.spark.sql.types.{ Metadata, StructType } +import org.apache.spark.sql.types.{Metadata, StructType} import org.scalacheck.Prop import org.scalacheck.Prop._ import org.scalatest.matchers.should.Matchers @@ -20,9 +20,9 @@ class SchemaTests extends TypedDatasetSuite with Matchers { } def prop[A]( - dataset: TypedDataset[A], - ignoreNullable: Boolean = false - ): Prop = { + dataset: TypedDataset[A], + ignoreNullable: Boolean = false + ): Prop = { val schema = dataset.dataset.schema Prop.all( diff --git a/dataset/src/test/scala/frameless/SelfJoinTests.scala b/dataset/src/test/scala/frameless/SelfJoinTests.scala index 742429108..7c8a4f68b 100644 --- a/dataset/src/test/scala/frameless/SelfJoinTests.scala +++ b/dataset/src/test/scala/frameless/SelfJoinTests.scala @@ -2,7 +2,7 @@ package frameless import org.scalacheck.Prop import org.scalacheck.Prop._ -import org.apache.spark.sql.{ SparkSession, functions => sparkFunctions } +import org.apache.spark.sql.{functions => sparkFunctions, SparkSession} class SelfJoinTests extends TypedDatasetSuite { @@ -10,10 +10,10 @@ class SelfJoinTests extends TypedDatasetSuite { // [error] Join condition is missing or trivial. // [error] Use the CROSS JOIN syntax to allow cartesian products between these relations. def allowTrivialJoin[T]( - body: => T - )(implicit - session: SparkSession - ): T = { + body: => T + )(implicit + session: SparkSession + ): T = { val crossJoin = "spark.sql.crossJoin.enabled" val oldSetting = session.conf.get(crossJoin) session.conf.set(crossJoin, "true") @@ -23,10 +23,10 @@ class SelfJoinTests extends TypedDatasetSuite { } def allowAmbiguousJoin[T]( - body: => T - )(implicit - session: SparkSession - ): T = { + body: => T + )(implicit + session: SparkSession + ): T = { val crossJoin = "spark.sql.analyzer.failAmbiguousSelfJoin" val oldSetting = session.conf.get(crossJoin) session.conf.set(crossJoin, "false") @@ -37,9 +37,9 @@ class SelfJoinTests extends TypedDatasetSuite { test("self join with colLeft/colRight disambiguation") { def prop[A: TypedEncoder: Ordering, B: TypedEncoder: Ordering]( - dx: List[X2[A, B]], - d: X2[A, B] - ): Prop = allowAmbiguousJoin { + dx: List[X2[A, B]], + d: X2[A, B] + ): Prop = allowAmbiguousJoin { val data = d :: dx val ds = TypedDataset.create(data) @@ -65,9 +65,9 @@ class SelfJoinTests extends TypedDatasetSuite { test("self join collects correct values via colLeft/colRight") { def prop[A: TypedEncoder: Ordering, B: TypedEncoder: Ordering]( - dx: List[X2[A, B]], - d: X2[A, B] - ): Prop = allowAmbiguousJoin { + dx: List[X2[A, B]], + d: X2[A, B] + ): Prop = allowAmbiguousJoin { val data = d :: dx val ds = TypedDataset.create(data) @@ -96,9 +96,9 @@ class SelfJoinTests extends TypedDatasetSuite { test("trivial self join") { def prop[A: TypedEncoder: Ordering, B: TypedEncoder: Ordering]( - dx: List[X2[A, B]], - d: X2[A, B] - ): Prop = + dx: List[X2[A, B]], + d: X2[A, B] + ): Prop = allowTrivialJoin { allowAmbiguousJoin { @@ -125,10 +125,9 @@ class SelfJoinTests extends TypedDatasetSuite { test("self join with unambiguous expression") { def prop[ - A: TypedEncoder: CatalystNumeric: Ordering, - B: TypedEncoder: Ordering - ](data: List[X3[A, A, B]] - ): Prop = allowAmbiguousJoin { + A: TypedEncoder: CatalystNumeric: Ordering, + B: TypedEncoder: Ordering + ](data: List[X3[A, A, B]]): Prop = allowAmbiguousJoin { val ds = TypedDataset.create(data) val df1 = ds.dataset.alias("df1") @@ -144,8 +143,10 @@ class SelfJoinTests extends TypedDatasetSuite { val typed = ds .joinInner(ds)( - (ds.colLeft('a) + ds.colLeft('b)) === (ds.colRight('a) + ds - .colRight('b)) + (ds.colLeft('a) + ds.colLeft('b)) === + (ds.colRight('a) + + ds + .colRight('b)) ) .count() .run() @@ -160,10 +161,9 @@ class SelfJoinTests extends TypedDatasetSuite { "Do you want ambiguous self join? This is how you get ambiguous self join." ) { def prop[ - A: TypedEncoder: CatalystNumeric: Ordering, - B: TypedEncoder: Ordering - ](data: List[X3[A, A, B]] - ): Prop = + A: TypedEncoder: CatalystNumeric: Ordering, + B: TypedEncoder: Ordering + ](data: List[X3[A, A, B]]): Prop = allowTrivialJoin { allowAmbiguousJoin { val ds = TypedDataset.create(data) @@ -195,11 +195,11 @@ class SelfJoinTests extends TypedDatasetSuite { test("colLeft and colRight are equivalent to col outside of joins") { def prop[A, B, C, D]( - data: Vector[X4[A, B, C, D]] - )(implicit - ea: TypedEncoder[A], - ex4: TypedEncoder[X4[A, B, C, D]] - ): Prop = { + data: Vector[X4[A, B, C, D]] + )(implicit + ea: TypedEncoder[A], + ex4: TypedEncoder[X4[A, B, C, D]] + ): Prop = { val dataset = TypedDataset.create(data) val selectedCol = dataset.select(dataset.col[A]('a)).collect().run().toVector @@ -219,11 +219,11 @@ class SelfJoinTests extends TypedDatasetSuite { test("colLeft and colRight are equivalent to col outside of joins - via files (codegen)") { def prop[A, B, C, D]( - data: Vector[X4[A, B, C, D]] - )(implicit - ea: TypedEncoder[A], - ex4: TypedEncoder[X4[A, B, C, D]] - ): Prop = { + data: Vector[X4[A, B, C, D]] + )(implicit + ea: TypedEncoder[A], + ex4: TypedEncoder[X4[A, B, C, D]] + ): Prop = { TypedDataset .create(data) .write diff --git a/dataset/src/test/scala/frameless/TypedDatasetSuite.scala b/dataset/src/test/scala/frameless/TypedDatasetSuite.scala index e31be7cbc..a602add90 100644 --- a/dataset/src/test/scala/frameless/TypedDatasetSuite.scala +++ b/dataset/src/test/scala/frameless/TypedDatasetSuite.scala @@ -2,23 +2,24 @@ package frameless import com.globalmentor.apache.hadoop.fs.BareLocalFileSystem import org.apache.hadoop.fs.local.StreamingFS -import org.apache.spark.{ SparkConf, SparkContext } -import org.apache.spark.sql.{ SQLContext, SparkSession } +import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.sql.{SQLContext, SparkSession} import org.scalactic.anyvals.PosZInt import org.scalatest.BeforeAndAfterAll import org.scalatestplus.scalacheck.Checkers import org.scalacheck.Prop import org.scalacheck.Prop._ -import scala.util.{ Properties, Try } +import scala.util.{Properties, Try} import org.scalatest.funsuite.AnyFunSuite trait SparkTesting { self: BeforeAndAfterAll => - val appID: String = new java.util.Date().toString + math - .floor(math.random * 10e4) - .toLong - .toString + val appID: String = new java.util.Date().toString + + math + .floor(math.random * 10e4) + .toLong + .toString /** * Allows bare naked to be used instead of winutils for testing / dev @@ -93,11 +94,11 @@ class TypedDatasetSuite implicit val sparkDelay: SparkDelay[Job] = Job.framelessSparkDelayForJob def approximatelyEqual[A]( - a: A, - b: A - )(implicit - numeric: Numeric[A] - ): Prop = { + a: A, + b: A + )(implicit + numeric: Numeric[A] + ): Prop = { val da = numeric.toDouble(a) val db = numeric.toDouble(b) val epsilon = 1e-6 diff --git a/dataset/src/test/scala/frameless/UdtEncodedClass.scala b/dataset/src/test/scala/frameless/UdtEncodedClass.scala index 4e5c2c6d9..e154428be 100644 --- a/dataset/src/test/scala/frameless/UdtEncodedClass.scala +++ b/dataset/src/test/scala/frameless/UdtEncodedClass.scala @@ -9,7 +9,7 @@ import org.apache.spark.sql.FramelessInternals.UserDefinedType class UdtEncodedClass(val a: Int, val b: Array[Double]) { override def equals(other: Any): Boolean = other match { case that: UdtEncodedClass => a == that.a && java.util.Arrays.equals(b, that.b) - case _ => false + case _ => false } override def hashCode(): Int = { diff --git a/dataset/src/test/scala/frameless/WithColumnTest.scala b/dataset/src/test/scala/frameless/WithColumnTest.scala index c41c4e726..2950dc42d 100644 --- a/dataset/src/test/scala/frameless/WithColumnTest.scala +++ b/dataset/src/test/scala/frameless/WithColumnTest.scala @@ -8,28 +8,28 @@ class WithColumnTest extends TypedDatasetSuite { import WithColumnTest._ test("fail to compile on missing value") { - val f: TypedDataset[X] = TypedDataset.create(X(1,1) :: X(1,1) :: X(1,10) :: Nil) + val f: TypedDataset[X] = TypedDataset.create(X(1, 1) :: X(1, 1) :: X(1, 10) :: Nil) illTyped { """val fNew: TypedDataset[XMissing] = f.withColumn[XMissing](f('j) === 10)""" } } test("fail to compile on different column name") { - val f: TypedDataset[X] = TypedDataset.create(X(1,1) :: X(1,1) :: X(1,10) :: Nil) + val f: TypedDataset[X] = TypedDataset.create(X(1, 1) :: X(1, 1) :: X(1, 10) :: Nil) illTyped { """val fNew: TypedDataset[XDifferentColumnName] = f.withColumn[XDifferentColumnName](f('j) === 10)""" } } test("fail to compile on added column name") { - val f: TypedDataset[X] = TypedDataset.create(X(1,1) :: X(1,1) :: X(1,10) :: Nil) + val f: TypedDataset[X] = TypedDataset.create(X(1, 1) :: X(1, 1) :: X(1, 10) :: Nil) illTyped { """val fNew: TypedDataset[XAdded] = f.withColumn[XAdded](f('j) === 10)""" } } test("fail to compile on wrong typed column") { - val f: TypedDataset[X] = TypedDataset.create(X(1,1) :: X(1,1) :: X(1,10) :: Nil) + val f: TypedDataset[X] = TypedDataset.create(X(1, 1) :: X(1, 1) :: X(1, 10) :: Nil) illTyped { """val fNew: TypedDataset[XWrongType] = f.withColumn[XWrongType](f('j) === 10)""" } @@ -54,7 +54,7 @@ class WithColumnTest extends TypedDatasetSuite { } test("update in place") { - def prop[A : TypedEncoder](startValue: A, replaceValue: A): Prop = { + def prop[A: TypedEncoder](startValue: A, replaceValue: A): Prop = { val d = TypedDataset.create(X2(startValue, replaceValue) :: Nil) val X2(a, b) = d.withColumnReplaced('a, d('b)) diff --git a/dataset/src/test/scala/frameless/XN.scala b/dataset/src/test/scala/frameless/XN.scala index c23d4b45d..ffcc59a76 100644 --- a/dataset/src/test/scala/frameless/XN.scala +++ b/dataset/src/test/scala/frameless/XN.scala @@ -97,9 +97,16 @@ object X6 { implicit def arbitrary[A: Arbitrary, B: Arbitrary, C: Arbitrary, D: Arbitrary, E: Arbitrary, F: Arbitrary]: Arbitrary[X6[A, B, C, D, E, F]] = Arbitrary(Arbitrary.arbTuple6[A, B, C, D, E, F].arbitrary.map((X6.apply[A, B, C, D, E, F] _).tupled)) - implicit def cogen[A, B, C, D, E, F](implicit A: Cogen[A], B: Cogen[B], C: Cogen[C], D: Cogen[D], E: Cogen[E], F: Cogen[F]): Cogen[X6[A, B, C, D, E, F]] = + implicit def cogen[A, B, C, D, E, F](implicit + A: Cogen[A], + B: Cogen[B], + C: Cogen[C], + D: Cogen[D], + E: Cogen[E], + F: Cogen[F] + ): Cogen[X6[A, B, C, D, E, F]] = Cogen.tuple6(A, B, C, D, E, F).contramap(x => (x.a, x.b, x.c, x.d, x.e, x.f)) implicit def ordering[A: Ordering, B: Ordering, C: Ordering, D: Ordering, E: Ordering, F: Ordering]: Ordering[X6[A, B, C, D, E, F]] = Ordering.Tuple6[A, B, C, D, E, F].on(x => (x.a, x.b, x.c, x.d, x.e, x.f)) -} \ No newline at end of file +} diff --git a/dataset/src/test/scala/frameless/forward/CheckpointTests.scala b/dataset/src/test/scala/frameless/forward/CheckpointTests.scala index 9a1ff8b44..91ff570a9 100644 --- a/dataset/src/test/scala/frameless/forward/CheckpointTests.scala +++ b/dataset/src/test/scala/frameless/forward/CheckpointTests.scala @@ -3,7 +3,6 @@ package frameless import org.scalacheck.Prop import org.scalacheck.Prop.{forAll, _} - class CheckpointTests extends TypedDatasetSuite { test("checkpoint") { def prop[A: TypedEncoder](data: Vector[A], isEager: Boolean): Prop = { @@ -18,4 +17,4 @@ class CheckpointTests extends TypedDatasetSuite { check(forAll(prop[Int] _)) check(forAll(prop[String] _)) } -} \ No newline at end of file +} diff --git a/dataset/src/test/scala/frameless/forward/ColumnsTests.scala b/dataset/src/test/scala/frameless/forward/ColumnsTests.scala index 282a72c9a..ec7c4c3f8 100644 --- a/dataset/src/test/scala/frameless/forward/ColumnsTests.scala +++ b/dataset/src/test/scala/frameless/forward/ColumnsTests.scala @@ -13,9 +13,14 @@ class ColumnsTests extends TypedDatasetSuite { val x5 = X5(i, s, b, l, d) :: Nil val x6 = X6(i, s, b, l, d, by) :: Nil - val datasets = Seq(TypedDataset.create(x1), TypedDataset.create(x2), - TypedDataset.create(x3), TypedDataset.create(x4), - TypedDataset.create(x5), TypedDataset.create(x6)) + val datasets = Seq( + TypedDataset.create(x1), + TypedDataset.create(x2), + TypedDataset.create(x3), + TypedDataset.create(x4), + TypedDataset.create(x5), + TypedDataset.create(x6) + ) Prop.all(datasets.flatMap { dataset => val columns = dataset.dataset.columns @@ -27,4 +32,4 @@ class ColumnsTests extends TypedDatasetSuite { check(forAll(prop _)) } -} \ No newline at end of file +} diff --git a/dataset/src/test/scala/frameless/forward/DistinctTests.scala b/dataset/src/test/scala/frameless/forward/DistinctTests.scala index 44da5e59e..fd8ac0719 100644 --- a/dataset/src/test/scala/frameless/forward/DistinctTests.scala +++ b/dataset/src/test/scala/frameless/forward/DistinctTests.scala @@ -7,7 +7,7 @@ import math.Ordering class DistinctTests extends TypedDatasetSuite { test("distinct") { // Comparison done with `.sorted` because order is not preserved by Spark for this operation. - def prop[A: TypedEncoder : Ordering](data: Vector[A]): Prop = + def prop[A: TypedEncoder: Ordering](data: Vector[A]): Prop = TypedDataset.create(data).distinct.collect().run().toVector.sorted ?= data.distinct.sorted check(forAll(prop[Int] _)) diff --git a/dataset/src/test/scala/frameless/forward/HeadTests.scala b/dataset/src/test/scala/frameless/forward/HeadTests.scala index 63f76e003..96c13a3ea 100644 --- a/dataset/src/test/scala/frameless/forward/HeadTests.scala +++ b/dataset/src/test/scala/frameless/forward/HeadTests.scala @@ -9,14 +9,14 @@ import scala.reflect.ClassTag import org.scalatest.matchers.should.Matchers class HeadTests extends TypedDatasetSuite with Matchers { - def propArray[A: TypedEncoder : ClassTag : Ordering](data: Vector[X1[A]])(implicit c: SparkSession): Prop = { + def propArray[A: TypedEncoder: ClassTag: Ordering](data: Vector[X1[A]])(implicit c: SparkSession): Prop = { import c.implicits._ - if(data.nonEmpty) { + if (data.nonEmpty) { val tds = TypedDataset. create(c.createDataset(data)( TypedExpressionEncoder.apply[X1[A]] ).orderBy($"a".desc)) - (tds.headOption().run().get ?= data.max). + (tds.headOption().run().get ?= data.max). &&(tds.head(1).run().head ?= data.max). &&(tds.head(4).run().toVector ?= data.sortBy(_.a)(implicitly[Ordering[A]].reverse).take(4)) diff --git a/dataset/src/test/scala/frameless/forward/InputFilesTests.scala b/dataset/src/test/scala/frameless/forward/InputFilesTests.scala index 246867e63..306070e20 100644 --- a/dataset/src/test/scala/frameless/forward/InputFilesTests.scala +++ b/dataset/src/test/scala/frameless/forward/InputFilesTests.scala @@ -25,7 +25,8 @@ class InputFilesTests extends TypedDatasetSuite with Matchers { inputDataset.dataset.write.csv(filePath) val dataset = TypedDataset.createUnsafe( - implicitly[SparkSession].sqlContext.read.schema(inputDataset.schema).csv(filePath)) + implicitly[SparkSession].sqlContext.read.schema(inputDataset.schema).csv(filePath) + ) dataset.inputFiles sameElements dataset.dataset.inputFiles } @@ -36,7 +37,8 @@ class InputFilesTests extends TypedDatasetSuite with Matchers { inputDataset.dataset.write.json(filePath) val dataset = TypedDataset.createUnsafe( - implicitly[SparkSession].sqlContext.read.schema(inputDataset.schema).json(filePath)) + implicitly[SparkSession].sqlContext.read.schema(inputDataset.schema).json(filePath) + ) dataset.inputFiles sameElements dataset.dataset.inputFiles } @@ -45,4 +47,4 @@ class InputFilesTests extends TypedDatasetSuite with Matchers { check(forAll(propCsv[String] _)) check(forAll(propJson[String] _)) } -} \ No newline at end of file +} diff --git a/dataset/src/test/scala/frameless/forward/IntersectTests.scala b/dataset/src/test/scala/frameless/forward/IntersectTests.scala index f0edb856e..c6bc110d0 100644 --- a/dataset/src/test/scala/frameless/forward/IntersectTests.scala +++ b/dataset/src/test/scala/frameless/forward/IntersectTests.scala @@ -6,7 +6,7 @@ import math.Ordering class IntersectTests extends TypedDatasetSuite { test("intersect") { - def prop[A: TypedEncoder : Ordering](data1: Vector[A], data2: Vector[A]): Prop = { + def prop[A: TypedEncoder: Ordering](data1: Vector[A], data2: Vector[A]): Prop = { val dataset1 = TypedDataset.create(data1) val dataset2 = TypedDataset.create(data2) val datasetIntersect = dataset1.intersect(dataset2).collect().run().toVector diff --git a/dataset/src/test/scala/frameless/forward/IsLocalTests.scala b/dataset/src/test/scala/frameless/forward/IsLocalTests.scala index f61d25cd1..71fbd27ce 100644 --- a/dataset/src/test/scala/frameless/forward/IsLocalTests.scala +++ b/dataset/src/test/scala/frameless/forward/IsLocalTests.scala @@ -14,4 +14,4 @@ class IsLocalTests extends TypedDatasetSuite { check(forAll(prop[Int] _)) check(forAll(prop[String] _)) } -} \ No newline at end of file +} diff --git a/dataset/src/test/scala/frameless/forward/IsStreamingTests.scala b/dataset/src/test/scala/frameless/forward/IsStreamingTests.scala index dd1874977..b056bc409 100644 --- a/dataset/src/test/scala/frameless/forward/IsStreamingTests.scala +++ b/dataset/src/test/scala/frameless/forward/IsStreamingTests.scala @@ -14,4 +14,4 @@ class IsStreamingTests extends TypedDatasetSuite { check(forAll(prop[Int] _)) check(forAll(prop[String] _)) } -} \ No newline at end of file +} diff --git a/dataset/src/test/scala/frameless/forward/QueryExecutionTests.scala b/dataset/src/test/scala/frameless/forward/QueryExecutionTests.scala index d59e250df..6fc9a5750 100644 --- a/dataset/src/test/scala/frameless/forward/QueryExecutionTests.scala +++ b/dataset/src/test/scala/frameless/forward/QueryExecutionTests.scala @@ -14,4 +14,4 @@ class QueryExecutionTests extends TypedDatasetSuite { check(forAll(prop[Int] _)) check(forAll(prop[String] _)) } -} \ No newline at end of file +} diff --git a/dataset/src/test/scala/frameless/forward/RandomSplitTests.scala b/dataset/src/test/scala/frameless/forward/RandomSplitTests.scala index 4cc9a4fde..24c5ab42e 100644 --- a/dataset/src/test/scala/frameless/forward/RandomSplitTests.scala +++ b/dataset/src/test/scala/frameless/forward/RandomSplitTests.scala @@ -12,7 +12,7 @@ class RandomSplitTests extends TypedDatasetSuite with Matchers { val nonEmptyPositiveArray: Gen[Array[Double]] = Gen.nonEmptyListOf(Gen.posNum[Double]).map(_.toArray) test("randomSplit(weight, seed)") { - def prop[A: TypedEncoder : Arbitrary] = forAll(vectorGen[A], nonEmptyPositiveArray, arbitrary[Long]) { + def prop[A: TypedEncoder: Arbitrary] = forAll(vectorGen[A], nonEmptyPositiveArray, arbitrary[Long]) { (data: Vector[A], weights: Array[Double], seed: Long) => val dataset = TypedDataset.create(data) @@ -25,7 +25,7 @@ class RandomSplitTests extends TypedDatasetSuite with Matchers { } test("randomSplitAsList(weight, seed)") { - def prop[A: TypedEncoder : Arbitrary] = forAll(vectorGen[A], nonEmptyPositiveArray, arbitrary[Long]) { + def prop[A: TypedEncoder: Arbitrary] = forAll(vectorGen[A], nonEmptyPositiveArray, arbitrary[Long]) { (data: Vector[A], weights: Array[Double], seed: Long) => val dataset = TypedDataset.create(data) diff --git a/dataset/src/test/scala/frameless/forward/SQLContextTests.scala b/dataset/src/test/scala/frameless/forward/SQLContextTests.scala index 06c14f651..6a9456d56 100644 --- a/dataset/src/test/scala/frameless/forward/SQLContextTests.scala +++ b/dataset/src/test/scala/frameless/forward/SQLContextTests.scala @@ -1,15 +1,16 @@ package frameless import org.scalacheck.Prop -import org.scalacheck.Prop.{ forAll, _ } +import org.scalacheck.Prop.{forAll, _} class SQLContextTests extends TypedDatasetSuite { test("sqlContext") { def prop[A: TypedEncoder](data: Vector[A]): Prop = { val dataset = TypedDataset.create[A](data) - dataset.sqlContext =? org.apache.spark.sql.FramelessInternals - .sqlContext(dataset.dataset) + dataset.sqlContext =? + org.apache.spark.sql.FramelessInternals + .sqlContext(dataset.dataset) } check(forAll(prop[Int] _)) diff --git a/dataset/src/test/scala/frameless/forward/SparkSessionTests.scala b/dataset/src/test/scala/frameless/forward/SparkSessionTests.scala index c5d0da338..ce3130d3b 100644 --- a/dataset/src/test/scala/frameless/forward/SparkSessionTests.scala +++ b/dataset/src/test/scala/frameless/forward/SparkSessionTests.scala @@ -14,4 +14,4 @@ class SparkSessionTests extends TypedDatasetSuite { check(forAll(prop[Int] _)) check(forAll(prop[String] _)) } -} \ No newline at end of file +} diff --git a/dataset/src/test/scala/frameless/forward/StorageLevelTests.scala b/dataset/src/test/scala/frameless/forward/StorageLevelTests.scala index 3ac93773e..63bd9be5e 100644 --- a/dataset/src/test/scala/frameless/forward/StorageLevelTests.scala +++ b/dataset/src/test/scala/frameless/forward/StorageLevelTests.scala @@ -7,12 +7,23 @@ import org.scalacheck.{Arbitrary, Gen} class StorageLevelTests extends TypedDatasetSuite { - val storageLevelGen: Gen[StorageLevel] = Gen.oneOf(Seq(NONE, DISK_ONLY, DISK_ONLY_2, MEMORY_ONLY, - MEMORY_ONLY_2, MEMORY_ONLY_SER, MEMORY_ONLY_SER_2, MEMORY_AND_DISK, - MEMORY_AND_DISK_2, MEMORY_AND_DISK_SER, MEMORY_AND_DISK_SER_2, OFF_HEAP)) + val storageLevelGen: Gen[StorageLevel] = Gen.oneOf(Seq( + NONE, + DISK_ONLY, + DISK_ONLY_2, + MEMORY_ONLY, + MEMORY_ONLY_2, + MEMORY_ONLY_SER, + MEMORY_ONLY_SER_2, + MEMORY_AND_DISK, + MEMORY_AND_DISK_2, + MEMORY_AND_DISK_SER, + MEMORY_AND_DISK_SER_2, + OFF_HEAP + )) test("storageLevel") { - def prop[A: TypedEncoder : Arbitrary] = forAll(vectorGen[A], storageLevelGen) { + def prop[A: TypedEncoder: Arbitrary] = forAll(vectorGen[A], storageLevelGen) { (data: Vector[A], storageLevel: StorageLevel) => val dataset = TypedDataset.create(data) if (storageLevel != StorageLevel.NONE) @@ -26,4 +37,4 @@ class StorageLevelTests extends TypedDatasetSuite { check(prop[Int]) check(prop[String]) } -} \ No newline at end of file +} diff --git a/dataset/src/test/scala/frameless/forward/TakeTests.scala b/dataset/src/test/scala/frameless/forward/TakeTests.scala index eec77bc80..98a877f87 100644 --- a/dataset/src/test/scala/frameless/forward/TakeTests.scala +++ b/dataset/src/test/scala/frameless/forward/TakeTests.scala @@ -6,17 +6,15 @@ import scala.reflect.ClassTag class TakeTests extends TypedDatasetSuite { test("take") { - def prop[A: TypedEncoder](n: Int, data: Vector[A]): Prop = - (n >= 0) ==> (TypedDataset.create(data).take(n).run().toVector =? data.take(n)) + def prop[A: TypedEncoder](n: Int, data: Vector[A]): Prop = (n >= 0) ==> (TypedDataset.create(data).take(n).run().toVector =? data.take(n)) - def propArray[A: TypedEncoder: ClassTag](n: Int, data: Vector[X1[Array[A]]]): Prop = - (n >= 0) ==> { - Prop { - TypedDataset.create(data).take(n).run().toVector.zip(data.take(n)).forall { - case (X1(l), X1(r)) => l sameElements r - } + def propArray[A: TypedEncoder: ClassTag](n: Int, data: Vector[X1[Array[A]]]): Prop = (n >= 0) ==> { + Prop { + TypedDataset.create(data).take(n).run().toVector.zip(data.take(n)).forall { + case (X1(l), X1(r)) => l sameElements r } } + } check(forAll(prop[Int] _)) check(forAll(prop[String] _)) diff --git a/dataset/src/test/scala/frameless/forward/ToJSONTests.scala b/dataset/src/test/scala/frameless/forward/ToJSONTests.scala index 5ed79a9c9..5e78ea6d0 100644 --- a/dataset/src/test/scala/frameless/forward/ToJSONTests.scala +++ b/dataset/src/test/scala/frameless/forward/ToJSONTests.scala @@ -14,4 +14,4 @@ class ToJSONTests extends TypedDatasetSuite { check(forAll(prop[Int] _)) check(forAll(prop[String] _)) } -} \ No newline at end of file +} diff --git a/dataset/src/test/scala/frameless/forward/UnionTests.scala b/dataset/src/test/scala/frameless/forward/UnionTests.scala index 6cd8f4005..45860046e 100644 --- a/dataset/src/test/scala/frameless/forward/UnionTests.scala +++ b/dataset/src/test/scala/frameless/forward/UnionTests.scala @@ -63,4 +63,4 @@ class UnionTests extends TypedDatasetSuite { final case class Foo[A, B](x: A, y: B) final case class Bar[A, B](y: B, x: A) final case class Baz[A, B, C](z: C, y: B, x: A) -final case class Wrong[A, B, C](a: A, b: B, c: C) \ No newline at end of file +final case class Wrong[A, B, C](a: A, b: B, c: C) diff --git a/dataset/src/test/scala/frameless/forward/WriteStreamTests.scala b/dataset/src/test/scala/frameless/forward/WriteStreamTests.scala index 368147c93..c3b2d6dee 100644 --- a/dataset/src/test/scala/frameless/forward/WriteStreamTests.scala +++ b/dataset/src/test/scala/frameless/forward/WriteStreamTests.scala @@ -49,7 +49,7 @@ class WriteStreamTests extends TypedDatasetSuite { .start() tester.processAllAvailable() val output = spark.table(s"testCsv_$uidNoHyphens").as[A] - TypedDataset.create(data).collect().run().groupBy(identity) ?= output.collect().groupBy(identity).map { case (k, arr) => (k, arr.toSeq) } + TypedDataset.create(data).collect().run().groupBy(identity) ?= output.collect().groupBy(identity).map { case (k, arr) => (k, arr.toSeq) } } check(forAll(Gen.nonEmptyListOf(Gen.alphaNumStr.suchThat(_.nonEmpty)))(prop[String])) @@ -79,7 +79,7 @@ class WriteStreamTests extends TypedDatasetSuite { .start() tester.processAllAvailable() val output = spark.table(s"testParquet_$uidNoHyphens").as[A] - TypedDataset.create(data).collect().run().groupBy(identity) ?= output.collect().groupBy(identity).map { case (k, arr) => (k, arr.toSeq) } + TypedDataset.create(data).collect().run().groupBy(identity) ?= output.collect().groupBy(identity).map { case (k, arr) => (k, arr.toSeq) } } check(forAll(Gen.nonEmptyListOf(genWriteExample))(prop[WriteExample])) diff --git a/dataset/src/test/scala/frameless/functions/AggregateFunctionsTests.scala b/dataset/src/test/scala/frameless/functions/AggregateFunctionsTests.scala index 201d93c63..d469608ba 100644 --- a/dataset/src/test/scala/frameless/functions/AggregateFunctionsTests.scala +++ b/dataset/src/test/scala/frameless/functions/AggregateFunctionsTests.scala @@ -21,7 +21,7 @@ class AggregateFunctionsTests extends TypedDatasetSuite { test("sum") { case class Sum4Tests[A, B](sum: Seq[A] => B) - def prop[A: TypedEncoder, Out: TypedEncoder : Numeric](xs: List[A])( + def prop[A: TypedEncoder, Out: TypedEncoder: Numeric](xs: List[A])( implicit summable: CatalystSummable[A, Out], summer: Sum4Tests[A, Out] @@ -33,7 +33,7 @@ class AggregateFunctionsTests extends TypedDatasetSuite { datasetSum match { case x :: Nil => approximatelyEqual(summer.sum(xs), x) - case other => falsified + case other => falsified } } @@ -61,7 +61,7 @@ class AggregateFunctionsTests extends TypedDatasetSuite { test("sumDistinct") { case class Sum4Tests[A, B](sum: Seq[A] => B) - def prop[A: TypedEncoder, Out: TypedEncoder : Numeric](xs: List[A])( + def prop[A: TypedEncoder, Out: TypedEncoder: Numeric](xs: List[A])( implicit summable: CatalystSummable[A, Out], summer: Sum4Tests[A, Out] @@ -73,15 +73,15 @@ class AggregateFunctionsTests extends TypedDatasetSuite { datasetSum match { case x :: Nil => approximatelyEqual(summer.sum(xs), x) - case other => falsified + case other => falsified } } // Replicate Spark's behaviour : Ints and Shorts are cast to Long // https://github.com/apache/spark/blob/7eb2ca8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L37 implicit def summerLong = Sum4Tests[Long, Long](_.toSet.sum) - implicit def summerInt = Sum4Tests[Int, Long]( x => x.toSet.map((_:Int).toLong).sum) - implicit def summerShort = Sum4Tests[Short, Long](x => x.toSet.map((_:Short).toLong).sum) + implicit def summerInt = Sum4Tests[Int, Long](x => x.toSet.map((_: Int).toLong).sum) + implicit def summerShort = Sum4Tests[Short, Long](x => x.toSet.map((_: Short).toLong).sum) check(forAll(prop[Long, Long] _)) check(forAll(prop[Int, Long] _)) @@ -95,7 +95,7 @@ class AggregateFunctionsTests extends TypedDatasetSuite { test("avg") { case class Averager4Tests[A, B](avg: Seq[A] => B) - def prop[A: TypedEncoder, Out: TypedEncoder : Numeric](xs: List[A])( + def prop[A: TypedEncoder, Out: TypedEncoder: Numeric](xs: List[A])( implicit averageable: CatalystAverageable[A, Out], averager: Averager4Tests[A, Out] @@ -107,21 +107,21 @@ class AggregateFunctionsTests extends TypedDatasetSuite { if (datasetAvg.size > 2) falsified else xs match { - case Nil => datasetAvg ?= Vector() + case Nil => datasetAvg ?= Vector() case _ :: _ => datasetAvg.headOption match { - case Some(x) => approximatelyEqual(averager.avg(xs), x) - case None => falsified - } + case Some(x) => approximatelyEqual(averager.avg(xs), x) + case None => falsified + } } } // Replicate Spark's behaviour : If the datatype isn't BigDecimal cast type to Double // https://github.com/apache/spark/blob/7eb2ca8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L50 - implicit def averageDecimal = Averager4Tests[BigDecimal, BigDecimal](as => as.sum/as.size) - implicit def averageDouble = Averager4Tests[Double, Double](as => as.sum/as.size) - implicit def averageLong = Averager4Tests[Long, Double](as => as.map(_.toDouble).sum/as.size) - implicit def averageInt = Averager4Tests[Int, Double](as => as.map(_.toDouble).sum/as.size) - implicit def averageShort = Averager4Tests[Short, Double](as => as.map(_.toDouble).sum/as.size) + implicit def averageDecimal = Averager4Tests[BigDecimal, BigDecimal](as => as.sum / as.size) + implicit def averageDouble = Averager4Tests[Double, Double](as => as.sum / as.size) + implicit def averageLong = Averager4Tests[Long, Double](as => as.map(_.toDouble).sum / as.size) + implicit def averageInt = Averager4Tests[Int, Double](as => as.map(_.toDouble).sum / as.size) + implicit def averageShort = Averager4Tests[Short, Double](as => as.map(_.toDouble).sum / as.size) /* under 3.4 an oddity was detected: Falsified after 2 successful property evaluations. @@ -141,7 +141,7 @@ class AggregateFunctionsTests extends TypedDatasetSuite { } test("stddev and variance") { - def prop[A: TypedEncoder : CatalystVariance : Numeric](xs: List[A]): Prop = { + def prop[A: TypedEncoder: CatalystVariance: Numeric](xs: List[A]): Prop = { val numeric = implicitly[Numeric[A]] val dataset = TypedDataset.create(xs.map(X1(_))) val A = dataset.col[A]('a) @@ -225,7 +225,7 @@ class AggregateFunctionsTests extends TypedDatasetSuite { val A = dataset.col[Long]('a) val datasetMax = dataset.agg(max(A) * 2).collect().run().headOption - datasetMax ?= (if(xs.isEmpty) None else Some(xs.max * 2)) + datasetMax ?= (if (xs.isEmpty) None else Some(xs.max * 2)) } check(forAll(prop _)) @@ -336,7 +336,7 @@ class AggregateFunctionsTests extends TypedDatasetSuite { } test("collectList") { - def prop[A: TypedEncoder : Ordering](xs: List[X2[A, A]]): Prop = { + def prop[A: TypedEncoder: Ordering](xs: List[X2[A, A]]): Prop = { val tds = TypedDataset.create(xs) val tdsRes: Seq[(A, Vector[A])] = tds.groupBy(tds('a)).agg(collectList(tds('b))).collect().run() @@ -350,7 +350,7 @@ class AggregateFunctionsTests extends TypedDatasetSuite { } test("collectSet") { - def prop[A: TypedEncoder : Ordering](xs: List[X2[A, A]]): Prop = { + def prop[A: TypedEncoder: Ordering](xs: List[X2[A, A]]): Prop = { val tds = TypedDataset.create(xs) val tdsRes: Seq[(A, Vector[A])] = tds.groupBy(tds('a)).agg(collectSet(tds('b))).collect().run() @@ -379,19 +379,15 @@ class AggregateFunctionsTests extends TypedDatasetSuite { check(forAll(prop[BigDecimal] _)) } - - def bivariatePropTemplate[A: TypedEncoder, B: TypedEncoder] - ( + def bivariatePropTemplate[A: TypedEncoder, B: TypedEncoder]( xs: List[X3[Int, A, B]] - ) - ( + )( framelessFun: (TypedColumn[X3[Int, A, B], A], TypedColumn[X3[Int, A, B], B]) => TypedAggregate[X3[Int, A, B], Option[Double]], sparkFun: (Column, Column) => Column - ) - ( + )( implicit encEv: Encoder[(Int, A, B)], - encEv2: Encoder[(Int,Option[Double])], + encEv2: Encoder[(Int, Option[Double])], evCanBeDoubleA: CatalystCast[A, Double], evCanBeDoubleB: CatalystCast[B, Double] ): Prop = { @@ -407,34 +403,29 @@ class AggregateFunctionsTests extends TypedDatasetSuite { val compBivar = cDF .groupBy(cDF("_1")) .agg(sparkFun(cDF("_2"), cDF("_3"))) - .map( - row => { - val grp = row.getInt(0) - (grp, DoubleBehaviourUtils.nanNullHandler(row.get(1))) - } - ) + .map(row => { + val grp = row.getInt(0) + (grp, DoubleBehaviourUtils.nanNullHandler(row.get(1))) + }) // Should be the same tdBivar.toMap ?= compBivar.collect().toMap } - def univariatePropTemplate[A: TypedEncoder] - ( + def univariatePropTemplate[A: TypedEncoder]( xs: List[X2[Int, A]] - ) - ( + )( framelessFun: (TypedColumn[X2[Int, A], A]) => TypedAggregate[X2[Int, A], Option[Double]], sparkFun: (Column) => Column - ) - ( + )( implicit encEv: Encoder[(Int, A)], - encEv2: Encoder[(Int,Option[Double])], + encEv2: Encoder[(Int, Option[Double])], evCanBeDoubleA: CatalystCast[A, Double] ): Prop = { val tds = TypedDataset.create(xs) - //typed implementation of univariate stats function + // typed implementation of univariate stats function val tdUnivar = tds.groupBy(tds('a)).agg(framelessFun(tds('b))).deserialized.map(kv => (kv._1, kv._2.flatMap(DoubleBehaviourUtils.nanNullHandler)) ).collect().run() @@ -444,12 +435,10 @@ class AggregateFunctionsTests extends TypedDatasetSuite { val compUnivar = cDF .groupBy(cDF("_1")) .agg(sparkFun(cDF("_2"))) - .map( - row => { - val grp = row.getInt(0) - (grp, DoubleBehaviourUtils.nanNullHandler(row.get(1))) - } - ) + .map(row => { + val grp = row.getInt(0) + (grp, DoubleBehaviourUtils.nanNullHandler(row.get(1))) + }) // Should be the same tdUnivar.toMap ?= compUnivar.collect().toMap @@ -464,7 +453,7 @@ class AggregateFunctionsTests extends TypedDatasetSuite { encEv: Encoder[(Int, A, B)], evCanBeDoubleA: CatalystCast[A, Double], evCanBeDoubleB: CatalystCast[B, Double] - ): Prop = bivariatePropTemplate(xs)(corr[A,B,X3[Int, A, B]],org.apache.spark.sql.functions.corr) + ): Prop = bivariatePropTemplate(xs)(corr[A, B, X3[Int, A, B]], org.apache.spark.sql.functions.corr) check(forAll(prop[Double, Double] _)) check(forAll(prop[Double, Int] _)) diff --git a/dataset/src/test/scala/frameless/functions/DateTimeStringBehaviourUtils.scala b/dataset/src/test/scala/frameless/functions/DateTimeStringBehaviourUtils.scala index e22fe4337..3f90e4d2d 100644 --- a/dataset/src/test/scala/frameless/functions/DateTimeStringBehaviourUtils.scala +++ b/dataset/src/test/scala/frameless/functions/DateTimeStringBehaviourUtils.scala @@ -5,6 +5,6 @@ import org.apache.spark.sql.Row object DateTimeStringBehaviourUtils { val nullHandler: Row => Option[Int] = _.get(0) match { case i: Int => Some(i) - case _ => None + case _ => None } } diff --git a/dataset/src/test/scala/frameless/functions/DoubleBehaviourUtils.scala b/dataset/src/test/scala/frameless/functions/DoubleBehaviourUtils.scala index f3a8be581..591edcf77 100644 --- a/dataset/src/test/scala/frameless/functions/DoubleBehaviourUtils.scala +++ b/dataset/src/test/scala/frameless/functions/DoubleBehaviourUtils.scala @@ -13,8 +13,8 @@ object DoubleBehaviourUtils { private val nanHandler: Double => Option[Double] = value => if (!value.equals(Double.NaN)) Option(value) else None // Making sure that null => None and does not result in 0.0d because of row.getAs[Double]'s use of .asInstanceOf val nanNullHandler: Any => Option[Double] = { - case null => None + case null => None case d: Double => nanHandler(d) - case _ => ??? + case _ => ??? } } diff --git a/dataset/src/test/scala/frameless/functions/NonAggregateFunctionsTests.scala b/dataset/src/test/scala/frameless/functions/NonAggregateFunctionsTests.scala index 470d58e5f..1ddc8483e 100644 --- a/dataset/src/test/scala/frameless/functions/NonAggregateFunctionsTests.scala +++ b/dataset/src/test/scala/frameless/functions/NonAggregateFunctionsTests.scala @@ -7,7 +7,7 @@ import java.nio.charset.StandardCharsets import frameless.functions.nonAggregate._ import org.apache.commons.io.FileUtils -import org.apache.spark.sql.{Column, Encoder, SaveMode, functions => sparkFunctions} +import org.apache.spark.sql.{functions => sparkFunctions, Column, Encoder, SaveMode} import org.scalacheck.Prop._ import org.scalacheck.{Arbitrary, Gen, Prop} @@ -18,24 +18,24 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { object NonNegativeGenerators { val doubleGen = for { - s <- Gen.chooseNum(1, Int.MaxValue) - e <- Gen.chooseNum(1, Int.MaxValue) + s <- Gen.chooseNum(1, Int.MaxValue) + e <- Gen.chooseNum(1, Int.MaxValue) res: Double = s.toDouble / e.toDouble } yield res - val intGen: Gen[Int] = Gen.chooseNum(1, Int.MaxValue) + val intGen: Gen[Int] = Gen.chooseNum(1, Int.MaxValue) val shortGen: Gen[Short] = Gen.chooseNum(1, Short.MaxValue) - val longGen: Gen[Long] = Gen.chooseNum(1, Long.MaxValue) - val byteGen: Gen[Byte] = Gen.chooseNum(1, Byte.MaxValue) + val longGen: Gen[Long] = Gen.chooseNum(1, Long.MaxValue) + val byteGen: Gen[Byte] = Gen.chooseNum(1, Byte.MaxValue) } object NonNegativeArbitraryNumericValues { import NonNegativeGenerators._ - implicit val arbInt: Arbitrary[Int] = Arbitrary(intGen) - implicit val arbDouble: Arbitrary[Double] = Arbitrary(doubleGen) - implicit val arbLong: Arbitrary[Long] = Arbitrary(longGen) - implicit val arbShort: Arbitrary[Short] = Arbitrary(shortGen) - implicit val arbByte: Arbitrary[Byte] = Arbitrary(byteGen) + implicit val arbInt: Arbitrary[Int] = Arbitrary(intGen) + implicit val arbDouble: Arbitrary[Double] = Arbitrary(doubleGen) + implicit val arbLong: Arbitrary[Long] = Arbitrary(longGen) + implicit val arbShort: Arbitrary[Short] = Arbitrary(shortGen) + implicit val arbByte: Arbitrary[Byte] = Arbitrary(byteGen) } private val base64Encoder = Base64.getEncoder @@ -53,9 +53,11 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: TypedEncoder : Encoder, B: TypedEncoder : Encoder](values: List[X1[A]])( - implicit encX1:Encoder[X1[A]], - catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, B]) = { + def prop[A: TypedEncoder: Encoder, B: TypedEncoder: Encoder](values: List[X1[A]])( + implicit + encX1: Encoder[X1[A]], + catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, B] + ) = { val cDS = session.createDataset(values) val resCompare = cDS .select(sparkFunctions.negate(cDS("a"))) @@ -77,7 +79,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { check(forAll(prop[Byte, Byte] _)) check(forAll(prop[Short, Short] _)) check(forAll(prop[Int, Int] _)) - check(forAll(prop[Long, Long] _)) + check(forAll(prop[Long, Long] _)) check(forAll(prop[BigDecimal, java.math.BigDecimal] _)) } @@ -85,7 +87,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop(values: List[X1[Boolean]], fromBase: Int, toBase: Int)(implicit encX1:Encoder[X1[Boolean]]) = { + def prop(values: List[X1[Boolean]], fromBase: Int, toBase: Int)(implicit encX1: Encoder[X1[Boolean]]) = { val cDS = session.createDataset(values) val resCompare = cDS @@ -112,7 +114,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop(values: List[X1[String]], fromBase: Int, toBase: Int)(implicit encX1:Encoder[X1[String]]) = { + def prop(values: List[X1[String]], fromBase: Int, toBase: Int)(implicit encX1: Encoder[X1[String]]) = { val cDS = session.createDataset(values) val resCompare = cDS @@ -139,7 +141,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: TypedEncoder : Encoder](values: List[X1[A]])(implicit encX1:Encoder[X1[A]]) = { + def prop[A: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = { val cDS = session.createDataset(values) val resCompare = cDS .select(sparkFunctions.degrees(cDS("a"))) @@ -161,12 +163,15 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { check(forAll(prop[Byte] _)) check(forAll(prop[Short] _)) check(forAll(prop[Int] _)) - check(forAll(prop[Long] _)) + check(forAll(prop[Long] _)) check(forAll(prop[BigDecimal] _)) } - def propBitShift[A: TypedEncoder : Encoder, B: TypedEncoder : Encoder](typedDS: TypedDataset[X1[A]]) - (typedCol: TypedColumn[X1[A], B], sparkFunc: (Column,Int) => Column, numBits: Int): Prop = { + def propBitShift[A: TypedEncoder: Encoder, B: TypedEncoder: Encoder](typedDS: TypedDataset[X1[A]])( + typedCol: TypedColumn[X1[A], B], + sparkFunc: (Column, Int) => Column, + numBits: Int + ): Prop = { val spark = session import spark.implicits._ @@ -190,9 +195,10 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { import spark.implicits._ @nowarn // supress sparkFunctions.shiftRightUnsigned call which is used to maintain Spark 3.1.x backwards compat - def prop[A: TypedEncoder : Encoder, B: TypedEncoder : Encoder] - (values: List[X1[A]], numBits: Int) - (implicit catalystBitShift: CatalystBitShift[A, B], encX1: Encoder[X1[A]]) = { + def prop[A: TypedEncoder: Encoder, B: TypedEncoder: Encoder](values: List[X1[A]], numBits: Int)(implicit + catalystBitShift: CatalystBitShift[A, B], + encX1: Encoder[X1[A]] + ) = { val typedDS = TypedDataset.create(values) propBitShift(typedDS)(shiftRightUnsigned(typedDS('a), numBits), sparkFunctions.shiftRightUnsigned, numBits) } @@ -209,9 +215,10 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { import spark.implicits._ @nowarn // supress sparkFunctions.shiftRight call which is used to maintain Spark 3.1.x backwards compat - def prop[A: TypedEncoder : Encoder, B: TypedEncoder : Encoder] - (values: List[X1[A]], numBits: Int) - (implicit catalystBitShift: CatalystBitShift[A, B], encX1: Encoder[X1[A]]) = { + def prop[A: TypedEncoder: Encoder, B: TypedEncoder: Encoder](values: List[X1[A]], numBits: Int)(implicit + catalystBitShift: CatalystBitShift[A, B], + encX1: Encoder[X1[A]] + ) = { val typedDS = TypedDataset.create(values) propBitShift(typedDS)(shiftRight(typedDS('a), numBits), sparkFunctions.shiftRight, numBits) } @@ -228,9 +235,10 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { import spark.implicits._ @nowarn // supress sparkFunctions.shiftLeft call which is used to maintain Spark 3.1.x backwards compat - def prop[A: TypedEncoder : Encoder, B: TypedEncoder : Encoder] - (values: List[X1[A]], numBits: Int) - (implicit catalystBitShift: CatalystBitShift[A, B], encX1: Encoder[X1[A]]) = { + def prop[A: TypedEncoder: Encoder, B: TypedEncoder: Encoder](values: List[X1[A]], numBits: Int)(implicit + catalystBitShift: CatalystBitShift[A, B], + encX1: Encoder[X1[A]] + ) = { val typedDS = TypedDataset.create(values) propBitShift(typedDS)(shiftLeft(typedDS('a), numBits), sparkFunctions.shiftLeft, numBits) } @@ -246,21 +254,21 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: TypedEncoder : Encoder, B: TypedEncoder : Encoder] - (values: List[X1[A]])( - implicit catalystAbsolute: CatalystRound[A, B], encX1: Encoder[X1[A]] + def prop[A: TypedEncoder: Encoder, B: TypedEncoder: Encoder](values: List[X1[A]])( + implicit + catalystAbsolute: CatalystRound[A, B], + encX1: Encoder[X1[A]] ) = { val cDS = session.createDataset(values) val resCompare = cDS .select(sparkFunctions.ceil(cDS("a"))) .map(_.getAs[B](0)) .collect() - .toList.map{ - case bigDecimal : java.math.BigDecimal => bigDecimal.setScale(0) - case other => other + .toList.map { + case bigDecimal: java.math.BigDecimal => bigDecimal.setScale(0) + case other => other }.asInstanceOf[List[B]] - val typedDS = TypedDataset.create(values) val res = typedDS .select(ceil(typedDS('a))) @@ -356,20 +364,20 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: TypedEncoder : Encoder, B: TypedEncoder : Encoder] - (values: List[X1[A]])( - implicit catalystAbsolute: CatalystRound[A, B], encX1: Encoder[X1[A]] + def prop[A: TypedEncoder: Encoder, B: TypedEncoder: Encoder](values: List[X1[A]])( + implicit + catalystAbsolute: CatalystRound[A, B], + encX1: Encoder[X1[A]] ) = { val cDS = session.createDataset(values) val resCompare = cDS .select(sparkFunctions.floor(cDS("a"))) .map(_.getAs[B](0)) .collect() - .toList.map{ - case bigDecimal : java.math.BigDecimal => bigDecimal.setScale(0) - case other => other - }.asInstanceOf[List[B]] - + .toList.map { + case bigDecimal: java.math.BigDecimal => bigDecimal.setScale(0) + case other => other + }.asInstanceOf[List[B]] val typedDS = TypedDataset.create(values) val res = typedDS @@ -387,35 +395,33 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { check(forAll(prop[BigDecimal, java.math.BigDecimal] _)) } - test("abs big decimal") { val spark = session import spark.implicits._ - def prop[A: TypedEncoder: Encoder, B: TypedEncoder: Encoder] - (values: List[X1[A]]) - ( - implicit catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, B], - encX1:Encoder[X1[A]] - )= { - val cDS = session.createDataset(values) - val resCompare = cDS - .select(sparkFunctions.abs(cDS("a"))) - .map(_.getAs[B](0)) - .collect().toList + def prop[A: TypedEncoder: Encoder, B: TypedEncoder: Encoder](values: List[X1[A]])( + implicit + catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, B], + encX1: Encoder[X1[A]] + ) = { + val cDS = session.createDataset(values) + val resCompare = cDS + .select(sparkFunctions.abs(cDS("a"))) + .map(_.getAs[B](0)) + .collect().toList - val typedDS = TypedDataset.create(values) - val col = typedDS('a) - val res = typedDS - .select( - abs(col) - ) - .collect() - .run() - .toList + val typedDS = TypedDataset.create(values) + val col = typedDS('a) + val res = typedDS + .select( + abs(col) + ) + .collect() + .run() + .toList - res ?= resCompare - } + res ?= resCompare + } check(forAll(prop[BigDecimal, java.math.BigDecimal] _)) } @@ -424,10 +430,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: TypedEncoder : Encoder] - (values: List[X1[A]]) - ( - implicit catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, A], + def prop[A: TypedEncoder: Encoder](values: List[X1[A]])( + implicit + catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, A], encX1: Encoder[X1[A]] ) = { val cDS = session.createDataset(values) @@ -436,7 +441,6 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { .map(_.getAs[A](0)) .collect().toList - val typedDS = TypedDataset.create(values) val res = typedDS .select(abs(typedDS('a))) @@ -453,36 +457,37 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { check(forAll(prop[Double] _)) } - def propTrigonometric[A: CatalystNumeric: TypedEncoder : Encoder](typedDS: TypedDataset[X1[A]]) - (typedCol: TypedColumn[X1[A], Double], sparkFunc: Column => Column): Prop = { - val spark = session - import spark.implicits._ + def propTrigonometric[A: CatalystNumeric: TypedEncoder: Encoder](typedDS: TypedDataset[X1[A]])( + typedCol: TypedColumn[X1[A], Double], + sparkFunc: Column => Column + ): Prop = { + val spark = session + import spark.implicits._ - val resCompare = typedDS.dataset - .select(sparkFunc($"a")) - .map(_.getAs[Double](0)) - .map(DoubleBehaviourUtils.nanNullHandler) - .collect().toList + val resCompare = typedDS.dataset + .select(sparkFunc($"a")) + .map(_.getAs[Double](0)) + .map(DoubleBehaviourUtils.nanNullHandler) + .collect().toList - val res = typedDS - .select(typedCol) - .deserialized - .map(DoubleBehaviourUtils.nanNullHandler) - .collect() - .run() - .toList + val res = typedDS + .select(typedCol) + .deserialized + .map(DoubleBehaviourUtils.nanNullHandler) + .collect() + .run() + .toList - res ?= resCompare + res ?= resCompare } test("cos") { val spark = session import spark.implicits._ - def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]]) - (implicit encX1:Encoder[X1[A]]) = { - val typedDS = TypedDataset.create(values) - propTrigonometric(typedDS)(cos(typedDS('a)), sparkFunctions.cos) + def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = { + val typedDS = TypedDataset.create(values) + propTrigonometric(typedDS)(cos(typedDS('a)), sparkFunctions.cos) } check(forAll(prop[Int] _)) @@ -497,10 +502,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]]) - (implicit encX1:Encoder[X1[A]]) = { - val typedDS = TypedDataset.create(values) - propTrigonometric(typedDS)(cosh(typedDS('a)), sparkFunctions.cosh) + def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = { + val typedDS = TypedDataset.create(values) + propTrigonometric(typedDS)(cosh(typedDS('a)), sparkFunctions.cosh) } check(forAll(prop[Int] _)) @@ -515,10 +519,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]]) - (implicit encX1:Encoder[X1[A]]) = { - val typedDS = TypedDataset.create(values) - propTrigonometric(typedDS)(acos(typedDS('a)), sparkFunctions.acos) + def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = { + val typedDS = TypedDataset.create(values) + propTrigonometric(typedDS)(acos(typedDS('a)), sparkFunctions.acos) } check(forAll(prop[Int] _)) @@ -529,16 +532,13 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { check(forAll(prop[Double] _)) } - - test("signum") { val spark = session import spark.implicits._ - def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]]) - (implicit encX1:Encoder[X1[A]]) = { - val typedDS = TypedDataset.create(values) - propTrigonometric(typedDS)(signum(typedDS('a)), sparkFunctions.signum) + def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = { + val typedDS = TypedDataset.create(values) + propTrigonometric(typedDS)(signum(typedDS('a)), sparkFunctions.signum) } check(forAll(prop[Int] _)) @@ -553,10 +553,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]]) - (implicit encX1:Encoder[X1[A]]) = { - val typedDS = TypedDataset.create(values) - propTrigonometric(typedDS)(sin(typedDS('a)), sparkFunctions.sin) + def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = { + val typedDS = TypedDataset.create(values) + propTrigonometric(typedDS)(sin(typedDS('a)), sparkFunctions.sin) } check(forAll(prop[Int] _)) @@ -571,10 +570,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]]) - (implicit encX1:Encoder[X1[A]]) = { - val typedDS = TypedDataset.create(values) - propTrigonometric(typedDS)(sinh(typedDS('a)), sparkFunctions.sinh) + def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = { + val typedDS = TypedDataset.create(values) + propTrigonometric(typedDS)(sinh(typedDS('a)), sparkFunctions.sinh) } check(forAll(prop[Int] _)) @@ -589,10 +587,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]]) - (implicit encX1:Encoder[X1[A]]) = { - val typedDS = TypedDataset.create(values) - propTrigonometric(typedDS)(asin(typedDS('a)), sparkFunctions.asin) + def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = { + val typedDS = TypedDataset.create(values) + propTrigonometric(typedDS)(asin(typedDS('a)), sparkFunctions.asin) } check(forAll(prop[Int] _)) @@ -607,10 +604,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]]) - (implicit encX1:Encoder[X1[A]]) = { - val typedDS = TypedDataset.create(values) - propTrigonometric(typedDS)(tan(typedDS('a)), sparkFunctions.tan) + def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = { + val typedDS = TypedDataset.create(values) + propTrigonometric(typedDS)(tan(typedDS('a)), sparkFunctions.tan) } check(forAll(prop[Int] _)) @@ -625,10 +621,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]]) - (implicit encX1:Encoder[X1[A]]) = { - val typedDS = TypedDataset.create(values) - propTrigonometric(typedDS)(tanh(typedDS('a)), sparkFunctions.tanh) + def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = { + val typedDS = TypedDataset.create(values) + propTrigonometric(typedDS)(tanh(typedDS('a)), sparkFunctions.tanh) } check(forAll(prop[Int] _)) @@ -639,51 +634,50 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { check(forAll(prop[Double] _)) } - /* - * Currently not all Collection types play nice with the Encoders. - * This test needs to be readressed and Set readded to the Collection Typeclass once these issues are resolved. - * - * [[https://issues.apache.org/jira/browse/SPARK-18891]] - * [[https://issues.apache.org/jira/browse/SPARK-21204]] - */ - test("arrayContains"){ + /* + * Currently not all Collection types play nice with the Encoders. + * This test needs to be readressed and Set readded to the Collection Typeclass once these issues are resolved. + * + * [[https://issues.apache.org/jira/browse/SPARK-18891]] + * [[https://issues.apache.org/jira/browse/SPARK-21204]] + */ + test("arrayContains") { val spark = session import spark.implicits._ val listLength = 10 val idxs = Stream.continually(Range(0, listLength)).flatten.toIterator - abstract class Nth[A, C[A]:CatalystCollection] { + abstract class Nth[A, C[A]: CatalystCollection] { - def nth(c:C[A], idx:Int):A + def nth(c: C[A], idx: Int): A } - implicit def deriveListNth[A] : Nth[A, List] = new Nth[A, List] { + implicit def deriveListNth[A]: Nth[A, List] = new Nth[A, List] { override def nth(c: List[A], idx: Int): A = c(idx) } - implicit def deriveSeqNth[A] : Nth[A, Seq] = new Nth[A, Seq] { + implicit def deriveSeqNth[A]: Nth[A, Seq] = new Nth[A, Seq] { override def nth(c: Seq[A], idx: Int): A = c(idx) } - implicit def deriveVectorNth[A] : Nth[A, Vector] = new Nth[A, Vector] { + implicit def deriveVectorNth[A]: Nth[A, Vector] = new Nth[A, Vector] { override def nth(c: Vector[A], idx: Int): A = c(idx) } - implicit def deriveArrayNth[A] : Nth[A, Array] = new Nth[A, Array] { + implicit def deriveArrayNth[A]: Nth[A, Array] = new Nth[A, Array] { override def nth(c: Array[A], idx: Int): A = c(idx) } - - def prop[C[_] : CatalystCollection] - ( - values: C[Int], - shouldBeIn:Boolean) - ( - implicit nth:Nth[Int, C], - encEv: Encoder[C[Int]], - tEncEv: TypedEncoder[C[Int]] - ) = { + def prop[C[_]: CatalystCollection]( + values: C[Int], + shouldBeIn: Boolean + )( + implicit + nth: Nth[Int, C], + encEv: Encoder[C[Int]], + tEncEv: TypedEncoder[C[Int]] + ) = { val contained = if (shouldBeIn) nth.nth(values, idxs.next) else -1 @@ -705,10 +699,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { check( forAll( - Gen.listOfN(listLength, Gen.choose(0,100)), - Gen.oneOf(true,false) - ) - (prop[List]) + Gen.listOfN(listLength, Gen.choose(0, 100)), + Gen.oneOf(true, false) + )(prop[List]) ) /*check( Looks like there is no Typed Encoder for Seq type yet @@ -721,18 +714,16 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { check( forAll( - Gen.listOfN(listLength, Gen.choose(0,100)).map(_.toVector), - Gen.oneOf(true,false) - ) - (prop[Vector]) + Gen.listOfN(listLength, Gen.choose(0, 100)).map(_.toVector), + Gen.oneOf(true, false) + )(prop[Vector]) ) check( forAll( - Gen.listOfN(listLength, Gen.choose(0,100)).map(_.toArray), - Gen.oneOf(true,false) - ) - (prop[Array]) + Gen.listOfN(listLength, Gen.choose(0, 100)).map(_.toArray), + Gen.oneOf(true, false) + )(prop[Array]) ) } @@ -740,8 +731,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: CatalystNumeric : TypedEncoder : Encoder] - (na: A, values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = { + def prop[A: CatalystNumeric: TypedEncoder: Encoder](na: A, values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = { val cDS = session.createDataset(X1(na) :: values) val resCompare = cDS .select(sparkFunctions.atan(cDS("a"))) @@ -759,8 +749,8 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { .toList val aggrTyped = typedDS.agg(atan( - frameless.functions.aggregate.first(typedDS('a))) - ).firstOption().run().get + frameless.functions.aggregate.first(typedDS('a)) + )).firstOption().run().get val aggrSpark = cDS.select( sparkFunctions.atan(sparkFunctions.first("a")).as[Double] @@ -781,9 +771,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: CatalystNumeric : TypedEncoder : Encoder, - B: CatalystNumeric : TypedEncoder : Encoder](na: X2[A, B], values: List[X2[A, B]]) - (implicit encEv: Encoder[X2[A,B]]) = { + def prop[A: CatalystNumeric: TypedEncoder: Encoder, B: CatalystNumeric: TypedEncoder: Encoder](na: X2[A, B], values: List[X2[A, B]])(implicit + encEv: Encoder[X2[A, B]] + ) = { val cDS = session.createDataset(na +: values) val resCompare = cDS .select(sparkFunctions.atan2(cDS("a"), cDS("b"))) @@ -791,7 +781,6 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { .map(DoubleBehaviourUtils.nanNullHandler) .collect().toList - val typedDS = TypedDataset.create(cDS) val res = typedDS .select(atan2(typedDS('a), typedDS('b))) @@ -803,17 +792,16 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val aggrTyped = typedDS.agg(atan2( frameless.functions.aggregate.first(typedDS('a)), - frameless.functions.aggregate.first(typedDS('b))) - ).firstOption().run().get + frameless.functions.aggregate.first(typedDS('b)) + )).firstOption().run().get val aggrSpark = cDS.select( - sparkFunctions.atan2(sparkFunctions.first("a"),sparkFunctions.first("b")).as[Double] + sparkFunctions.atan2(sparkFunctions.first("a"), sparkFunctions.first("b")).as[Double] ).first() (res ?= resCompare).&&(aggrTyped ?= aggrSpark) } - check(forAll(prop[Int, Long] _)) check(forAll(prop[Long, Int] _)) check(forAll(prop[Short, Byte] _)) @@ -826,8 +814,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: CatalystNumeric : TypedEncoder : Encoder] - (na: X1[A], value: List[X1[A]], lit:Double)(implicit encX1:Encoder[X1[A]]) = { + def prop[A: CatalystNumeric: TypedEncoder: Encoder](na: X1[A], value: List[X1[A]], lit: Double)(implicit encX1: Encoder[X1[A]]) = { val cDS = session.createDataset(na +: value) val resCompare = cDS .select(sparkFunctions.atan2(lit, cDS("a"))) @@ -835,7 +822,6 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { .map(DoubleBehaviourUtils.nanNullHandler) .collect().toList - val typedDS = TypedDataset.create(cDS) val res = typedDS .select(atan2(lit, typedDS('a))) @@ -847,8 +833,8 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val aggrTyped = typedDS.agg(atan2( lit, - frameless.functions.aggregate.first(typedDS('a))) - ).firstOption().run().get + frameless.functions.aggregate.first(typedDS('a)) + )).firstOption().run().get val aggrSpark = cDS.select( sparkFunctions.atan2(lit, sparkFunctions.first("a")).as[Double] @@ -869,8 +855,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: CatalystNumeric : TypedEncoder : Encoder] - (na: X1[A], value: List[X1[A]], lit:Double)(implicit encX1:Encoder[X1[A]]) = { + def prop[A: CatalystNumeric: TypedEncoder: Encoder](na: X1[A], value: List[X1[A]], lit: Double)(implicit encX1: Encoder[X1[A]]) = { val cDS = session.createDataset(na +: value) val resCompare = cDS .select(sparkFunctions.atan2(cDS("a"), lit)) @@ -878,7 +863,6 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { .map(DoubleBehaviourUtils.nanNullHandler) .collect().toList - val typedDS = TypedDataset.create(cDS) val res = typedDS .select(atan2(typedDS('a), lit)) @@ -890,8 +874,8 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val aggrTyped = typedDS.agg(atan2( frameless.functions.aggregate.first(typedDS('a)), - lit) - ).firstOption().run().get + lit + )).firstOption().run().get val aggrSpark = cDS.select( sparkFunctions.atan2(sparkFunctions.first("a"), lit).as[Double] @@ -900,7 +884,6 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { (res ?= resCompare).&&(aggrTyped ?= aggrSpark) } - check(forAll(prop[Int] _)) check(forAll(prop[Long] _)) check(forAll(prop[Short] _)) @@ -909,8 +892,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { check(forAll(prop[Double] _)) } - def mathProp[A: CatalystNumeric: TypedEncoder : Encoder](typedDS: TypedDataset[X1[A]])( - typedCol: TypedColumn[X1[A], Double], sparkFunc: Column => Column + def mathProp[A: CatalystNumeric: TypedEncoder: Encoder](typedDS: TypedDataset[X1[A]])( + typedCol: TypedColumn[X1[A], Double], + sparkFunc: Column => Column ): Prop = { val spark = session import spark.implicits._ @@ -936,7 +920,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])(implicit encX1:Encoder[X1[A]]) = { + def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = { val typedDS = TypedDataset.create(values) mathProp(typedDS)(sqrt(typedDS('a)), sparkFunctions.sqrt) } @@ -953,7 +937,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])(implicit encX1:Encoder[X1[A]]) = { + def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = { val typedDS = TypedDataset.create(values) mathProp(typedDS)(cbrt(typedDS('a)), sparkFunctions.cbrt) } @@ -970,7 +954,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])(implicit encX1:Encoder[X1[A]]) = { + def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = { val typedDS = TypedDataset.create(values) mathProp(typedDS)(exp(typedDS('a)), sparkFunctions.exp) } @@ -987,7 +971,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: TypedEncoder : Encoder](values: List[X1[A]]): Prop = { + def prop[A: TypedEncoder: Encoder](values: List[X1[A]]): Prop = { val spark = session import spark.implicits._ @@ -1040,8 +1024,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: TypedEncoder : Encoder](values: List[X1[A]])( - implicit catalystNumericWithJavaBigDecimal: CatalystNumericWithJavaBigDecimal[A, A], + def prop[A: TypedEncoder: Encoder](values: List[X1[A]])( + implicit + catalystNumericWithJavaBigDecimal: CatalystNumericWithJavaBigDecimal[A, A], encX1: Encoder[X1[A]] ) = { val cDS = session.createDataset(values) @@ -1050,7 +1035,6 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { .map(_.getAs[A](0)) .collect().toList - val typedDS = TypedDataset.create(values) val res = typedDS .select(round(typedDS('a))) @@ -1072,8 +1056,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { import spark.implicits._ def prop[A: TypedEncoder: Encoder](values: List[X1[A]])( - implicit catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, java.math.BigDecimal], - encX1:Encoder[X1[A]] + implicit + catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, java.math.BigDecimal], + encX1: Encoder[X1[A]] ) = { val cDS = session.createDataset(values) @@ -1101,8 +1086,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: TypedEncoder : Encoder](values: List[X1[A]])( - implicit catalystNumericWithJavaBigDecimal: CatalystNumericWithJavaBigDecimal[A, A], + def prop[A: TypedEncoder: Encoder](values: List[X1[A]])( + implicit + catalystNumericWithJavaBigDecimal: CatalystNumericWithJavaBigDecimal[A, A], encX1: Encoder[X1[A]] ) = { val cDS = session.createDataset(values) @@ -1111,7 +1097,6 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { .map(_.getAs[A](0)) .collect().toList - val typedDS = TypedDataset.create(values) val res = typedDS .select(round(typedDS('a), 1)) @@ -1133,8 +1118,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { import spark.implicits._ def prop[A: TypedEncoder: Encoder](values: List[X1[A]])( - implicit catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, java.math.BigDecimal], - encX1:Encoder[X1[A]] + implicit + catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, java.math.BigDecimal], + encX1: Encoder[X1[A]] ) = { val cDS = session.createDataset(values) @@ -1162,8 +1148,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: TypedEncoder : Encoder](values: List[X1[A]])( - implicit catalystNumericWithJavaBigDecimal: CatalystNumericWithJavaBigDecimal[A, A], + def prop[A: TypedEncoder: Encoder](values: List[X1[A]])( + implicit + catalystNumericWithJavaBigDecimal: CatalystNumericWithJavaBigDecimal[A, A], encX1: Encoder[X1[A]] ) = { val cDS = session.createDataset(values) @@ -1172,7 +1159,6 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { .map(_.getAs[A](0)) .collect().toList - val typedDS = TypedDataset.create(values) val res = typedDS .select(bround(typedDS('a))) @@ -1187,15 +1173,16 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { check(forAll(prop[Long] _)) check(forAll(prop[Short] _)) check(forAll(prop[Double] _)) - } + } test("bround big decimal") { val spark = session import spark.implicits._ def prop[A: TypedEncoder: Encoder](values: List[X1[A]])( - implicit catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, java.math.BigDecimal], - encX1:Encoder[X1[A]] + implicit + catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, java.math.BigDecimal], + encX1: Encoder[X1[A]] ) = { val cDS = session.createDataset(values) @@ -1219,63 +1206,64 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { check(forAll(prop[BigDecimal] _)) } - test("bround with scale") { - val spark = session - import spark.implicits._ + test("bround with scale") { + val spark = session + import spark.implicits._ - def prop[A: TypedEncoder : Encoder](values: List[X1[A]])( - implicit catalystNumericWithJavaBigDecimal: CatalystNumericWithJavaBigDecimal[A, A], - encX1: Encoder[X1[A]] - ) = { - val cDS = session.createDataset(values) - val resCompare = cDS - .select(sparkFunctions.bround(cDS("a"), 1)) - .map(_.getAs[A](0)) - .collect().toList - - - val typedDS = TypedDataset.create(values) - val res = typedDS - .select(bround(typedDS('a), 1)) - .collect() - .run() - .toList - - res ?= resCompare - } + def prop[A: TypedEncoder: Encoder](values: List[X1[A]])( + implicit + catalystNumericWithJavaBigDecimal: CatalystNumericWithJavaBigDecimal[A, A], + encX1: Encoder[X1[A]] + ) = { + val cDS = session.createDataset(values) + val resCompare = cDS + .select(sparkFunctions.bround(cDS("a"), 1)) + .map(_.getAs[A](0)) + .collect().toList - check(forAll(prop[Int] _)) - check(forAll(prop[Long] _)) - check(forAll(prop[Short] _)) - check(forAll(prop[Double] _)) + val typedDS = TypedDataset.create(values) + val res = typedDS + .select(bround(typedDS('a), 1)) + .collect() + .run() + .toList + + res ?= resCompare } - test("bround big decimal with scale") { - val spark = session - import spark.implicits._ + check(forAll(prop[Int] _)) + check(forAll(prop[Long] _)) + check(forAll(prop[Short] _)) + check(forAll(prop[Double] _)) + } - def prop[A: TypedEncoder: Encoder](values: List[X1[A]])( - implicit catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, java.math.BigDecimal], - encX1:Encoder[X1[A]] - ) = { - val cDS = session.createDataset(values) - - val resCompare = cDS - .select(sparkFunctions.bround(cDS("a"), 0)) - .map(_.getAs[java.math.BigDecimal](0)) - .collect() - .toList.map(_.setScale(0)) - - val typedDS = TypedDataset.create(values) - val col = typedDS('a) - val res = typedDS - .select(bround(col, 0)) - .collect() - .run() - .toList - - res ?= resCompare - } + test("bround big decimal with scale") { + val spark = session + import spark.implicits._ + + def prop[A: TypedEncoder: Encoder](values: List[X1[A]])( + implicit + catalystAbsolute: CatalystNumericWithJavaBigDecimal[A, java.math.BigDecimal], + encX1: Encoder[X1[A]] + ) = { + val cDS = session.createDataset(values) + + val resCompare = cDS + .select(sparkFunctions.bround(cDS("a"), 0)) + .map(_.getAs[java.math.BigDecimal](0)) + .collect() + .toList.map(_.setScale(0)) + + val typedDS = TypedDataset.create(values) + val col = typedDS('a) + val res = typedDS + .select(bround(col, 0)) + .collect() + .run() + .toList + + res ?= resCompare + } check(forAll(prop[BigDecimal] _)) } @@ -1285,7 +1273,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { import spark.implicits._ import NonNegativeArbitraryNumericValues._ - def prop[A: CatalystNumeric: TypedEncoder : Encoder]( + def prop[A: CatalystNumeric: TypedEncoder: Encoder]( values: List[X1[A]], base: Double ): Prop = { @@ -1322,7 +1310,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { import spark.implicits._ import NonNegativeArbitraryNumericValues._ - def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])(implicit encX1:Encoder[X1[A]]) = { + def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = { val typedDS = TypedDataset.create(values) mathProp(typedDS)(log(typedDS('a)), sparkFunctions.log) } @@ -1339,7 +1327,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { import spark.implicits._ import NonNegativeArbitraryNumericValues._ - def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])(implicit encX1:Encoder[X1[A]]) = { + def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = { val typedDS = TypedDataset.create(values) mathProp(typedDS)(log2(typedDS('a)), sparkFunctions.log2) } @@ -1356,7 +1344,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { import spark.implicits._ import NonNegativeArbitraryNumericValues._ - def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])(implicit encX1:Encoder[X1[A]]) = { + def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = { val typedDS = TypedDataset.create(values) mathProp(typedDS)(log1p(typedDS('a)), sparkFunctions.log1p) } @@ -1373,7 +1361,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { import spark.implicits._ import NonNegativeArbitraryNumericValues._ - def prop[A: CatalystNumeric : TypedEncoder : Encoder](values: List[X1[A]])(implicit encX1:Encoder[X1[A]]) = { + def prop[A: CatalystNumeric: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = { val typedDS = TypedDataset.create(values) mathProp(typedDS)(log10(typedDS('a)), sparkFunctions.log10) } @@ -1389,7 +1377,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop(values:List[X1[Array[Byte]]])(implicit encX1:Encoder[X1[Array[Byte]]]) = { + def prop(values: List[X1[Array[Byte]]])(implicit encX1: Encoder[X1[Array[Byte]]]) = { val cDS = session.createDataset(values) val resCompare = cDS .select(sparkFunctions.base64(cDS("a"))) @@ -1419,7 +1407,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: CatalystNumeric: TypedEncoder : Encoder]( + def prop[A: CatalystNumeric: TypedEncoder: Encoder]( values: List[X1[A]], base: Double ): Prop = { @@ -1463,7 +1451,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: CatalystNumeric: TypedEncoder : Encoder]( + def prop[A: CatalystNumeric: TypedEncoder: Encoder]( values: List[X2[A, A]] ): Prop = { val spark = session @@ -1498,7 +1486,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: CatalystNumeric: TypedEncoder : Encoder]( + def prop[A: CatalystNumeric: TypedEncoder: Encoder]( values: List[X1[A]], base: Double ): Prop = { @@ -1534,7 +1522,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { .run() .toList - (res ?= resCompare) && (res2 ?= resCompare2) + (res ?= resCompare) && (res2 ?= resCompare2) } check(forAll(prop[Int] _)) @@ -1548,7 +1536,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A: CatalystNumeric: TypedEncoder : Encoder]( + def prop[A: CatalystNumeric: TypedEncoder: Encoder]( values: List[X2[A, A]] ): Prop = { val spark = session @@ -1584,7 +1572,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { import spark.implicits._ import NonNegativeArbitraryNumericValues._ - def prop[A: CatalystNumeric: TypedEncoder : Encoder]( + def prop[A: CatalystNumeric: TypedEncoder: Encoder]( values: List[X2[A, A]] ): Prop = { val spark = session @@ -1637,11 +1625,11 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { check(forAll(prop _)) } - test("bin"){ + test("bin") { val spark = session import spark.implicits._ - def prop(values:List[X1[Long]])(implicit encX1:Encoder[X1[Long]]) = { + def prop(values: List[X1[Long]])(implicit encX1: Encoder[X1[Long]]) = { val cDS = session.createDataset(values) val resCompare = cDS .select(sparkFunctions.bin(cDS("a"))) @@ -1661,13 +1649,12 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { check(forAll(prop _)) } - test("bitwiseNOT"){ + test("bitwiseNOT") { val spark = session import spark.implicits._ @nowarn // supress sparkFunctions.bitwiseNOT call which is used to maintain Spark 3.1.x backwards compat - def prop[A: CatalystBitwise : TypedEncoder : Encoder] - (values:List[X1[A]])(implicit encX1:Encoder[X1[A]]) = { + def prop[A: CatalystBitwise: TypedEncoder: Encoder](values: List[X1[A]])(implicit encX1: Encoder[X1[A]]) = { val cDS = session.createDataset(values) val resCompare = cDS .select(sparkFunctions.bitwiseNOT(cDS("a"))) @@ -1694,7 +1681,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A : TypedEncoder]( + def prop[A: TypedEncoder]( toFile1: List[X1[A]], toFile2: List[X1[A]], inMem: List[X1[A]] @@ -1727,10 +1714,13 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val grouped = withFileName.groupBy(_.b).mapValues(_.map(_.c).toSet) grouped.foldLeft(passed) { (p, g) => - p && secure { g._1 match { - case "" => g._2.head == "" //Empty string if didn't come from file - case f => g._2.forall(_.contains(f)) - }}} + p && secure { + g._1 match { + case "" => g._2.head == "" // Empty string if didn't come from file + case f => g._2.forall(_.contains(f)) + } + } + } } check(forAll(prop[String] _)) @@ -1740,7 +1730,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A : TypedEncoder](xs: List[X1[A]])(implicit x2en: Encoder[X2[A, Long]]) = { + def prop[A: TypedEncoder](xs: List[X1[A]])(implicit x2en: Encoder[X2[A, Long]]) = { val ds = TypedDataset.create(xs) val result = ds.withColumn[X2[A, Long]](monotonicallyIncreasingId()) @@ -1750,7 +1740,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val ids = result.map(_.b) (ids.toSet.size ?= ids.length) && - (ids.sorted ?= ids) + (ids.sorted ?= ids) } check(forAll(prop[String] _)) @@ -1760,8 +1750,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val spark = session import spark.implicits._ - def prop[A : TypedEncoder : Encoder] - (condition1: Boolean, condition2: Boolean, value1: A, value2: A, otherwise: A) = { + def prop[A: TypedEncoder: Encoder](condition1: Boolean, condition2: Boolean, value1: A, value2: A, otherwise: A) = { val ds = TypedDataset.create(X5(condition1, condition2, value1, value2, otherwise) :: Nil) val untypedWhen = ds.toDF() @@ -1855,10 +1844,11 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { check(forAll(pairs) { values: List[X2[String, String]] => val ds = TypedDataset.create(values) - val td = ds.agg(concat(first(ds('a)),first(ds('b)))).collect().run().toVector + val td = ds.agg(concat(first(ds('a)), first(ds('b)))).collect().run().toVector val spark = ds.dataset.select(sparkFunctions.concat( sparkFunctions.first($"a").as[String], - sparkFunctions.first($"b").as[String])).as[String].collect().toVector + sparkFunctions.first($"b").as[String] + )).as[String].collect().toVector td ?= spark }) } @@ -1902,11 +1892,13 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { check(forAll(pairs) { values: List[X2[String, String]] => val ds = TypedDataset.create(values) - val td = ds.agg(concatWs(",",first(ds('a)),first(ds('b)), last(ds('b)))).collect().run().toVector - val spark = ds.dataset.select(sparkFunctions.concat_ws(",", + val td = ds.agg(concatWs(",", first(ds('a)), first(ds('b)), last(ds('b)))).collect().run().toVector + val spark = ds.dataset.select(sparkFunctions.concat_ws( + ",", sparkFunctions.first($"a").as[String], sparkFunctions.first($"b").as[String], - sparkFunctions.last($"b").as[String])).as[String].collect().toVector + sparkFunctions.last($"b").as[String] + )).as[String].collect().toVector td ?= spark }) } @@ -1962,13 +1954,13 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { val ds = TypedDataset.create(na +: values) val sparkResult = ds.toDF() - .select(sparkFunctions.levenshtein($"a", sparkFunctions.concat($"a",sparkFunctions.lit("Hello")))) + .select(sparkFunctions.levenshtein($"a", sparkFunctions.concat($"a", sparkFunctions.lit("Hello")))) .map(_.getAs[Int](0)) .collect() .toVector val typed = ds - .select(levenshtein(ds('a), concat(ds('a),lit("Hello")))) + .select(levenshtein(ds('a), concat(ds('a), lit("Hello")))) .collect() .run() .toVector @@ -2207,7 +2199,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { } test("Empty vararg tests") { - def prop[A : TypedEncoder, B: TypedEncoder](data: Vector[X2[A, B]]) = { + def prop[A: TypedEncoder, B: TypedEncoder](data: Vector[X2[A, B]]) = { val ds = TypedDataset.create(data) val frameless = ds.select(ds('a), concat(), ds('b), concatWs(":")).collect().run().toVector val framelessAggr = ds.agg(concat(), concatWs("x"), litAggr(2)).collect().run().toVector @@ -2220,8 +2212,7 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { check(forAll(prop[Option[Boolean], Long] _)) } - def dateTimeStringProp(typedDS: TypedDataset[X1[String]]) - (typedCol: TypedColumn[X1[String], Option[Int]], sparkFunc: Column => Column): Prop = { + def dateTimeStringProp(typedDS: TypedDataset[X1[String]])(typedCol: TypedColumn[X1[String], Option[Int]], sparkFunc: Column => Column): Prop = { val spark = session import spark.implicits._ @@ -2245,9 +2236,9 @@ class NonAggregateFunctionsTests extends TypedDatasetSuite { import spark.implicits._ def prop(data: List[X1[String]])(implicit E: Encoder[Option[Int]]): Prop = { - val ds = TypedDataset.create(data) - dateTimeStringProp(ds)(year(ds[String]('a)), sparkFunctions.year) - } + val ds = TypedDataset.create(data) + dateTimeStringProp(ds)(year(ds[String]('a)), sparkFunctions.year) + } check(forAll(dateTimeStringGen)(data => prop(data.map(X1.apply)))) check(forAll(prop _)) diff --git a/dataset/src/test/scala/frameless/functions/UdfTests.scala b/dataset/src/test/scala/frameless/functions/UdfTests.scala index 10e65180f..9a6b7932f 100644 --- a/dataset/src/test/scala/frameless/functions/UdfTests.scala +++ b/dataset/src/test/scala/frameless/functions/UdfTests.scala @@ -14,7 +14,7 @@ class UdfTests extends TypedDatasetSuite { val A = dataset.col[A]('a) // filter forces whole codegen - val codegen = dataset.deserialized.filter((_:X1[A]) => true).select(u1(A)).collect().run().toVector + val codegen = dataset.deserialized.filter((_: X1[A]) => true).select(u1(A)).collect().run().toVector // otherwise it uses local relation val local = dataset.select(u2(A)).collect().run().toVector @@ -42,8 +42,7 @@ class UdfTests extends TypedDatasetSuite { } test("multiple one argument udf") { - def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder] - (data: Vector[X3[A, B, C]], f1: A => A, f2: B => B, f3: C => C): Prop = { + def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder](data: Vector[X3[A, B, C]], f1: A => A, f2: B => B, f3: C => C): Prop = { val dataset = TypedDataset.create(data) val u11 = udf[X3[A, B, C], A, A](f1) val u21 = udf[X3[A, B, C], B, B](f2) @@ -69,8 +68,7 @@ class UdfTests extends TypedDatasetSuite { } test("two argument udf") { - def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder] - (data: Vector[X3[A, B, C]], f1: (A, B) => C): Prop = { + def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder](data: Vector[X3[A, B, C]], f1: (A, B) => C): Prop = { val dataset = TypedDataset.create(data) val u1 = udf[X3[A, B, C], A, B, C](f1) val u2 = dataset.makeUDF(f1) @@ -89,8 +87,7 @@ class UdfTests extends TypedDatasetSuite { } test("multiple two argument udf") { - def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder] - (data: Vector[X3[A, B, C]], f1: (A, B) => C, f2: (B, C) => A): Prop = { + def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder](data: Vector[X3[A, B, C]], f1: (A, B) => C, f2: (B, C) => A): Prop = { val dataset = TypedDataset.create(data) val u11 = udf[X3[A, B, C], A, B, C](f1) val u12 = dataset.makeUDF(f1) @@ -113,8 +110,7 @@ class UdfTests extends TypedDatasetSuite { } test("three argument udf") { - def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder] - (data: Vector[X3[A, B, C]], f: (A, B, C) => C): Prop = { + def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder](data: Vector[X3[A, B, C]], f: (A, B, C) => C): Prop = { val dataset = TypedDataset.create(data) val u1 = udf[X3[A, B, C], A, B, C, C](f) val u2 = dataset.makeUDF(f) @@ -135,8 +131,7 @@ class UdfTests extends TypedDatasetSuite { } test("four argument udf") { - def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder, D: TypedEncoder] - (data: Vector[X4[A, B, C, D]], f: (A, B, C, D) => C): Prop = { + def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder, D: TypedEncoder](data: Vector[X4[A, B, C, D]], f: (A, B, C, D) => C): Prop = { val dataset = TypedDataset.create(data) val u1 = udf[X4[A, B, C, D], A, B, C, D, C](f) val u2 = dataset.makeUDF(f) @@ -161,8 +156,10 @@ class UdfTests extends TypedDatasetSuite { } test("five argument udf") { - def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder, D: TypedEncoder, E: TypedEncoder] - (data: Vector[X5[A, B, C, D, E]], f: (A, B, C, D, E) => C): Prop = { + def prop[A: TypedEncoder, B: TypedEncoder, C: TypedEncoder, D: TypedEncoder, E: TypedEncoder]( + data: Vector[X5[A, B, C, D, E]], + f: (A, B, C, D, E) => C + ): Prop = { val dataset = TypedDataset.create(data) val u1 = udf[X5[A, B, C, D, E], A, B, C, D, E, C](f) val u2 = dataset.makeUDF(f) diff --git a/dataset/src/test/scala/frameless/functions/UnaryFunctionsTest.scala b/dataset/src/test/scala/frameless/functions/UnaryFunctionsTest.scala index 009179be6..a3ebac3ad 100644 --- a/dataset/src/test/scala/frameless/functions/UnaryFunctionsTest.scala +++ b/dataset/src/test/scala/frameless/functions/UnaryFunctionsTest.scala @@ -1,7 +1,7 @@ package frameless package functions -import org.scalacheck.{ Arbitrary, Prop } +import org.scalacheck.{Arbitrary, Prop} import org.scalacheck.Prop._ import scala.collection.SeqLike @@ -10,7 +10,10 @@ import scala.reflect.ClassTag class UnaryFunctionsTest extends TypedDatasetSuite { test("size tests") { - def prop[F[X] <: Traversable[X] : CatalystSizableCollection, A](xs: List[X1[F[A]]])(implicit arb: Arbitrary[F[A]], enc: TypedEncoder[F[A]]): Prop = { + def prop[F[X] <: Traversable[X]: CatalystSizableCollection, A](xs: List[X1[F[A]]])(implicit + arb: Arbitrary[F[A]], + enc: TypedEncoder[F[A]] + ): Prop = { val tds = TypedDataset.create(xs) val framelessResults = tds.select(size(tds('a))).collect().run().toVector @@ -58,7 +61,7 @@ class UnaryFunctionsTest extends TypedDatasetSuite { } test("sort in ascending order") { - def prop[F[X] <: SeqLike[X, F[X]] : CatalystSortableCollection, A: Ordering](xs: List[X1[F[A]]])(implicit enc: TypedEncoder[F[A]]): Prop = { + def prop[F[X] <: SeqLike[X, F[X]]: CatalystSortableCollection, A: Ordering](xs: List[X1[F[A]]])(implicit enc: TypedEncoder[F[A]]): Prop = { val tds = TypedDataset.create(xs) val framelessResults = tds.select(sortAscending(tds('a))).collect().run().toVector @@ -78,7 +81,7 @@ class UnaryFunctionsTest extends TypedDatasetSuite { } test("sort in descending order") { - def prop[F[X] <: SeqLike[X, F[X]] : CatalystSortableCollection, A: Ordering](xs: List[X1[F[A]]])(implicit enc: TypedEncoder[F[A]]): Prop = { + def prop[F[X] <: SeqLike[X, F[X]]: CatalystSortableCollection, A: Ordering](xs: List[X1[F[A]]])(implicit enc: TypedEncoder[F[A]]): Prop = { val tds = TypedDataset.create(xs) val framelessResults = tds.select(sortDescending(tds('a))).collect().run().toVector @@ -98,7 +101,7 @@ class UnaryFunctionsTest extends TypedDatasetSuite { } test("sort on array test: ascending order") { - def prop[A: TypedEncoder : Ordering : ClassTag](xs: List[X1[Array[A]]]): Prop = { + def prop[A: TypedEncoder: Ordering: ClassTag](xs: List[X1[Array[A]]]): Prop = { val tds = TypedDataset.create(xs) val framelessResults = tds.select(sortAscending(tds('a))).collect().run().toVector @@ -119,7 +122,7 @@ class UnaryFunctionsTest extends TypedDatasetSuite { } test("sort on array test: descending order") { - def prop[A: TypedEncoder : Ordering : ClassTag](xs: List[X1[Array[A]]]): Prop = { + def prop[A: TypedEncoder: Ordering: ClassTag](xs: List[X1[Array[A]]]): Prop = { val tds = TypedDataset.create(xs) val framelessResults = tds.select(sortDescending(tds('a))).collect().run().toVector diff --git a/dataset/src/test/scala/frameless/ops/ColumnTypesTest.scala b/dataset/src/test/scala/frameless/ops/ColumnTypesTest.scala index 303eb2cbd..0bf1595bc 100644 --- a/dataset/src/test/scala/frameless/ops/ColumnTypesTest.scala +++ b/dataset/src/test/scala/frameless/ops/ColumnTypesTest.scala @@ -12,12 +12,12 @@ class ColumnTypesTest extends TypedDatasetSuite { val d: TypedDataset[X4[A, B, C, D]] = TypedDataset.create(data) val hlist = d('a) :: d('b) :: d('c) :: d('d) :: HNil - type TC[N] = TypedColumn[X4[A,B,C,D], N] + type TC[N] = TypedColumn[X4[A, B, C, D], N] type IN = TC[A] :: TC[B] :: TC[C] :: TC[D] :: HNil type OUT = A :: B :: C :: D :: HNil - implicitly[ColumnTypes.Aux[X4[A,B,C,D], IN, OUT]] + implicitly[ColumnTypes.Aux[X4[A, B, C, D], IN, OUT]] Prop.passed // successful compilation implies test correctness } diff --git a/dataset/src/test/scala/frameless/ops/CubeTests.scala b/dataset/src/test/scala/frameless/ops/CubeTests.scala index 7a06822b9..4fe448b2a 100644 --- a/dataset/src/test/scala/frameless/ops/CubeTests.scala +++ b/dataset/src/test/scala/frameless/ops/CubeTests.scala @@ -8,8 +8,7 @@ import org.scalacheck.Prop._ class CubeTests extends TypedDatasetSuite { test("cube('a).agg(count())") { - def prop[A: TypedEncoder : Ordering, Out: TypedEncoder : Numeric] - (data: List[X1[A]])(implicit summable: CatalystSummable[A, Out]): Prop = { + def prop[A: TypedEncoder: Ordering, Out: TypedEncoder: Numeric](data: List[X1[A]])(implicit summable: CatalystSummable[A, Out]): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) @@ -24,8 +23,9 @@ class CubeTests extends TypedDatasetSuite { } test("cube('a, 'b).agg(count())") { - def prop[A: TypedEncoder : Ordering, B: TypedEncoder, Out: TypedEncoder : Numeric] - (data: List[X2[A, B]])(implicit summable: CatalystSummable[B, Out]): Prop = { + def prop[A: TypedEncoder: Ordering, B: TypedEncoder, Out: TypedEncoder: Numeric](data: List[X2[A, B]])(implicit + summable: CatalystSummable[B, Out] + ): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) val B = dataset.col[B]('b) @@ -41,8 +41,9 @@ class CubeTests extends TypedDatasetSuite { } test("cube('a).agg(sum('b)") { - def prop[A: TypedEncoder : Ordering, B: TypedEncoder, Out: TypedEncoder : Numeric] - (data: List[X2[A, B]])(implicit summable: CatalystSummable[B, Out]): Prop = { + def prop[A: TypedEncoder: Ordering, B: TypedEncoder, Out: TypedEncoder: Numeric](data: List[X2[A, B]])(implicit + summable: CatalystSummable[B, Out] + ): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) val B = dataset.col[B]('b) @@ -58,8 +59,7 @@ class CubeTests extends TypedDatasetSuite { } test("cube('a).mapGroups('a, sum('b))") { - def prop[A: TypedEncoder : Ordering, B: TypedEncoder : Numeric] - (data: List[X2[A, B]]): Prop = { + def prop[A: TypedEncoder: Ordering, B: TypedEncoder: Numeric](data: List[X2[A, B]]): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) @@ -76,11 +76,11 @@ class CubeTests extends TypedDatasetSuite { test("cube('a).agg(sum('b), sum('c)) to cube('a).agg(sum('a), sum('b), sum('a), sum('b), sum('a))") { def prop[ - A: TypedEncoder : Ordering, - B: TypedEncoder, - C: TypedEncoder, - OutB: TypedEncoder : Numeric, - OutC: TypedEncoder : Numeric + A: TypedEncoder: Ordering, + B: TypedEncoder, + C: TypedEncoder, + OutB: TypedEncoder: Numeric, + OutC: TypedEncoder: Numeric ](data: List[X3[A, B, C]])( implicit summableB: CatalystSummable[B, OutB], @@ -138,12 +138,12 @@ class CubeTests extends TypedDatasetSuite { test("cube('a, 'b).agg(sum('c), sum('d))") { def prop[ - A: TypedEncoder : Ordering, - B: TypedEncoder : Ordering, - C: TypedEncoder, - D: TypedEncoder, - OutC: TypedEncoder : Numeric, - OutD: TypedEncoder : Numeric + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering, + C: TypedEncoder, + D: TypedEncoder, + OutC: TypedEncoder: Numeric, + OutD: TypedEncoder: Numeric ](data: List[X4[A, B, C, D]])( implicit summableC: CatalystSummable[C, OutC], @@ -173,10 +173,10 @@ class CubeTests extends TypedDatasetSuite { test("cube('a, 'b).agg(sum('c)) to cube('a, 'b).agg(sum('c),sum('c),sum('c),sum('c),sum('c))") { def prop[ - A: TypedEncoder : Ordering, - B: TypedEncoder : Ordering, - C: TypedEncoder, - OutC: TypedEncoder: Numeric + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering, + C: TypedEncoder, + OutC: TypedEncoder: Numeric ](data: List[X3[A, B, C]])(implicit summableC: CatalystSummable[C, OutC]): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) @@ -224,7 +224,9 @@ class CubeTests extends TypedDatasetSuite { val sparkSumCCCC = dataset.dataset .cube("a", "b").sum("c", "c", "c", "c").collect().toVector - .map(row => (Option(row.getAs[A](0)), Option(row.getAs[B](1)), row.getAs[OutC](2), row.getAs[OutC](3), row.getAs[OutC](4), row.getAs[OutC](5))) + .map(row => + (Option(row.getAs[A](0)), Option(row.getAs[B](1)), row.getAs[OutC](2), row.getAs[OutC](3), row.getAs[OutC](4), row.getAs[OutC](5)) + ) .sortBy(_._2) val framelessSumCCCCC = dataset @@ -235,14 +237,24 @@ class CubeTests extends TypedDatasetSuite { val sparkSumCCCCC = dataset.dataset .cube("a", "b").sum("c", "c", "c", "c", "c").collect().toVector - .map(row => (Option(row.getAs[A](0)), Option(row.getAs[B](1)), row.getAs[OutC](2), row.getAs[OutC](3), row.getAs[OutC](4), row.getAs[OutC](5), row.getAs[OutC](6))) + .map(row => + ( + Option(row.getAs[A](0)), + Option(row.getAs[B](1)), + row.getAs[OutC](2), + row.getAs[OutC](3), + row.getAs[OutC](4), + row.getAs[OutC](5), + row.getAs[OutC](6) + ) + ) .sortBy(_._2) (framelessSumC ?= sparkSumC) && - (framelessSumCC ?= sparkSumCC) && - (framelessSumCCC ?= sparkSumCCC) && - (framelessSumCCCC ?= sparkSumCCCC) && - (framelessSumCCCCC ?= sparkSumCCCCC) + (framelessSumCC ?= sparkSumCC) && + (framelessSumCCC ?= sparkSumCCC) && + (framelessSumCCCC ?= sparkSumCCCC) && + (framelessSumCCCCC ?= sparkSumCCCCC) } check(forAll(prop[String, Long, Double, Double] _)) @@ -250,9 +262,9 @@ class CubeTests extends TypedDatasetSuite { test("cube('a, 'b).mapGroups('a, 'b, sum('c))") { def prop[ - A: TypedEncoder : Ordering, - B: TypedEncoder : Ordering, - C: TypedEncoder : Numeric + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering, + C: TypedEncoder: Numeric ](data: List[X3[A, B, C]]): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) @@ -275,8 +287,8 @@ class CubeTests extends TypedDatasetSuite { test("cube('a).mapGroups(('a, toVector(('a, 'b))") { def prop[ - A: TypedEncoder: Ordering, - B: TypedEncoder: Ordering, + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering ](data: Vector[X2[A, B]]): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) @@ -298,8 +310,8 @@ class CubeTests extends TypedDatasetSuite { test("cube('a).flatMapGroups(('a, toVector(('a, 'b))") { def prop[ - A: TypedEncoder : Ordering, - B: TypedEncoder : Ordering + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering ](data: Vector[X2[A, B]]): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) @@ -325,9 +337,9 @@ class CubeTests extends TypedDatasetSuite { test("cube('a, 'b).flatMapGroups((('a,'b) toVector((('a,'b), 'c))") { def prop[ - A: TypedEncoder : Ordering, - B: TypedEncoder : Ordering, - C: TypedEncoder : Ordering + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering, + C: TypedEncoder: Ordering ](data: Vector[X3[A, B, C]]): Prop = { val dataset = TypedDataset.create(data) val cA = dataset.col[A]('a) @@ -353,8 +365,7 @@ class CubeTests extends TypedDatasetSuite { } test("cubeMany('a).agg(sum('b))") { - def prop[A: TypedEncoder : Ordering, Out: TypedEncoder : Numeric] - (data: List[X1[A]])(implicit summable: CatalystSummable[A, Out]): Prop = { + def prop[A: TypedEncoder: Ordering, Out: TypedEncoder: Numeric](data: List[X1[A]])(implicit summable: CatalystSummable[A, Out]): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) @@ -367,4 +378,4 @@ class CubeTests extends TypedDatasetSuite { check(forAll(prop[Int, Long] _)) } -} \ No newline at end of file +} diff --git a/dataset/src/test/scala/frameless/ops/PivotTest.scala b/dataset/src/test/scala/frameless/ops/PivotTest.scala index dd9bf5e61..66b820a68 100644 --- a/dataset/src/test/scala/frameless/ops/PivotTest.scala +++ b/dataset/src/test/scala/frameless/ops/PivotTest.scala @@ -31,12 +31,18 @@ class PivotTest extends TypedDatasetSuite { .agg(sparkFunctions.sum("c"), sparkFunctions.first("d")).collect().toVector (frameless.map(_._1) ?= spark.map(x => x.getAs[String](0))).&&( - frameless.map(_._2) ?= spark.map(x => Option(x.getAs[Long](1)))).&&( - frameless.map(_._3) ?= spark.map(x => Option(x.getAs[Boolean](2)))).&&( - frameless.map(_._4) ?= spark.map(x => Option(x.getAs[Long](3)))).&&( - frameless.map(_._5) ?= spark.map(x => Option(x.getAs[Boolean](4)))).&&( - frameless.map(_._6) ?= spark.map(x => Option(x.getAs[Long](5)))).&&( - frameless.map(_._7) ?= spark.map(x => Option(x.getAs[Boolean](6)))) + frameless.map(_._2) ?= spark.map(x => Option(x.getAs[Long](1))) + ).&&( + frameless.map(_._3) ?= spark.map(x => Option(x.getAs[Boolean](2))) + ).&&( + frameless.map(_._4) ?= spark.map(x => Option(x.getAs[Long](3))) + ).&&( + frameless.map(_._5) ?= spark.map(x => Option(x.getAs[Boolean](4))) + ).&&( + frameless.map(_._6) ?= spark.map(x => Option(x.getAs[Long](5))) + ).&&( + frameless.map(_._7) ?= spark.map(x => Option(x.getAs[Boolean](6))) + ) } check(forAll(withCustomGenX4)(prop)) @@ -95,4 +101,4 @@ class PivotTest extends TypedDatasetSuite { agg(count[X3[String, Boolean, Boolean]]()). collect().run().toVector ?= Vector(("a", Some(2L), Some(1L))) } -} \ No newline at end of file +} diff --git a/dataset/src/test/scala/frameless/ops/RepeatTest.scala b/dataset/src/test/scala/frameless/ops/RepeatTest.scala index 78dfc6410..f92f827b3 100644 --- a/dataset/src/test/scala/frameless/ops/RepeatTest.scala +++ b/dataset/src/test/scala/frameless/ops/RepeatTest.scala @@ -6,13 +6,13 @@ import shapeless.{::, HNil, Nat} class RepeatTest extends TypedDatasetSuite { test("summoning with implicitly") { - implicitly[Repeat.Aux[Int::Boolean::HNil, Nat._1, Int::Boolean::HNil]] - implicitly[Repeat.Aux[Int::Boolean::HNil, Nat._2, Int::Boolean::Int::Boolean::HNil]] - implicitly[Repeat.Aux[Int::Boolean::HNil, Nat._3, Int::Boolean::Int::Boolean::Int::Boolean::HNil]] - implicitly[Repeat.Aux[String::HNil, Nat._5, String::String::String::String::String::HNil]] + implicitly[Repeat.Aux[Int :: Boolean :: HNil, Nat._1, Int :: Boolean :: HNil]] + implicitly[Repeat.Aux[Int :: Boolean :: HNil, Nat._2, Int :: Boolean :: Int :: Boolean :: HNil]] + implicitly[Repeat.Aux[Int :: Boolean :: HNil, Nat._3, Int :: Boolean :: Int :: Boolean :: Int :: Boolean :: HNil]] + implicitly[Repeat.Aux[String :: HNil, Nat._5, String :: String :: String :: String :: String :: HNil]] } test("ill typed") { illTyped("""implicitly[Repeat.Aux[String::HNil, Nat._5, String::String::String::String::HNil]]""") } -} \ No newline at end of file +} diff --git a/dataset/src/test/scala/frameless/ops/RollupTests.scala b/dataset/src/test/scala/frameless/ops/RollupTests.scala index da73ef8d0..7fd5a4584 100644 --- a/dataset/src/test/scala/frameless/ops/RollupTests.scala +++ b/dataset/src/test/scala/frameless/ops/RollupTests.scala @@ -8,8 +8,7 @@ import org.scalacheck.Prop._ class RollupTests extends TypedDatasetSuite { test("rollup('a).agg(count())") { - def prop[A: TypedEncoder : Ordering, Out: TypedEncoder : Numeric] - (data: List[X1[A]])(implicit summable: CatalystSummable[A, Out]): Prop = { + def prop[A: TypedEncoder: Ordering, Out: TypedEncoder: Numeric](data: List[X1[A]])(implicit summable: CatalystSummable[A, Out]): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) @@ -24,8 +23,9 @@ class RollupTests extends TypedDatasetSuite { } test("rollup('a, 'b).agg(count())") { - def prop[A: TypedEncoder : Ordering, B: TypedEncoder, Out: TypedEncoder : Numeric] - (data: List[X2[A, B]])(implicit summable: CatalystSummable[B, Out]): Prop = { + def prop[A: TypedEncoder: Ordering, B: TypedEncoder, Out: TypedEncoder: Numeric](data: List[X2[A, B]])(implicit + summable: CatalystSummable[B, Out] + ): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) val B = dataset.col[B]('b) @@ -41,8 +41,9 @@ class RollupTests extends TypedDatasetSuite { } test("rollup('a).agg(sum('b)") { - def prop[A: TypedEncoder : Ordering, B: TypedEncoder, Out: TypedEncoder : Numeric] - (data: List[X2[A, B]])(implicit summable: CatalystSummable[B, Out]): Prop = { + def prop[A: TypedEncoder: Ordering, B: TypedEncoder, Out: TypedEncoder: Numeric](data: List[X2[A, B]])(implicit + summable: CatalystSummable[B, Out] + ): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) val B = dataset.col[B]('b) @@ -58,8 +59,7 @@ class RollupTests extends TypedDatasetSuite { } test("rollup('a).mapGroups('a, sum('b))") { - def prop[A: TypedEncoder : Ordering, B: TypedEncoder : Numeric] - (data: List[X2[A, B]]): Prop = { + def prop[A: TypedEncoder: Ordering, B: TypedEncoder: Numeric](data: List[X2[A, B]]): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) @@ -76,11 +76,11 @@ class RollupTests extends TypedDatasetSuite { test("rollup('a).agg(sum('b), sum('c)) to rollup('a).agg(sum('a), sum('b), sum('a), sum('b), sum('a))") { def prop[ - A: TypedEncoder : Ordering, - B: TypedEncoder, - C: TypedEncoder, - OutB: TypedEncoder : Numeric, - OutC: TypedEncoder : Numeric + A: TypedEncoder: Ordering, + B: TypedEncoder, + C: TypedEncoder, + OutB: TypedEncoder: Numeric, + OutC: TypedEncoder: Numeric ](data: List[X3[A, B, C]])( implicit summableB: CatalystSummable[B, OutB], @@ -138,12 +138,12 @@ class RollupTests extends TypedDatasetSuite { test("rollup('a, 'b).agg(sum('c), sum('d))") { def prop[ - A: TypedEncoder : Ordering, - B: TypedEncoder : Ordering, - C: TypedEncoder, - D: TypedEncoder, - OutC: TypedEncoder : Numeric, - OutD: TypedEncoder : Numeric + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering, + C: TypedEncoder, + D: TypedEncoder, + OutC: TypedEncoder: Numeric, + OutD: TypedEncoder: Numeric ](data: List[X4[A, B, C, D]])( implicit summableC: CatalystSummable[C, OutC], @@ -173,10 +173,10 @@ class RollupTests extends TypedDatasetSuite { test("rollup('a, 'b).agg(sum('c)) to rollup('a, 'b).agg(sum('c),sum('c),sum('c),sum('c),sum('c))") { def prop[ - A: TypedEncoder : Ordering, - B: TypedEncoder : Ordering, - C: TypedEncoder, - OutC: TypedEncoder: Numeric + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering, + C: TypedEncoder, + OutC: TypedEncoder: Numeric ](data: List[X3[A, B, C]])(implicit summableC: CatalystSummable[C, OutC]): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) @@ -224,7 +224,9 @@ class RollupTests extends TypedDatasetSuite { val sparkSumCCCC = dataset.dataset .rollup("a", "b").sum("c", "c", "c", "c").collect().toVector - .map(row => (Option(row.getAs[A](0)), Option(row.getAs[B](1)), row.getAs[OutC](2), row.getAs[OutC](3), row.getAs[OutC](4), row.getAs[OutC](5))) + .map(row => + (Option(row.getAs[A](0)), Option(row.getAs[B](1)), row.getAs[OutC](2), row.getAs[OutC](3), row.getAs[OutC](4), row.getAs[OutC](5)) + ) .sortBy(_._2) val framelessSumCCCCC = dataset @@ -235,14 +237,24 @@ class RollupTests extends TypedDatasetSuite { val sparkSumCCCCC = dataset.dataset .rollup("a", "b").sum("c", "c", "c", "c", "c").collect().toVector - .map(row => (Option(row.getAs[A](0)), Option(row.getAs[B](1)), row.getAs[OutC](2), row.getAs[OutC](3), row.getAs[OutC](4), row.getAs[OutC](5), row.getAs[OutC](6))) + .map(row => + ( + Option(row.getAs[A](0)), + Option(row.getAs[B](1)), + row.getAs[OutC](2), + row.getAs[OutC](3), + row.getAs[OutC](4), + row.getAs[OutC](5), + row.getAs[OutC](6) + ) + ) .sortBy(_._2) (framelessSumC ?= sparkSumC) && - (framelessSumCC ?= sparkSumCC) && - (framelessSumCCC ?= sparkSumCCC) && - (framelessSumCCCC ?= sparkSumCCCC) && - (framelessSumCCCCC ?= sparkSumCCCCC) + (framelessSumCC ?= sparkSumCC) && + (framelessSumCCC ?= sparkSumCCC) && + (framelessSumCCCC ?= sparkSumCCCC) && + (framelessSumCCCCC ?= sparkSumCCCCC) } check(forAll(prop[String, Long, Double, Double] _)) @@ -250,9 +262,9 @@ class RollupTests extends TypedDatasetSuite { test("rollup('a, 'b).mapGroups('a, 'b, sum('c))") { def prop[ - A: TypedEncoder : Ordering, - B: TypedEncoder : Ordering, - C: TypedEncoder : Numeric + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering, + C: TypedEncoder: Numeric ](data: List[X3[A, B, C]]): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) @@ -275,8 +287,8 @@ class RollupTests extends TypedDatasetSuite { test("rollup('a).mapGroups(('a, toVector(('a, 'b))") { def prop[ - A: TypedEncoder: Ordering, - B: TypedEncoder: Ordering + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering ](data: Vector[X2[A, B]]): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) @@ -298,8 +310,8 @@ class RollupTests extends TypedDatasetSuite { test("rollup('a).flatMapGroups(('a, toVector(('a, 'b))") { def prop[ - A: TypedEncoder : Ordering, - B: TypedEncoder : Ordering + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering ](data: Vector[X2[A, B]]): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) @@ -325,9 +337,9 @@ class RollupTests extends TypedDatasetSuite { test("rollup('a, 'b).flatMapGroups((('a,'b) toVector((('a,'b), 'c))") { def prop[ - A: TypedEncoder : Ordering, - B: TypedEncoder : Ordering, - C: TypedEncoder : Ordering + A: TypedEncoder: Ordering, + B: TypedEncoder: Ordering, + C: TypedEncoder: Ordering ](data: Vector[X3[A, B, C]]): Prop = { val dataset = TypedDataset.create(data) val cA = dataset.col[A]('a) @@ -353,8 +365,7 @@ class RollupTests extends TypedDatasetSuite { } test("rollupMany('a).agg(sum('b))") { - def prop[A: TypedEncoder : Ordering, Out: TypedEncoder : Numeric] - (data: List[X1[A]])(implicit summable: CatalystSummable[A, Out]): Prop = { + def prop[A: TypedEncoder: Ordering, Out: TypedEncoder: Numeric](data: List[X1[A]])(implicit summable: CatalystSummable[A, Out]): Prop = { val dataset = TypedDataset.create(data) val A = dataset.col[A]('a) @@ -367,4 +378,4 @@ class RollupTests extends TypedDatasetSuite { check(forAll(prop[Int, Long] _)) } -} \ No newline at end of file +} diff --git a/dataset/src/test/scala/frameless/ops/SmartProjectTest.scala b/dataset/src/test/scala/frameless/ops/SmartProjectTest.scala index 233a42aec..1324e45dd 100644 --- a/dataset/src/test/scala/frameless/ops/SmartProjectTest.scala +++ b/dataset/src/test/scala/frameless/ops/SmartProjectTest.scala @@ -5,7 +5,6 @@ import org.scalacheck.Prop import org.scalacheck.Prop._ import shapeless.test.illTyped - case class Foo(i: Int, j: Int, x: String) case class Bar(i: Int, x: String) case class InvalidFooProjectionType(i: Int, x: Boolean) diff --git a/dataset/src/test/scala/frameless/package.scala b/dataset/src/test/scala/frameless/package.scala index 82ff375c9..a012a7436 100644 --- a/dataset/src/test/scala/frameless/package.scala +++ b/dataset/src/test/scala/frameless/package.scala @@ -4,6 +4,7 @@ import java.time.{LocalDateTime => JavaLocalDateTime} import org.scalacheck.{Arbitrary, Gen} package object frameless { + /** Fixed decimal point to avoid precision problems specific to Spark */ implicit val arbBigDecimal: Arbitrary[BigDecimal] = Arbitrary { for { @@ -72,11 +73,10 @@ package object frameless { def anyCauseHas(t: Throwable, f: Throwable => Boolean): Boolean = if (f(t)) true + else if (t.getCause ne null) + anyCauseHas(t.getCause, f) else - if (t.getCause ne null) - anyCauseHas(t.getCause, f) - else - false + false /** * Runs up to maxRuns and outputs the number of failures (times thrown) @@ -85,11 +85,11 @@ package object frameless { * @tparam T * @return the last passing thunk, or null */ - def runLoads[T](maxRuns: Int = 1000)(thunk: => T): T ={ + def runLoads[T](maxRuns: Int = 1000)(thunk: => T): T = { var i = 0 var r = null.asInstanceOf[T] var passed = 0 - while(i < maxRuns){ + while (i < maxRuns) { i += 1 try { r = thunk @@ -107,20 +107,20 @@ package object frameless { r } - /** + /** * Runs a given thunk up to maxRuns times, restarting the thunk if tolerantOf the thrown Throwable is true * @param tolerantOf * @param maxRuns default of 20 * @param thunk * @return either a successful run result or the last error will be thrown */ - def tolerantRun[T](tolerantOf: Throwable => Boolean, maxRuns: Int = 20)(thunk: => T): T ={ + def tolerantRun[T](tolerantOf: Throwable => Boolean, maxRuns: Int = 20)(thunk: => T): T = { var passed = false var i = 0 var res: T = null.asInstanceOf[T] var thrown: Throwable = null - while((i < maxRuns) && !passed) { + while ((i < maxRuns) && !passed) { try { i += 1 res = thunk diff --git a/dataset/src/test/scala/frameless/sql/package.scala b/dataset/src/test/scala/frameless/sql/package.scala index fcb45b03d..35b9b4aaa 100644 --- a/dataset/src/test/scala/frameless/sql/package.scala +++ b/dataset/src/test/scala/frameless/sql/package.scala @@ -9,8 +9,8 @@ package object sql { def rec(expr: Expression, acc: List[Expression]): List[Expression] = { expr match { case And(left, right) => rec(left, rec(right, acc)) - case Or(left, right) => rec(left, rec(right, acc)) - case e => e +: acc + case Or(left, right) => rec(left, rec(right, acc)) + case e => e +: acc } } diff --git a/dataset/src/test/scala/frameless/sql/rules/SQLRulesSuite.scala b/dataset/src/test/scala/frameless/sql/rules/SQLRulesSuite.scala index 8555d1809..86a4a3798 100644 --- a/dataset/src/test/scala/frameless/sql/rules/SQLRulesSuite.scala +++ b/dataset/src/test/scala/frameless/sql/rules/SQLRulesSuite.scala @@ -34,7 +34,7 @@ trait SQLRulesSuite extends TypedDatasetSuite with Matchers { self => val optimizedPlan = ds.queryExecution.optimizedPlan.collect { case logical.Filter(condition, _) => condition }.flatMap(_.toList) // check the optimized plan - optimizedPlan.collectFirst(planShouldNotContain) should be (empty) + optimizedPlan.collectFirst(planShouldNotContain) should be(empty) // compare filters actualPushDownFilters shouldBe expectedPushDownFilters @@ -53,7 +53,7 @@ trait SQLRulesSuite extends TypedDatasetSuite with Matchers { self => if (sparkPlan.children.isEmpty) // assume it's AQE sparkPlan match { case aq: AdaptiveSparkPlanExec => aq.initialPlan - case _ => sparkPlan + case _ => sparkPlan } else sparkPlan diff --git a/dataset/src/test/scala/org/apache/hadoop/fs/local/StreamingFS.scala b/dataset/src/test/scala/org/apache/hadoop/fs/local/StreamingFS.scala index a28ad0820..c45be11ad 100644 --- a/dataset/src/test/scala/org/apache/hadoop/fs/local/StreamingFS.scala +++ b/dataset/src/test/scala/org/apache/hadoop/fs/local/StreamingFS.scala @@ -3,5 +3,5 @@ package org.apache.hadoop.fs.local import com.globalmentor.apache.hadoop.fs.BareLocalFileSystem import org.apache.hadoop.fs.DelegateToFileSystem -class StreamingFS(uri: java.net.URI, conf: org.apache.hadoop.conf.Configuration) extends - DelegateToFileSystem(uri, new BareLocalFileSystem(), conf, "file", false) {} +class StreamingFS(uri: java.net.URI, conf: org.apache.hadoop.conf.Configuration) + extends DelegateToFileSystem(uri, new BareLocalFileSystem(), conf, "file", false) {} diff --git a/dataset/src/test/spark-3.3+/frameless/sql/rules/FramelessLitPushDownTests.scala b/dataset/src/test/spark-3.3+/frameless/sql/rules/FramelessLitPushDownTests.scala index 1df361b9b..ab6ab2cc0 100644 --- a/dataset/src/test/spark-3.3+/frameless/sql/rules/FramelessLitPushDownTests.scala +++ b/dataset/src/test/spark-3.3+/frameless/sql/rules/FramelessLitPushDownTests.scala @@ -3,7 +3,7 @@ package frameless.sql.rules import frameless._ import frameless.functions.Lit import org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToInstant -import org.apache.spark.sql.sources.{ EqualTo, GreaterThanOrEqual, IsNotNull } +import org.apache.spark.sql.sources.{EqualTo, GreaterThanOrEqual, IsNotNull} import org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import java.time.Instant diff --git a/ml/src/main/scala/frameless/ml/classification/TypedRandomForestClassifier.scala b/ml/src/main/scala/frameless/ml/classification/TypedRandomForestClassifier.scala index f6efcceaf..c6565e097 100644 --- a/ml/src/main/scala/frameless/ml/classification/TypedRandomForestClassifier.scala +++ b/ml/src/main/scala/frameless/ml/classification/TypedRandomForestClassifier.scala @@ -13,7 +13,7 @@ import org.apache.spark.ml.linalg.Vector * It supports both binary and multiclass labels, as well as both continuous and categorical * features. */ -final class TypedRandomForestClassifier[Inputs] private[ml]( +final class TypedRandomForestClassifier[Inputs] private[ml] ( rf: RandomForestClassifier, labelCol: String, featuresCol: String @@ -48,4 +48,3 @@ object TypedRandomForestClassifier { new TypedRandomForestClassifier(new RandomForestClassifier(), inputsChecker.labelCol, inputsChecker.featuresCol) } } - diff --git a/ml/src/main/scala/frameless/ml/clustering/TypedBisectingKMeans.scala b/ml/src/main/scala/frameless/ml/clustering/TypedBisectingKMeans.scala index 4a8c974b4..be7836d3e 100644 --- a/ml/src/main/scala/frameless/ml/clustering/TypedBisectingKMeans.scala +++ b/ml/src/main/scala/frameless/ml/clustering/TypedBisectingKMeans.scala @@ -22,19 +22,19 @@ import org.apache.spark.ml.clustering.{BisectingKMeans, BisectingKMeansModel} class TypedBisectingKMeans[Inputs] private[ml] ( bkm: BisectingKMeans, featuresCol: String -) extends TypedEstimator[Inputs,TypedBisectingKMeans.Output, BisectingKMeansModel]{ +) extends TypedEstimator[Inputs, TypedBisectingKMeans.Output, BisectingKMeansModel] { val estimator: BisectingKMeans = bkm - .setFeaturesCol(featuresCol) - .setPredictionCol(AppendTransformer.tempColumnName) - + .setFeaturesCol(featuresCol) + .setPredictionCol(AppendTransformer.tempColumnName) + def setK(value: Int): TypedBisectingKMeans[Inputs] = copy(bkm.setK(value)) - + def setMaxIter(value: Int): TypedBisectingKMeans[Inputs] = copy(bkm.setMaxIter(value)) def setMinDivisibleClusterSize(value: Double): TypedBisectingKMeans[Inputs] = copy(bkm.setMinDivisibleClusterSize(value)) - + def setSeed(value: Long): TypedBisectingKMeans[Inputs] = copy(bkm.setSeed(value)) private def copy(newBkm: BisectingKMeans): TypedBisectingKMeans[Inputs] = @@ -46,4 +46,4 @@ object TypedBisectingKMeans { def apply[Inputs]()(implicit inputsChecker: VectorInputsChecker[Inputs]): TypedBisectingKMeans[Inputs] = new TypedBisectingKMeans(new BisectingKMeans(), inputsChecker.featuresCol) -} \ No newline at end of file +} diff --git a/ml/src/main/scala/frameless/ml/clustering/TypedKMeans.scala b/ml/src/main/scala/frameless/ml/clustering/TypedKMeans.scala index 1a32076a5..6732f0133 100644 --- a/ml/src/main/scala/frameless/ml/clustering/TypedKMeans.scala +++ b/ml/src/main/scala/frameless/ml/clustering/TypedKMeans.scala @@ -14,7 +14,7 @@ import org.apache.spark.ml.clustering.{KMeans, KMeansModel} class TypedKMeans[Inputs] private[ml] ( km: KMeans, featuresCol: String -) extends TypedEstimator[Inputs,TypedKMeans.Output,KMeansModel] { +) extends TypedEstimator[Inputs, TypedKMeans.Output, KMeansModel] { val estimator: KMeans = km .setFeaturesCol(featuresCol) @@ -36,7 +36,7 @@ class TypedKMeans[Inputs] private[ml] ( } -object TypedKMeans{ +object TypedKMeans { case class Output(prediction: Int) def apply[Inputs](implicit inputsChecker: VectorInputsChecker[Inputs]): TypedKMeans[Inputs] = { diff --git a/ml/src/main/scala/frameless/ml/feature/TypedIndexToString.scala b/ml/src/main/scala/frameless/ml/feature/TypedIndexToString.scala index af2e9684a..ebb11e380 100644 --- a/ml/src/main/scala/frameless/ml/feature/TypedIndexToString.scala +++ b/ml/src/main/scala/frameless/ml/feature/TypedIndexToString.scala @@ -12,8 +12,8 @@ import org.apache.spark.ml.feature.IndexToString * * @see `TypedStringIndexer` for converting strings into indices */ -final class TypedIndexToString[Inputs] private[ml](indexToString: IndexToString, inputCol: String) - extends AppendTransformer[Inputs, TypedIndexToString.Outputs, IndexToString] { +final class TypedIndexToString[Inputs] private[ml] (indexToString: IndexToString, inputCol: String) + extends AppendTransformer[Inputs, TypedIndexToString.Outputs, IndexToString] { val transformer: IndexToString = indexToString @@ -25,8 +25,7 @@ final class TypedIndexToString[Inputs] private[ml](indexToString: IndexToString, object TypedIndexToString { case class Outputs(originalOutput: String) - def apply[Inputs](labels: Array[String]) - (implicit inputsChecker: UnaryInputsChecker[Inputs, Double]): TypedIndexToString[Inputs] = { + def apply[Inputs](labels: Array[String])(implicit inputsChecker: UnaryInputsChecker[Inputs, Double]): TypedIndexToString[Inputs] = { new TypedIndexToString[Inputs](new IndexToString().setLabels(labels), inputsChecker.inputCol) } -} \ No newline at end of file +} diff --git a/ml/src/main/scala/frameless/ml/feature/TypedStringIndexer.scala b/ml/src/main/scala/frameless/ml/feature/TypedStringIndexer.scala index 7eba8e306..fe897f3c4 100644 --- a/ml/src/main/scala/frameless/ml/feature/TypedStringIndexer.scala +++ b/ml/src/main/scala/frameless/ml/feature/TypedStringIndexer.scala @@ -13,8 +13,8 @@ import org.apache.spark.ml.feature.{StringIndexer, StringIndexerModel} * * @see `TypedIndexToString` for the inverse transformation */ -final class TypedStringIndexer[Inputs] private[ml](stringIndexer: StringIndexer, inputCol: String) - extends TypedEstimator[Inputs, TypedStringIndexer.Outputs, StringIndexerModel] { +final class TypedStringIndexer[Inputs] private[ml] (stringIndexer: StringIndexer, inputCol: String) + extends TypedEstimator[Inputs, TypedStringIndexer.Outputs, StringIndexerModel] { val estimator: StringIndexer = stringIndexer .setInputCol(inputCol) @@ -39,4 +39,4 @@ object TypedStringIndexer { def apply[Inputs](implicit inputsChecker: UnaryInputsChecker[Inputs, String]): TypedStringIndexer[Inputs] = { new TypedStringIndexer[Inputs](new StringIndexer(), inputsChecker.inputCol) } -} \ No newline at end of file +} diff --git a/ml/src/main/scala/frameless/ml/feature/TypedVectorAssembler.scala b/ml/src/main/scala/frameless/ml/feature/TypedVectorAssembler.scala index d599011b3..ac2f84faa 100644 --- a/ml/src/main/scala/frameless/ml/feature/TypedVectorAssembler.scala +++ b/ml/src/main/scala/frameless/ml/feature/TypedVectorAssembler.scala @@ -13,8 +13,8 @@ import scala.annotation.implicitNotFound /** * A feature transformer that merges multiple columns into a vector column. */ -final class TypedVectorAssembler[Inputs] private[ml](vectorAssembler: VectorAssembler, inputCols: Array[String]) - extends AppendTransformer[Inputs, TypedVectorAssembler.Output, VectorAssembler] { +final class TypedVectorAssembler[Inputs] private[ml] (vectorAssembler: VectorAssembler, inputCols: Array[String]) + extends AppendTransformer[Inputs, TypedVectorAssembler.Output, VectorAssembler] { val transformer: VectorAssembler = vectorAssembler .setInputCols(inputCols) @@ -57,7 +57,8 @@ private[ml] object TypedVectorAssemblerInputsValueChecker { new TypedVectorAssemblerInputsValueChecker[HNil] {} implicit def hlistCheckInputsValueNumeric[H, T <: HList]( - implicit ch: CatalystNumeric[H], + implicit + ch: CatalystNumeric[H], tt: TypedVectorAssemblerInputsValueChecker[T] ): TypedVectorAssemblerInputsValueChecker[H :: T] = new TypedVectorAssemblerInputsValueChecker[H :: T] {} @@ -65,5 +66,3 @@ private[ml] object TypedVectorAssemblerInputsValueChecker { implicit tt: TypedVectorAssemblerInputsValueChecker[T] ): TypedVectorAssemblerInputsValueChecker[Boolean :: T] = new TypedVectorAssemblerInputsValueChecker[Boolean :: T] {} } - - diff --git a/ml/src/main/scala/frameless/ml/internals/LinearInputsChecker.scala b/ml/src/main/scala/frameless/ml/internals/LinearInputsChecker.scala index 995a3f961..5552cbae7 100644 --- a/ml/src/main/scala/frameless/ml/internals/LinearInputsChecker.scala +++ b/ml/src/main/scala/frameless/ml/internals/LinearInputsChecker.scala @@ -25,10 +25,11 @@ trait LinearInputsChecker[Inputs] { object LinearInputsChecker { implicit def checkLinearInputs[ - Inputs, - InputsRec <: HList, - LabelK <: Symbol, - FeaturesK <: Symbol]( + Inputs, + InputsRec <: HList, + LabelK <: Symbol, + FeaturesK <: Symbol + ]( implicit i0: LabelledGeneric.Aux[Inputs, InputsRec], i1: Length.Aux[InputsRec, Nat._2], @@ -45,11 +46,12 @@ object LinearInputsChecker { } implicit def checkLinearInputs2[ - Inputs, - InputsRec <: HList, - LabelK <: Symbol, - FeaturesK <: Symbol, - WeightK <: Symbol]( + Inputs, + InputsRec <: HList, + LabelK <: Symbol, + FeaturesK <: Symbol, + WeightK <: Symbol + ]( implicit i0: LabelledGeneric.Aux[Inputs, InputsRec], i1: Length.Aux[InputsRec, Nat._3], diff --git a/ml/src/main/scala/frameless/ml/internals/TreesInputsChecker.scala b/ml/src/main/scala/frameless/ml/internals/TreesInputsChecker.scala index 0fe157654..e735e44be 100644 --- a/ml/src/main/scala/frameless/ml/internals/TreesInputsChecker.scala +++ b/ml/src/main/scala/frameless/ml/internals/TreesInputsChecker.scala @@ -24,10 +24,11 @@ trait TreesInputsChecker[Inputs] { object TreesInputsChecker { implicit def checkTreesInputs[ - Inputs, - InputsRec <: HList, - LabelK <: Symbol, - FeaturesK <: Symbol]( + Inputs, + InputsRec <: HList, + LabelK <: Symbol, + FeaturesK <: Symbol + ]( implicit i0: LabelledGeneric.Aux[Inputs, InputsRec], i1: Length.Aux[InputsRec, Nat._2], diff --git a/ml/src/main/scala/frameless/ml/internals/UnaryInputsChecker.scala b/ml/src/main/scala/frameless/ml/internals/UnaryInputsChecker.scala index 56dfc9a57..4be27370d 100644 --- a/ml/src/main/scala/frameless/ml/internals/UnaryInputsChecker.scala +++ b/ml/src/main/scala/frameless/ml/internals/UnaryInputsChecker.scala @@ -30,4 +30,3 @@ object UnaryInputsChecker { } } - diff --git a/ml/src/main/scala/frameless/ml/internals/VectorInputsChecker.scala b/ml/src/main/scala/frameless/ml/internals/VectorInputsChecker.scala index e993d9a55..033a7d9e2 100644 --- a/ml/src/main/scala/frameless/ml/internals/VectorInputsChecker.scala +++ b/ml/src/main/scala/frameless/ml/internals/VectorInputsChecker.scala @@ -20,13 +20,13 @@ trait VectorInputsChecker[Inputs] { object VectorInputsChecker { implicit def checkVectorInput[Inputs, InputsRec <: HList, FeaturesK <: Symbol]( implicit - i0: LabelledGeneric.Aux[Inputs, InputsRec], - i1: Length.Aux[InputsRec, Nat._1], - i2: SelectorByValue.Aux[InputsRec, Vector, FeaturesK], - i3: Witness.Aux[FeaturesK] - ): VectorInputsChecker[Inputs] = { - new VectorInputsChecker[Inputs] { - val featuresCol: String = i3.value.name - } + i0: LabelledGeneric.Aux[Inputs, InputsRec], + i1: Length.Aux[InputsRec, Nat._1], + i2: SelectorByValue.Aux[InputsRec, Vector, FeaturesK], + i3: Witness.Aux[FeaturesK] + ): VectorInputsChecker[Inputs] = { + new VectorInputsChecker[Inputs] { + val featuresCol: String = i3.value.name } + } } diff --git a/ml/src/main/scala/frameless/ml/params/kmeans/KMeansInitMode.scala b/ml/src/main/scala/frameless/ml/params/kmeans/KMeansInitMode.scala index b3c023735..95a265ca8 100644 --- a/ml/src/main/scala/frameless/ml/params/kmeans/KMeansInitMode.scala +++ b/ml/src/main/scala/frameless/ml/params/kmeans/KMeansInitMode.scala @@ -11,7 +11,7 @@ package kmeans * Default: k-means||. */ -sealed abstract class KMeansInitMode private[ml](val sparkValue: String) +sealed abstract class KMeansInitMode private[ml] (val sparkValue: String) object KMeansInitMode { case object Random extends KMeansInitMode("random") diff --git a/ml/src/main/scala/frameless/ml/params/linears/LossStrategy.scala b/ml/src/main/scala/frameless/ml/params/linears/LossStrategy.scala index 4b9ca6d4e..00420b24e 100644 --- a/ml/src/main/scala/frameless/ml/params/linears/LossStrategy.scala +++ b/ml/src/main/scala/frameless/ml/params/linears/LossStrategy.scala @@ -2,6 +2,7 @@ package frameless package ml package params package linears + /** * SquaredError measures the average of the squares of the errors—that is, * the average squared difference between the estimated values and what is estimated. @@ -9,8 +10,8 @@ package linears * Huber Loss loss function less sensitive to outliers in data than the * squared error loss */ -sealed abstract class LossStrategy private[ml](val sparkValue: String) +sealed abstract class LossStrategy private[ml] (val sparkValue: String) object LossStrategy { case object SquaredError extends LossStrategy("squaredError") - case object Huber extends LossStrategy("huber") + case object Huber extends LossStrategy("huber") } diff --git a/ml/src/main/scala/frameless/ml/params/linears/Solver.scala b/ml/src/main/scala/frameless/ml/params/linears/Solver.scala index 277e06e7a..555da751f 100644 --- a/ml/src/main/scala/frameless/ml/params/linears/Solver.scala +++ b/ml/src/main/scala/frameless/ml/params/linears/Solver.scala @@ -16,10 +16,9 @@ package linears * spark */ -sealed abstract class Solver private[ml](val sparkValue: String) +sealed abstract class Solver private[ml] (val sparkValue: String) object Solver { - case object LBFGS extends Solver("l-bfgs") - case object Auto extends Solver("auto") - case object Normal extends Solver("normal") + case object LBFGS extends Solver("l-bfgs") + case object Auto extends Solver("auto") + case object Normal extends Solver("normal") } - diff --git a/ml/src/main/scala/frameless/ml/params/trees/FeatureSubsetStrategy.scala b/ml/src/main/scala/frameless/ml/params/trees/FeatureSubsetStrategy.scala index f2167f983..c5a9a94c0 100644 --- a/ml/src/main/scala/frameless/ml/params/trees/FeatureSubsetStrategy.scala +++ b/ml/src/main/scala/frameless/ml/params/trees/FeatureSubsetStrategy.scala @@ -2,6 +2,7 @@ package frameless package ml package params package trees + /** * The number of features to consider for splits at each tree node. * Supported options: @@ -27,7 +28,7 @@ package trees * @see * Breiman manual for random forests */ -sealed abstract class FeatureSubsetStrategy private[ml](val sparkValue: String) +sealed abstract class FeatureSubsetStrategy private[ml] (val sparkValue: String) object FeatureSubsetStrategy { case object Auto extends FeatureSubsetStrategy("auto") case object All extends FeatureSubsetStrategy("all") @@ -36,4 +37,4 @@ object FeatureSubsetStrategy { case object Log2 extends FeatureSubsetStrategy("log2") case class Ratio(value: Double) extends FeatureSubsetStrategy(value.toString) case class NumberOfFeatures(value: Int) extends FeatureSubsetStrategy(value.toString) -} \ No newline at end of file +} diff --git a/ml/src/main/scala/frameless/ml/regression/TypedLinearRegression.scala b/ml/src/main/scala/frameless/ml/regression/TypedLinearRegression.scala index 3b3208623..f578b986f 100644 --- a/ml/src/main/scala/frameless/ml/regression/TypedLinearRegression.scala +++ b/ml/src/main/scala/frameless/ml/regression/TypedLinearRegression.scala @@ -11,30 +11,30 @@ import org.apache.spark.ml.regression.{LinearRegression, LinearRegressionModel} * Linear Regression linear approach to modelling the relationship * between a scalar response (or dependent variable) and one or more explanatory variables */ -final class TypedLinearRegression [Inputs] private[ml]( +final class TypedLinearRegression[Inputs] private[ml] ( lr: LinearRegression, labelCol: String, featuresCol: String, weightCol: Option[String] ) extends TypedEstimator[Inputs, TypedLinearRegression.Outputs, LinearRegressionModel] { - val estimatorWithoutWeight : LinearRegression = lr + val estimatorWithoutWeight: LinearRegression = lr .setLabelCol(labelCol) .setFeaturesCol(featuresCol) .setPredictionCol(AppendTransformer.tempColumnName) val estimator = if (weightCol.isDefined) estimatorWithoutWeight.setWeightCol(weightCol.get) else estimatorWithoutWeight - def setRegParam(value: Double): TypedLinearRegression[Inputs] = copy(lr.setRegParam(value)) - def setFitIntercept(value: Boolean): TypedLinearRegression[Inputs] = copy(lr.setFitIntercept(value)) - def setStandardization(value: Boolean): TypedLinearRegression[Inputs] = copy(lr.setStandardization(value)) - def setElasticNetParam(value: Double): TypedLinearRegression[Inputs] = copy(lr.setElasticNetParam(value)) - def setMaxIter(value: Int): TypedLinearRegression[Inputs] = copy(lr.setMaxIter(value)) - def setTol(value: Double): TypedLinearRegression[Inputs] = copy(lr.setTol(value)) - def setSolver(value: Solver): TypedLinearRegression[Inputs] = copy(lr.setSolver(value.sparkValue)) - def setAggregationDepth(value: Int): TypedLinearRegression[Inputs] = copy(lr.setAggregationDepth(value)) - def setLoss(value: LossStrategy): TypedLinearRegression[Inputs] = copy(lr.setLoss(value.sparkValue)) - def setEpsilon(value: Double): TypedLinearRegression[Inputs] = copy(lr.setEpsilon(value)) + def setRegParam(value: Double): TypedLinearRegression[Inputs] = copy(lr.setRegParam(value)) + def setFitIntercept(value: Boolean): TypedLinearRegression[Inputs] = copy(lr.setFitIntercept(value)) + def setStandardization(value: Boolean): TypedLinearRegression[Inputs] = copy(lr.setStandardization(value)) + def setElasticNetParam(value: Double): TypedLinearRegression[Inputs] = copy(lr.setElasticNetParam(value)) + def setMaxIter(value: Int): TypedLinearRegression[Inputs] = copy(lr.setMaxIter(value)) + def setTol(value: Double): TypedLinearRegression[Inputs] = copy(lr.setTol(value)) + def setSolver(value: Solver): TypedLinearRegression[Inputs] = copy(lr.setSolver(value.sparkValue)) + def setAggregationDepth(value: Int): TypedLinearRegression[Inputs] = copy(lr.setAggregationDepth(value)) + def setLoss(value: LossStrategy): TypedLinearRegression[Inputs] = copy(lr.setLoss(value.sparkValue)) + def setEpsilon(value: Double): TypedLinearRegression[Inputs] = copy(lr.setEpsilon(value)) private def copy(newLr: LinearRegression): TypedLinearRegression[Inputs] = new TypedLinearRegression[Inputs](newLr, labelCol, featuresCol, weightCol) @@ -45,8 +45,7 @@ object TypedLinearRegression { case class Outputs(prediction: Double) case class Weight(weight: Double) - def apply[Inputs](implicit inputsChecker: LinearInputsChecker[Inputs]): TypedLinearRegression[Inputs] = { new TypedLinearRegression(new LinearRegression(), inputsChecker.labelCol, inputsChecker.featuresCol, inputsChecker.weightCol) } -} \ No newline at end of file +} diff --git a/ml/src/main/scala/frameless/ml/regression/TypedRandomForestRegressor.scala b/ml/src/main/scala/frameless/ml/regression/TypedRandomForestRegressor.scala index 69c1ad68c..84f8fccfb 100644 --- a/ml/src/main/scala/frameless/ml/regression/TypedRandomForestRegressor.scala +++ b/ml/src/main/scala/frameless/ml/regression/TypedRandomForestRegressor.scala @@ -11,7 +11,7 @@ import org.apache.spark.ml.regression.{RandomForestRegressionModel, RandomForest * learning algorithm for regression. * It supports both continuous and categorical features. */ -final class TypedRandomForestRegressor[Inputs] private[ml]( +final class TypedRandomForestRegressor[Inputs] private[ml] ( rf: RandomForestRegressor, labelCol: String, featuresCol: String @@ -40,8 +40,10 @@ final class TypedRandomForestRegressor[Inputs] private[ml]( object TypedRandomForestRegressor { case class Outputs(prediction: Double) - def apply[Inputs](implicit inputsChecker: TreesInputsChecker[Inputs]) - : TypedRandomForestRegressor[Inputs] = { + def apply[Inputs](implicit + inputsChecker: TreesInputsChecker[Inputs] + ) + : TypedRandomForestRegressor[Inputs] = { new TypedRandomForestRegressor(new RandomForestRegressor(), inputsChecker.labelCol, inputsChecker.featuresCol) } -} \ No newline at end of file +} diff --git a/ml/src/test/scala/frameless/ml/Generators.scala b/ml/src/test/scala/frameless/ml/Generators.scala index f7dde986c..51f5413da 100644 --- a/ml/src/test/scala/frameless/ml/Generators.scala +++ b/ml/src/test/scala/frameless/ml/Generators.scala @@ -32,7 +32,8 @@ object Generators { val genRatio = Gen.choose(0D, 1D).suchThat(_ > 0D).map(FeatureSubsetStrategy.Ratio) val genNumberOfFeatures = Gen.choose(1, Int.MaxValue).map(FeatureSubsetStrategy.NumberOfFeatures) - Gen.oneOf(Gen.const(FeatureSubsetStrategy.All), + Gen.oneOf( + Gen.const(FeatureSubsetStrategy.All), Gen.const(FeatureSubsetStrategy.All), Gen.const(FeatureSubsetStrategy.Log2), Gen.const(FeatureSubsetStrategy.OneThird), @@ -43,7 +44,7 @@ object Generators { } implicit val arbLossStrategy: Arbitrary[LossStrategy] = Arbitrary { - Gen.const(LossStrategy.SquaredError) + Gen.const(LossStrategy.SquaredError) } implicit val arbSolver: Arbitrary[Solver] = Arbitrary { diff --git a/ml/src/test/scala/frameless/ml/classification/TypedRandomForestClassifierTests.scala b/ml/src/test/scala/frameless/ml/classification/TypedRandomForestClassifierTests.scala index ab03f1aad..e922bac87 100644 --- a/ml/src/test/scala/frameless/ml/classification/TypedRandomForestClassifierTests.scala +++ b/ml/src/test/scala/frameless/ml/classification/TypedRandomForestClassifierTests.scala @@ -66,13 +66,13 @@ class TypedRandomForestClassifierTests extends FramelessMlSuite with Matchers { val model = rf.fit(ds).run() model.transformer.getNumTrees == 10 && - model.transformer.getMaxBins == 100 && - model.transformer.getFeatureSubsetStrategy == featureSubsetStrategy.sparkValue && - model.transformer.getMaxDepth == 10 && - model.transformer.getMaxMemoryInMB == 100 && - model.transformer.getMinInfoGain == 0.1D && - model.transformer.getMinInstancesPerNode == 2 && - model.transformer.getSubsamplingRate == 0.9D + model.transformer.getMaxBins == 100 && + model.transformer.getFeatureSubsetStrategy == featureSubsetStrategy.sparkValue && + model.transformer.getMaxDepth == 10 && + model.transformer.getMaxMemoryInMB == 100 && + model.transformer.getMinInfoGain == 0.1D && + model.transformer.getMinInstancesPerNode == 2 && + model.transformer.getSubsamplingRate == 0.9D } check(prop) @@ -86,4 +86,4 @@ class TypedRandomForestClassifierTests extends FramelessMlSuite with Matchers { illTyped("TypedRandomForestClassifier.create[X2[Vector, String]]()") } -} \ No newline at end of file +} diff --git a/ml/src/test/scala/frameless/ml/clustering/BisectingKMeansTests.scala b/ml/src/test/scala/frameless/ml/clustering/BisectingKMeansTests.scala index 976df39b2..a1ef44dd3 100644 --- a/ml/src/test/scala/frameless/ml/clustering/BisectingKMeansTests.scala +++ b/ml/src/test/scala/frameless/ml/clustering/BisectingKMeansTests.scala @@ -24,7 +24,7 @@ class BisectingKMeansTests extends FramelessMlSuite with Matchers { pDs.select(pDs.col('a)).collect().run().toList == Seq(x1.a) } - def prop3[A: TypedEncoder : Arbitrary] = forAll { x2: X2[Vector, A] => + def prop3[A: TypedEncoder: Arbitrary] = forAll { x2: X2[Vector, A] => val km = TypedBisectingKMeans[X1[Vector]]() val ds = TypedDataset.create(Seq(x2)) val model = km.fit(ds).run() @@ -44,12 +44,12 @@ class BisectingKMeansTests extends FramelessMlSuite with Matchers { .setMinDivisibleClusterSize(1) .setSeed(123332) - val ds = TypedDataset.create(Seq(X2(Vectors.dense(Array(0D)),0))) + val ds = TypedDataset.create(Seq(X2(Vectors.dense(Array(0D)), 0))) val model = rf.fit(ds).run() - model.transformer.getK == 10 && - model.transformer.getMaxIter == 10 && - model.transformer.getMinDivisibleClusterSize == 1 && - model.transformer.getSeed == 123332 + model.transformer.getK == 10 && + model.transformer.getMaxIter == 10 && + model.transformer.getMinDivisibleClusterSize == 1 && + model.transformer.getSeed == 123332 } } diff --git a/ml/src/test/scala/frameless/ml/clustering/ClusteringIntegrationTests.scala b/ml/src/test/scala/frameless/ml/clustering/ClusteringIntegrationTests.scala index 398a0963d..9a963a345 100644 --- a/ml/src/test/scala/frameless/ml/clustering/ClusteringIntegrationTests.scala +++ b/ml/src/test/scala/frameless/ml/clustering/ClusteringIntegrationTests.scala @@ -14,11 +14,11 @@ class ClusteringIntegrationTests extends FramelessMlSuite with Matchers { test("predict field2 from field1 using a K-means clustering") { // Training - val trainingDataDs = TypedDataset.create(Seq.fill(5)(X2(10D, 0)) :+ X2(100D,0)) + val trainingDataDs = TypedDataset.create(Seq.fill(5)(X2(10D, 0)) :+ X2(100D, 0)) val vectorAssembler = TypedVectorAssembler[X1[Double]] - val dataWithFeatures = vectorAssembler.transform(trainingDataDs).as[X3[Double,Int,Vector]]() + val dataWithFeatures = vectorAssembler.transform(trainingDataDs).as[X3[Double, Int, Vector]]() case class Input(c: Vector) val km = TypedKMeans[Input].setK(2) @@ -32,9 +32,9 @@ class ClusteringIntegrationTests extends FramelessMlSuite with Matchers { ) val testData = TypedDataset.create(testSeq) - val testDataWithFeatures = vectorAssembler.transform(testData).as[X3[Double,Int,Vector]]() + val testDataWithFeatures = vectorAssembler.transform(testData).as[X3[Double, Int, Vector]]() - val predictionDs = model.transform(testDataWithFeatures).as[X4[Double,Int,Vector,Int]]() + val predictionDs = model.transform(testDataWithFeatures).as[X4[Double, Int, Vector, Int]]() val prediction = predictionDs.select(predictionDs.col[Int]('d)).collect().run().toList @@ -43,7 +43,7 @@ class ClusteringIntegrationTests extends FramelessMlSuite with Matchers { test("predict field2 from field1 using a bisecting K-means clustering") { // Training - val trainingDataDs = TypedDataset.create(Seq.fill(5)(X2(10D, 0)) :+ X2(100D,0)) + val trainingDataDs = TypedDataset.create(Seq.fill(5)(X2(10D, 0)) :+ X2(100D, 0)) val vectorAssembler = TypedVectorAssembler[X1[Double]] @@ -63,7 +63,7 @@ class ClusteringIntegrationTests extends FramelessMlSuite with Matchers { val testData = TypedDataset.create(testSeq) val testDataWithFeatures = vectorAssembler.transform(testData).as[X3[Double, Int, Vector]]() - val predictionDs = model.transform(testDataWithFeatures).as[X4[Double,Int,Vector,Int]]() + val predictionDs = model.transform(testDataWithFeatures).as[X4[Double, Int, Vector, Int]]() val prediction = predictionDs.select(predictionDs.col[Int]('d)).collect().run().toList diff --git a/ml/src/test/scala/frameless/ml/clustering/KMeansTests.scala b/ml/src/test/scala/frameless/ml/clustering/KMeansTests.scala index a41c1b703..745cf1826 100644 --- a/ml/src/test/scala/frameless/ml/clustering/KMeansTests.scala +++ b/ml/src/test/scala/frameless/ml/clustering/KMeansTests.scala @@ -30,7 +30,7 @@ class KMeansTests extends FramelessMlSuite with Matchers { val dense = Vectors.dense(dubs) vect match { case _: SparseVector => dense.toSparse - case _ => dense + case _ => dense } } @@ -46,7 +46,7 @@ class KMeansTests extends FramelessMlSuite with Matchers { pDs.select(pDs.col('a)).collect().run().toList == Seq(x1.a, x1a.a) } - def prop3[A: TypedEncoder : Arbitrary] = forAll { x2: X2[Vector, A] => + def prop3[A: TypedEncoder: Arbitrary] = forAll { x2: X2[Vector, A] => val x2a = x2.copy(a = newRowWithSameDimension(x2.a)) val km = TypedKMeans[X1[Vector]] val ds = TypedDataset.create(Seq(x2, x2a)) @@ -56,7 +56,7 @@ class KMeansTests extends FramelessMlSuite with Matchers { pDs.select(pDs.col('a), pDs.col('b)).collect().run().toList == Seq((x2.a, x2.b), (x2a.a, x2a.b)) } - tolerantRun( _.isInstanceOf[ArrayIndexOutOfBoundsException] ) { + tolerantRun(_.isInstanceOf[ArrayIndexOutOfBoundsException]) { check(prop) check(prop3[Double]) } @@ -76,11 +76,11 @@ class KMeansTests extends FramelessMlSuite with Matchers { val model = rf.fit(ds).run() model.transformer.getInitMode == KMeansInitMode.Random.sparkValue && - model.transformer.getInitSteps == 2 && - model.transformer.getK == 10 && - model.transformer.getMaxIter == 15 && - model.transformer.getSeed == 123223L && - model.transformer.getTol == 12D + model.transformer.getInitSteps == 2 && + model.transformer.getK == 10 && + model.transformer.getMaxIter == 15 && + model.transformer.getSeed == 123223L && + model.transformer.getTol == 12D } check(prop) diff --git a/ml/src/test/scala/frameless/ml/feature/TypedStringIndexerTests.scala b/ml/src/test/scala/frameless/ml/feature/TypedStringIndexerTests.scala index 18d490758..385c61b8a 100644 --- a/ml/src/test/scala/frameless/ml/feature/TypedStringIndexerTests.scala +++ b/ml/src/test/scala/frameless/ml/feature/TypedStringIndexerTests.scala @@ -11,7 +11,7 @@ import org.scalatest.matchers.must.Matchers class TypedStringIndexerTests extends FramelessMlSuite with Matchers { test(".fit() returns a correct TypedTransformer") { - def prop[A: TypedEncoder : Arbitrary] = forAll { x2: X2[String, A] => + def prop[A: TypedEncoder: Arbitrary] = forAll { x2: X2[String, A] => val indexer = TypedStringIndexer[X1[String]] val ds = TypedDataset.create(Seq(x2)) val model = indexer.fit(ds).run() diff --git a/ml/src/test/scala/frameless/ml/regression/TypedLinearRegressionTests.scala b/ml/src/test/scala/frameless/ml/regression/TypedLinearRegressionTests.scala index b864b1533..51d7dce62 100644 --- a/ml/src/test/scala/frameless/ml/regression/TypedLinearRegressionTests.scala +++ b/ml/src/test/scala/frameless/ml/regression/TypedLinearRegressionTests.scala @@ -66,12 +66,12 @@ class TypedLinearRegressionTests extends FramelessMlSuite with Matchers { val model = lr.fit(ds).run() model.transformer.getAggregationDepth == 10 && - model.transformer.getEpsilon == 4.0 && - model.transformer.getLoss == lossStrategy.sparkValue && - model.transformer.getMaxIter == 23 && - model.transformer.getRegParam == 1.2 && - model.transformer.getTol == 2.3 && - model.transformer.getSolver == solver.sparkValue + model.transformer.getEpsilon == 4.0 && + model.transformer.getLoss == lossStrategy.sparkValue && + model.transformer.getMaxIter == 23 && + model.transformer.getRegParam == 1.2 && + model.transformer.getTol == 2.3 && + model.transformer.getSolver == solver.sparkValue } check(prop) @@ -98,12 +98,12 @@ class TypedLinearRegressionTests extends FramelessMlSuite with Matchers { ) val ds2 = Seq( - X3(new DenseVector(Array(1.0)): Vector,2F, 1.0), - X3(new DenseVector(Array(2.0)): Vector,2F, 2.0), - X3(new DenseVector(Array(3.0)): Vector,2F, 3.0), - X3(new DenseVector(Array(4.0)): Vector,2F, 4.0), - X3(new DenseVector(Array(5.0)): Vector,2F, 5.0), - X3(new DenseVector(Array(6.0)): Vector,2F, 6.0) + X3(new DenseVector(Array(1.0)): Vector, 2F, 1.0), + X3(new DenseVector(Array(2.0)): Vector, 2F, 2.0), + X3(new DenseVector(Array(3.0)): Vector, 2F, 3.0), + X3(new DenseVector(Array(4.0)): Vector, 2F, 4.0), + X3(new DenseVector(Array(5.0)): Vector, 2F, 5.0), + X3(new DenseVector(Array(6.0)): Vector, 2F, 6.0) ) val tds = TypedDataset.create(ds) diff --git a/ml/src/test/scala/frameless/ml/regression/TypedRandomForestRegressorTests.scala b/ml/src/test/scala/frameless/ml/regression/TypedRandomForestRegressorTests.scala index 4a6cd37d2..989777ac6 100644 --- a/ml/src/test/scala/frameless/ml/regression/TypedRandomForestRegressorTests.scala +++ b/ml/src/test/scala/frameless/ml/regression/TypedRandomForestRegressorTests.scala @@ -64,13 +64,13 @@ class TypedRandomForestRegressorTests extends FramelessMlSuite with Matchers { val model = rf.fit(ds).run() model.transformer.getNumTrees == 10 && - model.transformer.getMaxBins == 100 && - model.transformer.getFeatureSubsetStrategy == featureSubsetStrategy.sparkValue && - model.transformer.getMaxDepth == 10 && - model.transformer.getMaxMemoryInMB == 100 && - model.transformer.getMinInfoGain == 0.1D && - model.transformer.getMinInstancesPerNode == 2 && - model.transformer.getSubsamplingRate == 0.9D + model.transformer.getMaxBins == 100 && + model.transformer.getFeatureSubsetStrategy == featureSubsetStrategy.sparkValue && + model.transformer.getMaxDepth == 10 && + model.transformer.getMaxMemoryInMB == 100 && + model.transformer.getMinInfoGain == 0.1D && + model.transformer.getMinInstancesPerNode == 2 && + model.transformer.getSubsamplingRate == 0.9D } check(prop) diff --git a/project/Common.scala b/project/Common.scala deleted file mode 100644 index 94bc73ff3..000000000 --- a/project/Common.scala +++ /dev/null @@ -1,14 +0,0 @@ -import sbt.Keys._ -import sbt._ -import sbt.plugins.JvmPlugin - -import org.scalafmt.sbt.ScalafmtPlugin.autoImport._ - -object Common extends AutoPlugin { - override def trigger = allRequirements - override def requires = JvmPlugin - - override def projectSettings = Seq( - scalafmtFilter := "diff-ref=78f708d" - ) -} diff --git a/refined/src/main/scala/frameless/refined/RefinedFieldEncoders.scala b/refined/src/main/scala/frameless/refined/RefinedFieldEncoders.scala index dba59454c..9803f67ca 100644 --- a/refined/src/main/scala/frameless/refined/RefinedFieldEncoders.scala +++ b/refined/src/main/scala/frameless/refined/RefinedFieldEncoders.scala @@ -3,24 +3,23 @@ package frameless.refined import scala.reflect.ClassTag import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.objects.{ - Invoke, NewInstance, UnwrapOption, WrapOption -} +import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, NewInstance, UnwrapOption, WrapOption} import org.apache.spark.sql.types._ import eu.timepit.refined.api.RefType -import frameless.{ TypedEncoder, RecordFieldEncoder } +import frameless.{RecordFieldEncoder, TypedEncoder} private[refined] trait RefinedFieldEncoders { + /** * @tparam T the refined type (e.g. `String`) */ implicit def optionRefined[F[_, _], T, R]( implicit - i0: RefType[F], - i1: TypedEncoder[T], - i2: ClassTag[F[T, R]], + i0: RefType[F], + i1: TypedEncoder[T], + i2: ClassTag[F[T, R]] ): RecordFieldEncoder[Option[F[T, R]]] = RecordFieldEncoder[Option[F[T, R]]](new TypedEncoder[Option[F[T, R]]] { def nullable = true @@ -55,9 +54,9 @@ private[refined] trait RefinedFieldEncoders { */ implicit def refined[F[_, _], T, R]( implicit - i0: RefType[F], - i1: TypedEncoder[T], - i2: ClassTag[F[T, R]], + i0: RefType[F], + i1: TypedEncoder[T], + i2: ClassTag[F[T, R]] ): RecordFieldEncoder[F[T, R]] = RecordFieldEncoder[F[T, R]](new TypedEncoder[F[T, R]] { def nullable = i1.nullable @@ -76,4 +75,3 @@ private[refined] trait RefinedFieldEncoders { override def toString = s"refined[${i2.runtimeClass.getName}]" }) } - diff --git a/refined/src/main/scala/frameless/refined/package.scala b/refined/src/main/scala/frameless/refined/package.scala index 8819be2bf..96b214272 100644 --- a/refined/src/main/scala/frameless/refined/package.scala +++ b/refined/src/main/scala/frameless/refined/package.scala @@ -2,32 +2,36 @@ package frameless import scala.reflect.ClassTag -import eu.timepit.refined.api.{ RefType, Validate } +import eu.timepit.refined.api.{RefType, Validate} package object refined extends RefinedFieldEncoders { implicit def refinedInjection[F[_, _], T, R]( implicit - refType: RefType[F], - validate: Validate[T, R] - ): Injection[F[T, R], T] = Injection( + refType: RefType[F], + validate: Validate[T, R] + ): Injection[F[T, R], T] = Injection( refType.unwrap, { value => refType.refine[R](value) match { case Left(errMsg) => throw new IllegalArgumentException( - s"Value $value does not satisfy refinement predicate: $errMsg") + s"Value $value does not satisfy refinement predicate: $errMsg" + ) case Right(res) => res } - }) + } + ) implicit def refinedEncoder[F[_, _], T, R]( implicit - i0: RefType[F], - i1: Validate[T, R], - i2: TypedEncoder[T], - i3: ClassTag[F[T, R]] - ): TypedEncoder[F[T, R]] = TypedEncoder.usingInjection( - i3, refinedInjection, i2) + i0: RefType[F], + i1: Validate[T, R], + i2: TypedEncoder[T], + i3: ClassTag[F[T, R]] + ): TypedEncoder[F[T, R]] = TypedEncoder.usingInjection( + i3, + refinedInjection, + i2 + ) } - diff --git a/refined/src/test/scala/frameless/RefinedFieldEncoderTests.scala b/refined/src/test/scala/frameless/RefinedFieldEncoderTests.scala index 5476284ea..fd0e82048 100644 --- a/refined/src/test/scala/frameless/RefinedFieldEncoderTests.scala +++ b/refined/src/test/scala/frameless/RefinedFieldEncoderTests.scala @@ -1,9 +1,7 @@ package frameless import org.apache.spark.sql.Row -import org.apache.spark.sql.types.{ - IntegerType, ObjectType, StringType, StructField, StructType -} +import org.apache.spark.sql.types.{IntegerType, ObjectType, StringType, StructField, StructType} import org.scalatest.matchers.should.Matchers @@ -42,7 +40,8 @@ class RefinedFieldEncoderTests extends TypedDatasetSuite with Matchers { // Check catalystRepr val expectedAStructType = StructType(Seq( StructField("a", IntegerType, false), - StructField("s", StringType, false))) + StructField("s", StringType, false) + )) encoderA.catalystRepr shouldBe expectedAStructType @@ -73,7 +72,8 @@ class RefinedFieldEncoderTests extends TypedDatasetSuite with Matchers { // Check catalystRepr val expectedBStructType = StructType(Seq( StructField("a", IntegerType, false), - StructField("s", StringType, true))) + StructField("s", StringType, true) + )) encoderB.catalystRepr shouldBe expectedBStructType @@ -81,7 +81,7 @@ class RefinedFieldEncoderTests extends TypedDatasetSuite with Matchers { val unsafeDs: TypedDataset[B] = { val rdd = sc.parallelize(Seq( Row(bs.a, bs.s.mkString), - Row(2, null.asInstanceOf[String]), + Row(2, null.asInstanceOf[String]) )) val df = session.createDataFrame(rdd, expectedBStructType)