Skip to content

Commit 4d47a5f

Browse files
authored
Merge pull request #29 from phpisciuneri/polish
Polish
2 parents 0379057 + 028f979 commit 4d47a5f

4 files changed

Lines changed: 53 additions & 53 deletions

File tree

src/main/scala/com/target/data_validator/validator/StringLengthCheck.scala

Lines changed: 26 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -12,28 +12,28 @@ import org.apache.spark.sql.catalyst.expressions._
1212
import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
1313

1414
case class StringLengthCheck(
15-
column: String,
16-
minLength: Option[Json],
17-
maxLength: Option[Json],
18-
threshold: Option[String]
19-
) extends RowBased {
15+
column: String,
16+
minLength: Option[Json],
17+
maxLength: Option[Json],
18+
threshold: Option[String]
19+
) extends RowBased {
2020

2121
override def substituteVariables(dict: VarSubstitution): ValidatorBase = {
2222

2323
val ret = StringLengthCheck(
24-
getVarSub(column, "column", dict),
25-
minLength.map(getVarSubJson(_, "minLength", dict)),
26-
maxLength.map(getVarSubJson(_, "maxLength", dict)),
27-
threshold.map(getVarSub(_, "threshold", dict))
28-
)
24+
getVarSub(column, "column", dict),
25+
minLength.map(getVarSubJson(_, "minLength", dict)),
26+
maxLength.map(getVarSubJson(_, "maxLength", dict)),
27+
threshold.map(getVarSub(_, "threshold", dict))
28+
)
2929
getEvents.foreach(ret.addEvent)
3030
ret
3131
}
3232

3333
private def cmpExpr(colExpr: Expression,
34-
value: Option[Json],
35-
cmp: (Expression, Expression) => Expression
36-
): Option[Expression] = {
34+
value: Option[Json],
35+
cmp: (Expression, Expression) => Expression
36+
): Option[Expression] = {
3737
value.map { v => cmp(colExpr, createLiteralOrUnresolvedAttribute(IntegerType, v)) }
3838
}
3939

@@ -57,20 +57,20 @@ case class StringLengthCheck(
5757
private def checkMinLessThanOrEqualToMax(values: List[Json]): Unit = {
5858

5959
if (values.forall(_.isNumber)) {
60-
values.flatMap(_.asNumber) match {
61-
case mv :: xv :: Nil if mv.toDouble > xv.toDouble =>
62-
addEvent(ValidatorError(s"min: ${minLength.get} must be less than or equal to max: ${maxLength.get}"))
63-
case _ =>
64-
}
60+
values.flatMap(_.asNumber) match {
61+
case mv :: xv :: Nil if mv.toDouble > xv.toDouble =>
62+
addEvent(ValidatorError(s"min: ${minLength.get} must be less than or equal to max: ${maxLength.get}"))
63+
case _ =>
64+
}
6565
} else if (values.forall(_.isString)) {
66-
values.flatMap(_.asString) match {
67-
case mv :: xv :: Nil if mv == xv =>
68-
addEvent(ValidatorError(s"Min[String]: $mv must be less than max[String]: $xv"))
69-
case _ =>
70-
}
66+
values.flatMap(_.asString) match {
67+
case mv :: xv :: Nil if mv == xv =>
68+
addEvent(ValidatorError(s"Min[String]: $mv must be less than max[String]: $xv"))
69+
case _ =>
70+
}
7171
} else {
72-
// Not Strings or Numbers
73-
addEvent(ValidatorError(s"Unsupported type in ${values.map(debugJson).mkString(", ")}"))
72+
// Not Strings or Numbers
73+
addEvent(ValidatorError(s"Unsupported type in ${values.map(debugJson).mkString(", ")}"))
7474
}
7575
}
7676

@@ -89,7 +89,7 @@ case class StringLengthCheck(
8989
val colType = findColumnInDataFrame(df, column)
9090
if (colType.isDefined) {
9191
val dataType = colType.get.dataType
92-
if (!(dataType.isInstanceOf[StringType])) {
92+
if (!dataType.isInstanceOf[StringType]) {
9393
addEvent(ValidatorError(s"Data type of column '$column' must be String, but was found to be $dataType"))
9494
}
9595
}
@@ -123,8 +123,6 @@ object StringLengthCheck extends LazyLogging {
123123
logger.debug(s"minLength: $minLengthJ type: ${minLengthJ.getClass.getCanonicalName}")
124124
logger.debug(s"maxLength: $maxLengthJ type: ${maxLengthJ.getClass.getCanonicalName}")
125125
logger.debug(s"threshold: $threshold type: ${threshold.getClass.getCanonicalName}")
126-
127-
c.focus.foreach {f => logger.info(s"StringLengthCheckJson: ${f.spaces2}")}
128126
scala.util.Right(StringLengthCheck(column, minLengthJ, maxLengthJ, threshold))
129127
}
130128
}

src/main/scala/com/target/data_validator/validator/StringRegexCheck.scala

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package com.target.data_validator.validator
22

33
import com.target.data_validator.{JsonEncoders, ValidatorError, VarSubstitution}
4-
import com.target.data_validator.JsonUtils.debugJson
54
import com.target.data_validator.validator.ValidatorBase._
65
import com.typesafe.scalalogging.LazyLogging
76
import io.circe.{DecodingFailure, HCursor, Json}
@@ -12,10 +11,10 @@ import org.apache.spark.sql.catalyst.expressions._
1211
import org.apache.spark.sql.types.{StringType, StructType}
1312

1413
case class StringRegexCheck(
15-
column: String,
16-
regex: Option[Json],
17-
threshold: Option[String]
18-
) extends RowBased {
14+
column: String,
15+
regex: Option[Json],
16+
threshold: Option[String]
17+
) extends RowBased {
1918

2019
override def substituteVariables(dict: VarSubstitution): ValidatorBase = {
2120

@@ -35,13 +34,13 @@ case class StringRegexCheck(
3534
val regexExpression = regex.map { r => RLike(colExp, createLiteralOrUnresolvedAttribute(StringType, r)) }
3635

3736
val ret = regexExpression match {
38-
/*
39-
RLike returns false if the column value is null.
40-
To avoid counting null values as validation failures (like other validations),
41-
an explicit non null check on the column value is required.
42-
*/
43-
case Some(x) => And(Not(x), IsNotNull(colExp))
44-
case _ => throw new RuntimeException("Must define a regex.")
37+
/*
38+
RLike returns false if the column value is null.
39+
To avoid counting null values as validation failures (like other validations),
40+
an explicit non null check on the column value is required.
41+
*/
42+
case Some(x) => And(Not(x), IsNotNull(colExp))
43+
case _ => throw new RuntimeException("Must define a regex.")
4544
}
4645
logger.debug(s"Expr: $ret")
4746
ret
@@ -59,7 +58,7 @@ case class StringRegexCheck(
5958
val colType = findColumnInDataFrame(df, column)
6059
if (colType.isDefined) {
6160
val dataType = colType.get.dataType
62-
if (!(dataType.isInstanceOf[StringType])) {
61+
if (!dataType.isInstanceOf[StringType]) {
6362
addEvent(ValidatorError(s"Data type of column '$column' must be String, but was found to be $dataType"))
6463
}
6564
}
@@ -90,8 +89,6 @@ object StringRegexCheck extends LazyLogging {
9089
logger.debug(s"column: $column")
9190
logger.debug(s"regex: $regex type: ${regex.getClass.getCanonicalName}")
9291
logger.debug(s"threshold: $threshold type: ${threshold.getClass.getCanonicalName}")
93-
94-
c.focus.foreach {f => logger.info(s"StringRegexCheckJson: ${f.spaces2}")}
9592
scala.util.Right(StringRegexCheck(column, regex, threshold))
9693
}
9794
}

src/test/scala/com/target/data_validator/validator/StringLengthCheckSpec.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ class StringLengthCheckSpec extends FunSpec with Matchers with TestingSparkSessi
364364
assert(config.quickChecks(spark, dict))
365365
assert(sut.failed)
366366
assert(sut.getEvents contains
367-
ValidatorCheckEvent(failure = true, "StringLengthCheck on column 'item'", 4, 2))
367+
ValidatorCheckEvent(failure = true, "StringLengthCheck on column 'item'", 4, 2)) // scalastyle:ignore
368368

369369
assert(sut.getEvents contains
370370
ValidatorQuickCheckError(("item", "Item1") :: Nil, "Item1",
@@ -428,4 +428,4 @@ class StringLengthCheckSpec extends FunSpec with Matchers with TestingSparkSessi
428428
}
429429
}
430430
}
431-
}
431+
}

src/test/scala/com/target/data_validator/validator/StringRegexCheckSpec.scala

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ package com.target.data_validator.validator
33
import com.target.TestingSparkSession
44
import com.target.data_validator._
55
import io.circe.Json
6-
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
6+
import org.apache.spark.sql.Row
77
import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
88
import org.apache.spark.sql.catalyst.expressions._
99
import org.apache.spark.sql.types._
@@ -22,8 +22,8 @@ class StringRegexCheckSpec extends FunSpec with Matchers with TestingSparkSessio
2222
Row("Item1", 2.99),
2323
Row("Item23", 5.35),
2424
Row("I", 1.00),
25-
Row(null, 1.00),
26-
Row(null, 2.00)
25+
Row(null, 1.00), // scalastyle:ignore
26+
Row(null, 2.00) // scalastyle:ignore
2727
)
2828

2929
describe("StringRegexCheck") {
@@ -40,7 +40,7 @@ class StringRegexCheckSpec extends FunSpec with Matchers with TestingSparkSessio
4040

4141
it("error if column is not found in df") {
4242
val df = mkDataFrame(spark, defData, schema)
43-
val sut = StringRegexCheck( "bad_column_name", Some(Json.fromString("I%")), None)
43+
val sut = StringRegexCheck("bad_column_name", Some(Json.fromString("I%")), None)
4444
assert(sut.configCheck(df))
4545
assert(sut.getEvents contains ValidatorError("Column: 'bad_column_name' not found in schema."))
4646
assert(sut.failed)
@@ -50,7 +50,10 @@ class StringRegexCheckSpec extends FunSpec with Matchers with TestingSparkSessio
5050
val df = mkDataFrame(spark, defData, schema)
5151
val sut = StringRegexCheck("baseprice", Some(Json.fromString("I%")), None)
5252
assert(sut.configCheck(df))
53-
assert(sut.getEvents contains ValidatorError("Data type of column 'baseprice' must be String, but was found to be DoubleType"))
53+
assert(
54+
sut.getEvents contains
55+
ValidatorError("Data type of column 'baseprice' must be String, but was found to be DoubleType")
56+
)
5457
assert(sut.failed)
5558
}
5659
}
@@ -71,7 +74,7 @@ class StringRegexCheckSpec extends FunSpec with Matchers with TestingSparkSessio
7174
}
7275

7376
it("substitute with threshold") {
74-
val dict = mkParams(List(("column", "item"), ("regex", "I%"), ("threshold", Json.fromInt(100))))
77+
val dict = mkParams(List(("column", "item"), ("regex", "I%"), ("threshold", Json.fromInt(100)))) // scalastyle:ignore
7578
val sut = StringRegexCheck("$column", Some(Json.fromString("${regex}")), Some("${threshold}"))
7679
assert(sut.substituteVariables(dict) == StringRegexCheck("item", Some(Json.fromString("I%")), Some("100")))
7780
assert(!sut.failed)
@@ -82,8 +85,10 @@ class StringRegexCheckSpec extends FunSpec with Matchers with TestingSparkSessio
8285

8386
it("regex pattern ab%") {
8487
val sut = StringRegexCheck("item", Some(Json.fromString("ab%")), None)
85-
assert(sut.colTest(schema, mkParams()).sql ==
86-
And(Not(RLike(UnresolvedAttribute("item"), Literal.create("ab%", StringType))), IsNotNull(UnresolvedAttribute("item"))).sql)
88+
assert(sut.colTest(schema, mkParams()).sql == And(
89+
Not(RLike(UnresolvedAttribute("item"), Literal.create("ab%", StringType))),
90+
IsNotNull(UnresolvedAttribute("item"))).sql
91+
)
8792
}
8893
}
8994

0 commit comments

Comments
 (0)