Skip to content

Commit 2779df9

Browse files
authored
test(workflow-operator): add unit test coverage for Sklearn linear classifier descriptors (#5941)
### What changes were proposed in this PR? Pin behavior of four previously-untested Sklearn linear classifier descriptors in `common/workflow-operator`. No production-code changes. | Spec | Source class | Tests | | --- | --- | --- | | `SklearnLogisticRegressionOpDescSpec` | `SklearnLogisticRegressionOpDesc` | 5 | | `SklearnLogisticRegressionCVOpDescSpec` | `SklearnLogisticRegressionCVOpDesc` | 5 | | `SklearnPerceptronOpDescSpec` | `SklearnPerceptronOpDesc` | 5 | | `SklearnPassiveAggressiveOpDescSpec` | `SklearnPassiveAggressiveOpDesc` | 5 | **Behavior pinned** | Surface | Contract | | --- | --- | | `operatorInfo` | exact model name + `Sklearn <name> Operator` description; Sklearn group; training/testing input ports + one blocking output | | field defaults | `countVectorizer`/`tfidfTransformer` `false`; `target`/`text` `null` | | `getOutputSchemas` | `model_name` (STRING) + `model` (BINARY) keyed by the declared output port | | `generatePythonCode` | imports the matching sklearn estimator and builds the `make_pipeline` model | | Round-trip | config fields preserved through the polymorphic `LogicalOp` base, with the correct `operatorType` discriminator | ### Any related issues, documentation, discussions? Part of the ongoing `workflow-operator` unit-test coverage effort (follow-up to the Sklearn Naive Bayes coverage in #5925). ### How was this PR tested? - `sbt "WorkflowOperator/testOnly *SklearnLogisticRegressionOpDescSpec *SklearnLogisticRegressionCVOpDescSpec *SklearnPerceptronOpDescSpec *SklearnPassiveAggressiveOpDescSpec"` — 20 tests, all green - `sbt "WorkflowOperator/Test/scalafmtCheck"` and `sbt "WorkflowOperator/scalafixAll --check"` — clean - CI to confirm ### Was this PR authored or co-authored using generative AI tooling? Generated-by: Claude Code (Opus 4.8 [1M context])
1 parent 43ca4b2 commit 2779df9

4 files changed

Lines changed: 316 additions & 0 deletions

File tree

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.texera.amber.operator.sklearn
21+
22+
import org.apache.texera.amber.core.tuple.AttributeType
23+
import org.apache.texera.amber.operator.LogicalOp
24+
import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
25+
import org.apache.texera.amber.util.JSONUtils.objectMapper
26+
import org.scalatest.flatspec.AnyFlatSpec
27+
import org.scalatest.matchers.should.Matchers
28+
29+
class SklearnLogisticRegressionCVOpDescSpec extends AnyFlatSpec with Matchers {
30+
31+
"SklearnLogisticRegressionCVOpDesc.operatorInfo" should
32+
"advertise the model name, Sklearn group, and the training/testing port shape" in {
33+
val info = (new SklearnLogisticRegressionCVOpDesc).operatorInfo
34+
info.userFriendlyName shouldBe "Logistic Regression Cross Validation"
35+
info.operatorDescription shouldBe "Sklearn Logistic Regression Cross Validation Operator"
36+
info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_GROUP
37+
info.inputPorts.map(_.displayName) shouldBe List("training", "testing")
38+
info.outputPorts should have length 1
39+
info.outputPorts.head.blocking shouldBe true
40+
}
41+
42+
"SklearnLogisticRegressionCVOpDesc" should "default its config fields" in {
43+
val d = new SklearnLogisticRegressionCVOpDesc
44+
d.countVectorizer shouldBe false
45+
d.tfidfTransformer shouldBe false
46+
d.target shouldBe null
47+
d.text shouldBe null
48+
}
49+
50+
"SklearnLogisticRegressionCVOpDesc.getOutputSchemas" should
51+
"emit the model_name/model schema keyed by the declared output port" in {
52+
val d = new SklearnLogisticRegressionCVOpDesc
53+
val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id)
54+
schema.getAttribute("model_name").getType shouldBe AttributeType.STRING
55+
schema.getAttribute("model").getType shouldBe AttributeType.BINARY
56+
}
57+
58+
"SklearnLogisticRegressionCVOpDesc.generatePythonCode" should "import the configured sklearn estimator" in {
59+
val d = new SklearnLogisticRegressionCVOpDesc
60+
d.target = "y"
61+
val code = d.generatePythonCode()
62+
code should include("from sklearn.linear_model import LogisticRegressionCV")
63+
code should include("make_pipeline")
64+
code should include("Logistic Regression Cross Validation")
65+
}
66+
67+
"SklearnLogisticRegressionCVOpDesc" should "round-trip its config fields through the polymorphic base" in {
68+
val d = new SklearnLogisticRegressionCVOpDesc
69+
d.target = "label"
70+
d.countVectorizer = true
71+
val json = objectMapper.writeValueAsString(d)
72+
json should include("\"operatorType\":\"SklearnLogisticRegressionCV\"")
73+
val restored = objectMapper.readValue(json, classOf[LogicalOp])
74+
restored shouldBe a[SklearnLogisticRegressionCVOpDesc]
75+
val r = restored.asInstanceOf[SklearnLogisticRegressionCVOpDesc]
76+
r.target shouldBe "label"
77+
r.countVectorizer shouldBe true
78+
}
79+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.texera.amber.operator.sklearn
21+
22+
import org.apache.texera.amber.core.tuple.AttributeType
23+
import org.apache.texera.amber.operator.LogicalOp
24+
import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
25+
import org.apache.texera.amber.util.JSONUtils.objectMapper
26+
import org.scalatest.flatspec.AnyFlatSpec
27+
import org.scalatest.matchers.should.Matchers
28+
29+
class SklearnLogisticRegressionOpDescSpec extends AnyFlatSpec with Matchers {
30+
31+
"SklearnLogisticRegressionOpDesc.operatorInfo" should
32+
"advertise the model name, Sklearn group, and the training/testing port shape" in {
33+
val info = (new SklearnLogisticRegressionOpDesc).operatorInfo
34+
info.userFriendlyName shouldBe "Logistic Regression"
35+
info.operatorDescription shouldBe "Sklearn Logistic Regression Operator"
36+
info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_GROUP
37+
info.inputPorts.map(_.displayName) shouldBe List("training", "testing")
38+
info.outputPorts should have length 1
39+
info.outputPorts.head.blocking shouldBe true
40+
}
41+
42+
"SklearnLogisticRegressionOpDesc" should "default its config fields" in {
43+
val d = new SklearnLogisticRegressionOpDesc
44+
d.countVectorizer shouldBe false
45+
d.tfidfTransformer shouldBe false
46+
d.target shouldBe null
47+
d.text shouldBe null
48+
}
49+
50+
"SklearnLogisticRegressionOpDesc.getOutputSchemas" should
51+
"emit the model_name/model schema keyed by the declared output port" in {
52+
val d = new SklearnLogisticRegressionOpDesc
53+
val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id)
54+
schema.getAttribute("model_name").getType shouldBe AttributeType.STRING
55+
schema.getAttribute("model").getType shouldBe AttributeType.BINARY
56+
}
57+
58+
"SklearnLogisticRegressionOpDesc.generatePythonCode" should "import the configured sklearn estimator" in {
59+
val d = new SklearnLogisticRegressionOpDesc
60+
d.target = "y"
61+
val code = d.generatePythonCode()
62+
code should include("from sklearn.linear_model import LogisticRegression")
63+
code should include("make_pipeline")
64+
code should include("Logistic Regression")
65+
}
66+
67+
"SklearnLogisticRegressionOpDesc" should "round-trip its config fields through the polymorphic base" in {
68+
val d = new SklearnLogisticRegressionOpDesc
69+
d.target = "label"
70+
d.countVectorizer = true
71+
val json = objectMapper.writeValueAsString(d)
72+
json should include("\"operatorType\":\"SklearnLogisticRegression\"")
73+
val restored = objectMapper.readValue(json, classOf[LogicalOp])
74+
restored shouldBe a[SklearnLogisticRegressionOpDesc]
75+
val r = restored.asInstanceOf[SklearnLogisticRegressionOpDesc]
76+
r.target shouldBe "label"
77+
r.countVectorizer shouldBe true
78+
}
79+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.texera.amber.operator.sklearn
21+
22+
import org.apache.texera.amber.core.tuple.AttributeType
23+
import org.apache.texera.amber.operator.LogicalOp
24+
import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
25+
import org.apache.texera.amber.util.JSONUtils.objectMapper
26+
import org.scalatest.flatspec.AnyFlatSpec
27+
import org.scalatest.matchers.should.Matchers
28+
29+
class SklearnPassiveAggressiveOpDescSpec extends AnyFlatSpec with Matchers {
30+
31+
"SklearnPassiveAggressiveOpDesc.operatorInfo" should
32+
"advertise the model name, Sklearn group, and the training/testing port shape" in {
33+
val info = (new SklearnPassiveAggressiveOpDesc).operatorInfo
34+
info.userFriendlyName shouldBe "Passive Aggressive"
35+
info.operatorDescription shouldBe "Sklearn Passive Aggressive Operator"
36+
info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_GROUP
37+
info.inputPorts.map(_.displayName) shouldBe List("training", "testing")
38+
info.outputPorts should have length 1
39+
info.outputPorts.head.blocking shouldBe true
40+
}
41+
42+
"SklearnPassiveAggressiveOpDesc" should "default its config fields" in {
43+
val d = new SklearnPassiveAggressiveOpDesc
44+
d.countVectorizer shouldBe false
45+
d.tfidfTransformer shouldBe false
46+
d.target shouldBe null
47+
d.text shouldBe null
48+
}
49+
50+
"SklearnPassiveAggressiveOpDesc.getOutputSchemas" should
51+
"emit the model_name/model schema keyed by the declared output port" in {
52+
val d = new SklearnPassiveAggressiveOpDesc
53+
val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id)
54+
schema.getAttribute("model_name").getType shouldBe AttributeType.STRING
55+
schema.getAttribute("model").getType shouldBe AttributeType.BINARY
56+
}
57+
58+
"SklearnPassiveAggressiveOpDesc.generatePythonCode" should "import the configured sklearn estimator" in {
59+
val d = new SklearnPassiveAggressiveOpDesc
60+
d.target = "y"
61+
val code = d.generatePythonCode()
62+
code should include("from sklearn.linear_model import PassiveAggressiveClassifier")
63+
code should include("make_pipeline")
64+
code should include("Passive Aggressive")
65+
}
66+
67+
"SklearnPassiveAggressiveOpDesc" should "round-trip its config fields through the polymorphic base" in {
68+
val d = new SklearnPassiveAggressiveOpDesc
69+
d.target = "label"
70+
d.countVectorizer = true
71+
val json = objectMapper.writeValueAsString(d)
72+
json should include("\"operatorType\":\"SklearnPassiveAggressive\"")
73+
val restored = objectMapper.readValue(json, classOf[LogicalOp])
74+
restored shouldBe a[SklearnPassiveAggressiveOpDesc]
75+
val r = restored.asInstanceOf[SklearnPassiveAggressiveOpDesc]
76+
r.target shouldBe "label"
77+
r.countVectorizer shouldBe true
78+
}
79+
}
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.texera.amber.operator.sklearn
21+
22+
import org.apache.texera.amber.core.tuple.AttributeType
23+
import org.apache.texera.amber.operator.LogicalOp
24+
import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
25+
import org.apache.texera.amber.util.JSONUtils.objectMapper
26+
import org.scalatest.flatspec.AnyFlatSpec
27+
import org.scalatest.matchers.should.Matchers
28+
29+
class SklearnPerceptronOpDescSpec extends AnyFlatSpec with Matchers {
30+
31+
"SklearnPerceptronOpDesc.operatorInfo" should
32+
"advertise the model name, Sklearn group, and the training/testing port shape" in {
33+
val info = (new SklearnPerceptronOpDesc).operatorInfo
34+
info.userFriendlyName shouldBe "Linear Perceptron"
35+
info.operatorDescription shouldBe "Sklearn Linear Perceptron Operator"
36+
info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_GROUP
37+
info.inputPorts.map(_.displayName) shouldBe List("training", "testing")
38+
info.outputPorts should have length 1
39+
info.outputPorts.head.blocking shouldBe true
40+
}
41+
42+
"SklearnPerceptronOpDesc" should "default its config fields" in {
43+
val d = new SklearnPerceptronOpDesc
44+
d.countVectorizer shouldBe false
45+
d.tfidfTransformer shouldBe false
46+
d.target shouldBe null
47+
d.text shouldBe null
48+
}
49+
50+
"SklearnPerceptronOpDesc.getOutputSchemas" should
51+
"emit the model_name/model schema keyed by the declared output port" in {
52+
val d = new SklearnPerceptronOpDesc
53+
val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id)
54+
schema.getAttribute("model_name").getType shouldBe AttributeType.STRING
55+
schema.getAttribute("model").getType shouldBe AttributeType.BINARY
56+
}
57+
58+
"SklearnPerceptronOpDesc.generatePythonCode" should "import the configured sklearn estimator" in {
59+
val d = new SklearnPerceptronOpDesc
60+
d.target = "y"
61+
val code = d.generatePythonCode()
62+
code should include("from sklearn.linear_model import Perceptron")
63+
code should include("make_pipeline")
64+
code should include("Linear Perceptron")
65+
}
66+
67+
"SklearnPerceptronOpDesc" should "round-trip its config fields through the polymorphic base" in {
68+
val d = new SklearnPerceptronOpDesc
69+
d.target = "label"
70+
d.countVectorizer = true
71+
val json = objectMapper.writeValueAsString(d)
72+
json should include("\"operatorType\":\"SklearnPerceptron\"")
73+
val restored = objectMapper.readValue(json, classOf[LogicalOp])
74+
restored shouldBe a[SklearnPerceptronOpDesc]
75+
val r = restored.asInstanceOf[SklearnPerceptronOpDesc]
76+
r.target shouldBe "label"
77+
r.countVectorizer shouldBe true
78+
}
79+
}

0 commit comments

Comments
 (0)