diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/AIClassificationEMTestBase.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/AIClassificationEMTestBase.kt index ce5d590bf3..51e158eb7c 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/AIClassificationEMTestBase.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/AIClassificationEMTestBase.kt @@ -30,7 +30,6 @@ abstract class AIClassificationEMTestBase : SpringTestBase(){ ): EvaluatedIndividual { val sampler = injector.getInstance(AbstractRestSampler::class.java) - val ind = sampler.createIndividual(sampleT, actions.toMutableList()) val ff = injector.getInstance(AbstractRestFitness::class.java) @@ -43,30 +42,42 @@ abstract class AIClassificationEMTestBase : SpringTestBase(){ injector: Injector, ok2xx: List, fail400: List, - threshold: Double = injector.getInstance(EMConfig::class.java).classificationRepairThreshold + repairThreshold: Double = injector.getInstance(EMConfig::class.java).classificationRepairThreshold, + randomPerformanceThreshold: Double = 0.50 ) { val model = injector.getInstance(AIResponseClassifier::class.java) model.disableLearning() // no side-effects - for(ok in ok2xx){ - val resOK = evaluateAction(injector, ok) - assertTrue(resOK.getStatusCode() in 200..299) - val mOK= model.classify(ok) - assertTrue( - mOK.probabilityOf400() < threshold, - "Too high probability of 400 for OK ${ok.getName()}: ${mOK.probabilityOf400()}") + var correctPrediction = 0 + // 400 + for (fail in fail400) { + val result = evaluateAction(injector, fail) + assertEquals(400, result.getStatusCode()) + + val probability = model.classify(fail).probabilityOf400() + if (probability >= repairThreshold) { + correctPrediction++ + } } + // 2xx + for (ok in ok2xx) { + val result = evaluateAction(injector, ok) + assertTrue(result.getStatusCode() in 200..299) - for(fail in fail400) { - val resFail = evaluateAction(injector, fail) - assertEquals(400, resFail.getStatusCode()) - val mFail = model.classify(fail) - assertTrue( - mFail.probabilityOf400() >= threshold, - "Too low probability of 400 for Fail ${fail.getName()}: ${mFail.probabilityOf400()}" - ) + val probability = model.classify(ok).probabilityOf400() + if (probability < repairThreshold) { + correctPrediction++ + } } - } -} + val totalSize = ok2xx.size + fail400.size + val accuracy = + if (totalSize > 0) correctPrediction.toDouble() / totalSize else 0.0 + + assertTrue( + accuracy > randomPerformanceThreshold, + "Too low total accuracy: $accuracy" + ) + } +} \ No newline at end of file diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/allornone/ACAllOrNoneEMTest.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/allornone/ACAllOrNoneEMTest.kt index f8192f790d..8133e553e9 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/allornone/ACAllOrNoneEMTest.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/allornone/ACAllOrNoneEMTest.kt @@ -57,7 +57,20 @@ class ACAllOrNoneEMTest : AIClassificationEMTestBase() { testRunEM(AIResponseClassifierModel.NN) } - private fun testRunEM(model: AIResponseClassifierModel) { + @Test + fun testRunEnsemble(){ + testRunEM( + AIResponseClassifierModel.GAUSSIAN, + AIResponseClassifierModel.GLM, + AIResponseClassifierModel.KDE, + AIResponseClassifierModel.KNN, + AIResponseClassifierModel.NN + ) + } + + private fun testRunEM(vararg models: AIResponseClassifierModel) { + + val modelString = models.joinToString(",") { it.name } runTestHandlingFlakyAndCompilation( "ACAllOrNoneEM", @@ -65,7 +78,7 @@ class ACAllOrNoneEMTest : AIClassificationEMTestBase() { ) { args: MutableList -> args.add("--aiModelForResponseClassification") - args.add("$model") + args.add(modelString) val (injector, solution) = initAndDebug(args) diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/arithmetic/ACArithmeticEMTest.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/arithmetic/ACArithmeticEMTest.kt index 588147fff1..51cfea869b 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/arithmetic/ACArithmeticEMTest.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/arithmetic/ACArithmeticEMTest.kt @@ -57,7 +57,20 @@ class ACArithmeticEMTest : AIClassificationEMTestBase() { testRunEM(AIResponseClassifierModel.NN) } - private fun testRunEM(model: AIResponseClassifierModel) { + @Test + fun testRunEnsemble(){ + testRunEM( + AIResponseClassifierModel.GAUSSIAN, + AIResponseClassifierModel.GLM, + AIResponseClassifierModel.KDE, + AIResponseClassifierModel.KNN, + AIResponseClassifierModel.NN + ) + } + + private fun testRunEM(vararg models: AIResponseClassifierModel) { + + val modelString = models.joinToString(",") { it.name } runTestHandlingFlakyAndCompilation( "ACArithmeticEM", @@ -65,7 +78,7 @@ class ACArithmeticEMTest : AIClassificationEMTestBase() { ) { args: MutableList -> args.add("--aiModelForResponseClassification") - args.add("$model") + args.add(modelString) val (injector, solution) = initAndDebug(args) diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/basic/ACBasicEMTest.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/basic/ACBasicEMTest.kt index 56480e4490..0fbde80f4a 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/basic/ACBasicEMTest.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/basic/ACBasicEMTest.kt @@ -21,6 +21,7 @@ class ACBasicEMTest : AIClassificationEMTestBase() { } } + @Disabled @Test fun testRunDeterministic(){ testRunEM(AIResponseClassifierModel.DETERMINISTIC) @@ -56,7 +57,20 @@ class ACBasicEMTest : AIClassificationEMTestBase() { testRunEM(AIResponseClassifierModel.NN) } - private fun testRunEM(model: AIResponseClassifierModel) { + @Test + fun testRunEnsemble(){ + testRunEM( + AIResponseClassifierModel.GAUSSIAN, + AIResponseClassifierModel.GLM, + AIResponseClassifierModel.KDE, + AIResponseClassifierModel.KNN, + AIResponseClassifierModel.NN + ) + } + + private fun testRunEM(vararg models: AIResponseClassifierModel) { + + val modelString = models.joinToString(",") { it.name } runTestHandlingFlakyAndCompilation( "ACBasicEM", @@ -64,7 +78,7 @@ class ACBasicEMTest : AIClassificationEMTestBase() { ) { args: MutableList -> args.add("--aiModelForResponseClassification") - args.add("$model") + args.add(modelString) val (injector, solution) = initAndDebug(args) diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/imply/ACImplyEMTest.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/imply/ACImplyEMTest.kt index d6edcff61e..a1ac93f2be 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/imply/ACImplyEMTest.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/imply/ACImplyEMTest.kt @@ -65,7 +65,20 @@ class ACImplyEMTest : AIClassificationEMTestBase() { testRunEM(AIResponseClassifierModel.NN) } - private fun testRunEM(model: AIResponseClassifierModel) { + @Test + fun testRunEnsemble(){ + testRunEM( + AIResponseClassifierModel.GAUSSIAN, + AIResponseClassifierModel.GLM, + AIResponseClassifierModel.KDE, + AIResponseClassifierModel.KNN, + AIResponseClassifierModel.NN + ) + } + + private fun testRunEM(vararg models: AIResponseClassifierModel) { + + val modelString = models.joinToString(",") { it.name } runTestHandlingFlakyAndCompilation( "ACImplyEM", @@ -73,7 +86,7 @@ class ACImplyEMTest : AIClassificationEMTestBase() { ) { args: MutableList -> args.add("--aiModelForResponseClassification") - args.add("$model") + args.add(modelString) val (injector, solution) = initAndDebug(args) diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/mixed/ACMixedEMTest.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/mixed/ACMixedEMTest.kt index 44c36ee82b..4048613d9d 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/mixed/ACMixedEMTest.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/mixed/ACMixedEMTest.kt @@ -66,7 +66,20 @@ class ACMixedEMTest : AIClassificationEMTestBase() { testRunEM(AIResponseClassifierModel.NN) } - private fun testRunEM(model: AIResponseClassifierModel) { + @Test + fun testRunEnsemble(){ + testRunEM( + AIResponseClassifierModel.GAUSSIAN, + AIResponseClassifierModel.GLM, + AIResponseClassifierModel.KDE, + AIResponseClassifierModel.KNN, + AIResponseClassifierModel.NN + ) + } + + private fun testRunEM(vararg models: AIResponseClassifierModel) { + + val modelString = models.joinToString(",") { it.name } runTestHandlingFlakyAndCompilation( "ACMixedEM", @@ -74,7 +87,7 @@ class ACMixedEMTest : AIClassificationEMTestBase() { ) { args: MutableList -> args.add("--aiModelForResponseClassification") - args.add("$model") + args.add(modelString) val (injector, solution) = initAndDebug(args) diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/onlyone/ACOnlyOneEMTest.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/onlyone/ACOnlyOneEMTest.kt index 1b87a2a099..11d5cbd911 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/onlyone/ACOnlyOneEMTest.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/onlyone/ACOnlyOneEMTest.kt @@ -63,7 +63,20 @@ class ACOnlyOneEMTest : AIClassificationEMTestBase() { testRunEM(AIResponseClassifierModel.NN) } - private fun testRunEM(model: AIResponseClassifierModel) { + @Test + fun testRunEnsemble(){ + testRunEM( + AIResponseClassifierModel.GAUSSIAN, + AIResponseClassifierModel.GLM, + AIResponseClassifierModel.KDE, + AIResponseClassifierModel.KNN, + AIResponseClassifierModel.NN + ) + } + + private fun testRunEM(vararg models: AIResponseClassifierModel) { + + val modelString = models.joinToString(",") { it.name } runTestHandlingFlakyAndCompilation( "ACOnlyOneEM", @@ -71,7 +84,7 @@ class ACOnlyOneEMTest : AIClassificationEMTestBase() { ) { args: MutableList -> args.add("--aiModelForResponseClassification") - args.add("$model") + args.add(modelString) val (injector, solution) = initAndDebug(args) diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/or/ACOrEMTest.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/or/ACOrEMTest.kt index 59e1ee7b2d..63da9e70fa 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/or/ACOrEMTest.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/or/ACOrEMTest.kt @@ -62,7 +62,20 @@ class ACOrEMTest : AIClassificationEMTestBase() { testRunEM(AIResponseClassifierModel.NN) } - private fun testRunEM(model: AIResponseClassifierModel) { + @Test + fun testRunEnsemble(){ + testRunEM( + AIResponseClassifierModel.GAUSSIAN, + AIResponseClassifierModel.GLM, + AIResponseClassifierModel.KDE, + AIResponseClassifierModel.KNN, + AIResponseClassifierModel.NN + ) + } + + private fun testRunEM(vararg models: AIResponseClassifierModel) { + + val modelString = models.joinToString(",") { it.name } runTestHandlingFlakyAndCompilation( "ACOrEM", @@ -70,7 +83,7 @@ class ACOrEMTest : AIClassificationEMTestBase() { ) { args: MutableList -> args.add("--aiModelForResponseClassification") - args.add("$model") + args.add(modelString) val (injector, solution) = initAndDebug(args) diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/required/ACRequiredEMTest.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/required/ACRequiredEMTest.kt index 4ea48f3823..4b7744ff53 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/required/ACRequiredEMTest.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/required/ACRequiredEMTest.kt @@ -61,7 +61,20 @@ class ACRequiredEMTest : AIClassificationEMTestBase() { testRunEM(AIResponseClassifierModel.NN) } - private fun testRunEM(model: AIResponseClassifierModel) { + @Test + fun testRunEnsemble(){ + testRunEM( + AIResponseClassifierModel.GAUSSIAN, + AIResponseClassifierModel.GLM, + AIResponseClassifierModel.KDE, + AIResponseClassifierModel.KNN, + AIResponseClassifierModel.NN + ) + } + + private fun testRunEM(vararg models: AIResponseClassifierModel) { + + val modelString = models.joinToString(",") { it.name } runTestHandlingFlakyAndCompilation( "ACRequiredEM", @@ -69,7 +82,7 @@ class ACRequiredEMTest : AIClassificationEMTestBase() { ) { args: MutableList -> args.add("--aiModelForResponseClassification") - args.add("$model") + args.add(modelString) val (injector, solution) = initAndDebug(args) diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/zeroorone/ACZeroOrOneEMTest.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/zeroorone/ACZeroOrOneEMTest.kt index 1d4eb762ad..eb9e234d9a 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/zeroorone/ACZeroOrOneEMTest.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/zeroorone/ACZeroOrOneEMTest.kt @@ -65,7 +65,20 @@ class ACZeroOrOneEMTest : AIClassificationEMTestBase() { testRunEM(AIResponseClassifierModel.NN) } - private fun testRunEM(model: AIResponseClassifierModel) { + @Test + fun testRunEnsemble(){ + testRunEM( + AIResponseClassifierModel.GAUSSIAN, + AIResponseClassifierModel.GLM, + AIResponseClassifierModel.KDE, + AIResponseClassifierModel.KNN, + AIResponseClassifierModel.NN + ) + } + + private fun testRunEM(vararg models: AIResponseClassifierModel) { + + val modelString = models.joinToString(",") { it.name } runTestHandlingFlakyAndCompilation( "ACZeroOrOneEM", @@ -73,7 +86,7 @@ class ACZeroOrOneEMTest : AIClassificationEMTestBase() { ) { args: MutableList -> args.add("--aiModelForResponseClassification") - args.add("$model") + args.add(modelString) val (injector, solution) = initAndDebug(args) diff --git a/core-tests/integration-tests/core-it/src/test/kotlin/org/evomaster/core/problem/rest/aiclassification/AIModelsCheck.kt b/core-tests/integration-tests/core-it/src/test/kotlin/org/evomaster/core/problem/rest/aiclassification/AIModelsCheck.kt index 2b488e4d7a..cbeae79ae3 100644 --- a/core-tests/integration-tests/core-it/src/test/kotlin/org/evomaster/core/problem/rest/aiclassification/AIModelsCheck.kt +++ b/core-tests/integration-tests/core-it/src/test/kotlin/org/evomaster/core/problem/rest/aiclassification/AIModelsCheck.kt @@ -2,14 +2,10 @@ package org.evomaster.core.problem.rest.aiclassification import bar.examples.it.spring.aiclassification.allornone.AllOrNoneController import com.google.inject.Inject +import org.evomaster.core.EMConfig import org.evomaster.core.problem.enterprise.SampleType import org.evomaster.core.problem.rest.IntegrationTestRestBase -import org.evomaster.core.problem.rest.data.RestCallAction import org.evomaster.core.problem.rest.builder.RestActionBuilderV3 -import org.evomaster.core.problem.rest.schema.RestSchema -import org.evomaster.core.EMConfig -import org.evomaster.core.problem.rest.classifier.quantifier.ModelMetricsFullHistory -import org.evomaster.core.problem.rest.classifier.quantifier.ModelMetricsWithTimeWindow import org.evomaster.core.problem.rest.classifier.probabilistic.InputEncoderUtilWrapper import org.evomaster.core.problem.rest.classifier.probabilistic.gaussian.Gaussian400Classifier import org.evomaster.core.problem.rest.classifier.probabilistic.gaussian.Gaussian400EndpointModel @@ -21,40 +17,33 @@ import org.evomaster.core.problem.rest.classifier.probabilistic.knn.KNN400Classi import org.evomaster.core.problem.rest.classifier.probabilistic.knn.KNN400EndpointModel import org.evomaster.core.problem.rest.classifier.probabilistic.nn.NN400Classifier import org.evomaster.core.problem.rest.classifier.probabilistic.nn.NN400EndpointModel +import org.evomaster.core.problem.rest.classifier.quantifier.ModelMetricsFullHistory +import org.evomaster.core.problem.rest.classifier.quantifier.ModelMetricsWithTimeWindow +import org.evomaster.core.problem.rest.data.RestCallAction import org.evomaster.core.problem.rest.schema.OpenApiAccess +import org.evomaster.core.problem.rest.schema.RestSchema import org.evomaster.core.problem.rest.service.AIResponseClassifier import org.evomaster.core.problem.rest.service.sampler.AbstractRestSampler import org.evomaster.core.search.action.Action import org.evomaster.core.search.service.Randomness - class AIModelsCheck : IntegrationTestRestBase() { companion object { @JvmStatic fun init() { -// initClass(MultiTypeController()) - - initClass(AllOrNoneController()) -// initClass(ArithmeticController()) -// initClass(BasicController()) -// initClass(ImplyController()) -// initClass(MixedController()) -// initClass(OnlyOneController()) -// initClass(OrController()) -// initClass(RequiredController()) -// initClass(ZeroOrOneController()) + initClass(AllOrNoneController()) } @JvmStatic fun main(args: Array) { val test = AIModelsCheck() - init() // initialize controllers - test.initializeTest() // create injector + init() + test.initializeTest() - test.injector.injectMembers(test) // injects config, classifier, etc. + test.injector.injectMembers(test) - test.setup(test.modelName) // setup based on the config + test.setup(test.modelName) test.runClassifierExample() } @@ -74,26 +63,30 @@ class AIModelsCheck : IntegrationTestRestBase() { @Inject lateinit var randomness: Randomness - fun initializeTest() { - recreateInjectorForWhite(listOf("--aiModelForResponseClassification", "$modelName")) - } - @Inject lateinit var config: EMConfig + @Inject + lateinit var aiGlobalClassifier: AIResponseClassifier + + fun initializeTest() { + recreateInjectorForWhite(listOf("--aiModelForResponseClassification", modelName)) + } + fun setup(modelName: String) { - config.aiModelForResponseClassification = EMConfig.AIResponseClassifierModel.valueOf(modelName) + config.setAIModels(EMConfig.AIResponseClassifierModel.valueOf(modelName)) config.aiEncoderType = EMConfig.EncoderType.valueOf(encoderType) - config.aiClassifierRepairActivation = EMConfig.AIClassificationRepairActivation.valueOf(decisionMaking) - config.aIClassificationMetrics = EMConfig.AIClassificationMetrics.valueOf(metricType) + config.aiClassifierRepairActivation = + EMConfig.AIClassificationRepairActivation.valueOf(decisionMaking) + config.aIClassificationMetrics = + EMConfig.AIClassificationMetrics.valueOf(metricType) config.aiResponseClassifierWarmup = warmUpRep config.maxRepairAttemptsInResponseClassification = maxAttemptRepair } - @Inject - lateinit var aiGlobalClassifier: AIResponseClassifier - - fun repairAction(call: RestCallAction) {call.randomize(randomness, true) } + fun repairAction(call: RestCallAction) { + call.randomize(randomness, true) + } fun runClassifierExample() { val schema = OpenApiAccess.getOpenAPIFromLocation("$baseUrlOfSut/v3/api-docs") @@ -101,7 +94,13 @@ class AIModelsCheck : IntegrationTestRestBase() { val options = RestActionBuilderV3.Options(config) val actionCluster = mutableMapOf() - RestActionBuilderV3.addActionsFromSwagger(restSchema, actionCluster, options = options) + + RestActionBuilderV3.addActionsFromSwagger( + restSchema, + actionCluster, + options = options + ) + val actionList = actionCluster.values.filterIsInstance() val random = Randomness() @@ -120,19 +119,30 @@ class AIModelsCheck : IntegrationTestRestBase() { val geneValues = sampledAction.parameters .map { it.primaryGene().getValueAsRawString().replace("EVOMASTER", "") } + println("Input Genes: ${geneValues.joinToString(", ")}") println("Genes Size: ${geneValues.size}") val individual = - sampler.createIndividual(SampleType.RANDOM, listOf(sampledAction).toMutableList()) + sampler.createIndividual( + SampleType.RANDOM, + listOf(sampledAction).toMutableList() + ) + val action = individual.seeMainExecutableActions()[0] - //print gene types - val encoder = InputEncoderUtilWrapper(action, encoderType = config.aiEncoderType) - println("Expanded genes are: " + - encoder.endPointToGeneList() - .joinToString(", ") { ng -> - "${ng.gene.name}:${ng.gene::class.simpleName ?: "Unknown"}" }) + val encoder = InputEncoderUtilWrapper( + action, + encoderType = config.aiEncoderType + ) + + println( + "Expanded genes are: " + + encoder.endPointToGeneList() + .joinToString(", ") { ng -> + "${ng.gene.name}:${ng.gene::class.simpleName ?: "Unknown"}" + } + ) if (encoder.areAllGenesUnSupported()) { println("Skipping classification for $endPoint as all its genes are unsupported.") @@ -143,77 +153,108 @@ class AIModelsCheck : IntegrationTestRestBase() { println("Encoded features: ${inputVector.joinToString(", ")}") println("Input vector size: ${inputVector.size}") - // Warm-up - val innerModel = aiGlobalClassifier.viewInnerModel() - println("innerModel is ${innerModel.javaClass.simpleName ?: "Unknown"}") - val endpointModel = when(innerModel) { - is Gaussian400Classifier -> innerModel.getModel(endPoint) - is GLM400Classifier -> innerModel.getModel(endPoint) - is KDE400Classifier -> innerModel.getModel(endPoint) - is KNN400Classifier -> innerModel.getModel(endPoint) - is NN400Classifier -> innerModel.getModel(endPoint) - else -> throw IllegalArgumentException("Unsupported model: $modelName") - } - - endpointModel?.let { - val metrics = it.modelMetrics - when (metrics) { - is ModelMetricsWithTimeWindow -> ExtraTools.printModelMetrics("${it.javaClass.simpleName}", metrics) - is ModelMetricsFullHistory -> ExtraTools.printModelMetrics("${it.javaClass.simpleName}", metrics) - else -> throw IllegalArgumentException("Unsupported metrics type: ${metrics::class.simpleName}") - } - } + printEndpointModels(endPoint) val metrics = aiGlobalClassifier.estimateMetrics(action.endpoint) - //Execute the action if the classifier is still weak - if(!(metrics.accuracy > 0.5 && metrics.f1Score400 > 0.5)){ - + if (!(metrics.accuracy > 0.5 && metrics.f1Score400 > 0.5)) { println("The classifier is weak for $endPoint") - val result = ExtraTools.executeRestCallAction(action, "$baseUrlOfSut") + + val result = ExtraTools.executeRestCallAction(action, baseUrlOfSut) println("True Response: ${result.getStatusCode()}") println("Updating the classifier!") aiGlobalClassifier.updateModel(action, result) - }else{ - + } else { println("The classifier is good enough for $endPoint") + val n = config.maxRepairAttemptsInResponseClassification for (j in 0 until n) { val classification = aiGlobalClassifier.classify(action) val p = classification.probabilityOf400() - // Stop attempts to repair if the classifier predicts a non-400 response val predictionOfStatusCode = classification.prediction() - if (predictionOfStatusCode==400){ - val repairOrNot = when(config.aiClassifierRepairActivation){ + if (predictionOfStatusCode == 400) { + val repairOrNot = when (config.aiClassifierRepairActivation) { EMConfig.AIClassificationRepairActivation.THRESHOLD -> p >= config.classificationRepairThreshold EMConfig.AIClassificationRepairActivation.PROBABILITY -> randomness.nextBoolean(p) - } - if(repairOrNot){ - repairAction(action) // identical to create a new action based on resampling + if (repairOrNot) { + repairAction(action) } else { - break //break the repeat + break } - }else{ - break //break the repeat + } else { + break } } - val result = ExtraTools.executeRestCallAction(action, "$baseUrlOfSut") + val result = ExtraTools.executeRestCallAction(action, baseUrlOfSut) println("True Response: ${result.getStatusCode()}") println("Updating the classifier!") aiGlobalClassifier.updateModel(action, result) + } + printEndpointModels(endPoint) + } + + val overAllMetrics = aiGlobalClassifier.estimateOverallMetrics() + + println("Overall Accuracy: ${overAllMetrics.accuracy}") + println("Overall Precision400: ${overAllMetrics.precision400}") + println("Overall Recall400: ${overAllMetrics.sensitivity400}") + println("Overall F1Score400: ${overAllMetrics.f1Score400}") + println("Overall MCC: ${overAllMetrics.mcc}") + + if (saveReport) { + saveReports() + } + + println("The process is finished!") + } + + private fun printEndpointModels(endPoint: org.evomaster.core.problem.rest.data.Endpoint) { + val innerModels = aiGlobalClassifier.viewInnerModels() + + println("innerModels is ${innerModels.javaClass.simpleName}") + + for (innerModel in innerModels) { + println("innerModel is ${innerModel.javaClass.simpleName}") + + val endpointModel = when (innerModel) { + is Gaussian400Classifier -> innerModel.getModel(endPoint) + is GLM400Classifier -> innerModel.getModel(endPoint) + is KDE400Classifier -> innerModel.getModel(endPoint) + is KNN400Classifier -> innerModel.getModel(endPoint) + is NN400Classifier -> innerModel.getModel(endPoint) + + else -> throw IllegalArgumentException( + "Unsupported model type: ${innerModel::class.simpleName}" + ) + } + + endpointModel?.let { + val metrics = it.modelMetrics + + when (metrics) { + is ModelMetricsWithTimeWindow -> + ExtraTools.printModelMetrics("${it.javaClass.simpleName}", metrics) + + is ModelMetricsFullHistory -> + ExtraTools.printModelMetrics("${it.javaClass.simpleName}", metrics) + + else -> throw IllegalArgumentException( + "Unsupported metrics type: ${metrics::class.simpleName}" + ) + } } when (endpointModel) { @@ -221,52 +262,89 @@ class AIModelsCheck : IntegrationTestRestBase() { val d400 = endpointModel.density400!! val dNot400 = endpointModel.densityNot400!! - fun formatStats(name: String, mean: List, variance: List, n: Int) { + fun formatStats( + name: String, + mean: List, + variance: List, + n: Int + ) { val m = mean.map { "%.2f".format(it) } val v = variance.map { "%.2f".format(it) } - println("$name: n=$n, mean=$m, variance=$v * I_${endpointModel.dimension}") + + println( + "$name: n=$n, mean=$m, variance=$v * I_${endpointModel.dimension}" + ) } - formatStats("DensityNot400", dNot400.mean, dNot400.variance, dNot400.n) - formatStats("Density400", d400.mean, d400.variance, d400.n) + + formatStats( + "DensityNot400", + dNot400.mean, + dNot400.variance, + dNot400.n + ) + + formatStats( + "Density400", + d400.mean, + d400.variance, + d400.n + ) } + is GLM400EndpointModel -> { println("Weights and Bias = ${endpointModel.getModelParams()}") } - is KNN400EndpointModel-> { + + is KNN400EndpointModel -> { println("KNN stats: stored ${endpointModel.samples.size} samples") } - is NN400EndpointModel, is KDE400EndpointModel -> { - println("The model is $modelName classifier.") + + is NN400EndpointModel, + is KDE400EndpointModel -> { + println("The model is ${innerModel.javaClass.simpleName}.") + } + + null -> { + println("No endpoint model exists yet for $endPoint.") } } } + } - val overAllMetrics = aiGlobalClassifier.estimateOverallMetrics() - println("Overall Accuracy: ${overAllMetrics.accuracy}") - println("Overall Precision400: ${overAllMetrics.precision400}") - println("Overall Recall400: ${overAllMetrics.sensitivity400}") - println("Overall F1Score400: ${overAllMetrics.f1Score400}") - println("Overall MCC: ${overAllMetrics.mcc}") + private fun saveReports() { + val innerModels = aiGlobalClassifier.viewInnerModels() - // Save the final result as a .txt file - if (saveReport){ - val innerModel = aiGlobalClassifier.viewInnerModel() + for (innerModel in innerModels) { val allModels = when (innerModel) { is Gaussian400Classifier -> innerModel.getAllModels() - is GLM400Classifier -> innerModel.getAllModels() - is KDE400Classifier -> innerModel.getAllModels() - is KNN400Classifier -> innerModel.getAllModels() - is NN400Classifier -> innerModel.getAllModels() - else -> throw IllegalArgumentException("Unsupported model: $modelName") + is GLM400Classifier -> innerModel.getAllModels() + is KDE400Classifier -> innerModel.getAllModels() + is KNN400Classifier -> innerModel.getAllModels() + is NN400Classifier -> innerModel.getAllModels() + + else -> throw IllegalArgumentException( + "Unsupported model type: ${innerModel::class.simpleName}" + ) } + + val modelLabel = innerModel.javaClass.simpleName ?: modelName + + val reportPath = + if (innerModels.size == 1) { + filePathReport + } else { + filePathReport.removeSuffix(".txt") + "_$modelLabel.txt" + } + ExtraTools.saveAllMetricsToTxt( allModels, - filePathReport, + reportPath, runIterations, config.aiEncoderType, - modelName) - println("The report is saved!") + modelLabel + ) + + println("The report is saved for $modelLabel!") } - println("The process is finished!") } -} +} \ No newline at end of file diff --git a/core-tests/integration-tests/core-it/src/test/kotlin/org/evomaster/core/problem/rest/aiclassification/AIMoldelsCheckWFD.kt b/core-tests/integration-tests/core-it/src/test/kotlin/org/evomaster/core/problem/rest/aiclassification/AIMoldelsCheckWFD.kt index 92571265ca..272012499b 100644 --- a/core-tests/integration-tests/core-it/src/test/kotlin/org/evomaster/core/problem/rest/aiclassification/AIMoldelsCheckWFD.kt +++ b/core-tests/integration-tests/core-it/src/test/kotlin/org/evomaster/core/problem/rest/aiclassification/AIMoldelsCheckWFD.kt @@ -86,7 +86,7 @@ class AIModelsCheckWFD : IntegrationTestRestBase() { // Configure classifier and other parameters fun setup(modelName: String) { - config.aiModelForResponseClassification = EMConfig.AIResponseClassifierModel.valueOf(modelName) + config.setAIModels(EMConfig.AIResponseClassifierModel.valueOf(modelName)) config.aiEncoderType = EMConfig.EncoderType.valueOf(encoderType) config.aiClassifierRepairActivation = EMConfig.AIClassificationRepairActivation.valueOf(decisionMaking) config.aiResponseClassifierWarmup = warmUpRep @@ -148,7 +148,7 @@ class AIModelsCheckWFD : IntegrationTestRestBase() { println("Input vector size: ${inputVector.size}") // Warm-up - val innerModel = aiGlobalClassifier.viewInnerModel() + val innerModel = aiGlobalClassifier.viewInnerModels() println("innerModel is ${innerModel.javaClass.simpleName ?: "Unknown"}") val endpointModel = when(innerModel) { is Gaussian400Classifier -> innerModel.getModel(endPoint) @@ -252,7 +252,7 @@ class AIModelsCheckWFD : IntegrationTestRestBase() { // Save the final result as a .txt file if (saveReport){ - val innerModel = aiGlobalClassifier.viewInnerModel() + val innerModel = aiGlobalClassifier.viewInnerModels() val allModels = when (innerModel) { is Gaussian400Classifier -> innerModel.getAllModels() is GLM400Classifier -> innerModel.getAllModels() diff --git a/core/src/main/kotlin/org/evomaster/core/EMConfig.kt b/core/src/main/kotlin/org/evomaster/core/EMConfig.kt index 1bc461ea74..d04b90fe51 100644 --- a/core/src/main/kotlin/org/evomaster/core/EMConfig.kt +++ b/core/src/main/kotlin/org/evomaster/core/EMConfig.kt @@ -1528,11 +1528,11 @@ class EMConfig { DETERMINISTIC } - - @Experimental - @Cfg("Model used to learn input constraints and infer response status before making request.") - var aiModelForResponseClassification = AIResponseClassifierModel.NONE + @Cfg("Models used to learn input constraints and predict the response status before issuing a request. " + + "Supports both single-model and ensemble configurations. " + + "Ensemble model is a combination of a comma-separated list, e.g., GLM,NN,KDE.") + var aiModelForResponseClassification: String = "NONE" @Experimental @Cfg("Learning rate controlling the step size during parameter updates in classifiers. " + @@ -1578,7 +1578,7 @@ class EMConfig { @Experimental @Cfg("The encoding strategy applied to transform raw data to the encoded version.") - var aiEncoderType = EncoderType.RAW + var aiEncoderType = EncoderType.NORMAL @Experimental @@ -1598,7 +1598,7 @@ class EMConfig { @PercentageAsProbability(false) @Cfg("If using THRESHOLD for AI Classification Repair, specify its value." + " All classifications with probability equal or above such threshold value will be accepted.") - var classificationRepairThreshold = 0.8 + var classificationRepairThreshold = 0.5 @Experimental @Cfg("Specify how the classification of actions's response will be used to execute a possible repair on the action.") @@ -1637,7 +1637,7 @@ class EMConfig { @Experimental @Cfg("Minimum confidence threshold required for the AI response classifier to decide" + "whether to send a request as-is or attempt a repair.") - var aIResponseClassifierWeaknessThreshold = 0.4 + var aIResponseClassifierWeaknessThreshold = 0.8 @Cfg("Output a JSON file representing statistics of the fuzzing session, written in the WFC Report format." + " This also includes a index.html web application to visualize such data.") @@ -3242,7 +3242,8 @@ class EMConfig { fun getExcludeEndpoints() = endpointExclude?.split(",")?.map { it.trim() } ?: listOf() - fun isEnabledAIModelForResponseClassification() = aiModelForResponseClassification != AIResponseClassifierModel.NONE + fun isEnabledAIModelForResponseClassification() = getAIModelForResponseClassification().any { it != AIResponseClassifierModel.NONE } + /** * Source to build the final GA solution when evolving full test suites (not single tests). @@ -3345,4 +3346,41 @@ class EMConfig { return disabledOracleCodesList!! } + // Sets the AI response classification models programmatically. + fun setAIModels(vararg models: AIResponseClassifierModel) { + aiModelForResponseClassification = + models.joinToString(",") { it.name } + } + + /** + * Parses and validates the configured AI response classification models. + * The configuration may contain a single model (e.g., "GLM") or + * multiple models separated by commas for ensemble usage (e.g., "GLM, NN, KDE") + * The value "NONE" to disable AI-based response classification. + */ + fun getAIModelForResponseClassification(): List { + val models = aiModelForResponseClassification + .split(",") + .map { it.trim() } + .filter { it.isNotEmpty() } + .map { + try { + AIResponseClassifierModel.valueOf(it) + } catch (e: Exception) { + throw ConfigProblemException("Invalid AI model: $it") + } + } + .distinct() + .sorted() + + // EvoMaster accept NONE or a combination of the AI models and not both + if (models.contains(AIResponseClassifierModel.NONE) && models.size > 1) { + throw ConfigProblemException( + "Invalid configuration: NONE cannot be combined with other AI models" + ) + } + + return models + } + } diff --git a/core/src/main/kotlin/org/evomaster/core/problem/rest/classifier/probabilistic/glm/GLM400EndpointModel.kt b/core/src/main/kotlin/org/evomaster/core/problem/rest/classifier/probabilistic/glm/GLM400EndpointModel.kt index 3eed9fc728..b1f0f3a4e9 100644 --- a/core/src/main/kotlin/org/evomaster/core/problem/rest/classifier/probabilistic/glm/GLM400EndpointModel.kt +++ b/core/src/main/kotlin/org/evomaster/core/problem/rest/classifier/probabilistic/glm/GLM400EndpointModel.kt @@ -72,12 +72,18 @@ class GLM400EndpointModel( val prob400 = 1 - sigmoid(z) val probNot400 = 1.0 - prob400 - return AIResponseClassification( - probabilities = mapOf( - NOT_400 to probNot400, - 400 to prob400 + return if (prob400.isNaN() || probNot400.isNaN()) { + AIResponseClassification( + probabilities = mapOf(NOT_400 to 0.5, 400 to 0.5) ) - ) + }else { + AIResponseClassification( + probabilities = mapOf( + NOT_400 to probNot400, + 400 to prob400 + ) + ) + } } diff --git a/core/src/main/kotlin/org/evomaster/core/problem/rest/service/AIResponseClassifier.kt b/core/src/main/kotlin/org/evomaster/core/problem/rest/service/AIResponseClassifier.kt index 224767c2d4..007aeb1424 100644 --- a/core/src/main/kotlin/org/evomaster/core/problem/rest/service/AIResponseClassifier.kt +++ b/core/src/main/kotlin/org/evomaster/core/problem/rest/service/AIResponseClassifier.kt @@ -91,7 +91,7 @@ class AIResponseClassifier : AIModel { @Inject private lateinit var randomness: Randomness - private lateinit var delegate: AIModel + private lateinit var delegates: List private var enabledLearning : Boolean = true @@ -102,52 +102,98 @@ class AIResponseClassifier : AIModel { /** Read-only snapshot for reporting */ fun getStats(): AIResponseClassifierStats = stats.copy() + /** Determines whether the model's performance metrics indicate that the model is weak + * A model is weak if any of its key metrics falls below the defined weaknessThreshold. + */ + private fun isWeak(metrics: ModelEvaluation): Boolean { + + val weaknessThreshold = config.aIResponseClassifierWeaknessThreshold + + return metrics.precision400 <= weaknessThreshold|| + metrics.sensitivity400 <= weaknessThreshold || + metrics.specificity <= weaknessThreshold || + metrics.npv <= weaknessThreshold + } + + /** + * In the ensemble scenario the function selects the strongest model for a given endpoint + * among all the other used models based on the average of key metrics. + * For the single-model scenario, the function simply returns the single model. + */ + private fun selectBestModel(endpoint: Endpoint): AIModel { + + require(delegates.isNotEmpty()) { + "No AI models were initialized in AIResponseClassifier for the endpoint: ${endpoint.path}" + } + + // Return the single model if there is only one delegate. + if (delegates.size == 1) return delegates.first() + + return delegates.maxBy { model -> + val m = model.estimateMetrics(endpoint) + listOf( + m.precision400, + m.sensitivity400, + m.specificity, + m.npv + ).average() + } + } + + fun viewInnerModels(): List = delegates + @PostConstruct fun initModel() { - delegate = when (config.aiModelForResponseClassification) { - EMConfig.AIResponseClassifierModel.GAUSSIAN -> - Gaussian400Classifier( - warmup = config.aiResponseClassifierWarmup, - encoderType=config.aiEncoderType, - metricType =config.aIClassificationMetrics, - randomness = randomness) - EMConfig.AIResponseClassifierModel.GLM -> - GLM400Classifier( - warmup = config.aiResponseClassifierWarmup, - encoderType=config.aiEncoderType, - metricType =config.aIClassificationMetrics, - randomness = randomness, - learningRate = config.aiResponseClassifierLearningRate) - EMConfig.AIResponseClassifierModel.NN -> - NN400Classifier( - warmup = config.aiResponseClassifierWarmup, - encoderType=config.aiEncoderType, - metricType =config.aIClassificationMetrics, - randomness = randomness, - learningRate = config.aiResponseClassifierLearningRate) - EMConfig.AIResponseClassifierModel.KNN -> - KNN400Classifier( - warmup = config.aiResponseClassifierWarmup, - encoderType=config.aiEncoderType, - metricType =config.aIClassificationMetrics, - randomness = randomness, - k = 3) - EMConfig.AIResponseClassifierModel.KDE -> - KDE400Classifier( - warmup = config.aiResponseClassifierWarmup, - encoderType=config.aiEncoderType, - metricType =config.aIClassificationMetrics, - randomness = randomness - ) - EMConfig.AIResponseClassifierModel.DETERMINISTIC -> - Deterministic400Classifier( - config.classificationRepairThreshold, - metricType = config.aIClassificationMetrics) - else -> object : AIModel { - override fun updateModel(input: RestCallAction, output: RestCallResult) {} - override fun classify(input: RestCallAction) = AIResponseClassification() - override fun estimateMetrics(endpoint: Endpoint): ModelEvaluation = ModelEvaluation.DEFAULT_NO_DATA - override fun estimateOverallMetrics(): ModelEvaluation = ModelEvaluation.DEFAULT_NO_DATA + + val models = config.getAIModelForResponseClassification() + + delegates = models.map { model -> + when (model) { + + EMConfig.AIResponseClassifierModel.GAUSSIAN -> + Gaussian400Classifier( + warmup = config.aiResponseClassifierWarmup, + encoderType=config.aiEncoderType, + metricType =config.aIClassificationMetrics, + randomness = randomness) + EMConfig.AIResponseClassifierModel.GLM -> + GLM400Classifier( + warmup = config.aiResponseClassifierWarmup, + encoderType=config.aiEncoderType, + metricType =config.aIClassificationMetrics, + randomness = randomness, + learningRate = config.aiResponseClassifierLearningRate) + EMConfig.AIResponseClassifierModel.NN -> + NN400Classifier( + warmup = config.aiResponseClassifierWarmup, + encoderType=config.aiEncoderType, + metricType =config.aIClassificationMetrics, + randomness = randomness, + learningRate = config.aiResponseClassifierLearningRate) + EMConfig.AIResponseClassifierModel.KNN -> + KNN400Classifier( + warmup = config.aiResponseClassifierWarmup, + encoderType=config.aiEncoderType, + metricType =config.aIClassificationMetrics, + randomness = randomness, + k = 3) + EMConfig.AIResponseClassifierModel.KDE -> + KDE400Classifier( + warmup = config.aiResponseClassifierWarmup, + encoderType=config.aiEncoderType, + metricType =config.aIClassificationMetrics, + randomness = randomness + ) + EMConfig.AIResponseClassifierModel.DETERMINISTIC -> + Deterministic400Classifier( + config.classificationRepairThreshold, + metricType = config.aIClassificationMetrics) + else -> object : AIModel { + override fun updateModel(input: RestCallAction, output: RestCallResult) {} + override fun classify(input: RestCallAction) = AIResponseClassification() + override fun estimateMetrics(endpoint: Endpoint): ModelEvaluation = ModelEvaluation.DEFAULT_NO_DATA + override fun estimateOverallMetrics(): ModelEvaluation = ModelEvaluation.DEFAULT_NO_DATA + } } } } @@ -190,7 +236,7 @@ class AIResponseClassifier : AIModel { // Measuring the time of update val start = System.nanoTime() - delegate.updateModel(input, output) + delegates.forEach { it.updateModel(input, output) } val t = System.nanoTime() - start // updating time stats @@ -210,26 +256,57 @@ class AIResponseClassifier : AIModel { } val start = System.nanoTime() - val result = delegate.classify(input) + + val bestModel = selectBestModel(input.endpoint) + val result = bestModel.classify(input) + + val p = result.probabilityOf400() + val invalidFields = result.invalidFields + val t = System.nanoTime() - start stats.classifyTimeNs += t stats.classifyCount++ - return result + return AIResponseClassification( + probabilities = mapOf(400 to p), + invalidFields = invalidFields + ) } + /** + * In the ensemble setting, the reported metrics correspond to the best-performing + * individual model for the given endpoint. In the single-model case, the metrics + * are naturally derived from that model alone. + * In other words, for ensembles, the metrics reflect the strongest performance + * achieved among all available models. + */ override fun estimateMetrics(endpoint: Endpoint): ModelEvaluation { - return delegate.estimateMetrics(endpoint) + val bestModel = selectBestModel(endpoint) + return bestModel.estimateMetrics(endpoint) } - + /** + * In the ensemble setting, the reported overall metrics are computed as the average + * of the corresponding metrics across all models. + * In the single-model case, the overall metrics coincide with those of + * the individual model (see [org.evomaster.core.problem.rest.classifier.probabilistic.AbstractProbabilistic400Classifier]). + */ override fun estimateOverallMetrics(): ModelEvaluation { - return delegate.estimateOverallMetrics() - } + val metrics = delegates.map { it.estimateOverallMetrics() } + + return ModelEvaluation( + accuracy = metrics.map { it.accuracy }.average(), + precision400 = metrics.map { it.precision400 }.average(), + sensitivity400 = metrics.map { it.sensitivity400 }.average(), + specificity = metrics.map { it.specificity }.average(), + npv = metrics.map { it.npv }.average(), + mcc = metrics.map { it.mcc }.average() + ) + + } - fun viewInnerModel(): AIModel = delegate /** * If the model thinks this call will lead to a user error (e.g., 400), then try to repair @@ -241,19 +318,13 @@ class AIResponseClassifier : AIModel { /** * Skips repair when the classifier is still too weak to provide meaningful guidance. - * Reliability is assessed using precision, recall, and MCC (see [ModelEvaluation]) to + * Reliability is assessed using model metrics (see [ModelEvaluation]) to * ensure the model performs better than random guessing, especially important under the class imbalance. * If any of the criteria are not met, the classifier is considered unreliable * for steering repairs. In such cases, the call is executed without modification so the * classifier can gather additional informative samples and improve over time. */ - val metrics = estimateMetrics(call.endpoint) - val weaknessThreshold = config.aIResponseClassifierWeaknessThreshold - if (metrics.precision400 <= weaknessThreshold - || metrics.sensitivity400 <= weaknessThreshold - || metrics.specificity <= weaknessThreshold - || metrics.npv <= weaknessThreshold) { - + if (isWeak(estimateMetrics(call.endpoint))) { //do nothing return } @@ -356,7 +427,7 @@ class AIResponseClassifier : AIModel { // skip conditions val skip5xx = - trueStatusCode !in 500..599 && config.skipAIModelUpdateWhenResponseIs5xx + trueStatusCode in 500..599 && config.skipAIModelUpdateWhenResponseIs5xx val skipNot2xxOr400 = trueStatusCode !in 200..299 diff --git a/core/src/main/kotlin/org/evomaster/core/search/service/Statistics.kt b/core/src/main/kotlin/org/evomaster/core/search/service/Statistics.kt index f5f91a8692..3b39d8cfbb 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/service/Statistics.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/service/Statistics.kt @@ -489,12 +489,12 @@ class Statistics : SearchListener { } // Compute metrics - val metrics = aiResponseClassifier.viewInnerModel().estimateOverallMetrics() + val metrics = aiResponseClassifier.estimateOverallMetrics() val aiStats = aiResponseClassifier.getStats() return aiMetricsAsPairs( enabled = true, - type = config.aiModelForResponseClassification.name, + type = config.aiModelForResponseClassification, accuracy = metrics.accuracy, precision = metrics.precision400, sensitivity = metrics.sensitivity400, diff --git a/core/src/test/kotlin/org/evomaster/core/output/dto/DtoWriterTest.kt b/core/src/test/kotlin/org/evomaster/core/output/dto/DtoWriterTest.kt index 790094619f..f722178a4c 100644 --- a/core/src/test/kotlin/org/evomaster/core/output/dto/DtoWriterTest.kt +++ b/core/src/test/kotlin/org/evomaster/core/output/dto/DtoWriterTest.kt @@ -33,7 +33,7 @@ class DtoWriterTest { val outputFormat = OutputFormat.JAVA_JUNIT_4 val config = EMConfig().apply { - aiModelForResponseClassification = EMConfig.AIResponseClassifierModel.GLM + setAIModels(EMConfig.AIResponseClassifierModel.GLM) enableSchemaConstraintHandling = true allowInvalidData = false probRestDefault = 0.0 diff --git a/docs/options.md b/docs/options.md index 819c9dc90f..8e7c40d472 100644 --- a/docs/options.md +++ b/docs/options.md @@ -253,11 +253,11 @@ There are 3 types of options: |Options|Description| |---|---| |`aIClassificationMetrics`| __Enum__. Determines which metric-tracking strategy is used by the AI response classifier. *Valid values*: `TIME_WINDOW, FULL_HISTORY`. *Default value*: `FULL_HISTORY`.| -|`aIResponseClassifierWeaknessThreshold`| __Double__. Minimum confidence threshold required for the AI response classifier to decidewhether to send a request as-is or attempt a repair. *Default value*: `0.4`.| +|`aIResponseClassifierWeaknessThreshold`| __Double__. Minimum confidence threshold required for the AI response classifier to decidewhether to send a request as-is or attempt a repair. *Default value*: `0.8`.| |`abstractInitializationGeneToMutate`| __Boolean__. During mutation, whether to abstract genes for repeated SQL actions. *Default value*: `false`.| |`aiClassifierRepairActivation`| __Enum__. Specify how the classification of actions's response will be used to execute a possible repair on the action. *Valid values*: `PROBABILITY, THRESHOLD`. *Default value*: `THRESHOLD`.| -|`aiEncoderType`| __Enum__. The encoding strategy applied to transform raw data to the encoded version. *Valid values*: `RAW, NORMAL, UNIT_NORMAL`. *Default value*: `RAW`.| -|`aiModelForResponseClassification`| __Enum__. Model used to learn input constraints and infer response status before making request. *Valid values*: `NONE, GAUSSIAN, KDE, KNN, NN, GLM, DETERMINISTIC`. *Default value*: `NONE`.| +|`aiEncoderType`| __Enum__. The encoding strategy applied to transform raw data to the encoded version. *Valid values*: `RAW, NORMAL, UNIT_NORMAL`. *Default value*: `NORMAL`.| +|`aiModelForResponseClassification`| __String__. Models used to learn input constraints and predict the response status before issuing a request. Supports both single-model and ensemble configurations. Ensemble model is a combination of a comma-separated list, e.g., GLM,NN,KDE. *Default value*: `NONE`.| |`aiResponseClassifierLearningRate`| __Double__. Learning rate controlling the step size during parameter updates in classifiers. Relevant for gradient-based models such as GLM and neural networks. A smaller value ensures stable but slower convergence, while a larger value speeds up training but may cause instability. *Default value*: `0.01`.| |`aiResponseClassifierMaxStoredSamples`| __Int__. Maximum number of stored samples for classifiers such as KNN and KDE models that rely on retaining encoded inputs. This value specifies the maximum number of samples stored for each endpoint. A higher value can improve classification accuracy by leveraging more historical data, but also increases memory usage. A lower value reduces memory consumption but may limit the classifier’s knowledge base. Typically, it is safe to keep this value between 10,000 and 50,000 when the encoded input vector is usually a list of doubles with a length under 20. Reservoir sampling is applied independently for each endpoint: if this maximum number is exceeded, new samples randomly replace existing ones, ensuring an unbiased selection of preserved data. As an example, for an API with 100 endpoints and an input vector of size 20, a maximum of 10,000 samples per endpoint would require roughly 200 MB of memory. *Default value*: `10000`.| |`aiResponseClassifierWarmup`| __Int__. Number of training iterations required to update classifier parameters. For example, in the Gaussian model this affects mean and variance updates. For neural network (NN) models, the warm-up should typically be larger than 1000. *Default value*: `100`.| @@ -267,7 +267,7 @@ There are 3 types of options: |`breederTruncationFraction`| __Double__. Breeder GA: fraction of top individuals to keep in parents pool (truncation). *Constraints*: `probability 0.0-1.0`. *Default value*: `0.5`.| |`callbackURLHostname`| __String__. HTTP callback verifier hostname. Default is set to 'localhost'. If the SUT is running inside a container (i.e., Docker), 'localhost' will refer to the container. This can be used to change the hostname. *Default value*: `localhost`.| |`cgaNeighborhoodModel`| __Enum__. Cellular GA: neighborhood model (RING, L5, C9, C13). *Valid values*: `RING, L5, C9, C13`. *Default value*: `RING`.| -|`classificationRepairThreshold`| __Double__. If using THRESHOLD for AI Classification Repair, specify its value. All classifications with probability equal or above such threshold value will be accepted. *Constraints*: `probability 0.0-1.0`. *Default value*: `0.8`.| +|`classificationRepairThreshold`| __Double__. If using THRESHOLD for AI Classification Repair, specify its value. All classifications with probability equal or above such threshold value will be accepted. *Constraints*: `probability 0.0-1.0`. *Default value*: `0.5`.| |`discoveredInfoRewardedInFitness`| __Boolean__. If there is new discovered information from a test execution, reward it in the fitness function. *Default value*: `false`.| |`dockerLocalhost`| __Boolean__. Replace references to 'localhost' to point to the actual host machine. Only needed when running EvoMaster inside Docker. *Default value*: `false`.| |`dpcTargetTestSize`| __Int__. Specify a max size of a test to be targeted when either DPC_INCREASING or DPC_DECREASING is enabled. *Default value*: `1`.|