@@ -37,32 +37,32 @@ baseline_scores = run_quality_metrics(
3737# Data with progressively higher cumulative sums
3838high_scores = run_quality_metrics(
3939 base_df_10 ,
40- c(rep( 0 .1 , 5 ), seq(2.0 , 5.0 , length.out = 5 )), # mean_increase
41- c(rep( 0 .1 , 5 ), seq(2.0 , 5.0 , length.out = 5 )), # mean_decrease
42- c(rep( 0 .1 , 5 ), seq(2.0 , 5.0 , length.out = 5 )) # dispersion_increase
40+ c(seq( 0 , 0 .1 , length.out = 5 ), seq(2.0 , 5.0 , length.out = 5 )), # mean_increase
41+ c(seq( 0 , 0 .1 , length.out = 5 ), seq(2.0 , 5.0 , length.out = 5 )), # mean_decrease
42+ c(seq( 0 , 0 .1 , length.out = 5 ), seq(2.0 , 5.0 , length.out = 5 )) # dispersion_increase
4343)
4444
4545# The last 5 rows (with high values) should have higher mean anomaly scores
46- # expect_true(mean(high_scores$AnomalyScores[6:10]) > mean(high_scores$AnomalyScores[1:5]),
47- # info = "Higher cumulative sum values should produce higher anomaly scores")
46+ expect_true(mean(high_scores $ AnomalyScores [6 : 10 ]) > mean(high_scores $ AnomalyScores [1 : 5 ]),
47+ info = " Higher cumulative sum values should produce higher anomaly scores" )
4848
4949# Test 2: Extreme Value Testing - Obvious Outliers
5050base_df_20 = create_base_df(20 )
5151
5252extreme_scores = run_quality_metrics(
5353 base_df_20 ,
54- c(rep( 0 .1 , 19 ), 10.0 ), # Last value is extreme
55- c(rep( 0 .1 , 19 ), 8.0 ), # Last value is extreme
56- c(rep( 0 .1 , 19 ), 12.0 ) # Last value is extreme
54+ c(seq( 0 , 0 .1 , length.out = 19 ), 10.0 ), # Last value is extreme
55+ c(seq( 0 , 0 .1 , length.out = 19 ), 8.0 ), # Last value is extreme
56+ c(seq( 0 , 0 .1 , length.out = 19 ), 12.0 ) # Last value is extreme
5757)
5858
5959# The extreme outlier (last row) should have the highest anomaly score
6060expect_true(extreme_scores $ AnomalyScores [20 ] == max(extreme_scores $ AnomalyScores ),
6161 info = " Extreme outlier should have highest anomaly score" )
6262
6363# The outlier should score significantly higher than the median
64- # expect_true(extreme_scores$AnomalyScores[20] > median(extreme_scores$AnomalyScores[1:19]) * 2,
65- # info = "Outlier should score significantly higher than median")
64+ expect_true(extreme_scores $ AnomalyScores [20 ] > median(extreme_scores $ AnomalyScores [1 : 19 ]) * 2 ,
65+ info = " Outlier should score significantly higher than median" )
6666
6767# Test 3: Consistency/Reproducibility Testing
6868base_df_20_orig = create_base_df(20 )
@@ -267,18 +267,18 @@ base_df_6_rank = create_base_df(6)
267267# Create data with obvious ranking: Row 6 > Row 5 > Row 4 > Rows 1,2,3
268268ranking_scores = run_quality_metrics(
269269 base_df_6_rank ,
270- c(0.1 , 0.1 , 0.1 , 1.0 , 2.0 , 5.0 ),
271- c(0.1 , 0.1 , 0.1 , 1.0 , 2.0 , 5.0 ),
272- c(0.1 , 0.1 , 0.1 , 1.0 , 2.0 , 5.0 )
270+ c(0.1 , 0.11 , 0.12 , 1.0 , 2.0 , 5.0 ),
271+ c(0.1 , 0.11 , 0.12 , 1.0 , 2.0 , 5.0 ),
272+ c(0.1 , 0.11 , 0.12 , 1.0 , 2.0 , 5.0 )
273273)
274274
275275# Row 5 should have highest score, Row 4 second highest, etc.
276276expect_true(ranking_scores $ AnomalyScores [6 ] > ranking_scores $ AnomalyScores [5 ],
277277 info = " Row 6 should score higher than Row 5" )
278278expect_true(ranking_scores $ AnomalyScores [5 ] > ranking_scores $ AnomalyScores [4 ],
279279 info = " Row 5 should score higher than Row 4" )
280- # expect_true(ranking_scores$AnomalyScores[4] > max(ranking_scores$AnomalyScores[1:3]),
281- # info = "Row 4 should score higher than Rows 1-3")
280+ expect_true(ranking_scores $ AnomalyScores [4 ] > max(ranking_scores $ AnomalyScores [1 : 3 ]),
281+ info = " Row 4 should score higher than Rows 1-3" )
282282
283283# Test 10: Original Quality Metrics Calculation Test (from the beginning of the file)
284284# Test add_increase, add_decrease, add_dispersion
0 commit comments