Skip to content

Commit 9fa359f

Browse files
committed
clean up scoring logic
Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
1 parent cbbe7b3 commit 9fa359f

2 files changed

Lines changed: 40 additions & 14 deletions

File tree

vortex-compressor/src/compressor.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,6 +1109,23 @@ mod tests {
11091109
Ok(())
11101110
}
11111111

1112+
#[test]
1113+
fn finite_ratio_does_not_displace_zero_byte_sample() -> VortexResult<()> {
1114+
let compressor = CascadingCompressor::new(vec![&ZeroBytesSamplingScheme, &HugeRatioScheme]);
1115+
let schemes: [&'static dyn Scheme; 2] = [&ZeroBytesSamplingScheme, &HugeRatioScheme];
1116+
let mut data = estimate_test_data();
1117+
1118+
let winner =
1119+
compressor.choose_best_scheme(&schemes, &mut data, CompressorContext::new())?;
1120+
1121+
assert!(matches!(
1122+
winner,
1123+
Some((scheme, WinnerEstimate::Score(EstimateScore::ZeroBytes)))
1124+
if scheme.id() == ZeroBytesSamplingScheme.id()
1125+
));
1126+
Ok(())
1127+
}
1128+
11121129
#[test]
11131130
fn all_null_array_compresses_to_constant() -> VortexResult<()> {
11141131
let array = PrimitiveArray::new(

vortex-compressor/src/estimate.rs

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,10 @@ pub(super) enum EstimateScore {
9292
/// A finite compression ratio.
9393
Finite(f64),
9494
/// Trial compression produced a 0-byte output.
95+
///
96+
/// This is treated as an unbounded score for scheme selection, but has no finite trace ratio.
97+
/// Final full-array compression still has to pass the normal before/after acceptance check, so
98+
/// a zero-byte sample can choose a scheme but cannot force a larger output to be returned.
9599
ZeroBytes,
96100
}
97101

@@ -112,6 +116,24 @@ impl EstimateScore {
112116
Self::ZeroBytes => None,
113117
}
114118
}
119+
120+
/// Returns whether this estimate is eligible to compete.
121+
fn is_valid(self) -> bool {
122+
match self {
123+
Self::Finite(ratio) => ratio.is_finite() && !ratio.is_subnormal() && ratio > 1.0,
124+
Self::ZeroBytes => true,
125+
}
126+
}
127+
128+
/// Returns whether this estimate beats another valid estimate.
129+
fn beats(self, other: Self) -> bool {
130+
match (self, other) {
131+
(Self::ZeroBytes, Self::Finite(_)) => true,
132+
(Self::ZeroBytes, Self::ZeroBytes) => false,
133+
(Self::Finite(_), Self::ZeroBytes) => false,
134+
(Self::Finite(ratio), Self::Finite(best_ratio)) => ratio > best_ratio,
135+
}
136+
}
115137
}
116138

117139
/// Winner estimate carried from scheme selection into result tracing.
@@ -138,20 +160,7 @@ pub(super) fn is_better_score(
138160
score: EstimateScore,
139161
best: &Option<(&'static dyn Scheme, EstimateScore)>,
140162
) -> bool {
141-
match score {
142-
EstimateScore::ZeroBytes => {
143-
best.is_none_or(|(_, best_score)| !matches!(best_score, EstimateScore::ZeroBytes))
144-
}
145-
EstimateScore::Finite(ratio) => {
146-
ratio.is_finite()
147-
&& !ratio.is_subnormal()
148-
&& ratio > 1.0
149-
&& best.is_none_or(|(_, best_score)| match best_score {
150-
EstimateScore::Finite(best_ratio) => ratio > best_ratio,
151-
EstimateScore::ZeroBytes => false,
152-
})
153-
}
154-
}
163+
score.is_valid() && best.is_none_or(|(_, best_score)| score.beats(best_score))
155164
}
156165

157166
/// Estimates compression ratio by compressing a ~1% sample of the data.

0 commit comments

Comments
 (0)