Skip to content

Commit 3c1a626

Browse files
committed
revert scoring logic
Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
1 parent e52fef5 commit 3c1a626

2 files changed

Lines changed: 34 additions & 21 deletions

File tree

vortex-compressor/src/compressor.rs

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1093,7 +1093,7 @@ mod tests {
10931093
}
10941094

10951095
#[test]
1096-
fn zero_byte_sample_beats_any_finite_ratio() -> VortexResult<()> {
1096+
fn zero_byte_sample_loses_to_finite_ratio() -> VortexResult<()> {
10971097
let compressor = CascadingCompressor::new(vec![&HugeRatioScheme, &ZeroBytesSamplingScheme]);
10981098
let schemes: [&'static dyn Scheme; 2] = [&HugeRatioScheme, &ZeroBytesSamplingScheme];
10991099
let mut data = estimate_test_data();
@@ -1103,14 +1103,14 @@ mod tests {
11031103

11041104
assert!(matches!(
11051105
winner,
1106-
Some((scheme, WinnerEstimate::Score(EstimateScore::ZeroBytes)))
1107-
if scheme.id() == ZeroBytesSamplingScheme.id()
1106+
Some((scheme, WinnerEstimate::Score(EstimateScore::FiniteCompression(100.0))))
1107+
if scheme.id() == HugeRatioScheme.id()
11081108
));
11091109
Ok(())
11101110
}
11111111

11121112
#[test]
1113-
fn finite_ratio_does_not_displace_zero_byte_sample() -> VortexResult<()> {
1113+
fn finite_ratio_displaces_zero_byte_sample() -> VortexResult<()> {
11141114
let compressor = CascadingCompressor::new(vec![&ZeroBytesSamplingScheme, &HugeRatioScheme]);
11151115
let schemes: [&'static dyn Scheme; 2] = [&ZeroBytesSamplingScheme, &HugeRatioScheme];
11161116
let mut data = estimate_test_data();
@@ -1120,12 +1120,25 @@ mod tests {
11201120

11211121
assert!(matches!(
11221122
winner,
1123-
Some((scheme, WinnerEstimate::Score(EstimateScore::ZeroBytes)))
1124-
if scheme.id() == ZeroBytesSamplingScheme.id()
1123+
Some((scheme, WinnerEstimate::Score(EstimateScore::FiniteCompression(100.0))))
1124+
if scheme.id() == HugeRatioScheme.id()
11251125
));
11261126
Ok(())
11271127
}
11281128

1129+
#[test]
1130+
fn zero_byte_sample_alone_selects_no_scheme() -> VortexResult<()> {
1131+
let compressor = CascadingCompressor::new(vec![&ZeroBytesSamplingScheme]);
1132+
let schemes: [&'static dyn Scheme; 1] = [&ZeroBytesSamplingScheme];
1133+
let mut data = estimate_test_data();
1134+
1135+
let winner =
1136+
compressor.choose_best_scheme(&schemes, &mut data, CompressorContext::new())?;
1137+
1138+
assert!(winner.is_none());
1139+
Ok(())
1140+
}
1141+
11291142
#[test]
11301143
fn all_null_array_compresses_to_constant() -> VortexResult<()> {
11311144
let array = PrimitiveArray::new(
@@ -1220,7 +1233,7 @@ mod tests {
12201233
}
12211234

12221235
#[test]
1223-
fn zero_byte_results_omit_ratio_fields() {
1236+
fn zero_byte_sample_result_omits_ratio_fields_and_selects_no_scheme() {
12241237
let compressor = CascadingCompressor::new(vec![&ZeroBytesSamplingScheme]);
12251238
let array = test_integer_array();
12261239

@@ -1235,12 +1248,12 @@ mod tests {
12351248
);
12361249
assert!(!sample_event.fields.contains_key("sampled_ratio"));
12371250

1238-
let result_event = find_event(&events, TARGET_TRACE, "scheme.compress_result");
1239-
assert_eq!(
1240-
result_event.fields.get("after_nbytes").map(String::as_str),
1241-
Some("0")
1242-
);
1243-
assert!(!result_event.fields.contains_key("estimated_ratio"));
1244-
assert!(!result_event.fields.contains_key("actual_ratio"));
1251+
assert!(!events.iter().any(|event| {
1252+
event.target == TARGET_TRACE
1253+
&& event
1254+
.fields
1255+
.get("message")
1256+
.is_some_and(|value| value == "scheme.compress_result")
1257+
}));
12451258
}
12461259
}

vortex-compressor/src/estimate.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,10 @@ pub(super) enum EstimateScore {
9393
FiniteCompression(f64),
9494
/// Trial compression produced a 0-byte output.
9595
///
96-
/// This is treated as an unbounded score for scheme selection, but has no finite trace ratio.
97-
/// Final full-array compression still has to pass the normal before/after acceptance check, so
98-
/// a zero-byte sample can choose a scheme but cannot force a larger output to be returned.
96+
/// This has no finite trace ratio and is not eligible for scheme selection.
97+
///
98+
/// TODO(connor): A zero-byte sample usually means the sampler happened to hit an all-null
99+
/// sample. Improve this logic so we can distinguish real zero-byte wins from sampling artifacts.
99100
ZeroBytes,
100101
}
101102

@@ -123,16 +124,15 @@ impl EstimateScore {
123124
Self::FiniteCompression(ratio) => {
124125
ratio.is_finite() && !ratio.is_subnormal() && ratio > 1.0
125126
}
126-
Self::ZeroBytes => true,
127+
Self::ZeroBytes => false,
127128
}
128129
}
129130

130131
/// Returns whether this estimate beats another valid estimate.
131132
fn beats(self, other: Self) -> bool {
132133
match (self, other) {
133-
(Self::ZeroBytes, Self::FiniteCompression(_)) => true,
134-
(Self::ZeroBytes, Self::ZeroBytes) => false,
135-
(Self::FiniteCompression(_), Self::ZeroBytes) => false,
134+
(Self::ZeroBytes, _) => false,
135+
(Self::FiniteCompression(_), Self::ZeroBytes) => true,
136136
(Self::FiniteCompression(ratio), Self::FiniteCompression(best_ratio)) => {
137137
ratio > best_ratio
138138
}

0 commit comments

Comments
 (0)