Skip to content

Commit d5838ba

Browse files
authored
fix: possible to miss some results for phrase query (#3715)
fix #3707 fix #3714 this is caused by the order of summing up floats just ran this test for 100 times locally to make sure we can always pass it --------- Signed-off-by: BubbleCal <bubble-cal@outlook.com>
1 parent 3140bc5 commit d5838ba

1 file changed

Lines changed: 8 additions & 13 deletions

File tree

  • rust/lance-index/src/scalar/inverted

rust/lance-index/src/scalar/inverted/wand.rs

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,10 @@ impl PartialOrd for PostingIterator {
4343
impl Ord for PostingIterator {
4444
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
4545
match (self.doc(), other.doc()) {
46-
(Some(doc1), Some(doc2)) => doc1.cmp(&doc2),
46+
(Some(doc1), Some(doc2)) => doc1.cmp(&doc2).then(
47+
self.approximate_upper_bound
48+
.total_cmp(&other.approximate_upper_bound),
49+
),
4750
(Some(_), None) => std::cmp::Ordering::Less,
4851
(None, Some(_)) => std::cmp::Ordering::Greater,
4952
(None, None) => std::cmp::Ordering::Equal,
@@ -126,7 +129,8 @@ impl Wand {
126129
operator: Operator,
127130
postings: impl Iterator<Item = PostingIterator>,
128131
) -> Self {
129-
let posting_lists = postings.collect::<Vec<_>>();
132+
let mut posting_lists = postings.collect::<Vec<_>>();
133+
posting_lists.sort_unstable();
130134
let threshold = match operator {
131135
Operator::Or => 0.0,
132136
Operator::And => posting_lists
@@ -161,16 +165,8 @@ impl Wand {
161165
let mut candidates = BinaryHeap::new();
162166

163167
while let Some(doc) = self.next().await? {
164-
if is_phrase_query {
165-
if let Some(last) = self.postings.last() {
166-
if last.doc().unwrap().row_id != doc {
167-
continue;
168-
}
169-
}
170-
171-
if !self.check_positions() {
172-
continue;
173-
}
168+
if is_phrase_query && !self.check_positions() {
169+
continue;
174170
}
175171
let score = self.score(doc, &scorer);
176172
if candidates.len() < limit {
@@ -209,7 +205,6 @@ impl Wand {
209205
// find the next doc candidate
210206
#[instrument(level = "debug", name = "wand_next", skip_all)]
211207
async fn next(&mut self) -> Result<Option<u64>> {
212-
self.postings.sort_unstable();
213208
while let Some(pivot_posting) = self.find_pivot_term() {
214209
let doc = pivot_posting
215210
.doc()

0 commit comments

Comments
 (0)