diff --git a/Dockerfile.nidx b/Dockerfile.nidx index dfb672545b..79bd963bb5 100644 --- a/Dockerfile.nidx +++ b/Dockerfile.nidx @@ -3,7 +3,7 @@ # For a version that uses pre-built binaries (outside Docker) see `Dockerfile.nidx_prebuild` # -FROM rust:1.94.0-trixie AS builder +FROM rust:1.95.0-trixie AS builder RUN apt update && apt install -y protobuf-compiler COPY nucliadb_protos/*proto /app/nucliadb_protos/ COPY nidx /app/nidx diff --git a/nidx/nidx_paragraph/src/reader.rs b/nidx/nidx_paragraph/src/reader.rs index 0fe5153e11..d02d6d618a 100644 --- a/nidx/nidx_paragraph/src/reader.rs +++ b/nidx/nidx_paragraph/src/reader.rs @@ -26,8 +26,8 @@ use nidx_protos::{OrderBy, ParagraphItem, ParagraphSearchResponse, StreamRequest use nidx_types::prefilter::PrefilterResult; use tantivy::collector::{Collector, Count, FacetCollector, TopDocs}; use tantivy::query::{AllQuery, Query}; -use tantivy::{DateTime, Order, schema::*}; -use tantivy::{DocAddress, Index, IndexReader}; +use tantivy::schema::{Facet, Field, Value}; +use tantivy::{DateTime, DocAddress, Index, IndexReader, Order, TantivyDocument}; use tracing::*; use super::schema::ParagraphSchema; diff --git a/nidx/nidx_vector/src/data_store/v2.rs b/nidx/nidx_vector/src/data_store/v2.rs index 664bbbc93d..4e332fbe06 100644 --- a/nidx/nidx_vector/src/data_store/v2.rs +++ b/nidx/nidx_vector/src/data_store/v2.rs @@ -65,7 +65,7 @@ impl DataStoreV2 { None }; - for (idx, elem) in (0..).zip(entries.into_iter()) { + for (idx, elem) in (0..).zip(entries) { let (first_vector, _) = vectors.write(idx, elem.vectors.iter().map(|v| config.vector_type.encode(v)))?; if let Some(quantized) = &mut quantized { for v in &elem.vectors { diff --git a/nidx/nidx_vector/src/hnsw/search.rs b/nidx/nidx_vector/src/hnsw/search.rs index 738a6aa069..e9319bc1ea 100644 --- a/nidx/nidx_vector/src/hnsw/search.rs +++ b/nidx/nidx_vector/src/hnsw/search.rs @@ -207,11 +207,7 @@ impl<'a, DR: DataRetriever> HnswSearcher<'a, DR> { let mut preloaded = 0; - loop { - let Some(Cnx(candidate, candidate_similarity)) = candidates.pop() else { - break; - }; - + while let Some(Cnx(candidate, candidate_similarity)) = candidates.pop() { if candidate_similarity < self.retriever.min_score() { break; } diff --git a/nidx/nidx_vector/src/indexer.rs b/nidx/nidx_vector/src/indexer.rs index 8991576a1f..0855313f84 100644 --- a/nidx/nidx_vector/src/indexer.rs +++ b/nidx/nidx_vector/src/indexer.rs @@ -65,29 +65,26 @@ impl<'a> ResourceWrapper<'a> { pub fn fields(&self) -> impl Iterator>)> { self.resource.paragraphs.iter().map(|(field_id, paragraphs_wrapper)| { - let sentences_iterator = paragraphs_wrapper - .paragraphs - .iter() - .filter_map(|(_paragraph_id, paragraph)| { - let sentences = if let Some(vectorset) = &self.vectorset { - // indexing a vectorset, we should return only paragraphs from this vectorset. - // If vectorset is not found, we'll skip this paragraph - if let Some(vectorset_sentences) = paragraph.vectorsets_sentences.get(vectorset) { - Some(&vectorset_sentences.sentences) - } else if self.fallback_to_default_vectorset { - Some(¶graph.sentences) - } else { - None - } - } else { - // Default vectors index (no vectorset) + let sentences_iterator = paragraphs_wrapper.paragraphs.values().filter_map(|paragraph| { + let sentences = if let Some(vectorset) = &self.vectorset { + // indexing a vectorset, we should return only paragraphs from this vectorset. + // If vectorset is not found, we'll skip this paragraph + if let Some(vectorset_sentences) = paragraph.vectorsets_sentences.get(vectorset) { + Some(&vectorset_sentences.sentences) + } else if self.fallback_to_default_vectorset { Some(¶graph.sentences) - }; - sentences.map(|s| ParagraphVectors { - vectors: s, - labels: ¶graph.labels, - }) - }); + } else { + None + } + } else { + // Default vectors index (no vectorset) + Some(¶graph.sentences) + }; + sentences.map(|s| ParagraphVectors { + vectors: s, + labels: ¶graph.labels, + }) + }); (field_id, sentences_iterator) }) } diff --git a/nidx/nidx_vector/src/segment.rs b/nidx/nidx_vector/src/segment.rs index 04da059850..3e965b55e8 100644 --- a/nidx/nidx_vector/src/segment.rs +++ b/nidx/nidx_vector/src/segment.rs @@ -823,7 +823,7 @@ mod test { let path_merged = tempdir()?; let merged_dp = merge(path_merged.path(), work, &config)?; - for (i, (elem, mut labels)) in elems1.into_iter().chain(elems2.into_iter()).enumerate() { + for (i, (elem, mut labels)) in elems1.into_iter().chain(elems2).enumerate() { let vector = merged_dp.data_store.get_vector(VectorAddr(i as u32)); assert_eq!(config.vector_type.encode(&elem.vectors[0]), vector.vector()); diff --git a/nidx/src/searcher.rs b/nidx/src/searcher.rs index 5e9c094af4..f2c5709ae7 100644 --- a/nidx/src/searcher.rs +++ b/nidx/src/searcher.rs @@ -76,7 +76,7 @@ async fn refresher_task(mut rx: Receiver<(IndexId, bool)>, index_cache: Arc