|
1 | 1 | use bytemuck::cast_slice_mut; |
2 | 2 | use byteorder::{LittleEndian, ReadBytesExt}; |
| 3 | +use core::convert::Infallible; |
3 | 4 |
|
4 | 5 | use std::cmp::Ordering; |
| 6 | +use std::error::Error; |
5 | 7 | use std::io::{self, SeekFrom}; |
6 | 8 | use std::mem; |
| 9 | +use std::ops::RangeInclusive; |
7 | 10 |
|
8 | 11 | use crate::bitmap::container::Container; |
9 | 12 | use crate::bitmap::serialization::{ |
@@ -40,20 +43,231 @@ impl RoaringBitmap { |
40 | 43 | /// rb1 & rb2, |
41 | 44 | /// ); |
42 | 45 | /// ``` |
43 | | - pub fn intersection_with_serialized_unchecked<R>( |
| 46 | + pub fn intersection_with_serialized_unchecked<R>(&self, other: R) -> io::Result<RoaringBitmap> |
| 47 | + where |
| 48 | + R: io::Read + io::Seek, |
| 49 | + { |
| 50 | + RoaringBitmap::intersection_with_serialized_impl::<R, _, Infallible, _, Infallible>( |
| 51 | + self, |
| 52 | + other, |
| 53 | + |values| Ok(ArrayStore::from_vec_unchecked(values)), |
| 54 | + |len, values| Ok(BitmapStore::from_unchecked(len, values)), |
| 55 | + ) |
| 56 | + } |
| 57 | + |
| 58 | + fn intersection_with_serialized_impl<R, A, AErr, B, BErr>( |
44 | 59 | &self, |
45 | | - mut other: R, |
| 60 | + mut reader: R, |
| 61 | + a: A, |
| 62 | + b: B, |
46 | 63 | ) -> io::Result<RoaringBitmap> |
47 | 64 | where |
48 | 65 | R: io::Read + io::Seek, |
| 66 | + A: Fn(Vec<u16>) -> Result<ArrayStore, AErr>, |
| 67 | + AErr: Error + Send + Sync + 'static, |
| 68 | + B: Fn(u64, Box<[u64; 1024]>) -> Result<BitmapStore, BErr>, |
| 69 | + BErr: Error + Send + Sync + 'static, |
49 | 70 | { |
50 | | - let metadata = BitmapReader::decode(&mut other)?; |
51 | | - let containers = Visitor { |
52 | | - containers: &self.containers, |
53 | | - metadata: &metadata, |
54 | | - handler: &mut BitAndHandler, |
| 71 | + let (size, has_offsets, has_run_containers) = { |
| 72 | + let cookie = reader.read_u32::<LittleEndian>()?; |
| 73 | + if cookie == SERIAL_COOKIE_NO_RUNCONTAINER { |
| 74 | + (reader.read_u32::<LittleEndian>()? as usize, true, false) |
| 75 | + } else if (cookie as u16) == SERIAL_COOKIE { |
| 76 | + let size = ((cookie >> 16) + 1) as usize; |
| 77 | + (size, size >= NO_OFFSET_THRESHOLD, true) |
| 78 | + } else { |
| 79 | + return Err(io::Error::other("unknown cookie value")); |
| 80 | + } |
| 81 | + }; |
| 82 | + |
| 83 | + let run_container_bitmap = if has_run_containers { |
| 84 | + let mut bitmap = vec![0u8; size.div_ceil(8)]; |
| 85 | + reader.read_exact(&mut bitmap)?; |
| 86 | + Some(bitmap) |
| 87 | + } else { |
| 88 | + None |
| 89 | + }; |
| 90 | + |
| 91 | + if size > u16::MAX as usize + 1 { |
| 92 | + return Err(io::Error::other("size is greater than supported")); |
55 | 93 | } |
56 | | - .visit(&mut other)?; |
| 94 | + |
| 95 | + let mut descriptions = vec![[0; 2]; size]; |
| 96 | + reader.read_exact(cast_slice_mut(&mut descriptions))?; |
| 97 | + descriptions.iter_mut().for_each(|[key, len]| { |
| 98 | + *key = u16::from_le(*key); |
| 99 | + *len = u16::from_le(*len); |
| 100 | + }); |
| 101 | + |
| 102 | + if has_offsets { |
| 103 | + let mut offsets = vec![0; size]; |
| 104 | + reader.read_exact(cast_slice_mut(&mut offsets))?; |
| 105 | + offsets.iter_mut().for_each(|offset| *offset = u32::from_le(*offset)); |
| 106 | + return self.intersection_with_serialized_impl_with_offsets( |
| 107 | + reader, |
| 108 | + a, |
| 109 | + b, |
| 110 | + &descriptions, |
| 111 | + &offsets, |
| 112 | + run_container_bitmap.as_deref(), |
| 113 | + ); |
| 114 | + } |
| 115 | + |
| 116 | + let mut containers = Vec::new(); |
| 117 | + for (i, &[key, len_minus_one]) in descriptions.iter().enumerate() { |
| 118 | + let container = match self.containers.binary_search_by_key(&key, |c| c.key) { |
| 119 | + Ok(index) => self.containers.get(index), |
| 120 | + Err(_) => None, |
| 121 | + }; |
| 122 | + let cardinality = u64::from(len_minus_one) + 1; |
| 123 | + |
| 124 | + let is_run_container = |
| 125 | + run_container_bitmap.as_ref().is_some_and(|bm| bm[i / 8] & (1 << (i % 8)) != 0); |
| 126 | + |
| 127 | + let store = if is_run_container { |
| 128 | + let runs = reader.read_u16::<LittleEndian>()?; |
| 129 | + match container { |
| 130 | + Some(_) => { |
| 131 | + let mut intervals = vec![[0, 0]; runs as usize]; |
| 132 | + reader.read_exact(cast_slice_mut(&mut intervals))?; |
| 133 | + intervals.iter_mut().for_each(|[s, len]| { |
| 134 | + *s = u16::from_le(*s); |
| 135 | + *len = u16::from_le(*len); |
| 136 | + }); |
| 137 | + |
| 138 | + let cardinality = intervals.iter().map(|[_, len]| *len as usize).sum(); |
| 139 | + let mut store = Store::with_capacity(cardinality); |
| 140 | + intervals.into_iter().try_for_each( |
| 141 | + |[s, len]| -> Result<(), io::ErrorKind> { |
| 142 | + let end = s.checked_add(len).ok_or(io::ErrorKind::InvalidData)?; |
| 143 | + store.insert_range(RangeInclusive::new(s, end)); |
| 144 | + Ok(()) |
| 145 | + }, |
| 146 | + )?; |
| 147 | + store |
| 148 | + } |
| 149 | + None => { |
| 150 | + let runs_size = mem::size_of::<u16>() * 2 * runs as usize; |
| 151 | + reader.seek(SeekFrom::Current(runs_size as i64))?; |
| 152 | + continue; |
| 153 | + } |
| 154 | + } |
| 155 | + } else if cardinality <= ARRAY_LIMIT { |
| 156 | + match container { |
| 157 | + Some(_) => { |
| 158 | + let mut values = vec![0; cardinality as usize]; |
| 159 | + reader.read_exact(cast_slice_mut(&mut values))?; |
| 160 | + values.iter_mut().for_each(|n| *n = u16::from_le(*n)); |
| 161 | + let array = |
| 162 | + a(values).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; |
| 163 | + Store::Array(array) |
| 164 | + } |
| 165 | + None => { |
| 166 | + let array_size = mem::size_of::<u16>() * cardinality as usize; |
| 167 | + reader.seek(SeekFrom::Current(array_size as i64))?; |
| 168 | + continue; |
| 169 | + } |
| 170 | + } |
| 171 | + } else { |
| 172 | + match container { |
| 173 | + Some(_) => { |
| 174 | + let mut values = Box::new([0; BITMAP_LENGTH]); |
| 175 | + reader.read_exact(cast_slice_mut(&mut values[..]))?; |
| 176 | + values.iter_mut().for_each(|n| *n = u64::from_le(*n)); |
| 177 | + let bitmap = b(cardinality, values) |
| 178 | + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; |
| 179 | + Store::Bitmap(bitmap) |
| 180 | + } |
| 181 | + None => { |
| 182 | + let bitmap_size = mem::size_of::<u64>() * BITMAP_LENGTH; |
| 183 | + reader.seek(SeekFrom::Current(bitmap_size as i64))?; |
| 184 | + continue; |
| 185 | + } |
| 186 | + } |
| 187 | + }; |
| 188 | + |
| 189 | + if let Some(container) = container { |
| 190 | + let mut other_container = Container { key, store }; |
| 191 | + other_container &= container; |
| 192 | + if !other_container.is_empty() { |
| 193 | + containers.push(other_container); |
| 194 | + } |
| 195 | + } |
| 196 | + } |
| 197 | + |
| 198 | + Ok(RoaringBitmap { containers }) |
| 199 | + } |
| 200 | + |
| 201 | + fn intersection_with_serialized_impl_with_offsets<R, A, AErr, B, BErr>( |
| 202 | + &self, |
| 203 | + mut reader: R, |
| 204 | + a: A, |
| 205 | + b: B, |
| 206 | + descriptions: &[[u16; 2]], |
| 207 | + offsets: &[u32], |
| 208 | + run_container_bitmap: Option<&[u8]>, |
| 209 | + ) -> io::Result<RoaringBitmap> |
| 210 | + where |
| 211 | + R: io::Read + io::Seek, |
| 212 | + A: Fn(Vec<u16>) -> Result<ArrayStore, AErr>, |
| 213 | + AErr: Error + Send + Sync + 'static, |
| 214 | + B: Fn(u64, Box<[u64; 1024]>) -> Result<BitmapStore, BErr>, |
| 215 | + BErr: Error + Send + Sync + 'static, |
| 216 | + { |
| 217 | + let mut containers = Vec::new(); |
| 218 | + for container in &self.containers { |
| 219 | + let i = match descriptions.binary_search_by_key(&container.key, |[k, _]| *k) { |
| 220 | + Ok(index) => index, |
| 221 | + Err(_) => continue, |
| 222 | + }; |
| 223 | + |
| 224 | + reader.seek(SeekFrom::Start(offsets[i] as u64))?; |
| 225 | + |
| 226 | + let [key, len_minus_one] = descriptions[i]; |
| 227 | + let cardinality = u64::from(len_minus_one) + 1; |
| 228 | + |
| 229 | + let is_run_container = |
| 230 | + run_container_bitmap.as_ref().is_some_and(|bm| bm[i / 8] & (1 << (i % 8)) != 0); |
| 231 | + |
| 232 | + let store = if is_run_container { |
| 233 | + let runs = reader.read_u16::<LittleEndian>().unwrap(); |
| 234 | + let mut intervals = vec![[0, 0]; runs as usize]; |
| 235 | + reader.read_exact(cast_slice_mut(&mut intervals)).unwrap(); |
| 236 | + intervals.iter_mut().for_each(|[s, len]| { |
| 237 | + *s = u16::from_le(*s); |
| 238 | + *len = u16::from_le(*len); |
| 239 | + }); |
| 240 | + |
| 241 | + let cardinality = intervals.iter().map(|[_, len]| *len as usize).sum(); |
| 242 | + let mut store = Store::with_capacity(cardinality); |
| 243 | + intervals.into_iter().try_for_each(|[s, len]| -> Result<(), io::ErrorKind> { |
| 244 | + let end = s.checked_add(len).ok_or(io::ErrorKind::InvalidData)?; |
| 245 | + store.insert_range(RangeInclusive::new(s, end)); |
| 246 | + Ok(()) |
| 247 | + })?; |
| 248 | + store |
| 249 | + } else if cardinality <= ARRAY_LIMIT { |
| 250 | + let mut values = vec![0; cardinality as usize]; |
| 251 | + reader.read_exact(cast_slice_mut(&mut values)).unwrap(); |
| 252 | + values.iter_mut().for_each(|n| *n = u16::from_le(*n)); |
| 253 | + let array = a(values).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; |
| 254 | + Store::Array(array) |
| 255 | + } else { |
| 256 | + let mut values = Box::new([0; BITMAP_LENGTH]); |
| 257 | + reader.read_exact(cast_slice_mut(&mut values[..])).unwrap(); |
| 258 | + values.iter_mut().for_each(|n| *n = u64::from_le(*n)); |
| 259 | + let bitmap = b(cardinality, values) |
| 260 | + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; |
| 261 | + Store::Bitmap(bitmap) |
| 262 | + }; |
| 263 | + |
| 264 | + let mut other_container = Container { key, store }; |
| 265 | + other_container &= container; |
| 266 | + if !other_container.is_empty() { |
| 267 | + containers.push(other_container); |
| 268 | + } |
| 269 | + } |
| 270 | + |
57 | 271 | Ok(RoaringBitmap { containers }) |
58 | 272 | } |
59 | 273 |
|
@@ -280,27 +494,6 @@ trait VisitorHandler { |
280 | 494 | ) -> io::Result<Option<Container>>; |
281 | 495 | } |
282 | 496 |
|
283 | | -struct BitAndHandler; |
284 | | - |
285 | | -impl VisitorHandler for BitAndHandler { |
286 | | - fn handle_left_only(&mut self, _container: &Container) -> io::Result<Option<Container>> { |
287 | | - Ok(None) |
288 | | - } |
289 | | - |
290 | | - fn handel_matched( |
291 | | - &mut self, |
292 | | - left: &Container, |
293 | | - mut right: Container, |
294 | | - ) -> io::Result<Option<Container>> { |
295 | | - right &= left; |
296 | | - if right.is_empty() { |
297 | | - Ok(None) |
298 | | - } else { |
299 | | - Ok(Some(right)) |
300 | | - } |
301 | | - } |
302 | | -} |
303 | | - |
304 | 497 | struct BitOrHandler; |
305 | 498 |
|
306 | 499 | impl VisitorHandler for BitOrHandler { |
|
0 commit comments