Skip to content

Commit 78b48cd

Browse files
author
birchkwok
committed
Refactor mmap cache handling for Windows compatibility in NumPack
- Updated the mmap cache management to implement platform-specific logic for Windows, ensuring that mmap caching does not interfere with file modifications. - Enhanced comments to clarify the necessity of clearing mmap caches before file operations on Windows, while noting that Unix platforms do not require this step. - Improved the organization of mmap cache cleanup functions to better reflect their platform-specific usage.
1 parent 44c4c58 commit 78b48cd

2 files changed

Lines changed: 36 additions & 18 deletions

File tree

src/lib.rs

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use pyo3::exceptions::PyKeyError;
2525
use pyo3::prelude::*;
2626
use pyo3::types::{PyDict, PyList, PyTuple};
2727
use pyo3::types::PySlice;
28-
use std::fs::OpenOptions;
28+
use std::fs::{File, OpenOptions};
2929
use std::io::Write;
3030
use num_complex::{Complex32, Complex64};
3131
use ndarray::ArrayD;
@@ -42,7 +42,11 @@ use crate::io::ParallelIO;
4242
use crate::core::DataType;
4343
use crate::storage::{BinaryMetadataStore, BinaryArrayMetadata, BinaryDataType, BinaryCachedStore};
4444
use crate::lazy_array::{OptimizedLazyArray, FastTypeConversion};
45+
46+
// Windows 平台专用:mmap 清理函数
47+
#[cfg(windows)]
4548
use crate::numpack::core::clear_mmap_cache_for_array;
49+
4650
use rayon::prelude::*;
4751
use crate::storage::DeletionBitmap;
4852
use crate::lazy_array::LogicalRowMap;
@@ -3188,15 +3192,25 @@ impl NumPack {
31883192
(dtype, shape, itemsize, modify_time)
31893193
};
31903194

3191-
// 2. 复用mmap缓存,避免重复映射
3195+
// 2. 平台特定的数据加载策略
31923196
let mut filename = String::with_capacity(5 + array_name.len() + 5); // "data_" + name + ".npkd"
31933197
filename.push_str("data_");
31943198
filename.push_str(array_name);
31953199
filename.push_str(".npkd");
31963200
let data_path = self.base_dir.join(&filename);
31973201
let array_path_string = data_path.to_string_lossy().to_string();
31983202

3199-
let mmap_arc = {
3203+
// Windows 平台专用逻辑:
3204+
// 在 Windows 上,eager load 不使用 mmap 缓存,以避免错误 1224
3205+
// (文件被 mmap 打开时无法执行 save/drop/append 等修改操作)
3206+
#[cfg(windows)]
3207+
let use_mmap_cache = false;
3208+
3209+
#[cfg(not(windows))]
3210+
let use_mmap_cache = true;
3211+
3212+
let mmap_arc = if use_mmap_cache {
3213+
// Unix 平台(macOS、Linux):使用 mmap 缓存优化性能
32003214
let mut mmap_cache = MMAP_CACHE.lock().unwrap();
32013215
if let Some((cached_mmap, cached_time)) = mmap_cache.get(&array_path_string) {
32023216
if *cached_time == modify_time {
@@ -3207,6 +3221,12 @@ impl NumPack {
32073221
} else {
32083222
create_optimized_mmap(&data_path, modify_time, &mut mmap_cache)?
32093223
}
3224+
} else {
3225+
// Windows 平台:直接创建 mmap,不缓存
3226+
// 这样在 save/drop/append 时不会有遗留的 mmap 句柄
3227+
let file = File::open(&data_path)?;
3228+
let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? };
3229+
Arc::new(mmap)
32103230
};
32113231

32123232
let mmap_bytes = mmap_arc.as_ref().as_ref();
@@ -3734,8 +3754,11 @@ impl NumPack {
37343754
));
37353755
};
37363756

3737-
// 在Windows上修改文件前,清理mmap缓存以避免错误1224
3757+
// Windows 平台:修改文件前清理 mmap 缓存(lazy load 可能创建的)
3758+
// Unix 平台:系统允许同时 mmap 和修改文件,不需要清理
3759+
#[cfg(windows)]
37383760
clear_mmap_cache_for_array(&self.base_dir, name);
3761+
37393762
self.io.drop_arrays(name, Some(&deleted_indices))?;
37403763

37413764
// 清除元数据缓存
@@ -3749,7 +3772,9 @@ impl NumPack {
37493772

37503773
Ok(())
37513774
} else {
3752-
// 批量删除数组前,清理所有相关的mmap缓存
3775+
// Windows 平台:批量删除数组前清理 mmap 缓存
3776+
// Unix 平台:系统允许同时 mmap 和修改文件,不需要清理
3777+
#[cfg(windows)]
37533778
for name in &names {
37543779
clear_mmap_cache_for_array(&self.base_dir, name);
37553780
}

src/numpack/core.rs

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,18 @@ lazy_static::lazy_static! {
2424
static ref MMAP_CACHE: Mutex<HashMap<String, (Arc<Mmap>, i64)>> = Mutex::new(HashMap::new());
2525
}
2626

27-
/// 清理指定文件的mmap缓存(Windows平台上修改文件前必须调用)
27+
/// 清理指定文件的mmap缓存(Windows平台专用)
28+
/// 在 Windows 上修改文件前必须调用,以避免错误 1224
29+
/// Unix 平台不需要(系统允许同时 mmap 和修改文件)
30+
#[cfg(windows)]
2831
pub(crate) fn clear_mmap_cache_for_file(file_path: &str) {
2932
if let Ok(mut cache) = MMAP_CACHE.lock() {
3033
cache.remove(file_path);
3134
}
3235
}
3336

34-
/// 清理指定数组的所有相关文件的mmap缓存
37+
/// 清理指定数组的所有相关文件的mmap缓存(Windows平台专用)
38+
#[cfg(windows)]
3539
pub(crate) fn clear_mmap_cache_for_array(base_dir: &Path, array_name: &str) {
3640
let data_path = base_dir.join(format!("data_{}.npkd", array_name));
3741
clear_mmap_cache_for_file(&data_path.to_string_lossy());
@@ -74,17 +78,6 @@ impl NumPack {
7478
}
7579

7680
fn save(&self, arrays: &Bound<'_, PyDict>, array_name: Option<String>) -> PyResult<()> {
77-
// Windows 修复:在保存之前清理所有要写入的数组的 mmap 缓存
78-
// 这样可以避免 Windows 错误 1224(文件被 mmap 打开时无法写入)
79-
for (i, (key, _value)) in arrays.iter().enumerate() {
80-
let name = if let Some(prefix) = &array_name {
81-
format!("{}{}", prefix, i)
82-
} else {
83-
key.extract::<String>().unwrap_or_default()
84-
};
85-
clear_mmap_cache_for_array(&self.base_dir, &name);
86-
}
87-
8881
let mut bool_arrays = Vec::new();
8982
let mut u8_arrays = Vec::new();
9083
let mut u16_arrays = Vec::new();

0 commit comments

Comments
 (0)