Skip to content

Commit 287a330

Browse files
authored
llama : Extend fallback, fix fileno for dio file, exclude case that mmap uses dio file (ggml-org#18887)
1 parent 293a156 commit 287a330

2 files changed

Lines changed: 17 additions & 7 deletions

File tree

src/llama-mmap.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,8 @@ struct llama_file::impl {
265265
continue; // Interrupted by signal, retry
266266
}
267267
// Fallback to std::fread in case the DMA controller cannot access the buffer
268-
if (errno == EFAULT) {
268+
if (errno == EFAULT || errno == EINVAL) {
269+
LLAMA_LOG_WARN("%s: Falling back to buffered IO due to %s\n", __func__, strerror(errno));
269270
auto curr_off = tell();
270271
close(fd);
271272
fd = -1;
@@ -384,6 +385,9 @@ int llama_file::file_id() const {
384385
#ifdef _WIN32
385386
return _fileno(pimpl->fp);
386387
#else
388+
if (pimpl->fd != -1) {
389+
return pimpl->fd;
390+
}
387391
#if defined(fileno)
388392
return fileno(pimpl->fp);
389393
#else

src/llama-model-loader.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -539,12 +539,18 @@ llama_model_loader::llama_model_loader(
539539
files.emplace_back(new llama_file(fname.c_str(), "rb", use_direct_io));
540540
contexts.emplace_back(ctx);
541541

542-
use_direct_io = use_direct_io && files.back()->has_direct_io();
543-
544-
// Disable mmap in case Direct I/O is enabled and available
545-
if (use_direct_io && use_mmap) {
546-
use_mmap = false;
547-
LLAMA_LOG_WARN("%s: direct I/O is enabled, disabling mmap\n", __func__);
542+
if (use_mmap && use_direct_io) {
543+
if (files.back()->has_direct_io()) {
544+
// Disable mmap, as DirectIO is available
545+
use_mmap = false;
546+
LLAMA_LOG_WARN("%s: direct I/O is enabled, disabling mmap\n", __func__);
547+
} else {
548+
// Disable DirectIO and reopen file using std::fopen for mmap
549+
use_direct_io = false;
550+
files.pop_back();
551+
files.emplace_back(new llama_file(fname.c_str(), "rb", false));
552+
LLAMA_LOG_WARN("%s: direct I/O is not available, using mmap\n", __func__);
553+
}
548554
}
549555

550556
// Save tensors data offset of the main file.

0 commit comments

Comments
 (0)