@@ -22,6 +22,7 @@ import {documentationPageUrls} from "../../../../config.js";
2222import { Llama } from "../../../../bindings/Llama.js" ;
2323import { toBytes } from "../../../utils/toBytes.js" ;
2424import { padSafeContextSize } from "../../../../evaluator/LlamaContext/utils/padSafeContextSize.js" ;
25+ import { getPlatform } from "../../../../bindings/utils/getPlatform.js" ;
2526
2627type InspectMeasureCommand = {
2728 modelPath ?: string ,
@@ -37,6 +38,7 @@ type InspectMeasureCommand = {
3738 measures : number ,
3839 memory : "vram" | "ram" | "all" ,
3940 noMmap : boolean ,
41+ noDirectIo : boolean ,
4042 printHeaderBeforeEachLayer ?: boolean ,
4143 evaluateText ?: string ,
4244 repeatEvaluateText ?: number
@@ -135,6 +137,11 @@ export const InspectMeasureCommand: CommandModule<object, InspectMeasureCommand>
135137 default : false ,
136138 description : "Disable mmap (memory-mapped file) usage"
137139 } )
140+ . option ( "noDirectIo" , {
141+ type : "boolean" ,
142+ default : false ,
143+ description : "Disable Direct I/O usage when available"
144+ } )
138145 . option ( "printHeaderBeforeEachLayer" , {
139146 alias : "ph" ,
140147 type : "boolean" ,
@@ -155,7 +162,8 @@ export const InspectMeasureCommand: CommandModule<object, InspectMeasureCommand>
155162 } ,
156163 async handler ( {
157164 modelPath : ggufPath , header : headerArg , gpu, minLayers, maxLayers, minContextSize, maxContextSize, flashAttention, swaFullCache,
158- batchSize, measures = 10 , memory : measureMemoryType , noMmap, printHeaderBeforeEachLayer = true , evaluateText, repeatEvaluateText
165+ batchSize, measures = 10 , memory : measureMemoryType , noMmap, noDirectIo, printHeaderBeforeEachLayer = true , evaluateText,
166+ repeatEvaluateText
159167 } : InspectMeasureCommand ) {
160168 if ( maxLayers === - 1 ) maxLayers = undefined ;
161169 if ( maxContextSize === - 1 ) maxContextSize = undefined ;
@@ -174,7 +182,9 @@ export const InspectMeasureCommand: CommandModule<object, InspectMeasureCommand>
174182 logLevel : LlamaLogLevel . error
175183 } ) ;
176184
185+ const platform = getPlatform ( ) ;
177186 const useMmap = ! noMmap && llama . supportsMmap ;
187+ const useDirectIo = ! noDirectIo ;
178188 const resolvedGgufPath = await resolveCommandGgufPath ( ggufPath , llama , headers , {
179189 flashAttention, swaFullCache, useMmap
180190 } ) ;
@@ -188,6 +198,14 @@ export const InspectMeasureCommand: CommandModule<object, InspectMeasureCommand>
188198 ? "enabled"
189199 : "disabled"
190200 ) ) ;
201+
202+ if ( platform !== "mac" ) // Direct I/O is not supported on macOS
203+ console . info ( chalk . yellow ( "Direct I/O:" ) + " " + (
204+ useDirectIo
205+ ? "enabled"
206+ : "disabled"
207+ ) ) ;
208+
191209 if ( measureMemoryType === "ram" || measureMemoryType === "all" )
192210 console . warn ( chalk . yellow ( "RAM measurements are greatly inaccurate due to OS optimizations that prevent released memory from being immediately available" ) ) ;
193211
@@ -221,6 +239,7 @@ export const InspectMeasureCommand: CommandModule<object, InspectMeasureCommand>
221239 const done = await measureModel ( {
222240 modelPath : resolvedGgufPath ,
223241 useMmap,
242+ useDirectIo,
224243 gpu : gpu == null
225244 ? undefined
226245 : llama . gpu ,
@@ -513,11 +532,12 @@ const detectedFileName = path.basename(__filename);
513532const expectedFileName = "InspectMeasureCommand" ;
514533
515534async function measureModel ( {
516- modelPath, useMmap, gpu, tests, initialMaxContextSize, maxContextSize, minContextSize, maxGpuLayers, minGpuLayers, flashAttention ,
517- swaFullCache, batchSize, evaluateText, exitAfterMeasurement = false , onInfo
535+ modelPath, useMmap, useDirectIo , gpu, tests, initialMaxContextSize, maxContextSize, minContextSize, maxGpuLayers, minGpuLayers,
536+ flashAttention , swaFullCache, batchSize, evaluateText, exitAfterMeasurement = false , onInfo
518537} : {
519538 modelPath : string ,
520539 useMmap ?: boolean ,
540+ useDirectIo ?: boolean ,
521541 gpu ?: BuildGpu | "auto" ,
522542 tests : number ,
523543 initialMaxContextSize ?: number ,
@@ -628,6 +648,7 @@ async function measureModel({
628648 type : "start" ,
629649 modelPath,
630650 useMmap,
651+ useDirectIo,
631652 tests,
632653 initialMaxContextSize,
633654 maxContextSize,
@@ -828,19 +849,20 @@ async function runTestWorkerLogic() {
828849 }
829850
830851 async function testWithGpuLayers ( {
831- modelPath, useMmap, gpuLayers, tests, startContextSize, maxContextSize, minContextSize, flashAttention, swaFullCache, batchSize ,
832- evaluateText, exitAfterMeasurement = false
852+ modelPath, useMmap, useDirectIo , gpuLayers, tests, startContextSize, maxContextSize, minContextSize, flashAttention, swaFullCache,
853+ batchSize , evaluateText, exitAfterMeasurement = false
833854 } : {
834- modelPath : string , useMmap ?: boolean , gpuLayers : number , tests : number , startContextSize ? : number , maxContextSize ?: number ,
835- minContextSize ?: number , flashAttention ?: boolean , swaFullCache ?: boolean , batchSize ?: number , evaluateText ?: string ,
836- exitAfterMeasurement ?: boolean
855+ modelPath : string , useMmap ?: boolean , useDirectIo ?: boolean , gpuLayers : number , tests : number , startContextSize ?: number ,
856+ maxContextSize ?: number , minContextSize ?: number , flashAttention ?: boolean , swaFullCache ?: boolean , batchSize ?: number ,
857+ evaluateText ?: string , exitAfterMeasurement ?: boolean
837858 } ) {
838859 try {
839860 const preModelVramUsage = ( await llama . getVramState ( ) ) . used ;
840861 const preModelRamUsage = getMemoryUsage ( llama ) ;
841862 const model = await llama . loadModel ( {
842863 modelPath,
843864 useMmap,
865+ useDirectIo,
844866 gpuLayers,
845867 defaultContextFlashAttention : flashAttention ,
846868 defaultContextSwaFullCache : swaFullCache ,
@@ -908,6 +930,7 @@ async function runTestWorkerLogic() {
908930 const measurementsDone = await testWithGpuLayers ( {
909931 modelPath : message . modelPath ,
910932 useMmap : message . useMmap ,
933+ useDirectIo : message . useDirectIo ,
911934 gpuLayers,
912935 tests : message . tests ,
913936 startContextSize : gpuLayers == message . maxGpuLayers
@@ -1005,6 +1028,7 @@ type ParentToChildMessage = {
10051028 type : "start" ,
10061029 modelPath : string ,
10071030 useMmap ?: boolean ,
1031+ useDirectIo ?: boolean ,
10081032 tests : number ,
10091033 maxGpuLayers : number ,
10101034 minGpuLayers ?: number ,
0 commit comments