Skip to content

Commit 8f7209e

Browse files
committed
feat: initial disk cache dir option for future optimizations (disabled for now)
1 parent 07fa753 commit 8f7209e

5 files changed

Lines changed: 310 additions & 12 deletions

File tree

src/bindings/Llama.ts

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import {LlamaJsonSchemaGrammar} from "../evaluator/LlamaJsonSchemaGrammar.js";
1010
import {LlamaGrammar, LlamaGrammarOptions} from "../evaluator/LlamaGrammar.js";
1111
import {ThreadsSplitter} from "../utils/ThreadsSplitter.js";
1212
import {getLlamaClasses, LlamaClasses} from "../utils/getLlamaClasses.js";
13+
import {getTempDir, FsPathHandle} from "../utils/getTempDir.js";
1314
import {BindingModule} from "./AddonTypes.js";
1415
import {
1516
BuildGpu, BuildMetadataFile, LlamaGpuType, LlamaLocks, LlamaLogLevel,
@@ -46,6 +47,8 @@ export class Llama {
4647
/** @internal */ public readonly _swapOrchestrator: MemoryOrchestrator;
4748
/** @internal */ public readonly _debug: boolean;
4849
/** @internal */ public readonly _threadsSplitter: ThreadsSplitter;
50+
/** @internal */ public readonly _tempDir?: FsPathHandle;
51+
/** @internal */ private _tempDirNextId: number = 0;
4952
/** @internal */ public _hadErrorLogs: boolean = false;
5053
/** @internal */ private readonly _gpu: LlamaGpuType;
5154
/** @internal */ private readonly _numa: LlamaNuma;
@@ -74,7 +77,7 @@ export class Llama {
7477
public readonly onDispose = new EventRelay<void>();
7578

7679
private constructor({
77-
bindings, bindingPath, extBackendsPath, logLevel, logger, buildType, cmakeOptions, llamaCppRelease, debug, numa, buildGpu,
80+
bindings, bindingPath, extBackendsPath, logLevel, logger, buildType, cmakeOptions, llamaCppRelease, debug, tempDir, numa, buildGpu,
7881
maxThreads, vramOrchestrator, vramPadding, ramOrchestrator, ramPadding, swapOrchestrator, skipLlamaInit
7982
}: {
8083
bindings: BindingModule,
@@ -89,6 +92,7 @@ export class Llama {
8992
release: string
9093
},
9194
debug: boolean,
95+
tempDir?: FsPathHandle,
9296
numa?: LlamaNuma,
9397
buildGpu: BuildGpu,
9498
maxThreads?: number,
@@ -104,6 +108,7 @@ export class Llama {
104108

105109
this._bindings = bindings;
106110
this._debug = debug;
111+
this._tempDir = tempDir;
107112
this._numa = numa ?? false;
108113
this._logLevel = this._debug
109114
? LlamaLogLevel.debug
@@ -175,6 +180,7 @@ export class Llama {
175180
this.onDispose.dispatchEvent();
176181
await this._backendDisposeGuard.acquireDisposeLock();
177182
await this._bindings.dispose();
183+
await this._tempDir?.dispose();
178184

179185
process.off("beforeExit", this._onBeforeExit);
180186
unregisterDisposeBeforeExit(this._selfWeakRef);
@@ -413,6 +419,15 @@ export class Llama {
413419
this._onAddonLog(LlamaLogLevelToAddonLogLevel.get(level) ?? defaultLogLevel, message + "\n");
414420
}
415421

422+
/** @internal */
423+
public _createTempFilePath() {
424+
if (this._tempDir == null)
425+
return undefined;
426+
427+
const fileId = this._tempDirNextId++;
428+
return new FsPathHandle(path.join(this._tempDir.path, fileId + ".nlc"));
429+
}
430+
416431
/** @internal */
417432
private _onAddonLog(level: number, message: string) {
418433
const llamaLogLevel = addonLogLevelToLlamaLogLevel.get(level) ?? LlamaLogLevel.fatal;
@@ -507,7 +522,7 @@ export class Llama {
507522
/** @internal */
508523
public static async _create({
509524
bindings, bindingPath, extBackendsPath, buildType, buildMetadata, logLevel, logger, vramPadding, ramPadding, maxThreads,
510-
skipLlamaInit = false, debug, numa
525+
skipLlamaInit = false, debug, numa, tempDir
511526
}: {
512527
bindings: BindingModule,
513528
bindingPath: string,
@@ -521,7 +536,8 @@ export class Llama {
521536
ramPadding: number | ((totalRam: number) => number),
522537
skipLlamaInit?: boolean,
523538
debug: boolean,
524-
numa?: LlamaNuma
539+
numa?: LlamaNuma,
540+
tempDir?: string | string[] | false
525541
}) {
526542
const vramOrchestrator = new MemoryOrchestrator(() => {
527543
const {total, used, unifiedSize} = bindings.getGpuVramInfo();
@@ -566,6 +582,14 @@ export class Llama {
566582
else
567583
resolvedRamPadding = ramOrchestrator.reserveMemory(ramPadding);
568584

585+
const resolvedTempDir = tempDir === false
586+
? undefined
587+
: await getTempDir(
588+
typeof tempDir === "string"
589+
? [tempDir]
590+
: tempDir
591+
);
592+
569593
const llama = new Llama({
570594
bindings,
571595
bindingPath,
@@ -579,6 +603,7 @@ export class Llama {
579603
logLevel,
580604
logger,
581605
debug,
606+
tempDir: resolvedTempDir,
582607
numa,
583608
buildGpu: buildMetadata.buildOptions.gpu,
584609
vramOrchestrator,

src/bindings/getLlama.ts

Lines changed: 65 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,27 @@ export type LlamaOptions = {
208208
*
209209
* Defaults to `false` (no NUMA policy).
210210
*/
211-
numa?: LlamaNuma
211+
numa?: LlamaNuma,
212+
213+
/**
214+
* A list of directories to use for internal cache to speed up processes and reduce memory usage.
215+
* Cache created by this process will be automatically cleaned up when it's no longer needed,
216+
* on process exit, and on the disposal of the Llama instance.
217+
*
218+
* The first directory that is writable from this list will be used.
219+
*
220+
* If no directories are provided or non are writable, these fallback will be used instead:
221+
* - The directory of where `node-llama-cpp` is installed inside `node_modules`.
222+
* - A `nlc.` prefixed directory under the OS's temp dir (e.g. `/tmp` on Linux).
223+
* - A `.node-llama-cpp/.temp` directory under the home directory of the current user.
224+
*
225+
* To disable the usage of any temporary directory and caching, set this option to `false`.
226+
*
227+
*
228+
* Hidden since currently unused - defaults to `false` for now
229+
* @internal
230+
*/
231+
tempDir?: string | string[] | false
212232
};
213233

214234
export type LastBuildOptions = {
@@ -318,7 +338,27 @@ export type LastBuildOptions = {
318338
*
319339
* Defaults to `false` (no NUMA policy).
320340
*/
321-
numa?: LlamaNuma
341+
numa?: LlamaNuma,
342+
343+
/**
344+
* A list of directories to use for internal cache to speed up processes and reduce memory usage.
345+
* Cache created by this process will be automatically cleaned up when it's no longer needed,
346+
* on process exit, and on the disposal of the Llama instance.
347+
*
348+
* The first directory that is writable from this list will be used.
349+
*
350+
* If no directories are provided or non are writable, these fallback will be used instead:
351+
* - The directory of where `node-llama-cpp` is installed inside `node_modules`.
352+
* - A `nlc.` prefixed directory under the OS's temp dir (e.g. `/tmp` on Linux).
353+
* - A `.node-llama-cpp/.temp` directory under the home directory of the current user.
354+
*
355+
* To disable the usage of any temporary directory and caching, set this option to `false`.
356+
*
357+
*
358+
* Hidden since currently unused - defaults to `false` for now
359+
* @internal
360+
*/
361+
tempDir?: string | string[] | false
322362
};
323363

324364
export const getLlamaFunctionName = "getLlama";
@@ -335,6 +375,7 @@ export const defaultLlamaRamPadding = (totalRam: number) => {
335375
const defaultBuildOption: Exclude<LlamaOptions["build"], undefined> = runningInElectron
336376
? "never"
337377
: "auto";
378+
const defaultTempDir: LlamaOptions["tempDir"] = false;
338379

339380
/**
340381
* Get a `llama.cpp` binding.
@@ -377,6 +418,7 @@ export async function getLlama(options?: LlamaOptions | "lastBuild", lastBuildOp
377418
ramPadding: lastBuildOptions?.ramPadding ?? defaultLlamaRamPadding,
378419
debug: lastBuildOptions?.debug ?? defaultLlamaCppDebugMode,
379420
numa: lastBuildOptions?.numa,
421+
tempDir: lastBuildOptions?.tempDir ?? defaultTempDir,
380422
dryRun
381423
};
382424

@@ -405,6 +447,7 @@ export async function getLlama(options?: LlamaOptions | "lastBuild", lastBuildOp
405447
ramPadding: lastBuildOptions?.ramPadding ?? defaultLlamaRamPadding,
406448
debug: lastBuildOptions?.debug ?? defaultLlamaCppDebugMode,
407449
numa: lastBuildOptions?.numa,
450+
tempDir: lastBuildOptions?.tempDir ?? defaultTempDir,
408451
skipLlamaInit: dryRun
409452
});
410453

@@ -440,6 +483,7 @@ export async function getLlamaForOptions({
440483
ramPadding = defaultLlamaRamPadding,
441484
debug = defaultLlamaCppDebugMode,
442485
numa = false,
486+
tempDir = defaultTempDir,
443487
dryRun = false
444488
}: LlamaOptions, {
445489
updateLastBuildInfoOnCompile = false,
@@ -514,6 +558,7 @@ export async function getLlamaForOptions({
514558
ramPadding,
515559
debug,
516560
numa,
561+
tempDir,
517562
dryRun
518563
});
519564
} catch (err) {
@@ -532,6 +577,7 @@ export async function getLlamaForOptions({
532577
ramPadding,
533578
debug,
534579
numa,
580+
tempDir,
535581
dryRun
536582
});
537583
}
@@ -579,6 +625,7 @@ export async function getLlamaForOptions({
579625
),
580626
debug,
581627
numa,
628+
tempDir,
582629
pipeBinaryTestErrorLogs
583630
});
584631

@@ -614,7 +661,8 @@ export async function getLlamaForOptions({
614661
ramPadding,
615662
skipLlamaInit,
616663
debug,
617-
numa
664+
numa,
665+
tempDir
618666
});
619667
} catch (err) {
620668
console.error(
@@ -691,7 +739,8 @@ export async function getLlamaForOptions({
691739
ramPadding,
692740
skipLlamaInit,
693741
debug,
694-
numa
742+
numa,
743+
tempDir
695744
});
696745
} catch (err) {
697746
console.error(
@@ -737,6 +786,7 @@ async function loadExistingLlamaBinary({
737786
fallbackMessage,
738787
debug,
739788
numa,
789+
tempDir,
740790
pipeBinaryTestErrorLogs
741791
}: {
742792
buildOptions: BuildOptions,
@@ -754,6 +804,7 @@ async function loadExistingLlamaBinary({
754804
fallbackMessage: string | null,
755805
debug: boolean,
756806
numa?: LlamaNuma,
807+
tempDir: LlamaOptions["tempDir"],
757808
pipeBinaryTestErrorLogs: boolean
758809
}) {
759810
const buildFolderName = await getBuildFolderNameForBuildOptions(buildOptions);
@@ -791,7 +842,8 @@ async function loadExistingLlamaBinary({
791842
ramPadding,
792843
skipLlamaInit,
793844
debug,
794-
numa
845+
numa,
846+
tempDir
795847
});
796848
} else if (progressLogs) {
797849
console.warn(
@@ -857,7 +909,8 @@ async function loadExistingLlamaBinary({
857909
ramPadding,
858910
skipLlamaInit,
859911
debug,
860-
numa
912+
numa,
913+
tempDir
861914
});
862915
} else if (progressLogs) {
863916
const binaryDescription = describeBinary({
@@ -913,7 +966,8 @@ async function buildAndLoadLlamaBinary({
913966
ramPadding,
914967
skipLlamaInit,
915968
debug,
916-
numa
969+
numa,
970+
tempDir
917971
}: {
918972
buildOptions: BuildOptions,
919973
skipDownload: boolean,
@@ -925,7 +979,8 @@ async function buildAndLoadLlamaBinary({
925979
ramPadding: Required<LlamaOptions>["ramPadding"],
926980
skipLlamaInit: boolean,
927981
debug: boolean,
928-
numa?: LlamaNuma
982+
numa?: LlamaNuma,
983+
tempDir: LlamaOptions["tempDir"]
929984
}) {
930985
const buildFolderName = await getBuildFolderNameForBuildOptions(buildOptions);
931986

@@ -960,7 +1015,8 @@ async function buildAndLoadLlamaBinary({
9601015
ramPadding,
9611016
skipLlamaInit,
9621017
debug,
963-
numa
1018+
numa,
1019+
tempDir
9641020
});
9651021
}
9661022

src/config.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,13 @@ export const llamaPrebuiltBinsDirectory = path.join(__dirname, "..", "bins");
2020
export const llamaLocalBuildBinsDirectory = path.join(llamaDirectory, "localBuilds");
2121
export const llamaBinsGrammarsDirectory = path.join(__dirname, "..", "llama", "grammars");
2222
export const projectTemplatesDirectory = path.join(__dirname, "..", "templates");
23+
export const localTempDirectory = path.join(__dirname, "..", ".temp");
2324
export const packedProjectTemplatesDirectory = path.join(projectTemplatesDirectory, "packed");
2425
export const llamaCppDirectory = path.join(llamaDirectory, "llama.cpp");
2526
export const llamaCppGrammarsDirectory = path.join(llamaDirectory, "llama.cpp", "grammars");
2627
export const tempDownloadDirectory = path.join(os.tmpdir(), "node-llama-cpp", nanoid());
2728
export const cliHomedirDirectory = path.join(os.homedir(), ".node-llama-cpp");
29+
export const cliHomedirTempDirectory = path.join(os.homedir(), ".node-llama-cpp", ".temp");
2830
export const chatCommandHistoryFilePath = path.join(cliHomedirDirectory, ".chat_repl_history");
2931
export const cliModelsDirectory = path.join(cliHomedirDirectory, "models");
3032
export const lastBuildInfoJsonPath = path.join(llamaDirectory, "lastBuild.json");

src/utils/getFirstWritableDir.ts

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import path from "path";
2+
import fs from "fs-extra";
3+
import {runningInElectron} from "./runtime.js";
4+
import {LruCache} from "./LruCache.js";
5+
6+
export async function getFirstWritableDir(dirPaths: string[]): Promise<string | null> {
7+
for (const dirPath of dirPaths) {
8+
if (isPathInsideAsar(dirPath))
9+
continue;
10+
11+
if (await isPathWritable(dirPath))
12+
return dirPath;
13+
}
14+
15+
return null;
16+
}
17+
18+
const writableCheckCache = new LruCache<string, boolean>(20);
19+
export async function isPathWritableWithCache(dirPath: string) {
20+
const isWritable = writableCheckCache.get(dirPath) ?? await isPathWritable(dirPath);
21+
writableCheckCache.set(dirPath, isWritable);
22+
return isWritable;
23+
}
24+
25+
export async function isPathWritable(dirPath: string) {
26+
let checkPath = path.resolve(dirPath);
27+
28+
while (true) {
29+
try {
30+
const stat = await fs.lstat(checkPath);
31+
if (!stat.isDirectory())
32+
return false;
33+
34+
break;
35+
} catch (error: any) {
36+
if (error?.code !== "ENOENT")
37+
return false;
38+
39+
const nextCheckPath = path.dirname(checkPath);
40+
if (nextCheckPath === checkPath)
41+
return false;
42+
43+
checkPath = nextCheckPath;
44+
}
45+
}
46+
47+
try {
48+
await fs.access(path.dirname(checkPath), fs.constants.W_OK | fs.constants.X_OK);
49+
} catch {
50+
return false;
51+
}
52+
53+
return true;
54+
}
55+
56+
/**
57+
* Check whether a path is inside an asar when running in Electron,
58+
* which means that the path is not writable and inaccessible outside the Electron app.
59+
*/
60+
export function isPathInsideAsar(dirPath: string, excludeUnpacked: boolean = false) {
61+
if (!runningInElectron)
62+
return false;
63+
64+
const normalizedPath = dirPath.toLowerCase();
65+
if (normalizedPath.endsWith(".asar") ||
66+
(!excludeUnpacked && normalizedPath.endsWith(".asar.unpacked"))
67+
)
68+
return true;
69+
70+
return normalizedPath.includes(".asar" + path.sep) ||
71+
(!excludeUnpacked && normalizedPath.includes(".asar.unpacked" + path.sep));
72+
}

0 commit comments

Comments
 (0)