Skip to content

Commit 5faaad6

Browse files
committed
chore: moved base-embedding to abstract class and added init method
1 parent b9c5eb1 commit 5faaad6

14 files changed

Lines changed: 102 additions & 59 deletions

File tree

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1-
export interface BaseEmbeddings {
2-
embedDocuments(texts: string[]): Promise<number[][]>;
3-
embedQuery(text: string): Promise<number[]>;
4-
getDimensions(): Promise<number>;
1+
export abstract class BaseEmbeddings {
2+
// eslint-disable-next-line @typescript-eslint/no-empty-function
3+
public async init(): Promise<void> {}
4+
5+
public abstract embedDocuments(texts: string[]): Promise<number[][]>;
6+
public abstract embedQuery(text: string): Promise<number[]>;
7+
public abstract getDimensions(): Promise<number>;
58
}

core/embedjs/src/core/rag-application.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,6 @@ export class RAGApplication {
4444

4545
this.searchResultCount = llmBuilder.getSearchResultCount();
4646
this.embeddingRelevanceCutOff = llmBuilder.getEmbeddingRelevanceCutOff();
47-
48-
RAGEmbedding.init(llmBuilder.getEmbeddingModel());
4947
}
5048

5149
/**
@@ -57,6 +55,8 @@ export class RAGApplication {
5755
* LLM based on the configuration provided
5856
*/
5957
public async init(llmBuilder: RAGApplicationBuilder) {
58+
await RAGEmbedding.init(llmBuilder.getEmbeddingModel());
59+
6060
this.model = await this.getModel(llmBuilder.getModel());
6161
if (!this.model) this.debug('No base model set; query function unavailable!');
6262
else BaseModel.setDefaultTemperature(llmBuilder.getTemperature());

core/embedjs/src/core/rag-embedding.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@ import { Document } from 'langchain/document';
44
export class RAGEmbedding {
55
private static singleton: RAGEmbedding;
66

7-
public static init(embeddingModel: BaseEmbeddings) {
7+
public static async init(embeddingModel: BaseEmbeddings) {
88
if (!this.singleton) {
9+
await embeddingModel.init();
910
this.singleton = new RAGEmbedding(embeddingModel);
1011
}
1112
}

databases/embedjs-libsql/src/libsql-store.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,13 +116,17 @@ export class LibSqlStore implements BaseStore {
116116
}
117117

118118
async loaderCustomSet<T extends Record<string, unknown>>(loaderId: string, key: string, value: T): Promise<void> {
119+
this.debug(`LibSQL custom set '${key}' with values`, value);
119120
await this.loaderCustomDelete(key);
120121

121-
await this.client.execute({
122+
this.debug(`LibSQL custom set '${key}' insert started`);
123+
const results = await this.client.execute({
122124
sql: `INSERT INTO ${this.loadersCustomDataTableName} (key, loaderId, value)
123125
VALUES (?, ?, ?)`,
124126
args: [key, loaderId, JSON.stringify(value)],
125127
});
128+
129+
this.debug(`LibSQL custom set for key '${key}' resulted in`, results.rows);
126130
}
127131

128132
async loaderCustomGet<T extends Record<string, unknown>>(key: string): Promise<T> {
@@ -144,10 +148,15 @@ export class LibSqlStore implements BaseStore {
144148
}
145149

146150
async loaderCustomDelete(key: string): Promise<void> {
147-
await this.client.execute(`DELETE FROM ${this.loadersCustomDataTableName} WHERE key = '${key}';`);
151+
this.debug(`LibSQL custom delete '${key}'`);
152+
const results = await this.client.execute(
153+
`DELETE FROM ${this.loadersCustomDataTableName} WHERE key = '${key}';`,
154+
);
155+
this.debug(`LibSQL custom delete for key '${key}' resulted in`, results.rowsAffected);
148156
}
149157

150158
async deleteLoaderMetadataAndCustomValues(loaderId: string): Promise<void> {
159+
this.debug(`LibSQL deleteLoaderMetadataAndCustomValues for loader '${loaderId}'`);
151160
await this.client.execute(`DELETE FROM ${this.loadersTableName} WHERE id = '${loaderId}';`);
152161
await this.client.execute(`DELETE FROM ${this.loadersCustomDataTableName} WHERE loaderId = '${loaderId}';`);
153162
}

examples/confluence/src/main.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
import 'dotenv/config';
2+
import path from 'node:path';
23
import { RAGApplicationBuilder } from '@llm-tools/embedjs';
4+
import { LibSqlDb, LibSqlStore } from '@llm-tools/embedjs-libsql';
35
import { OpenAi, OpenAiEmbeddings } from '@llm-tools/embedjs-openai';
46
import { ConfluenceLoader } from '@llm-tools/embedjs-loader-confluence';
5-
import { HNSWDb } from '@llm-tools/embedjs-hnswlib';
67

8+
const databasePath = path.resolve('./examples/confluence/data.db');
79
const llmApplication = await new RAGApplicationBuilder()
10+
.setStore(new LibSqlStore({ path: databasePath }))
11+
.setVectorDatabase(new LibSqlDb({ path: databasePath }))
812
.setModel(new OpenAi({ modelName: 'gpt-4o' }))
913
.setEmbeddingModel(new OpenAiEmbeddings())
10-
.setVectorDatabase(new HNSWDb())
1114
.build();
1215

1316
await llmApplication.addLoader(new ConfluenceLoader({ spaceNames: ['DEMO'] }));

loaders/embedjs-loader-confluence/src/confluence-loader.ts

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,13 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }, {
6363

6464
private async *processSpace(spaceKey: string) {
6565
this.debug('Processing space', spaceKey);
66+
6667
try {
6768
const spaceContent = await this.confluence.space.getContentForSpace({ spaceKey });
6869
this.debug(`Confluence space '${spaceKey}' has '${spaceContent['page'].results.length}' root pages`);
6970

70-
for (const { id } of spaceContent['page'].results) {
71-
for await (const result of this.processPage(id)) {
71+
for (const { id, title } of spaceContent['page'].results) {
72+
for await (const result of this.processPage(id, title)) {
7273
yield result;
7374
}
7475
}
@@ -78,8 +79,10 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }, {
7879
}
7980
}
8081

81-
private async *processPage(pageId: string) {
82+
private async *processPage(pageId: string, title: string) {
83+
this.debug('Processing page', title);
8284
let confluenceVersion = 0;
85+
8386
try {
8487
const spaceProperties = await this.confluence.content.getContentById({
8588
id: pageId,
@@ -89,29 +92,29 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }, {
8992
if (!spaceProperties.version.number) throw new Error('Version number not found in space properties...');
9093
confluenceVersion = spaceProperties.version.number;
9194
} catch (e) {
92-
this.debug('Could not get page properties. Page will be SKIPPED!', pageId, e.response);
95+
this.debug('Could not get page properties. Page will be SKIPPED!', title, e.response);
9396
return;
9497
}
9598

9699
let doProcess = false;
97100
if (!(await this.checkInCache(pageId))) {
98-
this.debug(`Processing '${pageId}' for the FIRST time...`);
101+
this.debug(`Processing '${title}' for the FIRST time...`);
99102
doProcess = true;
100103
} else {
101104
const cacheVersion = (await this.getFromCache(pageId)).version;
102105
if (cacheVersion !== confluenceVersion) {
103106
this.debug(
104-
`For page '${pageId}' - version in cache is ${cacheVersion} and confluence version is ${confluenceVersion}. This page will be PROCESSED.`,
107+
`For page '${title}' - version in cache is ${cacheVersion} and confluence version is ${confluenceVersion}. This page will be PROCESSED.`,
105108
);
106109
doProcess = true;
107110
} else
108111
this.debug(
109-
`For page '${pageId}' - version in cache and confluence are the same ${confluenceVersion}. This page will be SKIPPED.`,
112+
`For page '${title}' - version in cache and confluence are the same ${confluenceVersion}. This page will be SKIPPED.`,
110113
);
111114
}
112115

113116
if (!doProcess) {
114-
this.debug(`Skipping page '${pageId}'`);
117+
this.debug(`Skipping page '${title}'`);
115118
return;
116119
}
117120

@@ -126,21 +129,27 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }, {
126129
return;
127130
}
128131

132+
this.debug(`Processing content for page '${title}'...`);
129133
for await (const result of this.getContentChunks(content.body.view.value, content._links.webui)) {
130134
yield result;
131135
}
132136

133137
await this.saveToCache(pageId, { version: confluenceVersion });
134138

135139
if (content.children) {
136-
for (const { id } of content.children.page.results) {
137-
for await (const result of this.processPage(id)) {
138-
yield result;
140+
for (const { id, title } of content.children.page.results) {
141+
try {
142+
for await (const result of this.processPage(id, title)) {
143+
yield result;
144+
}
145+
} catch (e) {
146+
this.debug(`Error! Could not process page child '${title}'`, pageId, e);
147+
return;
139148
}
140149
}
141150
}
142151
} catch (e) {
143-
this.debug('Error! Could not process page content or children', pageId, e);
152+
this.debug('Error! Could not process page content', pageId, e);
144153
return;
145154
}
146155
}
Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,28 @@
11
import { CohereEmbeddings as LangChainCohereEmbeddings } from '@langchain/cohere';
22
import { BaseEmbeddings } from '@llm-tools/embedjs-interfaces';
33

4-
export class CohereEmbeddings implements BaseEmbeddings {
4+
export class CohereEmbeddings extends BaseEmbeddings {
55
private model: LangChainCohereEmbeddings;
66

77
constructor() {
8+
super();
9+
810
this.model = new LangChainCohereEmbeddings({
911
model: 'embed-english-v2.0',
1012
maxConcurrency: 3,
1113
maxRetries: 5,
1214
});
1315
}
1416

15-
async getDimensions(): Promise<number> {
17+
override async getDimensions(): Promise<number> {
1618
return 4096;
1719
}
1820

19-
async embedDocuments(texts: string[]): Promise<number[][]> {
21+
override async embedDocuments(texts: string[]): Promise<number[][]> {
2022
return this.model.embedDocuments(texts);
2123
}
2224

23-
async embedQuery(text: string): Promise<number[]> {
25+
override async embedQuery(text: string): Promise<number[]> {
2426
return this.model.embedQuery(text);
2527
}
2628
}
Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,33 @@
11
import { HuggingFaceInferenceEmbeddings } from '@langchain/community/embeddings/hf';
22
import { BaseEmbeddings } from '@llm-tools/embedjs-interfaces';
33

4-
export class HuggingFaceEmbeddings implements BaseEmbeddings {
4+
export class HuggingFaceEmbeddings extends BaseEmbeddings {
55
private model: HuggingFaceInferenceEmbeddings;
66
private dimensions: number | null;
77

88
constructor({ apiKey, model, dimensions }: { apiKey?: string; model?: string; dimensions?: number }) {
9+
super();
10+
911
this.dimensions = dimensions ?? null;
1012
this.model = new HuggingFaceInferenceEmbeddings({
1113
apiKey, //Or set process.env.HUGGINGFACEHUB_API_KEY
1214
model,
1315
});
1416
}
1517

16-
async getDimensions(): Promise<number> {
18+
override async getDimensions(): Promise<number> {
1719
if (this.dimensions === null) {
1820
this.dimensions = (await this.embedQuery('Test')).length;
1921
}
2022

2123
return this.dimensions;
2224
}
2325

24-
async embedDocuments(texts: string[]): Promise<number[][]> {
26+
override async embedDocuments(texts: string[]): Promise<number[][]> {
2527
return this.model.embedDocuments(texts);
2628
}
2729

28-
async embedQuery(text: string): Promise<number[]> {
30+
override async embedQuery(text: string): Promise<number[]> {
2931
return this.model.embedQuery(text);
3032
}
3133
}
Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,42 @@
11
import { BaseEmbeddings } from '@llm-tools/embedjs-interfaces';
22
import { getLlama, Llama, LlamaEmbedding, LlamaEmbeddingContext, LlamaModel } from 'node-llama-cpp';
33

4-
export class LlamaCppEmbeddings implements BaseEmbeddings {
4+
export class LlamaCppEmbeddings extends BaseEmbeddings {
5+
private readonly modelPath: string;
56
private context: LlamaEmbeddingContext;
67

7-
constructor(options: { modelPath: string; }) {
8-
getLlama().then((llama: Llama) => {
9-
llama.loadModel({ modelPath: options.modelPath }).then((model: LlamaModel) => {
8+
constructor({ modelPath }: { modelPath: string }) {
9+
super();
10+
this.modelPath = modelPath;
11+
}
12+
13+
override async init(): Promise<void> {
14+
await getLlama().then((llama: Llama) => {
15+
llama.loadModel({ modelPath: this.modelPath }).then((model: LlamaModel) => {
1016
model.createEmbeddingContext().then((context: LlamaEmbeddingContext) => {
1117
this.context = context;
1218
});
1319
});
1420
});
1521
}
16-
async getDimensions(): Promise<number> {
22+
23+
override async getDimensions(): Promise<number> {
1724
const sample = await this.embedDocuments(['sample']);
1825
return sample[0].length;
1926
}
2027

21-
async embedDocuments(texts: string[]): Promise<number[][]> {
28+
override async embedDocuments(texts: string[]): Promise<number[][]> {
2229
const embeddings = new Map<string, LlamaEmbedding>();
23-
await Promise.all(texts.map(async (document) => {
24-
const embedding = await this.context.getEmbeddingFor(document);
25-
embeddings.set(document, embedding);
26-
}));
30+
await Promise.all(
31+
texts.map(async (document) => {
32+
const embedding = await this.context.getEmbeddingFor(document);
33+
embeddings.set(document, embedding);
34+
}),
35+
);
2736
return Array.from(embeddings).map(([_, embedding]) => embedding.vector as number[]);
2837
}
2938

30-
async embedQuery(text: string): Promise<number[]> {
39+
override async embedQuery(text: string): Promise<number[]> {
3140
return (await this.context.getEmbeddingFor(text)).vector as number[];
3241
}
3342
}

models/embedjs-llama-cpp/src/llama-cpp-model.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import createDebugMessages from 'debug';
22
import { AIMessage, HumanMessage, SystemMessage } from '@langchain/core/messages';
3-
import { BaseModel, ModelResponse } from '@llm-tools/embedjs-interfaces';
43
import { LlamaCpp as ChatLlamaCpp } from '@langchain/community/llms/llama_cpp';
4+
import { BaseModel, ModelResponse } from '@llm-tools/embedjs-interfaces';
55

66
export class LlamaCpp extends BaseModel {
77
private readonly debug = createDebugMessages('embedjs:model:LlamaCpp');

0 commit comments

Comments
 (0)