Azure-Samples
diff --git a/‎ai/vector-search-typescript/.env.example‎
Lines changed: 31 additions & 0 deletions b/‎ai/vector-search-typescript/.env.example‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎ai/vector-search-typescript/package.json‎
Lines changed: 25 additions & 0 deletions b/‎ai/vector-search-typescript/package.json‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎ai/vector-search-typescript/src/create-embeddings.ts‎
Lines changed: 132 additions & 0 deletions b/‎ai/vector-search-typescript/src/create-embeddings.ts‎
Lines changed: 132 additions & 0 deletions
diff --git a/‎ai/vector-search-typescript/src/diskann.ts‎
Lines changed: 108 additions & 0 deletions b/‎ai/vector-search-typescript/src/diskann.ts‎
Lines changed: 108 additions & 0 deletions
@@ -0,0 +1,31 @@
+DEBUG=true
+
+# ========================================
+# Azure OpenAI Embedding Settings
+# ========================================
+AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-ada-002
+AZURE_OPENAI_EMBEDDING_API_VERSION=2023-05-15
+AZURE_OPENAI_EMBEDDING_KEY=
+AZURE_OPENAI_EMBEDDING_ENDPOINT=https://<RESOURCE-NAME>.openai.azure.com
+EMBEDDING_SIZE_BATCH=16
+
+# ========================================
+# Data File Paths and Vector Configuration
+# ========================================
+DATA_FILE_WITHOUT_VECTORS=../data/HotelsData_toCosmosDB.JSON
+DATA_FILE_WITH_VECTORS=../data/HotelsData_toCosmosDB_Vector.json
+DATA_FILE_WITH_SIMILARITY=../data/HotelsData_toCosmosDB_Vector_Similarity.json
+QUERY_FILE_WITH_VECTORS=../data/HotelsData_Query_Vector.json
+DATA_FOLDER=../data/
+FIELD_TO_EMBED=Description
+EMBEDDED_FIELD=text_embedding_ada_002
+EMBEDDING_DIMENSIONS=1536
+LOAD_SIZE_BATCH=100
+
+# ========================================
+# MongoDB/Cosmos DB Connection Settings
+# ========================================
+MONGO_CONNECTION_STRING=mongodb+srv://<USERNAME>:<PASSWORD>@<CLUSTER-NAME>.global.mongocluster.cosmos.azure.com/?tls=true&authMechanism=SCRAM-SHA-256&retrywrites=false&maxIdleTimeMS=120000
+MONGO_CLUSTER_NAME=<CLUSTER-NAME>
+
+
@@ -0,0 +1,25 @@
+{
+  "name": "ts-cosmos-nodejs-vector-samples",
+  "version": "1.0.0",
+  "description": "Samples for MongoDB vCore vector search with Cosmos DB",
+  "main": "index.js",
+  "type": "module",
+  "scripts": {
+    "build": "tsc",
+    "start:one-insert": "node --env-file .env dist/insert-one-document.js",
+    "start:embed": "node --env-file .env dist/create-embeddings.js",
+    "start:show-indexes": "node --env-file .env dist/showIndexes.js",
+    "start:ivf": "node --env-file .env dist/ivf.js",
+    "start:hnsw": "node --env-file .env dist/hnsw.js",
+    "start:diskann": "node --env-file .env dist/diskann.js"
+  },
+  "dependencies": {
+    "@azure/identity": "^4.11.1",
+    "mongodb": "^6.18.0",
+    "openai": "^5.16.0"
+  },
+  "devDependencies": {
+    "@types/node": "^24.3.0",
+    "typescript": "^5.9.2"
+  }
+}
@@ -0,0 +1,132 @@
+/**
+ * Module for creating embedding vectors using OpenAI API
+ * Supports text embedding models for generating embeddings
+ * that can be used with Cosmos DB MongoDB vCore vector search
+ */
+import * as path from "node:path";
+import { AzureOpenAI } from "openai";
+import { Embedding } from "openai/resources";
+import { readFileReturnJson, writeFileJson, JsonData } from "./utils.js";
+
+// ESM specific features - create __dirname equivalent
+import { fileURLToPath } from "node:url";
+import { dirname } from "node:path";
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const apiKey = process.env.AZURE_OPENAI_EMBEDDING_KEY;
+const apiVersion = process.env.AZURE_OPENAI_EMBEDDING_API_VERSION;
+const endpoint = process.env.AZURE_OPENAI_EMBEDDING_ENDPOINT;
+console.log(`Using OpenAI endpoint: ${endpoint}`);
+const deployment = process.env.AZURE_OPENAI_EMBEDDING_MODEL!;
+
+const dataWithVectors = process.env.DATA_FILE_WITH_VECTORS!;
+const dataWithoutVectors = process.env.DATA_FILE_WITHOUT_VECTORS!;
+const fieldToEmbed = process.env.FIELD_TO_EMBED! || "description";
+const newEmbeddedField = process.env.EMBEDDED_FIELD! || deployment;
+const batchSize = parseInt(process.env.EMBEDDING_BATCH_SIZE || '16', 10);
+
+// Define a reusable delay function
+async function delay(ms: number = 200): Promise<void> {
+    await new Promise(resolve => setTimeout(resolve, ms));
+}
+
+export async function createEmbeddings(client: AzureOpenAI, model: string, inputItems: string[]): Promise<Embedding[]> {
+    const response = await client.embeddings.create({
+        model,
+        input: inputItems
+    });
+
+    if (!response.data || response.data.length === 0) {
+        throw new Error(`No embedding data returned`);
+    }
+    return response.data;
+}
+
+export async function processEmbeddingBatch<T>(
+    client: AzureOpenAI,
+    model: string,
+    fieldToEmbed: string,
+    newEmbeddedField: string,
+    maxEmbeddings: number,
+    items: T[]
+
+): Promise<T[]> {
+    if (!Array.isArray(items) || items.length === 0) {
+        throw new Error("Items must be a non-empty array");
+    }
+
+    if (!fieldToEmbed) {
+        throw new Error("Field to embed must be specified");
+    }
+
+    const itemsWithEmbeddings: T[] = [];
+    maxEmbeddings = maxEmbeddings || items.length;
+
+    // Process in batches to avoid rate limits and memory issues
+    for (let i = 0; i < maxEmbeddings; i += batchSize) {
+        const batchEnd = Math.min(i + batchSize, items.length);
+        console.log(`Processing batch: ${i} to ${batchEnd - 1} (of ${items.length} items)`);
+
+        const batchItems = items.slice(i, batchEnd);
+        const textsToEmbed = batchItems.map(item => {
+            if (!item[fieldToEmbed]) {
+                console.warn(`Item is missing the field to embed: ${fieldToEmbed}`);
+                return ""; // Provide a fallback value to prevent API errors
+            }
+            return item[fieldToEmbed];
+        });
+
+        try {
+            const embeddings = await createEmbeddings(client, model, textsToEmbed);
+
+            embeddings.forEach((embeddingData, index) => {
+                const originalItem = batchItems[index];
+                const newItem = {
+                    ...originalItem,
+                    [newEmbeddedField]: embeddingData.embedding
+                };
+                itemsWithEmbeddings.push(newItem);
+            });
+
+            // Add a small delay between batches to avoid rate limiting
+            if (batchEnd < items.length) {
+                await delay();
+            }
+        } catch (error) {
+            console.error(`Error generating embeddings for batch ${i}:`, error);
+            throw error;
+        }
+    }
+
+    return itemsWithEmbeddings;
+}
+
+
+try {
+
+    const client =  new AzureOpenAI( {
+        apiKey,
+        apiVersion,
+        endpoint,
+        deployment
+    });
+
+    const data = await readFileReturnJson(path.join(__dirname, "..", dataWithoutVectors!));
+    const model = deployment;
+    const maxEmbeddings = data.length; 
+
+    const embeddings = await processEmbeddingBatch<JsonData>(
+        client,
+        model,
+        fieldToEmbed,
+        newEmbeddedField,
+        maxEmbeddings,
+        data
+    );
+
+    await writeFileJson(path.join(__dirname, "..", dataWithVectors!), embeddings);
+
+} catch (error) {
+    console.error(`Failed to save embeddings to file: ${(error as Error).message}`);
+}
@@ -0,0 +1,108 @@
+import path from 'path';
+import { readFileReturnJson, getClientsPasswordless, insertData, printSearchResults } from './utils.js';
+
+// ESM specific features - create __dirname equivalent
+import { fileURLToPath } from "node:url";
+import { dirname } from "node:path";
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const config = {
+    query: "quintessential lodging near running trails, eateries, retail",
+    dbName: "Hotels",
+    collectionName: "hotels_diskann",
+    indexName: "vectorIndex_diskann",
+    dataFile: process.env.DATA_FILE_WITH_VECTORS!,
+    batchSize: parseInt(process.env.LOAD_SIZE_BATCH! || '100', 10),
+    embeddedField: process.env.EMBEDDED_FIELD!,
+    embeddingDimensions: parseInt(process.env.EMBEDDING_DIMENSIONS!, 10),
+    deployment: process.env.AZURE_OPENAI_EMBEDDING_MODEL!,
+};
+
+async function main() {
+
+    const { aiClient, dbClient } = getClientsPasswordless();
+
+    try {
+
+        if (!aiClient) {
+            throw new Error('AI client is not configured. Please check your environment variables.');
+        }
+        if (!dbClient) {
+            throw new Error('Database client is not configured. Please check your environment variables.');
+        }
+
+        await dbClient.connect();
+        const db = dbClient.db(config.dbName);
+        const collection = await db.createCollection(config.collectionName);
+        console.log('Created collection:', config.collectionName);
+        const data = await readFileReturnJson(path.join(__dirname, "..", config.dataFile));
+        const insertSummary = await insertData(config, collection, data);
+        console.log('Created vector index:', config.indexName);
+        
+        // Create the vector index
+        const indexOptions = {
+            createIndexes: config.collectionName,
+            indexes: [
+                {
+                    name: config.indexName,
+                    key: {
+                        [config.embeddedField]: 'cosmosSearch'
+                    },
+                    cosmosSearchOptions: {
+                        kind: 'vector-diskann',
+                        dimensions: config.embeddingDimensions,
+                        similarity: 'COS', // 'COS', 'L2', 'IP'
+                        maxDegree: 20, // 20 - 2048,  edges per node
+                        lBuild: 10 // 10 - 500, candidate neighbors evaluated
+                    }
+                }
+            ]
+        };
+        const vectorIndexSummary = await db.command(indexOptions);
+
+        // Create embedding for the query
+        const createEmbeddedForQueryResponse = await aiClient.embeddings.create({
+            model: config.deployment,
+            input: [config.query]
+        });
+
+        // Perform the vector similarity search
+        const searchResults = await collection.aggregate([
+            {
+                $search: {
+                    cosmosSearch: {
+                        vector: createEmbeddedForQueryResponse.data[0].embedding,
+                        path: config.embeddedField,
+                        k: 5
+                    }
+                }
+            },
+            {
+                $project: {
+                    score: {
+                        $meta: "searchScore"
+                    },
+                    document: "$$ROOT"
+                }
+            }
+        ]).toArray();
+
+        // Print the results
+        printSearchResults(insertSummary, vectorIndexSummary, searchResults);
+
+    } catch (error) {
+        console.error('App failed:', error);
+        process.exitCode = 1;
+    } finally {
+        console.log('Closing database connection...');
+        if (dbClient) await dbClient.close();
+        console.log('Database connection closed');
+    }
+}
+
+// Execute the main function
+main().catch(error => {
+    console.error('Unhandled error:', error);
+    process.exitCode = 1;
+});