diff --git a/README.md b/README.md index d5d1320..b7b6a7c 100644 --- a/README.md +++ b/README.md @@ -54,8 +54,54 @@ The LLMWhisperer provides the following methods: - `whisper(options)`: Performs a whisper operation. - `whisperStatus(whisperHash)`: Retrieves the status of a whisper operation. - `whisperRetrieve(whisperHash)`: Retrieves the result of a whisper operation. +- `whisperDetail(whisperHash)`: Retrieves metadata/details about a completed extraction job. - `highlightData(whisperHash, searchText)`: Highlights the specified text in the result of a whisper operation. +## Running Tests + +### Environment Setup + +Copy the sample environment file and fill in your API key: + +```bash +cp sample.env .env +``` + +Then edit `.env` and set the required values: + +``` +LLMWHISPERER_API_KEY= +LLMWHISPERER_BASE_URL_V2=https://llmwhisperer-api.us-central.unstract.com/api/v2 +LLMWHISPERER_LOGGING_LEVEL=DEBUG +``` + +`LLMWHISPERER_API_KEY` is required for integration tests. The other variables are optional and have sensible defaults. + +### Run All Tests + +```bash +npm install +npm test +``` + +### Run a Specific Test File + +```bash +npx jest --runInBand test/retry.test.js +npx jest --runInBand test/test.js +``` + +### Run a Specific Test by Name + +```bash +npx jest --runInBand -t "test name pattern" +``` + +The test suite includes: + +- **`test/test.js`** — Integration tests that call the live LLMWhisperer API (requires a valid `LLMWHISPERER_API_KEY`) +- **`test/retry.test.js`** — Unit tests for retry behavior (mocked, no API key needed) + ## Error Handling Errors are handled by the LLMWhispererClientException class. This class extends the built-in Error class and adds a `statusCode` property. diff --git a/index.js b/index.js index aad0ac2..9f6f0f9 100644 --- a/index.js +++ b/index.js @@ -1,14 +1,14 @@ /** - * @fileoverview This file contains the LLMWhispererClient and LLMWhispererClientException classes. - * LLMWhispererClient is used to interact with the LLMWhisperer API. + * @fileoverview This file contains the LLMWhispererClientV2 and LLMWhispererClientException classes. + * LLMWhispererClientV2 is used to interact with the LLMWhisperer API v2. * LLMWhispererClientException is used to handle exceptions that occur while interacting with the API. - * + * * @requires axios * @requires winston * @requires fs - * - * @const {string} BASE_URL - The base URL for the LLMWhisperer API. - * + * + * @const {string} BASE_URL_V2 - The base URL for the LLMWhisperer API v2. + * */ require("dotenv").config(); @@ -17,7 +17,6 @@ const axiosRetryModule = require("axios-retry"); const axiosRetry = axiosRetryModule.default; const winston = require("winston"); const fs = require("fs"); -const BASE_URL = "https://llmwhisperer-api.unstract.com/v1"; const BASE_URL_V2 = "https://llmwhisperer-api.us-central.unstract.com/api/v2"; class LLMWhispererClientException extends Error { @@ -31,367 +30,6 @@ class LLMWhispererClientException extends Error { } } -/** - * @class LLMWhispererClient - * @classdesc Represents a client for the LLMWhisperer API. - * @constructor - * @param {Object} [config={}] - The configuration object. - * @param {string} [config.baseUrl=''] - The base URL for the API. - * @param {string} [config.apiKey=''] - The API key for authentication. - * @param {number} [config.apiTimeout=120] - The timeout duration for API requests, in seconds. - * @param {string} [config.loggingLevel=''] - The logging level (e.g., 'debug','info', 'warn', 'error'). - * @param {number} [config.maxRetries=4] - Maximum number of retry attempts (0 to disable retries). - * @param {number} [config.initialDelay=2.0] - Initial delay in seconds before the first retry. - * @param {number} [config.maxDelay=60.0] - Maximum delay cap in seconds between retries. - * @param {number} [config.backoffFactor=2.0] - Exponential multiplier for retry delay. - * @param {number} [config.jitter=1.0] - Maximum random additive jitter in seconds. - - * @property {string} baseUrl - The base URL for the API. - * @property {string} apiKey - The API key used for authentication. - * @property {number} apiTimeout - The timeout for API requests. - * @property {string} loggingLevel - The logging level for the client. - * @property {Object} logger - The logger used by the client. Initialized in the constructor. - */ - -class LLMWhispererClient { - constructor({ - baseUrl = "", - apiKey = "", - apiTimeout = 120, - loggingLevel = "", - maxRetries = 4, - initialDelay = 2.0, - maxDelay = 60.0, - backoffFactor = 2.0, - jitter = 1.0, - } = {}) { - const level = - loggingLevel || process.env.LLMWHISPERER_LOGGING_LEVEL || "debug"; - - this.logger = winston.createLogger({ - level: level, - format: winston.format.combine( - winston.format.timestamp(), - winston.format.printf(({ timestamp, level, message }) => { - return `${timestamp} - ${level}: ${message}`; - }), - ), - transports: [new winston.transports.Console()], - }); - - this.logger.debug(`logging_level set to ${level}`); - - this.baseUrl = baseUrl || process.env.LLMWHISPERER_BASE_URL || BASE_URL; - this.logger.debug(`base_url set to ${this.baseUrl}`); - - this.apiKey = apiKey || process.env.LLMWHISPERER_API_KEY || ""; - this.apiTimeout = apiTimeout; - - this.retryMaxRetries = maxRetries; - this.retryInitialDelay = initialDelay; - this.retryMaxDelay = maxDelay; - this.retryBackoffFactor = backoffFactor; - this.retryJitter = jitter; - - this.client = axios.create(); - axiosRetry(this.client, { - retries: this.retryMaxRetries, - retryCondition: (error) => { - return ( - axiosRetryModule.isNetworkError(error) || - (error.response && - (error.response.status >= 500 || error.response.status === 429)) - ); - }, - retryDelay: (retryCount, error) => { - const calculated = Math.min( - this.retryInitialDelay * - Math.pow(this.retryBackoffFactor, retryCount - 1), - this.retryMaxDelay, - ); - const retryAfterSec = axiosRetryModule.retryAfter(error) || 0; - const base = Math.max(calculated, retryAfterSec / 1000); - const jitterVal = Math.random() * this.retryJitter; - return (base + jitterVal) * 1000; - }, - onRetry: (retryCount, error, requestConfig) => { - const status = error.response - ? error.response.status - : error.code || error.message; - this.logger.warn( - `Retry ${retryCount}/${this.retryMaxRetries} for ${requestConfig.url} (${status}). ` + - `Waiting before next attempt.`, - ); - if (requestConfig._filePath) { - requestConfig.data = fs.createReadStream(requestConfig._filePath); - } - }, - }); - } - - /** - * @function - * @name getUsageInfo - * @description This function retrieves usage information. Refer to the API documentation for more information. - * @async - * @returns {Object} Returns an object containing usage information. - */ - async getUsageInfo() { - this.logger.debug("get_usage_info called"); - const url = `${this.baseUrl}/get-usage-info`; - this.logger.debug(`url: ${url}`); - - try { - const response = await this.client.get(url, { - headers: { "unstract-key": this.apiKey }, - timeout: this.apiTimeout * 1000, - }); - return response.data; - } catch (error) { - const err = error.response - ? error.response.data - : { message: error.message }; - err.statusCode = error.response ? error.response.status : -1; - throw new LLMWhispererClientException(err.message, err.statusCode); - } - } - - /** - * @function - * @name whisper - * @description This function processes a file using the whisper API. Refer to the API documentation for more information. - * @async - * @param {Object} options - The options for processing. - * @param {string} [options.filePath=''] - The path to the file to be processed. - * @param {string} [options.url=''] - The URL of the file to be processed. - * @param {string} [options.processingMode='ocr'] - The mode of processing, e.g., 'ocr'. - * @param {string} [options.outputMode='line-printer'] - The mode of output, e.g., 'line-printer'. - * @param {string} [options.pageSeparator='<<<'] - The separator for pages in the output. - * @param {boolean} [options.forceTextProcessing=false] - Whether to force text processing. - * @param {string} [options.pagesToExtract=''] - The specific pages to extract. - * @param {number} [options.timeout=200] - The timeout for the request, in seconds. - * @param {boolean} [options.storeMetadataForHighlighting=false] - Whether to store metadata for highlighting. - * @param {number} [options.medianFilterSize=0] - The size of the median filter. - * @param {number} [options.gaussianBlurRadius=0] - The radius of the Gaussian blur. - * @param {string} [options.ocrProvider='advanced'] - The OCR provider to use. - * @param {number} [options.lineSplitterTolerance=0.4] - The tolerance for splitting lines. - * @param {number} [options.horizontalStretchFactor=1.0] - The horizontal stretch factor. - * @returns {Promise} The response from the whisper API. - * @throws {LLMWhispererClientException} If there is an error in the request. - */ - async whisper({ - filePath = "", - url = "", - processingMode = "ocr", - outputMode = "line-printer", - pageSeparator = "<<<", - forceTextProcessing = false, - pagesToExtract = "", - timeout = 200, - storeMetadataForHighlighting = false, - medianFilterSize = 0, - gaussianBlurRadius = 0, - ocrProvider = "advanced", - lineSplitterTolerance = 0.4, - horizontalStretchFactor = 1.0, - } = {}) { - this.logger.debug("whisper called"); - const apiUrl = `${this.baseUrl}/whisper`; - const params = { - url, - processing_mode: processingMode, - output_mode: outputMode, - page_seperator: pageSeparator, - force_text_processing: forceTextProcessing, - pages_to_extract: pagesToExtract, - timeout, - store_metadata_for_highlighting: storeMetadataForHighlighting, - median_filter_size: medianFilterSize, - gaussian_blur_radius: gaussianBlurRadius, - ocr_provider: ocrProvider, - line_splitter_tolerance: lineSplitterTolerance, - horizontal_stretch_factor: horizontalStretchFactor, - }; - - this.logger.debug(`api_url: ${apiUrl}`); - this.logger.debug(`params: ${JSON.stringify(params)}`); - - if (!url && !filePath) { - throw new LLMWhispererClientException( - "Either url or filePath must be provided", - -1, - ); - } - - if (timeout < 0 || timeout > 200) { - throw new LLMWhispererClientException( - "timeout must be between 0 and 200", - -1, - ); - } - - try { - const options = { - method: "post", - url: apiUrl, - headers: { - "unstract-key": this.apiKey, - }, - params, - timeout: this.apiTimeout * 1000, - }; - - // Disable retry for synchronous whisper (timeout > 0) since the - // server may have already started processing the document. - if (timeout > 0) { - options["axios-retry"] = { retries: 0 }; - } - - if (!url) { - const file = fs.createReadStream(filePath); - const fileStats = fs.statSync(filePath); - options.data = file; - options._filePath = filePath; - options.headers["Content-Type"] = "application/octet-stream"; - options.headers["Content-Length"] = fileStats.size; - } - - const response = await this.client(options); - - if (response.status !== 200 && response.status !== 202) { - const message = response.data; - message.statusCode = response.status; - throw new LLMWhispererClientException(message.message, response.status); - } - - if (response.status === 202) { - const message = response.data; - message.statusCode = response.status; - return message; - } - - return { - statusCode: response.status, - extracted_text: response.data, - whisper_hash: response.headers["whisper-hash"], - }; - } catch (error) { - const err = error.response - ? error.response.data - : { message: error.message }; - err.statusCode = error.response ? error.response.status : -1; - throw new LLMWhispererClientException(err.message, err.statusCode); - } - } - - /** - * @function - * @name whisperStatus - * @description This function retrieves the status of a whisper operation using the provided whisper hash. - * @async - * @param {string} whisperHash - The hash of the whisper operation whose status is to be retrieved. - * @returns {Promise} Returns a promise that resolves with an object containing the status of the whisper operation. The object includes the status code and the response data. - * @throws {LLMWhispererClientException} Throws an LLMWhispererClientException if an error occurs during the operation. - */ - async whisperStatus(whisperHash) { - this.logger.debug("whisper_status called"); - const url = `${this.baseUrl}/whisper-status`; - const params = { "whisper-hash": whisperHash }; - this.logger.debug(`url: ${url}`); - - try { - const response = await this.client.get(url, { - headers: { "unstract-key": this.apiKey }, - params, - timeout: this.apiTimeout * 1000, - }); - - const message = response.data; - message.statusCode = response.status; - return message; - } catch (error) { - const err = error.response - ? error.response.data - : { message: error.message }; - err.statusCode = error.response ? error.response.status : -1; - throw new LLMWhispererClientException(err.message, err.statusCode); - } - } - - /** - * @function - * @name whisperRetrieve - * @description This function retrieves the result of a whisper operation using the provided whisper hash. - * @async - * @param {string} whisperHash - The hash of the whisper operation whose result is to be retrieved. - * @returns {Promise} Returns a promise that resolves with an object containing the result of the whisper operation. The object includes the status code and the response data. - * @throws {LLMWhispererClientException} Throws an LLMWhispererClientException if an error occurs during the operation. - */ - async whisperRetrieve(whisperHash) { - this.logger.debug("whisper_retrieve called"); - const url = `${this.baseUrl}/whisper-retrieve`; - const params = { "whisper-hash": whisperHash }; - this.logger.debug(`url: ${url}`); - - try { - const response = await this.client.get(url, { - headers: { "unstract-key": this.apiKey }, - params, - timeout: this.apiTimeout * 1000, - }); - - return { - statusCode: response.status, - extracted_text: response.data, - }; - } catch (error) { - const err = error.response - ? error.response.data - : { message: error.message }; - err.statusCode = error.response ? error.response.status : -1; - throw new LLMWhispererClientException(err.message, err.statusCode); - } - } - - /** - * @function - * @name highlightData - * @description This function highlights the specified text in the result of a whisper operation using the provided whisper hash. - * @async - * @param {string} whisperHash - The hash of the whisper operation whose result is to be highlighted. - * @param {string} searchText - The text to be highlighted. - * @returns {Promise} Returns a promise that resolves with an object containing the response from the highlight operation. The object includes the status code and the response data. - * @throws {LLMWhispererClientException} Throws an LLMWhispererClientException if an error occurs during the operation. - */ - async highlightData(whisperHash, searchText) { - this.logger.debug("highlight_data called"); - const url = `${this.baseUrl}/highlight-data`; - const params = { "whisper-hash": whisperHash }; - this.logger.debug(`url: ${url}`); - - try { - const response = await this.client.post(url, searchText, { - headers: { - "unstract-key": this.apiKey, - "Content-Type": "text/plain", - }, - params, - timeout: this.apiTimeout * 1000, - }); - - const result = response.data; - result.statusCode = response.status; - return result; - } catch (error) { - const err = error.response - ? error.response.data - : { message: error.message }; - err.statusCode = error.response ? error.response.status : -1; - throw new LLMWhispererClientException(err.message, err.statusCode); - } - } -} - /** * @class LLMWhispererClientV2 * @classdesc Represents a client for the LLMWhisperer API. @@ -612,7 +250,7 @@ class LLMWhispererClientV2 { const options = { method: "post", url: apiUrl, - headers: this.headers, + headers: { ...this.headers }, params, timeout: 200 * 1000, }; @@ -721,8 +359,6 @@ class LLMWhispererClientV2 { const params = { whisper_hash: whisperHash }; this.logger.debug(`url: ${url}`); this.logger.debug(`params: ${JSON.stringify(params)}`); - delete this.headers["Content-Length"]; - this.logger.debug(`headers: ${JSON.stringify(this.headers)}`); try { const response = await this.client.get(url, { @@ -971,6 +607,44 @@ class LLMWhispererClientV2 { } } + /** + * Retrieves the details of a text extraction process. + * + * This method sends a GET request to the '/whisper-detail' endpoint of the LLMWhisperer API. + * The response is a JSON object containing metadata about the extraction job. + * Refer to https://docs.unstract.com/llmwhisperer/llm_whisperer/apis/llm_whisperer_text_extraction_detail_api + * + * @param {string} whisperHash - The hash returned when starting the extraction process. + * @returns {Promise} A promise that resolves with the extraction details including + * completed_at, mode, processed_pages, processing_started_at, + * processing_time_in_seconds, requested_pages, tag, total_pages, + * upload_file_size_in_kb, and whisper_hash. + * @throws {LLMWhispererClientException} If the API request fails. + */ + async whisperDetail(whisperHash) { + this.logger.debug("whisper_detail called"); + const url = `${this.baseUrl}/whisper-detail`; + const params = { whisper_hash: whisperHash }; + this.logger.debug(`url: ${url}`); + this.logger.debug(`params: ${JSON.stringify(params)}`); + + try { + const response = await this.client.get(url, { + headers: this.headers, + params, + timeout: this.apiTimeout * 1000, + }); + + return response.data; + } catch (error) { + const err = error.response + ? error.response.data + : { message: error.message }; + err.statusCode = error.response ? error.response.status : -1; + throw new LLMWhispererClientException(err.message, err.statusCode); + } + } + /** * Retrieves the highlight information of the LLMWhisperer API. * @@ -1022,7 +696,6 @@ class LLMWhispererClientV2 { } module.exports = { - LLMWhispererClient, LLMWhispererClientV2, LLMWhispererClientException, }; diff --git a/test/retry.test.js b/test/retry.test.js index 09ef9aa..89d2497 100644 --- a/test/retry.test.js +++ b/test/retry.test.js @@ -2,7 +2,6 @@ const axios = require("axios"); const fs = require("fs"); const path = require("path"); const { - LLMWhispererClient, LLMWhispererClientV2, LLMWhispererClientException, } = require("../index"); @@ -74,15 +73,6 @@ function successResponse(data = {}, status = 200, headers = {}) { return { status, data, headers, statusText: "OK" }; } -function createV1Client(opts = {}) { - return new LLMWhispererClient({ - baseUrl: "https://test.example.com/v1", - apiKey: "test-key", - loggingLevel: "error", - ...opts, - }); -} - function createV2Client(opts = {}) { return new LLMWhispererClientV2({ baseUrl: "https://test.example.com/v2", @@ -93,15 +83,6 @@ function createV2Client(opts = {}) { } describe("Retry Configuration", () => { - test("V1 client stores retry configuration defaults", () => { - const client = createV1Client(); - expect(client.retryMaxRetries).toBe(4); - expect(client.retryInitialDelay).toBe(2.0); - expect(client.retryMaxDelay).toBe(60.0); - expect(client.retryBackoffFactor).toBe(2.0); - expect(client.retryJitter).toBe(1.0); - }); - test("V2 client stores retry configuration defaults", () => { const client = createV2Client(); expect(client.retryMaxRetries).toBe(4); @@ -111,21 +92,6 @@ describe("Retry Configuration", () => { expect(client.retryJitter).toBe(1.0); }); - test("V1 client accepts custom retry configuration", () => { - const client = createV1Client({ - maxRetries: 10, - initialDelay: 5.0, - maxDelay: 120.0, - backoffFactor: 3.0, - jitter: 2.0, - }); - expect(client.retryMaxRetries).toBe(10); - expect(client.retryInitialDelay).toBe(5.0); - expect(client.retryMaxDelay).toBe(120.0); - expect(client.retryBackoffFactor).toBe(3.0); - expect(client.retryJitter).toBe(2.0); - }); - test("V2 client accepts custom retry configuration", () => { const client = createV2Client({ maxRetries: 0, @@ -141,12 +107,6 @@ describe("Retry Configuration", () => { expect(client.retryJitter).toBe(0.5); }); - test("V1 client creates its own axios instance", () => { - const client = createV1Client(); - expect(client.client).toBeDefined(); - expect(client.client).not.toBe(axios); - }); - test("V2 client creates its own axios instance", () => { const client = createV2Client(); expect(client.client).toBeDefined(); @@ -154,207 +114,6 @@ describe("Retry Configuration", () => { }); }); -describe("V1 Retry on server errors", () => { - test("getUsageInfo retries on 503 then succeeds", async () => { - const client = createV1Client({ maxRetries: 2, jitter: 0 }); - const adapter = mockAdapter([ - errorResponse(503, "Service Unavailable"), - successResponse({ usage: "100" }), - ]); - client.client.defaults.adapter = adapter; - - const result = await client.getUsageInfo(); - expect(result).toEqual({ usage: "100" }); - }); - - test("getUsageInfo retries on 429 then succeeds", async () => { - const client = createV1Client({ maxRetries: 2, jitter: 0 }); - const adapter = mockAdapter([ - errorResponse(429, "Rate limited"), - successResponse({ usage: "100" }), - ]); - client.client.defaults.adapter = adapter; - - const result = await client.getUsageInfo(); - expect(result).toEqual({ usage: "100" }); - }); - - test("getUsageInfo retries on network error then succeeds", async () => { - const client = createV1Client({ maxRetries: 2, jitter: 0 }); - const adapter = mockAdapter([ - networkError("ECONNRESET"), - successResponse({ usage: "100" }), - ]); - client.client.defaults.adapter = adapter; - - const result = await client.getUsageInfo(); - expect(result).toEqual({ usage: "100" }); - }); - - test("whisperStatus retries on 500 then succeeds", async () => { - const client = createV1Client({ maxRetries: 2, jitter: 0 }); - const adapter = mockAdapter([ - errorResponse(500, "Internal Server Error"), - successResponse({ status: "processed" }), - ]); - client.client.defaults.adapter = adapter; - - const result = await client.whisperStatus("test-hash"); - expect(result.status).toBe("processed"); - }); - - test("whisperRetrieve retries on 502 then succeeds", async () => { - const client = createV1Client({ maxRetries: 2, jitter: 0 }); - const adapter = mockAdapter([ - errorResponse(502, "Bad Gateway"), - successResponse("extracted text here"), - ]); - client.client.defaults.adapter = adapter; - - const result = await client.whisperRetrieve("test-hash"); - expect(result.extracted_text).toBe("extracted text here"); - }); - - test("highlightData retries on 503 then succeeds", async () => { - const client = createV1Client({ maxRetries: 2, jitter: 0 }); - const adapter = mockAdapter([ - errorResponse(503, "Service Unavailable"), - successResponse({ highlights: [] }), - ]); - client.client.defaults.adapter = adapter; - - const result = await client.highlightData("hash", "search text"); - expect(result).toEqual({ highlights: [], statusCode: 200 }); - }); -}); - -describe("V1 No retry on client errors", () => { - test("getUsageInfo does NOT retry on 400", async () => { - const client = createV1Client({ maxRetries: 3, jitter: 0 }); - let callCount = 0; - client.client.defaults.adapter = (config) => { - callCount++; - const err = new Error("Bad Request"); - err.response = { status: 400, data: { message: "Bad Request" }, headers: {} }; - err.config = config; - err.isAxiosError = true; - return Promise.reject(err); - }; - - await expect(client.getUsageInfo()).rejects.toThrow(LLMWhispererClientException); - expect(callCount).toBe(1); - }); - - test("getUsageInfo does NOT retry on 401", async () => { - const client = createV1Client({ maxRetries: 3, jitter: 0 }); - let callCount = 0; - client.client.defaults.adapter = (config) => { - callCount++; - const err = new Error("Unauthorized"); - err.response = { status: 401, data: { message: "Unauthorized" }, headers: {} }; - err.config = config; - err.isAxiosError = true; - return Promise.reject(err); - }; - - await expect(client.getUsageInfo()).rejects.toThrow(LLMWhispererClientException); - expect(callCount).toBe(1); - }); - - test("getUsageInfo does NOT retry on 404", async () => { - const client = createV1Client({ maxRetries: 3, jitter: 0 }); - let callCount = 0; - client.client.defaults.adapter = (config) => { - callCount++; - const err = new Error("Not Found"); - err.response = { status: 404, data: { message: "Not Found" }, headers: {} }; - err.config = config; - err.isAxiosError = true; - return Promise.reject(err); - }; - - await expect(client.getUsageInfo()).rejects.toThrow(LLMWhispererClientException); - expect(callCount).toBe(1); - }); -}); - -describe("V1 Retry exhaustion and disable", () => { - test("retry exhaustion throws after maxRetries attempts", async () => { - const client = createV1Client({ maxRetries: 2, jitter: 0, initialDelay: 0.1 }); - let callCount = 0; - client.client.defaults.adapter = (config) => { - callCount++; - const err = new Error("Service Unavailable"); - err.response = { status: 503, data: { message: "Service Unavailable" }, headers: {} }; - err.config = config; - err.isAxiosError = true; - return Promise.reject(err); - }; - - await expect(client.getUsageInfo()).rejects.toThrow(LLMWhispererClientException); - // 1 initial + 2 retries = 3 total - expect(callCount).toBe(3); - }); - - test("maxRetries=0 disables retries", async () => { - const client = createV1Client({ maxRetries: 0, jitter: 0 }); - let callCount = 0; - client.client.defaults.adapter = (config) => { - callCount++; - const err = new Error("Service Unavailable"); - err.response = { status: 503, data: { message: "Service Unavailable" }, headers: {} }; - err.config = config; - err.isAxiosError = true; - return Promise.reject(err); - }; - - await expect(client.getUsageInfo()).rejects.toThrow(LLMWhispererClientException); - expect(callCount).toBe(1); - }); -}); - -describe("V1 whisper retry control", () => { - test("whisper with timeout=0 (async) retries on 503", async () => { - const client = createV1Client({ maxRetries: 2, jitter: 0 }); - const adapter = mockAdapter([ - errorResponse(503, "Service Unavailable"), - successResponse( - { whisper_hash: "abc123", statusCode: 202 }, - 202, - { "whisper-hash": "abc123" }, - ), - ]); - client.client.defaults.adapter = adapter; - - const result = await client.whisper({ - url: "https://example.com/doc.pdf", - timeout: 0, - }); - expect(result.whisper_hash).toBe("abc123"); - }); - - test("whisper with timeout>0 (sync) does NOT retry on 503", async () => { - const client = createV1Client({ maxRetries: 3, jitter: 0 }); - let callCount = 0; - client.client.defaults.adapter = (config) => { - callCount++; - const err = new Error("Service Unavailable"); - err.response = { status: 503, data: { message: "Service Unavailable" }, headers: {} }; - err.config = config; - err.isAxiosError = true; - return Promise.reject(err); - }; - - await expect( - client.whisper({ - url: "https://example.com/doc.pdf", - timeout: 60, - }), - ).rejects.toThrow(LLMWhispererClientException); - expect(callCount).toBe(1); - }); -}); - describe("V2 Retry Behavior", () => { test("getUsageInfo retries on 503 then succeeds", async () => { const client = createV2Client({ maxRetries: 2, jitter: 0 }); @@ -440,6 +199,18 @@ describe("V2 Retry Behavior", () => { expect(result.status_code).toBe(200); }); + test("whisperDetail retries on 503 then succeeds", async () => { + const client = createV2Client({ maxRetries: 2, jitter: 0 }); + const adapter = mockAdapter([ + errorResponse(503, "Service Unavailable"), + successResponse({ whisper_hash: "abc", mode: "ocr" }), + ]); + client.client.defaults.adapter = adapter; + + const result = await client.whisperDetail("abc"); + expect(result).toEqual({ whisper_hash: "abc", mode: "ocr" }); + }); + test("getHighlightData retries on 503 then succeeds", async () => { const client = createV2Client({ maxRetries: 2, jitter: 0 }); const adapter = mockAdapter([ @@ -455,7 +226,7 @@ describe("V2 Retry Behavior", () => { describe("Retry-After header", () => { test("429 with Retry-After header is respected", async () => { - const client = createV1Client({ maxRetries: 1, jitter: 0, initialDelay: 0.1 }); + const client = createV2Client({ maxRetries: 1, jitter: 0, initialDelay: 0.1 }); const adapter = mockAdapter([ // 429 with Retry-After of 1 second (() => { @@ -492,24 +263,6 @@ describe("Backoff delay calculation", () => { }); describe("File stream re-creation", () => { - test("V1 whisper with filePath attaches _filePath to config for retry", async () => { - const testFilePath = path.join(__dirname, "data", "credit_card.pdf"); - const client = createV1Client({ maxRetries: 1, jitter: 0 }); - let capturedConfig; - client.client.defaults.adapter = (config) => { - capturedConfig = config; - return Promise.resolve({ - status: 200, - data: "extracted text", - headers: { "whisper-hash": "hash123" }, - config, - }); - }; - - await client.whisper({ filePath: testFilePath, timeout: 0 }); - expect(capturedConfig._filePath).toBe(testFilePath); - }); - test("V2 whisper with filePath attaches _filePath to config for retry", async () => { const testFilePath = path.join(__dirname, "data", "credit_card.pdf"); const client = createV2Client({ maxRetries: 1, jitter: 0 }); @@ -530,7 +283,7 @@ describe("File stream re-creation", () => { test("onRetry re-creates file stream when _filePath is set", async () => { const testFilePath = path.join(__dirname, "data", "credit_card.pdf"); - const client = createV1Client({ maxRetries: 1, jitter: 0 }); + const client = createV2Client({ maxRetries: 1, jitter: 0 }); let callCount = 0; let secondCallData; client.client.defaults.adapter = (config) => { @@ -544,14 +297,14 @@ describe("File stream re-creation", () => { } secondCallData = config.data; return Promise.resolve({ - status: 200, - data: "extracted text", - headers: { "whisper-hash": "hash123" }, + status: 202, + data: { whisper_hash: "v2hash" }, + headers: {}, config, }); }; - await client.whisper({ filePath: testFilePath, timeout: 0 }); + await client.whisper({ filePath: testFilePath }); expect(callCount).toBe(2); // The data should be a fresh ReadStream (re-created by onRetry) expect(secondCallData).toBeDefined(); @@ -561,7 +314,7 @@ describe("File stream re-creation", () => { describe("Logging on retries", () => { test("onRetry logs a warning message", async () => { - const client = createV1Client({ maxRetries: 1, jitter: 0 }); + const client = createV2Client({ maxRetries: 1, jitter: 0 }); const adapter = mockAdapter([ errorResponse(503, "Service Unavailable"), successResponse({ usage: "100" }), diff --git a/test/v1test.js b/test/v1test.js deleted file mode 100644 index 86b83a4..0000000 --- a/test/v1test.js +++ /dev/null @@ -1,95 +0,0 @@ -const fs = require("fs"); -const path = require("path"); -const LLMWhispererClient = require("../index").LLMWhispererClient; -const client = new LLMWhispererClient({ - apiKey: process.env.LLMWHISPERER_API_KEY, -}); -describe("LLMWhispererClient", () => { - test.skip("get_usage_info", async () => { - const usage_info = await client.getUsageInfo(); - console.info(usage_info); - expect(typeof usage_info).toBe("object"); - const expected_keys = [ - "current_page_count", - "daily_quota", - "monthly_quota", - "overage_page_count", - "subscription_plan", - "today_page_count", - ]; - expect(Object.keys(usage_info)).toEqual( - expect.arrayContaining(expected_keys), - ); - }); - - const test_cases = [ - ["ocr", "line-printer", "restaurant_invoice_photo.pdf"], - ["ocr", "line-printer", "credit_card.pdf"], - ["ocr", "line-printer", "handwritten-form.pdf"], - ["ocr", "text", "restaurant_invoice_photo.pdf"], - ["text", "line-printer", "restaurant_invoice_photo.pdf"], - ["text", "text", "handwritten-form.pdf"], - ]; - - test.skip.each(test_cases)( - "whisper(%s, %s, %s)", - async (processing_mode, output_mode, input_file) => { - const data_dir = path.join(__dirname, "data"); - const file_path = path.join(data_dir, input_file); - const response = await client.whisper({ - processingMode: processing_mode, - outputMode: output_mode, - filePath: file_path, - timeout: 200, - }); - console.debug(response); - - const exp_basename = `${path.parse(input_file).name}.${processing_mode}.${output_mode}.txt`; - const exp_file = path.join(data_dir, "expected", exp_basename); - const exp = await fs.promises.readFile(exp_file, "utf-8"); - - expect(typeof response).toBe("object"); - expect(response.statusCode).toBe(200); - // expect(response.extracted_text).toBe(exp); - }, - 200000, - ); - - // TODO: Review and port to Jest based tests - test.skip("whisper", () => { - // response = client.whisper( - // 'https://storage.googleapis.com/pandora-static/samples/bill.jpg.pdf' - // ); - const response = client.whisper("test_files/restaurant_invoice_photo.pdf", { - timeout: 200, - store_metadata_for_highlighting: true, - }); - console.info(response); - // expect(typeof response).toBe('object'); - }); - - test.skip("whisper_status", () => { - const response = client.whisper_status( - "7cfa5cbb|5f1d285a7cf18d203de7af1a1abb0a3a", - ); - console.info(response); - expect(typeof response).toBe("object"); - }); - - test.skip("whisper_retrieve", () => { - const response = client.whisper_retrieve( - "7cfa5cbb|5f1d285a7cf18d203de7af1a1abb0a3a", - ); - console.info(response); - expect(typeof response).toBe("object"); - }); - - test.skip("whisper_highlight_data", () => { - const response = client.highlight_data( - "9924d865|5f1d285a7cf18d203de7af1a1abb0a3a", - "Indiranagar", - ); - console.info(response); - expect(typeof response).toBe("object"); - }); -});