From 3645eddb69ef2c9aaba77413afa356a9978d54d1 Mon Sep 17 00:00:00 2001 From: Benjie Gillam Date: Thu, 16 Apr 2026 16:54:24 +0100 Subject: [PATCH 1/3] Move maxTokens check to lexer --- src/index.ts | 1 + src/language/__tests__/parser-test.ts | 7 ++++ src/language/index.ts | 1 + src/language/lexer.ts | 43 +++++++++++++++++++++++-- src/language/parser.ts | 46 +++++++++++++++------------ src/language/schemaCoordinateLexer.ts | 15 +++++++-- 6 files changed, 88 insertions(+), 25 deletions(-) diff --git a/src/index.ts b/src/index.ts index 54ab38437f..54fe8e3d87 100644 --- a/src/index.ts +++ b/src/index.ts @@ -242,6 +242,7 @@ export { export type { ParseOptions, + LexerOptions, SourceLocation, TokenKindEnum, KindEnum, diff --git a/src/language/__tests__/parser-test.ts b/src/language/__tests__/parser-test.ts index d3c249a9fd..39b2d8f0f9 100644 --- a/src/language/__tests__/parser-test.ts +++ b/src/language/__tests__/parser-test.ts @@ -106,6 +106,13 @@ describe('Parser', () => { expect(() => parse('{ foo(bar: "baz") }', { maxTokens: 7 })).to.throw( 'Syntax Error: Document contains more that 7 tokens. Parsing aborted.', ); + + expect(() => + parse('#\n{\n#\na\n#\na\n#\n}\n#', { maxTokens: 9 }), + ).to.not.throw(); + expect(() => parse('#\n{\n#\na\n#\na\n#\n}\n#', { maxTokens: 8 })).to.throw( + 'Syntax Error: Document contains more that 8 tokens. Parsing aborted.', + ); }); it('parses variable inline values', () => { diff --git a/src/language/index.ts b/src/language/index.ts index 28d6400bc4..84ece789f1 100644 --- a/src/language/index.ts +++ b/src/language/index.ts @@ -12,6 +12,7 @@ export { TokenKind } from './tokenKind'; export type { TokenKindEnum } from './tokenKind'; export { Lexer } from './lexer'; +export type { LexerOptions } from './lexer'; export { parse, diff --git a/src/language/lexer.ts b/src/language/lexer.ts index e62ffd70d7..a580c257bc 100644 --- a/src/language/lexer.ts +++ b/src/language/lexer.ts @@ -6,6 +6,20 @@ import { isDigit, isNameContinue, isNameStart } from './characterClasses'; import type { Source } from './source'; import { TokenKind } from './tokenKind'; +/** + * Configuration options to control lexer behavior + */ +export interface LexerOptions { + /** + * Parser CPU and memory usage is linear to the number of tokens in a document + * however in extreme cases it becomes quadratic due to memory exhaustion. + * Parsing happens before validation so even invalid queries can burn lots of + * CPU time and memory. + * To prevent this you can set a maximum number of tokens allowed within a document. + */ + maxTokens?: number | undefined; +} + /** * A Lexer interface which provides common properties and methods required for * lexing GraphQL source. @@ -13,7 +27,10 @@ import { TokenKind } from './tokenKind'; * @internal */ export interface LexerInterface { - source: Source; + readonly _options: Readonly; + _tokenCounter: number; + readonly source: Source; + tokenCount: number; lastToken: Token; token: Token; line: number; @@ -31,6 +48,11 @@ export interface LexerInterface { * whenever called. */ export class Lexer implements LexerInterface { + /** @internal */ + readonly _options: Readonly; + /** @internal */ + _tokenCounter: number; + source: Source; /** @@ -53,9 +75,11 @@ export class Lexer implements LexerInterface { */ lineStart: number; - constructor(source: Source) { + constructor(source: Source, options: LexerOptions = {}) { const startOfFileToken = new Token(TokenKind.SOF, 0, 0, 0, 0); + this._options = options; + this._tokenCounter = 0; this.source = source; this.lastToken = startOfFileToken; this.token = startOfFileToken; @@ -67,6 +91,10 @@ export class Lexer implements LexerInterface { return 'Lexer'; } + get tokenCount(): number { + return this._tokenCounter; + } + /** * Advances the token stream to the next non-ignored token. */ @@ -200,8 +228,19 @@ export function createToken( end: number, value?: string, ): Token { + const { maxTokens } = lexer._options; const line = lexer.line; const col = 1 + start - lexer.lineStart; + if (kind !== TokenKind.EOF) { + ++lexer._tokenCounter; + if (maxTokens !== undefined && lexer._tokenCounter > maxTokens) { + throw syntaxError( + lexer.source, + start, + `Document contains more that ${maxTokens} tokens. Parsing aborted.`, + ); + } + } return new Token(kind, start, end, line, col, value); } diff --git a/src/language/parser.ts b/src/language/parser.ts index f489027b6b..93a707df2b 100644 --- a/src/language/parser.ts +++ b/src/language/parser.ts @@ -111,10 +111,17 @@ export interface ParseOptions { * ``` */ allowLegacyFragmentVariables?: boolean; +} +/** + * @internal + */ +export interface ParseOptionsInternal extends ParseOptions { /** * You may override the Lexer class used to lex the source; this is used by * schema coordinates to introduce a lexer with a restricted syntax. + * + * Cannot be set if `maxTokens` is set. */ lexer?: LexerInterface | undefined; } @@ -204,10 +211,15 @@ export function parseType( */ export function parseSchemaCoordinate( source: string | Source, + options?: ParseOptions | undefined, ): SchemaCoordinateNode { const sourceObj = isSource(source) ? source : new Source(source); - const lexer = new SchemaCoordinateLexer(sourceObj); - const parser = new Parser(source, { lexer }); + const lexer = new SchemaCoordinateLexer(sourceObj, options); + const parser = new Parser(source, { + ...options, + maxTokens: undefined, // Handled by SchemaCoordinateLexer + lexer, + }); parser.expectToken(TokenKind.SOF); const coordinate = parser.parseSchemaCoordinate(); parser.expectToken(TokenKind.EOF); @@ -226,26 +238,30 @@ export function parseSchemaCoordinate( * @internal */ export class Parser { - protected _options: Omit; + protected _options: ParseOptions; protected _lexer: LexerInterface; - protected _tokenCounter: number; - constructor(source: string | Source, options: ParseOptions = {}) { + constructor(source: string | Source, options: ParseOptionsInternal = {}) { const { lexer, ..._options } = options; if (lexer) { + if (options.maxTokens != null) { + throw new Error( + 'Setting maxTokens has no effect when a custom lexer is passed', + ); + } this._lexer = lexer; } else { const sourceObj = isSource(source) ? source : new Source(source); - this._lexer = new Lexer(sourceObj); + const { maxTokens } = options; + this._lexer = new Lexer(sourceObj, { maxTokens }); } this._options = _options; - this._tokenCounter = 0; } get tokenCount(): number { - return this._tokenCounter; + return this._lexer.tokenCount; } /** @@ -1690,19 +1706,7 @@ export class Parser { } advanceLexer(): void { - const { maxTokens } = this._options; - const token = this._lexer.advance(); - - if (token.kind !== TokenKind.EOF) { - ++this._tokenCounter; - if (maxTokens !== undefined && this._tokenCounter > maxTokens) { - throw syntaxError( - this._lexer.source, - token.start, - `Document contains more that ${maxTokens} tokens. Parsing aborted.`, - ); - } - } + this._lexer.advance(); } } diff --git a/src/language/schemaCoordinateLexer.ts b/src/language/schemaCoordinateLexer.ts index 4a65f5e556..2c462d3be9 100644 --- a/src/language/schemaCoordinateLexer.ts +++ b/src/language/schemaCoordinateLexer.ts @@ -2,7 +2,7 @@ import { syntaxError } from '../error/syntaxError'; import { Token } from './ast'; import { isNameStart } from './characterClasses'; -import type { LexerInterface } from './lexer'; +import type { LexerInterface, LexerOptions } from './lexer'; import { createToken, printCodePointAt, readName } from './lexer'; import type { Source } from './source'; import { TokenKind } from './tokenKind'; @@ -16,6 +16,11 @@ import { TokenKind } from './tokenKind'; * whenever called. */ export class SchemaCoordinateLexer implements LexerInterface { + /** @internal */ + public readonly _options: Readonly; + /** @internal */ + public _tokenCounter: number; + source: Source; /** @@ -40,9 +45,11 @@ export class SchemaCoordinateLexer implements LexerInterface { */ lineStart: 0 = 0 as const; - constructor(source: Source) { + constructor(source: Source, options: LexerOptions = {}) { const startOfFileToken = new Token(TokenKind.SOF, 0, 0, 0, 0); + this._options = options; + this._tokenCounter = 0; this.source = source; this.lastToken = startOfFileToken; this.token = startOfFileToken; @@ -52,6 +59,10 @@ export class SchemaCoordinateLexer implements LexerInterface { return 'SchemaCoordinateLexer'; } + get tokenCount(): number { + return this._tokenCounter; + } + /** * Advances the token stream to the next non-ignored token. */ From 7324a506c069bc5295298045ad3d3aa900ea9332 Mon Sep 17 00:00:00 2001 From: Benjie Gillam Date: Thu, 16 Apr 2026 17:00:40 +0100 Subject: [PATCH 2/3] Mark SchemaCoordinateLexer as internal --- src/language/schemaCoordinateLexer.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/language/schemaCoordinateLexer.ts b/src/language/schemaCoordinateLexer.ts index 2c462d3be9..7da97212ec 100644 --- a/src/language/schemaCoordinateLexer.ts +++ b/src/language/schemaCoordinateLexer.ts @@ -14,6 +14,8 @@ import { TokenKind } from './tokenKind'; * source lexes, the final Token emitted by the lexer will be of kind * EOF, after which the lexer will repeatedly return the same EOF token * whenever called. + * + * @internal */ export class SchemaCoordinateLexer implements LexerInterface { /** @internal */ From 5aa6166826f0ac3d4d63d64ce433055275a48c6a Mon Sep 17 00:00:00 2001 From: Benjie Gillam Date: Thu, 16 Apr 2026 17:10:59 +0100 Subject: [PATCH 3/3] Test coverage --- src/language/__tests__/parser-test.ts | 14 ++++++++++++++ .../__tests__/schemaCoordinateLexer-test.ts | 10 ++++++++++ src/language/parser.ts | 2 +- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/language/__tests__/parser-test.ts b/src/language/__tests__/parser-test.ts index 39b2d8f0f9..e6f66a5fb4 100644 --- a/src/language/__tests__/parser-test.ts +++ b/src/language/__tests__/parser-test.ts @@ -8,9 +8,11 @@ import { kitchenSinkQuery } from '../../__testUtils__/kitchenSinkQuery'; import { inspect } from '../../jsutils/inspect'; import { Kind } from '../kinds'; +import { Lexer } from '../lexer'; import { parse, parseConstValue, + Parser, parseSchemaCoordinate, parseType, parseValue, @@ -115,6 +117,18 @@ describe('Parser', () => { ); }); + it('forbids maxTokens and lexer together', () => { + expect( + () => + new Parser('{a}', { + maxTokens: 10, + lexer: new Lexer(new Source('{a}')), + }), + ).to.throw( + 'Setting maxTokens has no effect when a custom lexer is supplied', + ); + }); + it('parses variable inline values', () => { expect(() => parse('{ field(complex: { a: { b: [ $var ] } }) }'), diff --git a/src/language/__tests__/schemaCoordinateLexer-test.ts b/src/language/__tests__/schemaCoordinateLexer-test.ts index 09a349fa25..a7003b680d 100644 --- a/src/language/__tests__/schemaCoordinateLexer-test.ts +++ b/src/language/__tests__/schemaCoordinateLexer-test.ts @@ -3,6 +3,7 @@ import { describe, it } from 'mocha'; import { expectToThrowJSON } from '../../__testUtils__/expectJSON'; +import type { Token } from '../ast'; import { SchemaCoordinateLexer } from '../schemaCoordinateLexer'; import { Source } from '../source'; import { TokenKind } from '../tokenKind'; @@ -49,4 +50,13 @@ describe('SchemaCoordinateLexer', () => { locations: [{ line: 1, column: 4 }], }); }); + + it('counts tokens', () => { + const lexer = new SchemaCoordinateLexer(new Source('Name.field')); + let token: Token; + do { + token = lexer.advance(); + } while (token.kind !== TokenKind.EOF); + expect(lexer.tokenCount).to.eq(3); + }); }); diff --git a/src/language/parser.ts b/src/language/parser.ts index 93a707df2b..e9fbb484f9 100644 --- a/src/language/parser.ts +++ b/src/language/parser.ts @@ -247,7 +247,7 @@ export class Parser { if (lexer) { if (options.maxTokens != null) { throw new Error( - 'Setting maxTokens has no effect when a custom lexer is passed', + 'Setting maxTokens has no effect when a custom lexer is supplied', ); } this._lexer = lexer;