diff --git a/src/generators/metadata/constants.mjs b/src/generators/metadata/constants.mjs index e8ac140b..aa2edf27 100644 --- a/src/generators/metadata/constants.mjs +++ b/src/generators/metadata/constants.mjs @@ -56,8 +56,5 @@ export const DOC_API_HEADING_TYPES = [ }, ]; -// This regex is used to match basic TypeScript generic types (e.g., Promise) -export const TYPE_GENERIC_REGEX = /^([^<]+)<([^>]+)>$/; - // This is the base URL of the Man7 documentation export const DOC_MAN_BASE_URL = 'http://man7.org/linux/man-pages/man'; diff --git a/src/generators/metadata/utils/__tests__/transformers.test.mjs b/src/generators/metadata/utils/__tests__/transformers.test.mjs index ef625d4c..cbb5836e 100644 --- a/src/generators/metadata/utils/__tests__/transformers.test.mjs +++ b/src/generators/metadata/utils/__tests__/transformers.test.mjs @@ -75,4 +75,41 @@ describe('transformTypeToReferenceLink', () => { '[``](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Map)<[``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type), [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type)> & [``](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type)>' ); }); + + it('should transform a function returning a Generic type', () => { + strictEqual( + transformTypeToReferenceLink('(err: Error) => Promise', {}), + '(err: [``](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Error)) => [``](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)<[``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#boolean_type)>' + ); + }); + + it('should respect precedence: Unions (|) are weaker than Intersections (&)', () => { + strictEqual( + transformTypeToReferenceLink('string | number & boolean', {}), + '[``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type) | [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type) & [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#boolean_type)' + ); + }); + + it('should handle extreme nested combinations of functions, arrays, generics, unions, and intersections', () => { + const input = + '(str: string[]) => Promise, Map>'; + + const expected = + '(str: [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type)[]) => [``](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)<[``](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Map)<[``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type), [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type) & [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type)>, [``](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Map)<[``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type) | [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type)>>'; + + strictEqual(transformTypeToReferenceLink(input, {}), expected); + }); + + it('should parse functions with array destructuring in callbacks returning functions with object destructuring', () => { + const input = + '(cb: ([first, second]: string[]) => void) => ({ id, name }: User) => boolean'; + + const expected = + '(cb: ([first, second]: [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type)[]) => ``) => ({ id, name }: [``](userLink)) => [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#boolean_type)'; + + strictEqual( + transformTypeToReferenceLink(input, { User: 'userLink' }), + expected + ); + }); }); diff --git a/src/generators/metadata/utils/transformers.mjs b/src/generators/metadata/utils/transformers.mjs index 7fac4e85..dd6de8b9 100644 --- a/src/generators/metadata/utils/transformers.mjs +++ b/src/generators/metadata/utils/transformers.mjs @@ -1,9 +1,6 @@ -import { - DOC_MAN_BASE_URL, - DOC_API_HEADING_TYPES, - TYPE_GENERIC_REGEX, -} from '../constants.mjs'; +import { DOC_MAN_BASE_URL, DOC_API_HEADING_TYPES } from '../constants.mjs'; import { slug } from './slugger.mjs'; +import { parseType } from './typeParser.mjs'; import { transformNodesToString } from '../../../utils/unist.mjs'; import BUILTIN_TYPE_MAP from '../maps/builtin.json' with { type: 'json' }; import MDN_TYPE_MAP from '../maps/mdn.json' with { type: 'json' }; @@ -22,84 +19,7 @@ export const transformUnixManualToLink = ( ) => { return `[\`${text}\`](${DOC_MAN_BASE_URL}${sectionNumber}/${command}.${sectionNumber}${sectionLetter}.html)`; }; -/** - * Safely splits the string by `|` or `&` at the top level (ignoring those - * inside `< >`), and returns both the pieces and the separator used. - * - * @param {string} str The type string to split - * @returns {{ pieces: string[], separator: string }} The split pieces and the separator string used to join them (` | ` or ` & `) - */ -const splitByOuterSeparator = str => { - const pieces = []; - let current = ''; - let depth = 0; - let separator; - - for (const char of str) { - if (char === '<') { - depth++; - } else if (char === '>') { - depth--; - } else if ((char === '|' || char === '&') && depth === 0) { - pieces.push(current); - current = ''; - separator ??= ` ${char} `; - continue; - } - current += char; - } - - pieces.push(current); - return { pieces, separator }; -}; - -/** - * Attempts to parse and format a basic Generic type (e.g., Promise). - * It also supports union and multi-parameter types within the generic brackets. - * - * @param {string} typePiece The plain type piece to be evaluated - * @param {Function} transformType The function used to resolve individual types into links - * @returns {string|null} The formatted Markdown link, or null if no match is found - */ -const formatBasicGeneric = (typePiece, transformType) => { - const genericMatch = typePiece.match(TYPE_GENERIC_REGEX); - - if (genericMatch) { - const baseType = genericMatch[1].trim(); - const innerType = genericMatch[2].trim(); - - const baseResult = transformType(baseType.replace(/\[\]$/, '')); - const baseFormatted = baseResult - ? `[\`<${baseType}>\`](${baseResult})` - : `\`<${baseType}>\``; - // Split while capturing delimiters (| or ,) to preserve original syntax - const parts = innerType.split(/([|,])/); - - const innerFormatted = parts - .map(part => { - const trimmed = part.trim(); - // If it is a delimiter, return it as is - if (trimmed === '|') { - return ' | '; - } - - if (trimmed === ',') { - return ', '; - } - - const innerRes = transformType(trimmed.replace(/\[\]$/, '')); - return innerRes - ? `[\`<${trimmed}>\`](${innerRes})` - : `\`<${trimmed}>\``; - }) - .join(''); - - return `${baseFormatted}<${innerFormatted}>`; - } - - return null; -}; /** * This method replaces plain text Types within the Markdown content into Markdown links * that link to the actual relevant reference for such type (either internal or external link) @@ -111,7 +31,10 @@ const formatBasicGeneric = (typePiece, transformType) => { export const transformTypeToReferenceLink = (type, record) => { // Removes the wrapping curly braces that wrap the type references // We keep the angle brackets `<>` intact here to parse Generics later - const typeInput = type.replace(/[{}]/g, ''); + const typeInput = type + .trim() + .replace(/^\{(.*)\}$/, '$1') + .trim(); /** * Handles the mapping (if there's a match) of the input text @@ -150,32 +73,7 @@ export const transformTypeToReferenceLink = (type, record) => { return ''; }; - const { pieces: outerPieces, separator } = splitByOuterSeparator(typeInput); - - const typePieces = outerPieces.map(piece => { - // This is the content to render as the text of the Markdown link - const trimmedPiece = piece.trim(); - - // 1. Attempt to format as a basic Generic type first - const genericMarkdown = formatBasicGeneric(trimmedPiece, transformType); - if (genericMarkdown) { - return genericMarkdown; - } - - // 2. Fallback to the logic for plain types - // This is what we will compare against the API types mappings - // The ReGeX below is used to remove `[]` from the end of the type - const result = transformType(trimmedPiece.replace(/\[\]$/, '')); - - // If we have a valid result and the piece is not empty, we return the Markdown link - if (trimmedPiece.length && result.length) { - return `[\`<${trimmedPiece}>\`](${result})`; - } - }); - - // Filter out pieces that we failed to map and then join the valid ones - // using the same separator that appeared in the original type string - const markdownLinks = typePieces.filter(Boolean).join(separator); + const markdownLinks = parseType(typeInput, transformType); // Return the replaced links or the original content if they all failed to be replaced // Note that if some failed to get replaced, only the valid ones will be returned diff --git a/src/generators/metadata/utils/typeParser.mjs b/src/generators/metadata/utils/typeParser.mjs new file mode 100644 index 00000000..27f80f7c --- /dev/null +++ b/src/generators/metadata/utils/typeParser.mjs @@ -0,0 +1,249 @@ +const openParentheses = ['<', '(', '{', '[']; +const closeParentheses = ['>', ')', '}', ']']; + +/** + * Safely splits a string by a given set of separators at depth 0 + * (ignoring those inside < >, ( ), { }, or [ ]). + * + * @param {string} str The string to split + * @param {string} separator The separator to split by (e.g., '|', '&', ',', '=>') + * @returns {string[]} The split pieces + */ +const splitByOuterSeparator = (str, separator) => { + const pieces = []; + let current = ''; + let depth = 0; + + for (let i = 0; i < str.length; i++) { + const char = str[i]; + + // Track depth using the global arrays + if (openParentheses.includes(char)) { + depth++; + } else if (closeParentheses.includes(char)) { + // Small exception: don't decrease depth for the '>' in '=>' + if (!(char === '>' && str[i - 1] === '=')) { + depth--; + } + } + + // Check for multi-character separators like '=>' + const isArrow = separator === '=>' && char === '=' && str[i + 1] === '>'; + // Check for single-character separators + const isCharSeparator = separator === char; + + if (depth === 0 && (isCharSeparator || isArrow)) { + pieces.push(current.trim()); + current = ''; + if (isArrow) { + i++; + } // skip the '>' part of '=>' + continue; + } + + current += char; + } + + pieces.push(current.trim()); + return pieces; +}; + +/** + * Safely removes outer parentheses from a type string if they wrap the entire string. + * This prevents "depth blindness" in the parser by recursively unwrapping types like `(((string | number)))` + * into `string | number`, while safely ignoring disconnected groups like `(A) | (B)`. + * + * @param {string} typeString The type string to evaluate and potentially unwrap. + * @returns {string} The unwrapped type string, or the original string if not fully wrapped. + */ +const stripOuterParentheses = typeString => { + let trimmed = typeString.trim(); + + // Only attempt to unwrap if it's enclosed in standard grouping parentheses + if (trimmed.startsWith('(') && trimmed.endsWith(')')) { + let depth = 0; + let isValidWrapper = true; + + // Iterate through the string, ignoring the last closing parenthesis + for (let i = 0; i < trimmed.length - 1; i++) { + const char = trimmed[i]; + + if (openParentheses.includes(char)) { + depth++; + } else if (closeParentheses.includes(char)) { + if (!(char === '>' && trimmed[i - 1] === '=')) { + depth--; + } + } + + // If depth hits 0 before the end, it means the parentheses don't wrap the whole string + if (depth === 0) { + isValidWrapper = false; + break; + } + } + + if (isValidWrapper) { + const unwrapped = trimmed.slice(1, -1).trim(); + // Keep stripping if there are multiple redundant layers + return stripOuterParentheses(unwrapped); + } + } + + return trimmed; +}; + +/** + * Parses the left side of an arrow function. + * @param {string} signature The left side of the arrow function + * @param {Function} transformType The resolver function + * @returns {string} The parsed signature with markdown links + */ +const parseFunctionSignature = (signature, transformType) => { + let trimmed = signature.trim(); + + // Safety fallback + if (!trimmed.endsWith(')')) { + return signature; + } + + let depth = 0; + let openParenIndex = -1; + + // Reverse walk to isolate parameters from prefix + for (let i = trimmed.length - 1; i >= 0; i--) { + const char = trimmed[i]; + + // Explicitly targeting normal parentheses for the argument wrapper + if (char === ')') { + depth++; + } else if (char === '(') { + depth--; + } + + if (depth === 0) { + openParenIndex = i; + break; + } + } + + if (openParenIndex === -1) { + return signature; + } + + const prefix = trimmed.slice(0, openParenIndex); + const paramsString = trimmed.slice(openParenIndex + 1, -1); + + if (!paramsString.trim()) { + return `${prefix}()`; + } + + const args = splitByOuterSeparator(paramsString, ','); + + const parsedArgs = args.map(arg => { + const colonParts = splitByOuterSeparator(arg, ':'); + + if (colonParts.length > 1) { + const paramName = colonParts[0]; + const paramType = colonParts.slice(1).join(':'); + + const parsedType = + parseType(paramType, transformType) || `\`<${paramType}>\``; + return `${paramName}: ${parsedType}`; + } + + return parseType(arg, transformType) || arg; + }); + + return `${prefix}(${parsedArgs.join(', ')})`; +}; + +/** + * Recursively parses advanced TypeScript types, including Unions, Intersections, Functions, and Nested Generics. + * * @param {string} typeString The plain type string to evaluate + * @param {Function} transformType The function used to resolve individual types into links + * @returns {string|null} The formatted Markdown link(s), or null if the base type doesn't map + */ +export const parseType = (typeString, transformType) => { + // Clean the string and strip unnecessary outer parentheses to prevent depth blindness + const trimmed = stripOuterParentheses(typeString); + if (!trimmed) { + return null; + } + + // Handle Unions (|) + if (trimmed.includes('|')) { + const parts = splitByOuterSeparator(trimmed, '|'); + if (parts.length > 1) { + // Re-evaluate each part recursively and join with ' | ' + const resolvedParts = parts.map( + p => parseType(p, transformType) || `\`<${p}>\`` + ); + return resolvedParts.join(' | '); + } + } + + // Handle Intersections (&) + if (trimmed.includes('&')) { + const parts = splitByOuterSeparator(trimmed, '&'); + if (parts.length > 1) { + // Re-evaluate each part recursively and join with ' & ' + const resolvedParts = parts.map( + p => parseType(p, transformType) || `\`<${p}>\`` + ); + return resolvedParts.join(' & '); + } + } + + // Handle Functions (=>) + if (trimmed.includes('=>')) { + const parts = splitByOuterSeparator(trimmed, '=>'); + if (parts.length > 1) { + const signature = parts[0]; + const returnType = parts.slice(1).join(' => '); + + const parsedSignature = parseFunctionSignature(signature, transformType); + + const parsedReturn = + parseType(returnType, transformType) || `\`<${returnType}>\``; + return `${parsedSignature} => ${parsedReturn}`; + } + } + + // Handle Generics (Base) + // Check if it's a generic wrapped in an array (e.g., Promise[]) + const isGenericArray = trimmed.endsWith('[]'); + const genericTarget = isGenericArray ? trimmed.slice(0, -2).trim() : trimmed; + + if (genericTarget.includes('<') && genericTarget.endsWith('>')) { + const firstBracketIndex = genericTarget.indexOf('<'); + const baseType = genericTarget.slice(0, firstBracketIndex).trim(); + const innerType = genericTarget.slice(firstBracketIndex + 1, -1).trim(); + + const cleanBaseType = baseType.replace(/\[\]$/, ''); // Just in case of Base[] + const baseResult = transformType(cleanBaseType); + + const baseFormatted = baseResult + ? `[\`<${cleanBaseType}>\`](${baseResult})` + : `\`<${cleanBaseType}>\``; + + const innerArgs = splitByOuterSeparator(innerType, ','); + const innerFormatted = innerArgs + .map(arg => parseType(arg, transformType) || `\`<${arg}>\``) + .join(', '); + + return `${baseFormatted}<${innerFormatted}>${isGenericArray ? '[]' : ''}`; + } + + // Base Case: Plain Type (e.g., string, Buffer, Function) + // Preserve array notation for base types + const isArray = trimmed.endsWith('[]'); + const cleanType = trimmed.replace(/\[\]$/, ''); + + const result = transformType(cleanType); + if (cleanType.length && result) { + return `[\`<${cleanType}>\`](${result})${isArray ? '[]' : ''}`; + } + + return null; +};