diff --git a/src/pseudo-selectors/filters.ts b/src/pseudo-selectors/filters.ts index 6fe37384..42ac0678 100644 --- a/src/pseudo-selectors/filters.ts +++ b/src/pseudo-selectors/filters.ts @@ -4,6 +4,31 @@ import { cacheParentResults } from "../helpers/cache.js"; import { getElementParent } from "../helpers/querying.js"; import type { CompiledQuery, InternalOptions } from "../types.js"; +/** + * RFC 4647 extended filtering with pre-split subtags. + * @param tag - Lowercased subtags of the element's language value. + * @param range - Lowercased subtags of the language range to match against. + */ +function extendedFilter(tag: string[], range: string[]): boolean { + if (range[0] !== "*" && range[0] !== tag[0]) return false; + + let tagIndex = 1; + + for (let rangeIndex = 1; rangeIndex < range.length; rangeIndex++) { + if (range[rangeIndex] === "*") continue; + + // Skip non-singleton tag subtags until we find a match. + while (tagIndex < tag.length && tag[tagIndex] !== range[rangeIndex]) { + if (tag[tagIndex++].length <= 1) return false; + } + + if (tagIndex >= tag.length) return false; + tagIndex++; + } + + return true; +} + type Filter = ( next: CompiledQuery, text: string, @@ -175,6 +200,49 @@ export const filters: Record = { return (element) => context.includes(element) && next(element); }, + lang(next, code, { adapter }) { + const ranges = code + .split(",") + .map((r) => r.trim()) + .filter((r) => r.length > 0) + .map((r) => + r + .replace(/^['"]|['"]$/g, "") + .toLowerCase() + .split("-"), + ); + + return function lang(element) { + let node: typeof element | null = element; + + while (node != null) { + const value = + adapter.getAttributeValue(node, "xml:lang") ?? + adapter.getAttributeValue(node, "lang"); + + if (value != null) { + if (!value) { + return ranges.some((r) => r[0] === "") && next(element); + } + + const tag = value.toLowerCase().split("-"); + return ( + ranges.some((r) => extendedFilter(tag, r)) && + next(element) + ); + } + + const parent = adapter.getParent(node); + node = + parent != null && adapter.isTag(parent) + ? (parent as typeof element) + : null; + } + + return ranges.some((r) => r[0] === "") && next(element); + }; + }, + hover: dynamicStatePseudo("isHovered"), visited: dynamicStatePseudo("isVisited"), active: dynamicStatePseudo("isActive"), diff --git a/test/pseudo-classes.ts b/test/pseudo-classes.ts index 316ad1de..c1e70929 100644 --- a/test/pseudo-classes.ts +++ b/test/pseudo-classes.ts @@ -206,6 +206,64 @@ describe(":has", () => { }); }); +describe(":lang", () => { + // Single fixture covering inheritance, override, and untagged elements. + const langFixture = parseDocument( + '

A

B

C

', + ); + + it.each([ + // [selector, expected ids] + [":lang(en)", ["a"]], + [":lang(EN)", ["a"]], + [":lang(fr)", ["b"]], + [":lang(fr-BE)", ["b"]], + [":lang(en, fr)", ["a", "b"]], + [":lang(de)", []], + ])("%s matches %j", (selector, expectedIds) => { + const matches = CSSselect.selectAll( + `p${selector}`, + langFixture, + ); + expect(matches.map((element) => element.attribs["id"])).toStrictEqual( + expectedIds, + ); + }); + + it("should not match untagged elements", () => { + expect( + CSSselect.selectAll("p:lang(en)", langFixture), + ).toHaveLength(1); + }); + + it("should use extended filtering", () => { + const dom = parseDocument( + '

a

b

c

', + ); + expect( + CSSselect.selectAll(":lang(de-DE)", dom), + ).toHaveLength(3); + }); + + it("should support wildcard primary subtag", () => { + const dom = parseDocument( + '

a

b

c

', + ); + expect( + CSSselect.selectAll(":lang(\\*-CH)", dom), + ).toHaveLength(2); + }); + + it("should support xml:lang", () => { + const dom = parseDocument('

x

', { + xmlMode: true, + }); + expect( + CSSselect.selectAll(":lang(ja)", dom), + ).toHaveLength(2); + }); +}); + describe(":read-only and :read-write", () => { it("should match", () => { const dom = parseDocument(`