diff --git a/llparse/frontend.py b/llparse/frontend.py index 47204a8..47beeeb 100644 --- a/llparse/frontend.py +++ b/llparse/frontend.py @@ -255,6 +255,8 @@ def ID(): elif isinstance(node, source.node.Int): result = self.translateInt(node) + elif isinstance(node, source.node.LengthConsume): + result = self.translateLengthConsume(node) else: raise Exception(f'Unknown Node Type for :"{node.name}" {type(node)}') @@ -263,7 +265,7 @@ def ID(): if isinstance(result, list): # result:list[WrappedNode] - assert isinstance(node, (source.code.Match, source.node.Int)) + assert isinstance(node, (source.code.Match, source.node.Int, source.node.LengthConsume)) _match = node assert otherwise, f'Node "{node.name}" has no ".otherwise()"' @@ -344,6 +346,26 @@ def translateInt(self, node: source.node.Int) -> list[IWrap[_frontend.node.Int]] front = result[-1] return result + def translateLengthConsume(self, node: source.node.LengthConsume): + """Flattens LengthConsume into a List of Empty Nodes with skipTo advances. + """ + # inner = _frontend.node.LengthConsume(self.Id.id(node.name)) + def wrap_advance_number(an: int) -> IWrap[_frontend.node.Empty]: + return self.implementation.node.Empty(_frontend.node.Empty(self.Id.id(f"{node.name}_{an}"))) + + results = [wrap_advance_number(0)] + front = self.Map[node] = results[0] + + for i in range(1, node.length): + _next = wrap_advance_number(i) + results.append(_next) + front.ref.setOtherwise(_next, False) + front = _next + return results + + + + def maybeTableLookup( self, node: source.code.Match, trie: TrieSingle, children: MatchChildren ): diff --git a/llparse/pybuilder/builder.py b/llparse/pybuilder/builder.py index badd226..6897c3e 100644 --- a/llparse/pybuilder/builder.py +++ b/llparse/pybuilder/builder.py @@ -1,6 +1,7 @@ from typing import Literal from ..pybuilder import main_code as code +import warnings # typehinting node and code (TODO: Vizonex) Lets seperate the modules soon... node = code @@ -280,6 +281,7 @@ def is_ge(self, field: str, value: int) -> code.Operator: """ return code.Operator(">=", field, value) + # NOTE: I have Nodes and Codes in the same file called `main_code` @@ -382,3 +384,29 @@ def uintLE(self, field: str, bits: int): :param bits: Number of bits to use """ return code.Int(field, bits, False, True) + + def skip_multiple(self, value: int): + """ + A Numerous number of nodes to have being skipped over + this value must be greater than 1 but know that passing + anything higher than 256 may lead to performance regressions + with your parser. + + :param value: a value greater than 2 + + """ + if value <= 1: + raise ValueError(f"A value that is <= 1 for skip_multiple defeats it's purpose. Got {value}") + + # TODO: Remove this warning when an alternative method such as + # Creating a u64 dummy field with consume can be readily supplied (Code generation side of things). + if value >= 256: + warnings.warn( + f"skipping nodes greater than 256 in this case: {value}" + " may cause significant performance regressions with the parser " \ + " being generated", UserWarning) + + return code.LengthConsume(value) + + + diff --git a/llparse/pybuilder/main_code.py b/llparse/pybuilder/main_code.py index 28c137f..56a8ff8 100644 --- a/llparse/pybuilder/main_code.py +++ b/llparse/pybuilder/main_code.py @@ -189,6 +189,7 @@ def skipTo(self, node: "Node"): self.otherwiseEdge = Edge(node, False, None, None) return self + def getOtherwiseEdge(self): return self.otherwiseEdge @@ -302,6 +303,27 @@ def __init__( self.signed = signed self.little_endian = little_endian super().__init__(build_name(field, bits, signed, little_endian)) + + def otherwise(self, node): + """WARNING, otherwise is skipped as it can cause unwanted problems when parsing integers. + Use skip_to or skipTo instead.""" + raise TypeError("Int nodes do not support the use of `otherwise` use skipTo instead.") + + + +# Multiple character skipping +# This is meant to lazily say "node" -> skipto(node) as many times as needed. +# Without needing to use a specified field value. Useful with spans and protocols +# that only need to collect based on a given size. + +class LengthConsume(Node): + """unlike `Consume` which requires a field, this only requires a length to be provided + allowing for optimized advancments in the parser's overall skipping capabilities""" + def __init__(self, length: int) -> None: + super().__init__(f"length_consume_{length}_bits") + self.length = length + + # -- Transfroms -- @@ -531,7 +553,7 @@ def select( def getTransform(self): return self.transformFn - +# TODO: (Vizonex) Rename _Span to MatchSpan? class _Span(Match): def __init__(self, name: str) -> None: self.name = name diff --git a/llparse/pyfront/nodes.py b/llparse/pyfront/nodes.py index 82eec60..4623c7f 100644 --- a/llparse/pyfront/nodes.py +++ b/llparse/pyfront/nodes.py @@ -165,6 +165,18 @@ class Int(Node): def __hash__(self): return hash(self.id) + + +# Introduced in 0.4.0 +# A LengthConsume Node provides the ability of over a +# provided amount of characters for optimized performance. + + +class LengthConsume(Node): + def __init__(self, id: IUniqueName, length: int) -> None: + self.length = length + super().__init__(id) + @dataclass diff --git a/tests/test_capi.py b/tests/test_capi.py index ed52b9b..e333e82 100644 --- a/tests/test_capi.py +++ b/tests/test_capi.py @@ -1,6 +1,7 @@ """ Tests tools for writing C-API Wrappers """ + from llparse import LLParse import re @@ -48,12 +49,12 @@ """ - @pytest.fixture() def llparse() -> LLParse: return LLParse("llparse_internal") -def test_collecting_spans(llparse:LLParse): + +def test_collecting_spans(llparse: LLParse): lc = llparse.capi("llparse") span = llparse.span(llparse.code.span("span")) start = llparse.node("start") @@ -68,7 +69,7 @@ def test_collecting_spans(llparse:LLParse): assert result.use.spans, "No spans found" -def test_collecting_matches(llparse:LLParse): +def test_collecting_matches(llparse: LLParse): lc = llparse.capi("lc") span = llparse.span(llparse.code.span("llparse_on_span")) on_test = llparse.code.match("llparse_on_test") @@ -76,14 +77,10 @@ def test_collecting_matches(llparse:LLParse): start = llparse.node("start") body = llparse.node("body") - start.otherwise( - span.start(body) - ) + start.otherwise(span.start(body)) body.skipTo( - span.end( - llparse.invoke(on_test, {0:start}, llparse.error(-1, "error")) - ) + span.end(llparse.invoke(on_test, {0: start}, llparse.error(-1, "error"))) ) lc.use("llparse_") result = lc.filter(start) @@ -91,7 +88,7 @@ def test_collecting_matches(llparse:LLParse): assert result.use.matches, "No matches found" -def test_write_capi(llparse:LLParse): +def test_write_capi(llparse: LLParse): lc = llparse.capi("llparse") span = llparse.span(llparse.code.span("llparse_on_span")) on_test = llparse.code.match("llparse_on_test") @@ -99,23 +96,14 @@ def test_write_capi(llparse:LLParse): start = llparse.node("start") body = llparse.node("body") - start.otherwise( - span.start(body) - ) + start.otherwise(span.start(body)) body.skipTo( - span.end( - llparse.invoke(on_test, {0:start}, llparse.error(-1, "error")) - ) + span.end(llparse.invoke(on_test, {0: start}, llparse.error(-1, "error"))) ) lc.use("span") lc.use_regex(r"llparse_([^\s]+)") - result = lc.build(start) assert result.header.strip() == DUMMY_HEADER.strip() - - - - diff --git a/tests/test_frontend.py b/tests/test_frontend.py index f6265fa..988457b 100644 --- a/tests/test_frontend.py +++ b/tests/test_frontend.py @@ -178,3 +178,34 @@ def test_operators(op: str): assert "int test__c_ge_a_10 (" in code elif op == "<=": assert "int test__c_le_a_10 (" in code + + +@pytest.fixture(params=list(range(2, 10))) +def amount(request: pytest.FixtureRequest) -> int: + return request.param + + +def test_length_consume(amount: int): + s = LLParse("test") + on_end = s.span(s.code.span("on_end")) + end = s.node("end") + start = s.node("start").skipTo(on_end.start(s.skip_multiple(amount).otherwise(end))) + end.otherwise(on_end.end().skipTo(start)) + + t = s.build(start) + code = t.c.splitlines(keepends=False) + for s in range(amount - 1): + # should be enough to see that we have a length consumption of bits + assert f" s_n_test__n_length_consume_{amount}_bits_{s}," in code + assert f" case s_n_test__n_length_consume_{amount}_bits_{s}:" in code + assert f" goto s_n_test__n_length_consume_{amount}_bits_{s};" in code + +@pytest.fixture(params=[-2, 0, 1, -42069]) +def invalid_amount(request: pytest.FixtureRequest) -> int: + return request.param + +def test_length_consume_fail(invalid_amount: int): + with pytest.raises(ValueError): + s = LLParse("test") + s.skip_multiple(invalid_amount) +