From 479c629e946430321e7f53bc5a648b90b1be6d96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Bevan=E2=80=93McGregor?= Date: Fri, 18 Dec 2015 23:19:12 -0500 Subject: [PATCH 1/4] Extracted and generalized expression split. --- cinje/util.py | 35 ++++++++++++++++++++++++++++++++ test/test_util/test_functions.py | 13 +++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/cinje/util.py b/cinje/util.py index 79db9e4..9109327 100644 --- a/cinje/util.py +++ b/cinje/util.py @@ -7,6 +7,7 @@ # ## Imports import sys +import ast from codecs import iterencode from inspect import isfunction, isclass @@ -243,6 +244,40 @@ def xmlargs(_source=None, **values): return bless(" " + ejoin(parts)) if parts else '' +def splitexpr(text): + """Split a given line of text into constituent expressions.""" + + # This is rather nasty, so we've isolated it here. + + parts = [] + + while text: + split = -1 + + try: + ast.parse(text) + except SyntaxError as e: # We expect this, and catch it. It'll have exploded after the first expr. + split = text.rfind(text[e.offset-1] if text[e.offset-1] in "'\"" else ' ', 0, e.offset-1) + + if split < 0: + parts.append(text) + break + + chunk = text[:split].rstrip() + + # Verify this is a good split. + try: + ast.parse(chunk) + except SyntaxError as e: + parts.append(text) + break + + parts.append(chunk) + text = text[split:].lstrip() + + return parts + + def chunk(text, mapping={None: 'text', '${': '_escape', '#{': '_bless', '&{': '_args', '%{': 'format', '@{': '_json'}): """Chunkify and "tag" a block of text into plain text and code sections. diff --git a/test/test_util/test_functions.py b/test/test_util/test_functions.py index 05d6923..87787ae 100644 --- a/test/test_util/test_functions.py +++ b/test/test_util/test_functions.py @@ -1,6 +1,6 @@ # encoding: utf-8 -from cinje.util import interruptable, iterate, xmlargs, chunk, ensure_buffer, Line, strip_tags +from cinje.util import interruptable, iterate, xmlargs, splitexpr, chunk, ensure_buffer, Line, strip_tags # Note: ensure_buffer is tested indirectly via template conformance testing. @@ -108,6 +108,17 @@ def test_defaults_overridden(self): )) +class TestExpressionSplit(object): + def test_object_quote_single(self): + assert splitexpr("foo 'Hello world!'") == ['foo', "'Hello world!'"] + + def test_object_quote_double(self): + assert splitexpr('bar "Farewell cruel world!"') == ['bar', '"Farewell cruel world!"'] + + def test_call_then_argspec(self): + assert splitexpr('baz(diz, "thing") 27, 42') == ['baz(diz, "thing")', '27, 42'] + + class TestChunker(object): def _do(self, value): token, kind, value = value From 03ea14a5be2907c96fa39e1db53d67a1bef52596 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Bevan=E2=80=93McGregor?= Date: Sat, 19 Dec 2015 01:10:56 -0500 Subject: [PATCH 2/4] Attempt to handle Pypy off-by-one error. --- cinje/util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cinje/util.py b/cinje/util.py index 9109327..98bca87 100644 --- a/cinje/util.py +++ b/cinje/util.py @@ -250,6 +250,7 @@ def splitexpr(text): # This is rather nasty, so we've isolated it here. parts = [] + offset = 0 if pypy else 1 while text: split = -1 @@ -257,7 +258,7 @@ def splitexpr(text): try: ast.parse(text) except SyntaxError as e: # We expect this, and catch it. It'll have exploded after the first expr. - split = text.rfind(text[e.offset-1] if text[e.offset-1] in "'\"" else ' ', 0, e.offset-1) + split = text.rfind(text[e.offset-offset] if text[e.offset-offset] in "'\"" else ' ', 0, e.offset-offset) if split < 0: parts.append(text) From 73908efff9d90986134fb3c12af88ea41af5652e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Bevan=E2=80=93McGregor?= Date: Sat, 19 Dec 2015 01:13:54 -0500 Subject: [PATCH 3/4] Pypy actually gives us the split point, so don't search. --- cinje/util.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cinje/util.py b/cinje/util.py index 98bca87..97e677d 100644 --- a/cinje/util.py +++ b/cinje/util.py @@ -250,7 +250,6 @@ def splitexpr(text): # This is rather nasty, so we've isolated it here. parts = [] - offset = 0 if pypy else 1 while text: split = -1 @@ -258,7 +257,10 @@ def splitexpr(text): try: ast.parse(text) except SyntaxError as e: # We expect this, and catch it. It'll have exploded after the first expr. - split = text.rfind(text[e.offset-offset] if text[e.offset-offset] in "'\"" else ' ', 0, e.offset-offset) + if pypy: + split = e.offset + else: + split = text.rfind(text[e.offset - 1] if text[e.offset - 1] in "'\"" else ' ', 0, e.offset - 1) if split < 0: parts.append(text) From 55d1170a9a7d5a0ecb8b977a2e03c6868c5573a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alice=20Bevan=E2=80=93McGregor?= Date: Mon, 4 Jan 2016 11:04:11 -0500 Subject: [PATCH 4/4] WIP --- cinje/inline/text.py | 16 +++------------- cinje/util.py | 12 ++++++------ test/test_util/test_functions.py | 3 +++ 3 files changed, 12 insertions(+), 19 deletions(-) diff --git a/cinje/inline/text.py b/cinje/inline/text.py index 32c71cf..ca47d1e 100644 --- a/cinje/inline/text.py +++ b/cinje/inline/text.py @@ -5,7 +5,7 @@ from itertools import chain from pprint import pformat -from ..util import pypy, iterate, chunk, Line, ensure_buffer +from ..util import pypy, iterate, splitexpr, chunk, Line, ensure_buffer def gather(input): @@ -116,18 +116,8 @@ def inner_chain(): continue if token == 'format': - # We need to split the expression defining the format string from the values to pass when formatting. - # We want to allow any Python expression, so we'll need to piggyback on Python's own parser in order - # to exploit the currently available syntax. Apologies, this is probably the scariest thing in here. - split = -1 - - try: - ast.parse(chunk_) - except SyntaxError as e: # We expect this, and catch it. It'll have exploded after the first expr. - split = chunk_.rfind(' ', 0, e.offset) - - token = '_bless(' + chunk_[:split].rstrip() + ').format' - chunk_ = chunk_[split:].lstrip() + token, chunk_ = splitexpr(chunk_, 1) + token = '_bless(' + token + ').format' yield Line(lineno, prefix + token + '(' + chunk_ + ')' + suffix, scope) diff --git a/cinje/util.py b/cinje/util.py index 97e677d..0843d6e 100644 --- a/cinje/util.py +++ b/cinje/util.py @@ -244,7 +244,7 @@ def xmlargs(_source=None, **values): return bless(" " + ejoin(parts)) if parts else '' -def splitexpr(text): +def splitexpr(text, limit=0): """Split a given line of text into constituent expressions.""" # This is rather nasty, so we've isolated it here. @@ -269,14 +269,14 @@ def splitexpr(text): chunk = text[:split].rstrip() # Verify this is a good split. - try: - ast.parse(chunk) - except SyntaxError as e: - parts.append(text) - break + ast.parse(chunk) # We want this to explode if invalid. parts.append(chunk) text = text[split:].lstrip() + + if limit and len(parts) == limit: + parts.append(text) + break return parts diff --git a/test/test_util/test_functions.py b/test/test_util/test_functions.py index 87787ae..4384c8c 100644 --- a/test/test_util/test_functions.py +++ b/test/test_util/test_functions.py @@ -117,6 +117,9 @@ def test_object_quote_double(self): def test_call_then_argspec(self): assert splitexpr('baz(diz, "thing") 27, 42') == ['baz(diz, "thing")', '27, 42'] + + def test_partial_expression(self): + assert splitexpr('asdf 24 asdf"') == ['asdf', '24', 'asdf"'] class TestChunker(object):