Skip to content

Commit e7d4837

Browse files
committed
PEP 813: Implement !p pretty-print conversion for f-strings
Add the !p conversion flag for f-strings that calls sys.__prettyhook__ (defaulting to pprint.pformat) on the value. When a callable is provided as a format spec (e.g. f"{obj!p:my_formatter}"), the format spec is evaluated as a Python expression and called on the value instead. Key changes: - Tokenizer: When !p: is detected inside an f-string expression, keep expression tokenization mode instead of switching to format-spec mode, allowing the format spec to be parsed as a Python expression. - Grammar: Add fstring_pretty_conversion rule and two new alternatives in fstring_replacement_field for !p with and without callable. - Parser: Add _PyPegen_check_pretty_conversion() to validate the !p conversion, and _PyPegen_pretty_formatted_value() to build the AST. - Codegen: Handle 'p' conversion - without callable uses CONVERT_VALUE with new FVC_PRETTY opcode arg; with callable emits CALL bytecode. - Runtime: Add _PyObject_Pretty() conversion function that looks up sys.__prettyhook__ and calls it on the object. - sys module: Add sys.prettyhook (default: pprint.pformat) and sys.__prettyhook__ backup copy following the existing hook pattern. https://claude.ai/code/session_01M64NyGDjnkVvSmDRmJFF64
1 parent 4f9313e commit e7d4837

15 files changed

Lines changed: 696 additions & 298 deletions

File tree

Grammar/python.gram

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -953,9 +953,15 @@ fstring_middle[expr_ty]:
953953
| fstring_replacement_field
954954
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
955955
fstring_replacement_field[expr_ty]:
956+
| '{' a=annotated_rhs debug_expr='='? conv=fstring_pretty_conversion ':' pretty_func=annotated_rhs rbrace='}' {
957+
_PyPegen_pretty_formatted_value(p, a, debug_expr, conv, pretty_func, rbrace, EXTRA) }
958+
| '{' a=annotated_rhs debug_expr='='? conv=fstring_pretty_conversion rbrace='}' {
959+
_PyPegen_pretty_formatted_value(p, a, debug_expr, conv, NULL, rbrace, EXTRA) }
956960
| '{' a=annotated_rhs debug_expr='='? conversion=[fstring_conversion] format=[fstring_full_format_spec] rbrace='}' {
957961
_PyPegen_formatted_value(p, a, debug_expr, conversion, format, rbrace, EXTRA) }
958962
| invalid_fstring_replacement_field
963+
fstring_pretty_conversion[Token*]:
964+
| conv_token="!" conv=NAME { _PyPegen_check_pretty_conversion(p, conv_token, conv) }
959965
fstring_conversion[ResultTokenWithMetadata*]:
960966
| conv_token="!" conv=NAME { _PyPegen_check_fstring_conversion(p, conv_token, conv) }
961967
fstring_full_format_spec[ResultTokenWithMetadata*]:

Include/ceval.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,13 +125,14 @@ PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate);
125125
}
126126

127127
/* Masks and values used by FORMAT_VALUE opcode. */
128-
#define FVC_MASK 0x3
128+
#define FVC_MASK 0x7
129129
#define FVC_NONE 0x0
130130
#define FVC_STR 0x1
131131
#define FVC_REPR 0x2
132132
#define FVC_ASCII 0x3
133-
#define FVS_MASK 0x4
134-
#define FVS_HAVE_SPEC 0x4
133+
#define FVC_PRETTY 0x4
134+
#define FVS_MASK 0x8
135+
#define FVS_HAVE_SPEC 0x8
135136

136137
#ifndef Py_LIMITED_API
137138
# define Py_CPYTHON_CEVAL_H

Lib/test/test_fstring.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1866,6 +1866,104 @@ def test_gh139516(self):
18661866
f.write('''def f(a): pass\nf"{f(a=lambda: 'à'\n)}"'''.encode())
18671867
assert_python_ok(script)
18681868

1869+
def test_pretty_conversion_basic(self):
1870+
"""PEP 813: Test !p conversion calls sys.__prettyhook__"""
1871+
import pprint
1872+
x = [1, 2, 3]
1873+
self.assertEqual(f'{x!p}', pprint.pformat(x))
1874+
1875+
d = {'hello': 'world', 'foo': 'bar'}
1876+
self.assertEqual(f'{d!p}', pprint.pformat(d))
1877+
1878+
# Simple values
1879+
self.assertEqual(f'{42!p}', pprint.pformat(42))
1880+
self.assertEqual(f'{"hello"!p}', pprint.pformat("hello"))
1881+
1882+
def test_pretty_conversion_with_callable(self):
1883+
"""PEP 813: Test !p with callable format spec"""
1884+
x = [1, 2, 3]
1885+
1886+
# Use repr as the callable
1887+
self.assertEqual(f'{x!p:repr}', repr(x))
1888+
1889+
# Use str as the callable
1890+
self.assertEqual(f'{x!p:str}', str(x))
1891+
1892+
# Use a custom function
1893+
def my_format(obj):
1894+
return f'custom({obj!r})'
1895+
self.assertEqual(f'{x!p:my_format}', 'custom([1, 2, 3])')
1896+
1897+
def test_pretty_conversion_with_expression_callable(self):
1898+
"""PEP 813: Test !p with expression as callable"""
1899+
import pprint
1900+
1901+
d = {'key1': 'value1', 'key2': 'value2'}
1902+
1903+
# Attribute access
1904+
pp = pprint.PrettyPrinter(width=20)
1905+
result = f'{d!p:pp.pformat}'
1906+
self.assertEqual(result, pp.pformat(d))
1907+
1908+
# Method call in callable expression
1909+
result = f'{d!p:pprint.PrettyPrinter(width=20).pformat}'
1910+
self.assertEqual(result, pprint.PrettyPrinter(width=20).pformat(d))
1911+
1912+
def test_pretty_conversion_debug(self):
1913+
"""PEP 813: Test !p with debug (=) syntax"""
1914+
import pprint
1915+
x = [1, 2, 3]
1916+
result = f'{x=!p}'
1917+
self.assertEqual(result, f'x={pprint.pformat(x)}')
1918+
1919+
def test_pretty_conversion_custom_hook(self):
1920+
"""PEP 813: Test overriding sys.__prettyhook__"""
1921+
import sys
1922+
original = sys.__prettyhook__
1923+
1924+
def custom_hook(obj):
1925+
return f'CUSTOM({obj!r})'
1926+
1927+
try:
1928+
sys.__prettyhook__ = custom_hook
1929+
x = [1, 2, 3]
1930+
self.assertEqual(f'{x!p}', 'CUSTOM([1, 2, 3])')
1931+
finally:
1932+
sys.__prettyhook__ = original
1933+
1934+
def test_pretty_hook_exists(self):
1935+
"""PEP 813: Test sys.prettyhook and sys.__prettyhook__ exist"""
1936+
import sys
1937+
self.assertTrue(hasattr(sys, 'prettyhook'))
1938+
self.assertTrue(hasattr(sys, '__prettyhook__'))
1939+
self.assertTrue(callable(sys.prettyhook))
1940+
self.assertTrue(callable(sys.__prettyhook__))
1941+
1942+
def test_pretty_conversion_p_as_variable(self):
1943+
"""PEP 813: Test that 'p' can still be used as a variable name"""
1944+
p = 42
1945+
self.assertEqual(f'{p}', '42')
1946+
self.assertEqual(f'{p!r}', '42')
1947+
self.assertEqual(f'{p!s}', '42')
1948+
1949+
# And as part of expressions
1950+
p = [1, 2, 3]
1951+
self.assertEqual(f'{p[0]}', '1')
1952+
1953+
def test_pretty_conversion_not_callable_error(self):
1954+
"""PEP 813: Test error when format spec is not callable"""
1955+
x = 42
1956+
with self.assertRaises(TypeError):
1957+
f'{x!p:42}'
1958+
1959+
def test_pretty_conversion_combined(self):
1960+
"""PEP 813: Test !p in combined f-strings"""
1961+
x = {'a': 1}
1962+
y = [1, 2]
1963+
result = f'{x!p} and {y!r}'
1964+
import pprint
1965+
self.assertEqual(result, f'{pprint.pformat(x)} and {y!r}')
1966+
18691967

18701968
if __name__ == '__main__':
18711969
unittest.main()

Objects/interpolationobject.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,9 +212,12 @@ _PyInterpolation_Build(PyObject *value, PyObject *str, int conversion, PyObject
212212
case FVC_STR:
213213
interpolation->conversion = _Py_LATIN1_CHR('s');
214214
break;
215+
case FVC_PRETTY:
216+
interpolation->conversion = _Py_LATIN1_CHR('p');
217+
break;
215218
default:
216219
PyErr_SetString(PyExc_SystemError,
217-
"Interpolation() argument 'conversion' must be one of 's', 'a' or 'r'");
220+
"Interpolation() argument 'conversion' must be one of 's', 'a', 'r', or 'p'");
218221
Py_DECREF(interpolation);
219222
return NULL;
220223
}

Parser/action_helpers.c

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,25 @@ _PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv)
983983
return result_token_with_metadata(p, conv, conv_token->metadata);
984984
}
985985

986+
/* PEP 813: Check for !p pretty-print conversion.
987+
* Returns the conv_token on success, NULL without error on failure
988+
* (causing the PEG parser to backtrack). */
989+
Token *
990+
_PyPegen_check_pretty_conversion(Parser *p, Token *conv_token, expr_ty conv)
991+
{
992+
if (conv_token->lineno != conv->lineno || conv_token->end_col_offset != conv->col_offset) {
993+
return NULL; /* Not adjacent - let other rules handle it */
994+
}
995+
if (PyUnicode_GET_LENGTH(conv->v.Name.id) != 1) {
996+
return NULL;
997+
}
998+
Py_UCS4 first = PyUnicode_READ_CHAR(conv->v.Name.id, 0);
999+
if (first != 'p') {
1000+
return NULL; /* Not !p - backtrack */
1001+
}
1002+
return conv_token;
1003+
}
1004+
9861005
ResultTokenWithMetadata *
9871006
_PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, int lineno, int col_offset,
9881007
int end_lineno, int end_col_offset, PyArena *arena)
@@ -1604,6 +1623,43 @@ expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, Re
16041623
return _PyAST_JoinedStr(values, lineno, col_offset, debug_end_line, debug_end_offset, p->arena);
16051624
}
16061625

1626+
expr_ty _PyPegen_pretty_formatted_value(Parser *p, expr_ty expression, Token *debug,
1627+
Token *conv_token, expr_ty pretty_func,
1628+
Token *closing_brace, int lineno, int col_offset,
1629+
int end_lineno, int end_col_offset, PyArena *arena) {
1630+
/* PEP 813: !p conversion.
1631+
* conversion = 'p' (112).
1632+
* format_spec holds the callable expression, or NULL for default
1633+
* (sys.__prettyhook__). */
1634+
int conversion_val = (int)'p';
1635+
1636+
expr_ty formatted_value = _PyAST_FormattedValue(
1637+
expression, conversion_val, pretty_func,
1638+
lineno, col_offset, end_lineno,
1639+
end_col_offset, arena
1640+
);
1641+
1642+
if (!debug) {
1643+
return formatted_value;
1644+
}
1645+
1646+
/* Handle debug expression (f"{foo=!p:callable}") */
1647+
int debug_end_line = conv_token->lineno;
1648+
int debug_end_offset = conv_token->col_offset;
1649+
PyObject *debug_metadata = conv_token->metadata;
1650+
1651+
expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line,
1652+
debug_end_offset - 1, p->arena);
1653+
if (!debug_text) {
1654+
return NULL;
1655+
}
1656+
1657+
asdl_expr_seq *values = _Py_asdl_expr_seq_new(2, arena);
1658+
asdl_seq_SET(values, 0, debug_text);
1659+
asdl_seq_SET(values, 1, formatted_value);
1660+
return _PyAST_JoinedStr(values, lineno, col_offset, debug_end_line, debug_end_offset, p->arena);
1661+
}
1662+
16071663
static expr_ty
16081664
_build_concatenated_bytes(Parser *p, asdl_expr_seq *strings, int lineno,
16091665
int col_offset, int end_lineno, int end_col_offset,

Parser/lexer/lexer.c

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1096,6 +1096,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
10961096
the_current_tok->last_expr_end = -1;
10971097
the_current_tok->in_format_spec = 0;
10981098
the_current_tok->in_debug = 0;
1099+
the_current_tok->in_pretty_conversion = 0;
10991100

11001101
enum string_kind_t string_kind = FSTRING;
11011102
switch (*tok->start) {
@@ -1265,7 +1266,25 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
12651266
return MAKE_TOKEN(ERRORTOKEN);
12661267
}
12671268

1268-
if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1269+
/* When we see '!' at the top level of an f-string expression,
1270+
* peek ahead for 'p' followed by ':'. If found, set the
1271+
* in_pretty_conversion flag so that ':' does NOT trigger
1272+
* format-spec mode. This allows the text after ':' to be
1273+
* tokenized as a regular Python expression (PEP 813). */
1274+
if (c == '!' && cursor == current_tok->curly_bracket_expr_start_depth) {
1275+
int peek1 = tok_nextc(tok);
1276+
if (peek1 == 'p') {
1277+
int peek2 = tok_nextc(tok);
1278+
if (peek2 == ':') {
1279+
current_tok->in_pretty_conversion = 1;
1280+
}
1281+
tok_backup(tok, peek2);
1282+
}
1283+
tok_backup(tok, peek1);
1284+
}
1285+
1286+
if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth
1287+
&& !current_tok->in_pretty_conversion) {
12691288
current_tok->kind = TOK_FSTRING_MODE;
12701289
current_tok->in_format_spec = 1;
12711290
p_start = tok->start;
@@ -1365,6 +1384,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
13651384
current_tok->kind = TOK_FSTRING_MODE;
13661385
current_tok->in_format_spec = 0;
13671386
current_tok->in_debug = 0;
1387+
current_tok->in_pretty_conversion = 0;
13681388
}
13691389
}
13701390
break;

Parser/lexer/state.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ typedef struct _tokenizer_mode {
6666
char* last_expr_buffer;
6767
int in_debug;
6868
int in_format_spec;
69+
int in_pretty_conversion;
6970

7071
enum string_kind_t string_kind;
7172
} tokenizer_mode;

0 commit comments

Comments
 (0)