Skip to content

Commit d4bfaa8

Browse files
committed
gh-85393: Handle partial .write in TextIOWrapper
Also improves non-blocking support (gh-57531) and unbuffered support (gh-61606)
1 parent c91ad5d commit d4bfaa8

2 files changed

Lines changed: 192 additions & 13 deletions

File tree

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Improve behavior of :meth:`TextIOWrapper.write` when there is a partial write
2+
caused by underlying buffer being :class:`RawIOBase` (:gh:`91606` :gh:`85393`)
3+
or if there is a :exc:`BlockingIOError` :gh:`97531`

Modules/_io/textio.c

Lines changed: 189 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1573,17 +1573,73 @@ _io_TextIOWrapper_detach_impl(textio *self)
15731573
return buffer;
15741574
}
15751575

1576+
/* Prepend bytes to pending_bytes.
1577+
1578+
At the front because other bytes were added via TextIOWrapper.write
1579+
during buffer.write (see: gh-119506, gh-118138). */
1580+
static int
1581+
_textiowrapper_prepend(textio *self, PyObject *unwritten)
1582+
{
1583+
assert(PyBytes_CheckExact(unwritten));
1584+
1585+
/* Ensure pending_bytes and pending_bytes_count are protected. */
1586+
_Py_AssertHoldsTstate();
1587+
1588+
if (self->pending_bytes == NULL) {
1589+
self->pending_bytes = unwritten;
1590+
assert(self->pending_bytes == 0);
1591+
self->pending_bytes_count += PyBytes_GET_SIZE(unwritten);
1592+
}
1593+
else if (!PyList_CheckExact(self->pending_bytes)) {
1594+
PyObject *list = PyList_New(2);
1595+
if (list == NULL) {
1596+
Py_DECREF(unwritten);
1597+
return -1;
1598+
}
1599+
// Since Python 3.12, allocating GC object won't trigger GC and release
1600+
// GIL. See https://github.com/python/cpython/issues/97922
1601+
assert(!PyList_CheckExact(self->pending_bytes));
1602+
PyList_SET_ITEM(list, 0, unwritten);
1603+
PyList_SET_ITEM(list, 1, self->pending_bytes);
1604+
self->pending_bytes = list;
1605+
self->pending_bytes_count += PyBytes_GET_SIZE(unwritten);
1606+
}
1607+
else {
1608+
PyList_Insert(self->pending_bytes, 0, unwritten);
1609+
self->pending_bytes_count += PyBytes_GET_SIZE(unwritten);
1610+
Py_DECREF(unwritten);
1611+
}
1612+
return 0;
1613+
}
1614+
15761615
/* Flush the internal write buffer. This doesn't explicitly flush the
15771616
underlying buffered object, though. */
15781617
static int
15791618
_textiowrapper_writeflush(textio *self)
15801619
{
1620+
/* Validate have exclusive access to members.
1621+
1622+
NOTE: this code relies on GIL / @critical_section. It needs to ensure
1623+
state is "safe" before/after calls to other Python code.
1624+
1625+
In particular:
1626+
_textiowrapper_writeflush() calls buffer.write(). self->pending_bytes
1627+
can be appended during buffer->write() or other thread.
1628+
1629+
At the end of this function either pending_bytes needs to be NULL _or_
1630+
there needs to be an exception.
1631+
1632+
For more details see: gh-119506, gh-118138. */
1633+
_Py_AssertHoldsTstate();
1634+
15811635
if (self->pending_bytes == NULL)
15821636
return 0;
15831637

15841638
PyObject *pending = self->pending_bytes;
1639+
Py_ssize_t bytes_to_write = self->pending_bytes_count;
15851640
PyObject *b;
15861641

1642+
/* Make pending_bytes into a contiguous bytes. */
15871643
if (PyBytes_Check(pending)) {
15881644
b = Py_NewRef(pending);
15891645
}
@@ -1628,20 +1684,140 @@ _textiowrapper_writeflush(textio *self)
16281684
assert(pos == self->pending_bytes_count);
16291685
}
16301686

1687+
1688+
/* pending_bytes is now owned by this function. */
16311689
self->pending_bytes_count = 0;
16321690
self->pending_bytes = NULL;
16331691
Py_DECREF(pending);
16341692

16351693
PyObject *ret;
1636-
do {
1694+
/* Continue until all data written or an error occurs. */
1695+
while (1) {
1696+
/* note: gh-119506 self->pending_bytes and self->pending_bytes_count
1697+
may be altered during this call. */
16371698
ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b);
1638-
} while (ret == NULL && _PyIO_trap_eintr());
1699+
1700+
/* Validate have exclusive access to members. */
1701+
_Py_AssertHoldsTstate();
1702+
1703+
if (ret == NULL) {
1704+
/* PEP 475 Retry on EINTR */
1705+
if (_PyIO_trap_eintr()) {
1706+
continue;
1707+
}
1708+
1709+
/* Try to find out how many bytes were written before exception. */
1710+
PyObject *exc = PyErr_GetRaisedException();
1711+
/* If it's a BlockingIOError, use the bytes written. */
1712+
if (PyErr_GivenExceptionMatches(exc, PyExc_BlockingIOError)) {
1713+
PyOSErrorObject *err = (PyOSErrorObject *)exc;
1714+
bytes_to_write -= err->written;
1715+
if (bytes_to_write) {
1716+
pending = PyBytes_FromStringAndSize(
1717+
PyBytes_AS_STRING(b) + err->written,
1718+
self->pending_bytes_count);
1719+
if (pending) {
1720+
/* _textiowrapper can raise an exception if it fails to
1721+
allocate a list, that just adds to active error
1722+
list. Already exiting as fast as possible. */
1723+
_textiowrapper_prepend(self, pending);
1724+
}
1725+
else {
1726+
PyErr_SetString(PyExc_MemoryError,
1727+
"lost unwritten bytes during partial write");
1728+
1729+
}
1730+
}
1731+
}
1732+
/* XXX: For other exceptions this assumes all data was written. */
1733+
/* Re-raise exceptions. */
1734+
PyErr_SetRaisedException(exc);
1735+
Py_DECREF(b); /* note: ret is NULL / no need to decref */
1736+
return -1;
1737+
}
1738+
1739+
/* Try to determine bytes written from return value
1740+
1741+
XXX: On unexpected return this matches previous behavior and asumes
1742+
all data was written. */
1743+
/* OPEN QUESTION: None is common in CPython, should this warn? */
1744+
if (ret == Py_None) {
1745+
Py_DECREF(b);
1746+
Py_DECREF(ret);
1747+
return 0;
1748+
}
1749+
1750+
/* Should be an integer. */
1751+
if (!_PyIndex_Check(ret)) {
1752+
PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1753+
"write returned '%s' not specified by BufferedIOBase or"
1754+
" TextIOBase, typically count of bytes written",
1755+
ret);
1756+
Py_DECREF(b);
1757+
Py_DECREF(ret);
1758+
return 0;
1759+
}
1760+
Py_ssize_t size = PyNumber_AsSsize_t(ret, PyExc_OverflowError);
1761+
1762+
/* Check for unexpected return values. */
1763+
/* Can't get out size follow return previous behavior. */
1764+
if (size == -1 && PyErr_Occurred()) {
1765+
if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
1766+
Py_DECREF(b);
1767+
Py_DECREF(ret);
1768+
return -1;
1769+
}
1770+
PyErr_Clear(); /* fall through */
1771+
PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1772+
"buffer.write returned value '%s' not specified by"
1773+
" BufferedIOBase or TextIOBase", ret);
1774+
Py_DECREF(b);
1775+
Py_DECREF(ret);
1776+
return 0;
1777+
}
1778+
/* Negative count of bytes doesn't make sense. */
1779+
else if (size < 0) {
1780+
PyErr_WarnFormat(PyExc_RuntimeWarning, 1,
1781+
"buffer.write returned negative count of bytes '%s'",
1782+
ret);
1783+
Py_DECREF(b);
1784+
Py_DECREF(ret);
1785+
return 0;
1786+
}
1787+
/* More written than passed in call. */
1788+
else if (size > bytes_to_write) {
1789+
PyErr_WarnFormat(PyExc_RuntimeWarning, 1,
1790+
"buffer.write returned '%s' bytes written but was only"
1791+
" called with '%s' bytes to write",
1792+
ret, bytes_to_write);
1793+
Py_DECREF(b);
1794+
Py_DECREF(ret);
1795+
return 0;
1796+
}
1797+
1798+
/* Wrote data! */
1799+
bytes_to_write -= size;
1800+
assert(bytes_to_write >= 0);
1801+
1802+
/* Avoid a new byte allocation if fully written. */
1803+
if (bytes_to_write == 0) {
1804+
Py_DECREF(ret);
1805+
break;
1806+
}
1807+
1808+
/* Make a new PyByte to keep type for next call to write. */
1809+
pending = PyBytes_FromStringAndSize(
1810+
PyBytes_AS_STRING(b) + size,
1811+
self->pending_bytes_count);
1812+
Py_DECREF(b);
1813+
b = pending;
1814+
pending = NULL;
1815+
}
1816+
1817+
/* All data owned by this function is written. Other bytes could have shown
1818+
up while running. */
1819+
assert(self->pending_bytes_count >= 0);
16391820
Py_DECREF(b);
1640-
// NOTE: We cleared buffer but we don't know how many bytes are actually written
1641-
// when an error occurred.
1642-
if (ret == NULL)
1643-
return -1;
1644-
Py_DECREF(ret);
16451821
return 0;
16461822
}
16471823

@@ -1733,12 +1909,12 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
17331909
// Flush the buffer before adding b to the buffer if b is not small.
17341910
// https://github.com/python/cpython/issues/87426
17351911
if (bytes_len >= self->chunk_size) {
1736-
// _textiowrapper_writeflush() calls buffer.write().
1737-
// self->pending_bytes can be appended during buffer->write()
1738-
// or other thread.
1739-
// We need to loop until buffer becomes empty.
1740-
// https://github.com/python/cpython/issues/118138
1741-
// https://github.com/python/cpython/issues/119506
1912+
/* writeflush works to ensure all data written.
1913+
1914+
Additional data may be written to the TextIO when the lock is
1915+
released while calling buffer.write (and show up in
1916+
pending_bytes). When that happens, flush again to avoid copying
1917+
the current bytes. */
17421918
while (self->pending_bytes != NULL) {
17431919
if (_textiowrapper_writeflush(self) < 0) {
17441920
Py_DECREF(b);

0 commit comments

Comments
 (0)