From 4199b8807a1992ecda199af7cf8bab22a0878d84 Mon Sep 17 00:00:00 2001 From: "E. Madison Bray" Date: Tue, 4 Aug 2020 12:51:18 +0200 Subject: [PATCH 1/5] bpo-20082: fix misbehavior of buffered writes to raw files in append mode --- Lib/test/test_io/test_bufferedio.py | 24 +++++++++ Modules/_io/bufferedio.c | 82 ++++++++++++++++++++++------- 2 files changed, 87 insertions(+), 19 deletions(-) diff --git a/Lib/test/test_io/test_bufferedio.py b/Lib/test/test_io/test_bufferedio.py index e83dd0d4e28d00..51d7da0709b410 100644 --- a/Lib/test/test_io/test_bufferedio.py +++ b/Lib/test/test_io/test_bufferedio.py @@ -1473,6 +1473,30 @@ def test_interleaved_readline_write(self): f.flush() self.assertEqual(raw.getvalue(), b'1b\n2def\n3\n') + def test_append_write(self): + # Uses a real FileIO so that append behavior is reproduced accurately + with self.FileIO(os_helper.TESTFN, 'wb') as f: + f.write(b'test test') + + with self.FileIO(os_helper.TESTFN, 'ab+') as raw: + with self.tp(raw) as f: + self.assertEqual(f.tell(), 9) + f.write(b'A') + f.seek(0) + self.assertEqual(f.read(), b'test testA') + f.seek(0) + self.assertEqual(f.read(1), b't') + self.assertEqual(f.write(b'B'), 1) + f.seek(0) + + # This read previously returned b'tBst testA' but that is + # inccorect if the underlying raw file is in append mode; + # see bpo-20082 + self.assertEqual(f.read(), b'test testAB') + f.flush() + f.seek(0) + self.assertEqual(f.read(), b'test testAB') + # You can't construct a BufferedRandom over a non-seekable stream. test_unseekable = None diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index 0fdae7b2d21004..d7d65bf51d7c9c 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -228,6 +228,7 @@ typedef struct { int detached; int readable; int writable; + int appending; char finalizing; /* True if this is a vanilla Buffered object (rather than a user derived @@ -1350,6 +1351,33 @@ _io__Buffered_tell_impl(buffered *self) return PyLong_FromOff_t(pos); } +static PyObject * +_buffered_seek_unlocked(buffered *self, Py_off_t target, int whence) +{ + Py_off_t n; + PyObject *res = NULL; + + if (self->writable) { + res = _bufferedwriter_flush_unlocked(self); + if (res == NULL) + return res; + Py_CLEAR(res); + } + + /* TODO: align on block boundary and read buffer if needed? */ + if (whence == 1) + target -= RAW_OFFSET(self); + n = _buffered_raw_seek(self, target, whence); + if (n == -1) + return res; + self->raw_pos = -1; + res = PyLong_FromOff_t(n); + if (res != NULL && self->readable) + _bufferedreader_reset_buf(self); + + return res; +} + /*[clinic input] @critical_section _io._Buffered.seek @@ -1362,7 +1390,7 @@ static PyObject * _io__Buffered_seek_impl(buffered *self, PyObject *targetobj, int whence) /*[clinic end generated code: output=7ae0e8dc46efdefb input=b5a12be70e0ad07b]*/ { - Py_off_t target, n; + Py_off_t target; PyObject *res = NULL; CHECK_INITIALIZED(self) @@ -1430,25 +1458,8 @@ _io__Buffered_seek_impl(buffered *self, PyObject *targetobj, int whence) return NULL; /* Fallback: invoke raw seek() method and clear buffer */ - if (self->writable) { - res = _bufferedwriter_flush_unlocked(self); - if (res == NULL) - goto end; - Py_CLEAR(res); - } + res = _buffered_seek_unlocked(self, target, whence); - /* TODO: align on block boundary and read buffer if needed? */ - if (whence == 1) - target -= RAW_OFFSET(self); - n = _buffered_raw_seek(self, target, whence); - if (n == -1) - goto end; - self->raw_pos = -1; - res = PyLong_FromOff_t(n); - if (res != NULL && self->readable) - _bufferedreader_reset_buf(self); - -end: LEAVE_BUFFERED(self) return res; } @@ -1926,6 +1937,27 @@ _bufferedwriter_reset_buf(buffered *self) self->write_end = -1; } +static void +_bufferedwriter_set_append(buffered *self) +{ + PyObject *mode; + + mode = _PyObject_GetAttrId(self->raw, &PyId_mode); + if (mode != NULL) { + /* Raw fileobj has no mode string so as far as we can know it has + normal write behavior */ + if (PyUnicode_FindChar(mode, 'a', 0, PyUnicode_GET_LENGTH(mode), 1) != -1) { + self->appending = 1; + } else { + self->appending = 0; + } + Py_DECREF(mode); + } else { + PyErr_Clear(); + self->appending = 0; + } +} + /*[clinic input] _io.BufferedWriter.__init__ raw: object @@ -1956,6 +1988,8 @@ _io_BufferedWriter___init___impl(buffered *self, PyObject *raw, self->readable = 0; self->writable = 1; + _bufferedwriter_set_append(self); + self->buffer_size = buffer_size; if (_buffered_init(self) < 0) return -1; @@ -2111,6 +2145,14 @@ _io_BufferedWriter_write_impl(buffered *self, Py_buffer *buffer) self->pos = 0; self->raw_pos = 0; } + + if (self->appending) { + res = _buffered_seek_unlocked(self, 0, SEEK_END); + if (res == NULL) + goto error; + Py_DECREF(res); + } + avail = Py_SAFE_DOWNCAST(self->buffer_size - self->pos, Py_off_t, Py_ssize_t); if (buffer->len <= avail && buffer->len < self->buffer_size) { memcpy(self->buffer + self->pos, buffer->buf, buffer->len); @@ -2504,6 +2546,8 @@ _io_BufferedRandom___init___impl(buffered *self, PyObject *raw, self->readable = 1; self->writable = 1; + _bufferedwriter_set_append(self); + if (_buffered_init(self) < 0) return -1; _bufferedreader_reset_buf(self); From 6718c5c5782b124ee9336e0828f8e571eb34567b Mon Sep 17 00:00:00 2001 From: "E. Madison Bray" Date: Tue, 11 Aug 2020 14:21:07 +0200 Subject: [PATCH 2/5] trivial fixes addressing review comments --- Lib/test/test_io/test_bufferedio.py | 2 +- Modules/_io/bufferedio.c | 37 ++++++++++++++++------------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/Lib/test/test_io/test_bufferedio.py b/Lib/test/test_io/test_bufferedio.py index 51d7da0709b410..d6c8ac4d62a0da 100644 --- a/Lib/test/test_io/test_bufferedio.py +++ b/Lib/test/test_io/test_bufferedio.py @@ -1490,7 +1490,7 @@ def test_append_write(self): f.seek(0) # This read previously returned b'tBst testA' but that is - # inccorect if the underlying raw file is in append mode; + # incorect if the underlying raw file is in append mode; # see bpo-20082 self.assertEqual(f.read(), b'test testAB') f.flush() diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index d7d65bf51d7c9c..a2f3e590324196 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -1354,26 +1354,30 @@ _io__Buffered_tell_impl(buffered *self) static PyObject * _buffered_seek_unlocked(buffered *self, Py_off_t target, int whence) { - Py_off_t n; PyObject *res = NULL; if (self->writable) { res = _bufferedwriter_flush_unlocked(self); - if (res == NULL) + if (res == NULL) { return res; + } Py_CLEAR(res); } /* TODO: align on block boundary and read buffer if needed? */ - if (whence == 1) + if (whence == 1) { target -= RAW_OFFSET(self); - n = _buffered_raw_seek(self, target, whence); - if (n == -1) + } + + Py_off_t n = _buffered_raw_seek(self, target, whence); + if (n == -1) { return res; + } self->raw_pos = -1; res = PyLong_FromOff_t(n); - if (res != NULL && self->readable) + if (res != NULL && self->readable) { _bufferedreader_reset_buf(self); + } return res; } @@ -1940,20 +1944,20 @@ _bufferedwriter_reset_buf(buffered *self) static void _bufferedwriter_set_append(buffered *self) { - PyObject *mode; - - mode = _PyObject_GetAttrId(self->raw, &PyId_mode); + PyObject *mode = _PyObject_GetAttrId(self->raw, &PyId_mode); if (mode != NULL) { - /* Raw fileobj has no mode string so as far as we can know it has - normal write behavior */ - if (PyUnicode_FindChar(mode, 'a', 0, PyUnicode_GET_LENGTH(mode), 1) != -1) { + if (PyUnicode_FindChar(mode, 'a', 0, + PyUnicode_GET_LENGTH(mode), 1) != -1) { self->appending = 1; - } else { + } + else { self->appending = 0; } Py_DECREF(mode); - } else { - PyErr_Clear(); + } + else { + /* Raw fileobj has no mode string so as far as we can know it has + normal write behavior */ self->appending = 0; } } @@ -2148,8 +2152,9 @@ _io_BufferedWriter_write_impl(buffered *self, Py_buffer *buffer) if (self->appending) { res = _buffered_seek_unlocked(self, 0, SEEK_END); - if (res == NULL) + if (res == NULL) { goto error; + } Py_DECREF(res); } From e8a2d25de3c5b63c01ed2742a630e0bcc9b8c3f6 Mon Sep 17 00:00:00 2001 From: "E. Madison Bray" Date: Tue, 11 Aug 2020 14:21:14 +0200 Subject: [PATCH 3/5] fix I/O error when flushing writes to raw files in append mode if the raw file is appending there is no reason to attempt to rewind it before flushing --- Modules/_io/bufferedio.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index a2f3e590324196..427cdf00891f31 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -2059,18 +2059,21 @@ _bufferedwriter_raw_write(buffered *self, char *start, Py_ssize_t len) static PyObject * _bufferedwriter_flush_unlocked(buffered *self) { + Py_ssize_t written = 0; Py_off_t n, rewind; if (!VALID_WRITE_BUFFER(self) || self->write_pos == self->write_end) goto end; - /* First, rewind */ - rewind = RAW_OFFSET(self) + (self->pos - self->write_pos); - if (rewind != 0) { - n = _buffered_raw_seek(self, -rewind, 1); - if (n < 0) { - goto error; + /* First, rewind unless raw file is in append mode */ + if (!self->appending) { + Py_off_t rewind = RAW_OFFSET(self) + (self->pos - self->write_pos); + if (rewind != 0) { + n = _buffered_raw_seek(self, -rewind, 1); + if (n < 0) { + goto error; + } + self->raw_pos -= rewind; } - self->raw_pos -= rewind; } while (self->write_pos < self->write_end) { n = _bufferedwriter_raw_write(self, From 176e4aec9440222b1b7506b317e58bf0db26e3ff Mon Sep 17 00:00:00 2001 From: "E. Madison Bray" Date: Tue, 11 Aug 2020 15:09:40 +0200 Subject: [PATCH 4/5] fix possible unhandled AttributeError (e.g. when the raw file does not have a mode attribute) and ensure that the mode is in fact a string --- Modules/_io/bufferedio.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index 427cdf00891f31..a2f9f27f240841 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -1945,7 +1945,7 @@ static void _bufferedwriter_set_append(buffered *self) { PyObject *mode = _PyObject_GetAttrId(self->raw, &PyId_mode); - if (mode != NULL) { + if (mode != NULL && PyUnicode_Check(mode)) { if (PyUnicode_FindChar(mode, 'a', 0, PyUnicode_GET_LENGTH(mode), 1) != -1) { self->appending = 1; @@ -1956,6 +1956,9 @@ _bufferedwriter_set_append(buffered *self) Py_DECREF(mode); } else { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + } /* Raw fileobj has no mode string so as far as we can know it has normal write behavior */ self->appending = 0; From d14f0472f055f50127885261a9cfd151279f3baf Mon Sep 17 00:00:00 2001 From: "E. Madison Bray" Date: Mon, 13 Apr 2026 13:11:29 +0200 Subject: [PATCH 5/5] use PyObject_GetOptionalAttr and fix error handling on PyUnicode_FindChar --- Modules/_io/bufferedio.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index a2f9f27f240841..24bbd75a3bd65e 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -1944,10 +1944,14 @@ _bufferedwriter_reset_buf(buffered *self) static void _bufferedwriter_set_append(buffered *self) { - PyObject *mode = _PyObject_GetAttrId(self->raw, &PyId_mode); - if (mode != NULL && PyUnicode_Check(mode)) { + PyObject *mode = NULL; + if (PyObject_GetOptionalAttr(self->raw, &_Py_ID(mode), &mode) < 0) { + /* Raw fileobj has no mode string so as far as we can know it has + normal write behavior */ + self->appending = 0; + } else if (mode != NULL && PyUnicode_Check(mode)) { if (PyUnicode_FindChar(mode, 'a', 0, - PyUnicode_GET_LENGTH(mode), 1) != -1) { + PyUnicode_GET_LENGTH(mode), 1) >= 0) { self->appending = 1; } else { @@ -1955,14 +1959,6 @@ _bufferedwriter_set_append(buffered *self) } Py_DECREF(mode); } - else { - if (PyErr_ExceptionMatches(PyExc_AttributeError)) { - PyErr_Clear(); - } - /* Raw fileobj has no mode string so as far as we can know it has - normal write behavior */ - self->appending = 0; - } } /*[clinic input] @@ -2062,14 +2058,13 @@ _bufferedwriter_raw_write(buffered *self, char *start, Py_ssize_t len) static PyObject * _bufferedwriter_flush_unlocked(buffered *self) { - Py_ssize_t written = 0; Py_off_t n, rewind; if (!VALID_WRITE_BUFFER(self) || self->write_pos == self->write_end) goto end; /* First, rewind unless raw file is in append mode */ if (!self->appending) { - Py_off_t rewind = RAW_OFFSET(self) + (self->pos - self->write_pos); + rewind = RAW_OFFSET(self) + (self->pos - self->write_pos); if (rewind != 0) { n = _buffered_raw_seek(self, -rewind, 1); if (n < 0) {