Skip to content

Commit 44a7968

Browse files
committed
cleanup, additional tests
1 parent 431b3cb commit 44a7968

2 files changed

Lines changed: 131 additions & 23 deletions

File tree

src/methods.c

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1209,12 +1209,27 @@ write_array_to_file(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs)
12091209
Py_RETURN_NONE;
12101210
}
12111211

1212-
/* Try to get file descriptor for direct writes. If this fails,
1213-
* we'll fall back to calling Python's write() method. */
1212+
// Try to get file descriptor for direct writes. fall back to calling Python's write() method.
12141213
int fd = PyObject_AsFileDescriptor(file);
12151214
int use_fd = (fd >= 0);
1216-
1217-
/* For non-fd path, we need the write method name */
1215+
1216+
/* If we are going to write directly to the file descriptor, we must first
1217+
* flush any data buffered at the Python level (e.g. an io.BufferedWriter
1218+
* from open(path, 'wb')). Writing to the raw fd bypasses that user-space
1219+
* buffer, so without a flush our bytes would land at the kernel offset,
1220+
* ahead of any still-buffered data, corrupting the file. Raw integer fds
1221+
* (e.g. from os.open) have no flush() method; that is not an error. */
1222+
if (use_fd) {
1223+
PyObject *flush_result = PyObject_CallMethod(file, "flush", NULL);
1224+
if (flush_result == NULL) {
1225+
PyErr_Clear();
1226+
}
1227+
else {
1228+
Py_DECREF(flush_result);
1229+
}
1230+
}
1231+
1232+
// For non-fd path, we need the write method name
12181233
PyObject *write_name = NULL;
12191234
if (!use_fd) {
12201235
PyErr_Clear(); /* Clear the error from PyObject_AsFileDescriptor */
@@ -1225,7 +1240,7 @@ write_array_to_file(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs)
12251240
}
12261241
}
12271242

1228-
/* Allocate a buffer for packing non-contiguous data */
1243+
// Allocate a buffer for packing non-contiguous data
12291244
char *pack_buffer = NULL;
12301245
Py_ssize_t pack_buffer_size = 0;
12311246

@@ -1239,10 +1254,10 @@ write_array_to_file(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs)
12391254
goto fail;
12401255
}
12411256
Py_ssize_t chunk_size = (Py_ssize_t)inner_size * itemsize;
1242-
1257+
12431258
const char *data_to_write = *dataptr;
1244-
1245-
/* If stride is not contiguous, pack into buffer */
1259+
1260+
// If stride is not contiguous, pack into buffer
12461261
if (*strideptr != itemsize) {
12471262
if (pack_buffer_size < chunk_size) {
12481263
char *new_buffer = (char *)PyMem_Realloc(pack_buffer, chunk_size);
@@ -1261,16 +1276,14 @@ write_array_to_file(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs)
12611276
data_to_write = pack_buffer;
12621277
}
12631278

1264-
/* Write the data */
1279+
// Write the data
12651280
if (use_fd) {
1266-
/* Direct file descriptor write - no Python objects needed! */
12671281
if (write_to_fd(fd, data_to_write, chunk_size) < 0) {
12681282
goto fail;
12691283
}
12701284
}
12711285
else {
1272-
/* Fall back to Python write() method.
1273-
* Note: PyMemoryView_FromMemory requires non-const char*, but we pass
1286+
/* Note: PyMemoryView_FromMemory requires non-const char*, but we pass
12741287
* PyBUF_READ flag which makes the view read-only, so the cast is safe. */
12751288
PyObject *buffer = PyMemoryView_FromMemory((char *)data_to_write, chunk_size, PyBUF_READ);
12761289
if (buffer == NULL) {
@@ -1282,7 +1295,7 @@ write_array_to_file(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs)
12821295
if (write_result == NULL) {
12831296
goto fail;
12841297
}
1285-
1298+
12861299
/* Check for partial writes */
12871300
if (PyLong_Check(write_result)) {
12881301
Py_ssize_t bytes_written = PyLong_AsSsize_t(write_result);
@@ -1302,27 +1315,21 @@ write_array_to_file(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs)
13021315
}
13031316
} while(iternext(iter));
13041317

1305-
/* Check if iteration stopped due to an error.
1306-
* NpyIter can return 0 for TWO reasons: end of iteration OR error.
1307-
* We MUST check PyErr_Occurred() to distinguish between the two.
1308-
* This is the standard pattern for NpyIter error handling. */
1318+
/* NpyIter can return 0 for TWO reasons: end of iteration OR error.
1319+
* We MUST check PyErr_Occurred() to distinguish between the two. */
13091320
if (PyErr_Occurred()) {
13101321
goto fail;
13111322
}
13121323

13131324
PyMem_Free(pack_buffer);
13141325
NpyIter_Deallocate(iter);
1315-
if (write_name != NULL) {
1316-
Py_DECREF(write_name);
1317-
}
1326+
Py_XDECREF(write_name);
13181327
Py_RETURN_NONE;
13191328

13201329
fail:
13211330
PyMem_Free(pack_buffer);
13221331
NpyIter_Deallocate(iter);
1323-
if (write_name != NULL) {
1324-
Py_DECREF(write_name);
1325-
}
1332+
Py_XDECREF(write_name);
13261333
return NULL;
13271334
}
13281335

test/test_util.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import pytest
22
import collections
33
import datetime
4+
import os
5+
import tempfile
46
import unittest
57
import warnings
68
from io import BytesIO
@@ -337,6 +339,105 @@ def test_write_array_to_file_f(self) -> None:
337339
fp = BytesIO()
338340
write_array_to_file(a1, fp)
339341
self.assertEqual(fp.getvalue(), b'')
342+
343+
#---------------------------------------------------------------------------
344+
# fd fast-path tests (real files expose fileno(), so these exercise the
345+
# direct file-descriptor write path rather than the Python write() fallback)
346+
347+
def test_write_array_to_file_fd_a(self) -> None:
348+
# basic 1d C-contiguous array to a real (buffered) file
349+
a1 = np.arange(20, dtype=np.int64)
350+
with tempfile.TemporaryDirectory() as d:
351+
fp = os.path.join(d, 'a.bin')
352+
with open(fp, 'wb') as f:
353+
write_array_to_file(a1, f)
354+
with open(fp, 'rb') as f:
355+
self.assertEqual(f.read(), a1.tobytes())
356+
357+
def test_write_array_to_file_fd_b(self) -> None:
358+
# 2d array, C order, small buffersize forces multiple chunks
359+
a1 = np.arange(12).reshape(3, 4)
360+
with tempfile.TemporaryDirectory() as d:
361+
fp = os.path.join(d, 'b.bin')
362+
with open(fp, 'wb') as f:
363+
write_array_to_file(a1, f, buffersize=2)
364+
with open(fp, 'rb') as f:
365+
self.assertEqual(f.read(), a1.tobytes('C'))
366+
367+
def test_write_array_to_file_fd_c(self) -> None:
368+
# 2d fortran-ordered array written in fortran order
369+
a1 = np.arange(12).reshape(3, 4).T
370+
with tempfile.TemporaryDirectory() as d:
371+
fp = os.path.join(d, 'c.bin')
372+
with open(fp, 'wb') as f:
373+
write_array_to_file(a1, f, fortran_order=True, buffersize=2)
374+
with open(fp, 'rb') as f:
375+
self.assertEqual(f.read(), a1.tobytes('F'))
376+
377+
def test_write_array_to_file_fd_d(self) -> None:
378+
# non-contiguous array (strided view) exercises the packing path
379+
a1 = np.arange(10, dtype=np.int16)[::2]
380+
with tempfile.TemporaryDirectory() as d:
381+
fp = os.path.join(d, 'd.bin')
382+
with open(fp, 'wb') as f:
383+
write_array_to_file(a1, f, buffersize=3)
384+
with open(fp, 'rb') as f:
385+
self.assertEqual(f.read(), a1.tobytes('C'))
386+
387+
def test_write_array_to_file_fd_flush(self) -> None:
388+
# regression: a Python-level buffered write before the fd write must be
389+
# preserved. Without an internal flush, the array bytes would be written
390+
# at the kernel offset ahead of the still-buffered header, corrupting
391+
# the file.
392+
a1 = np.arange(8, dtype=np.int64)
393+
header = b'HEADER-DATA'
394+
with tempfile.TemporaryDirectory() as d:
395+
fp = os.path.join(d, 'flush.bin')
396+
with open(fp, 'wb') as f:
397+
f.write(header)
398+
write_array_to_file(a1, f)
399+
with open(fp, 'rb') as f:
400+
self.assertEqual(f.read(), header + a1.tobytes())
401+
402+
def test_write_array_to_file_fd_interleave(self) -> None:
403+
# multiple array writes interleaved with Python writes stay ordered
404+
a1 = np.arange(4, dtype=np.int32)
405+
a2 = np.arange(4, 8, dtype=np.int32)
406+
with tempfile.TemporaryDirectory() as d:
407+
fp = os.path.join(d, 'inter.bin')
408+
with open(fp, 'wb') as f:
409+
f.write(b'<')
410+
write_array_to_file(a1, f)
411+
f.write(b'|')
412+
write_array_to_file(a2, f)
413+
f.write(b'>')
414+
with open(fp, 'rb') as f:
415+
self.assertEqual(
416+
f.read(),
417+
b'<' + a1.tobytes() + b'|' + a2.tobytes() + b'>')
418+
419+
def test_write_array_to_file_fd_raw_fd(self) -> None:
420+
# an os.open integer fd (no .flush) must still work
421+
a1 = np.arange(6, dtype=np.float64)
422+
with tempfile.TemporaryDirectory() as d:
423+
fp = os.path.join(d, 'raw.bin')
424+
fd = os.open(fp, os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
425+
try:
426+
write_array_to_file(a1, fd)
427+
finally:
428+
os.close(fd)
429+
with open(fp, 'rb') as f:
430+
self.assertEqual(f.read(), a1.tobytes())
431+
432+
def test_write_array_to_file_fd_empty(self) -> None:
433+
# zero-size array to a real file produces an empty file
434+
a1 = np.array([], dtype=np.int64)
435+
with tempfile.TemporaryDirectory() as d:
436+
fp = os.path.join(d, 'empty.bin')
437+
with open(fp, 'wb') as f:
438+
write_array_to_file(a1, f)
439+
with open(fp, 'rb') as f:
440+
self.assertEqual(f.read(), b'')
340441
#---------------------------------------------------------------------------
341442
def test_array_to_tuple_array_1d_a(self) -> None:
342443
a1 = np.arange(10)

0 commit comments

Comments
 (0)