Document tobytes() copy behavior in NumPy array hashing (#343)

Copilot · shaypal5 · shaypal5 · commit 22a27cd17983 · 2026-03-20T13:11:53.000+02:00
* Initial plan

* Document tobytes(order="C") behavior in _hash_numpy_array docstring

Co-authored-by: shaypal5 &lt;917954+shaypal5@users.noreply.github.com&gt;

---------

Co-authored-by: copilot-swe-agent[bot] &lt;198982749+Copilot@users.noreply.github.com&gt;
Co-authored-by: shaypal5 &lt;917954+shaypal5@users.noreply.github.com&gt;
diff --git a/src/cachier/config.py b/src/cachier/config.py
@@ -29,13 +29,26 @@ def _is_numpy_array(value: Any) -> bool:
 def _hash_numpy_array(hasher: "hashlib._Hash", value: Any) -> None:
     """Update hasher with NumPy array metadata and buffer content.
 
+    The array content is converted to bytes using C-order (row-major) layout
+    to ensure consistent hashing regardless of memory layout. This operation
+    may create a copy if the array is not already C-contiguous (e.g., for
+    transposed arrays, sliced views, or Fortran-ordered arrays), which has
+    performance implications for large arrays.
+
     Parameters
     ----------
     hasher : hashlib._Hash
         The hasher to update.
     value : Any
         A NumPy ndarray instance.
 
+    Notes
+    -----
+    The ``tobytes(order="C")`` call ensures deterministic hash values by
+    normalizing the memory layout, but may incur a memory copy for
+    non-contiguous arrays. For optimal performance with large arrays,
+    consider using C-contiguous arrays when possible.
+
     """
     hasher.update(b"numpy.ndarray")
     hasher.update(value.dtype.str.encode("utf-8"))