forked from NVIDIA/cuda-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path_layout.pyx
More file actions
1322 lines (1113 loc) · 51.8 KB
/
_layout.pyx
File metadata and controls
1322 lines (1113 loc) · 51.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0
cimport cython
from libc.stdint cimport int64_t, intptr_t
from cpython.object cimport PyObject
cdef extern from "Python.h":
int _PySlice_Unpack "PySlice_Unpack" (PyObject *slice, Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step) except -1
Py_ssize_t _PySlice_AdjustIndices "PySlice_AdjustIndices" (Py_ssize_t length, Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t step) noexcept nogil
@cython.final
cdef class _StridedLayout:
"""
A class describing the layout of a multi-dimensional tensor
with a shape, strides and itemsize.
Parameters
----------
shape : tuple
A tuple of non-negative integers.
strides : tuple, optional
If provided, must be a tuple of integers of the same length as ``shape``.
Otherwise, the strides are assumed to be implicitly C-contiguous and the resulting
layout's :attr:`strides` will be None.
itemsize : int
The number of bytes per single element (dtype size).
divide_strides : bool, optional
If True, the provided :attr:`strides` will be divided by the :attr:`itemsize`.
See also :meth:`dense`.
Attributes
----------
itemsize : int
The number of bytes per single element (dtype size).
slice_offset : int
The offset (as a number of elements, not bytes) of the element at
index ``(0,) * ndim``. See also :attr:`slice_offset_in_bytes`.
"""
def __init__(
self : _StridedLayout,
shape : tuple[int, ...],
strides : tuple[int, ...] | None,
itemsize : int,
divide_strides : bool = False
) -> None:
self.init_from_tuple(shape, strides, itemsize, divide_strides)
@classmethod
def dense(
cls,
shape : tuple[int],
itemsize : int,
stride_order : str | tuple[int] = 'C'
) -> _StridedLayout:
"""
Creates a new _StridedLayout instance with dense strides.
Parameters
----------
shape : tuple
A tuple of non-negative integers.
itemsize : int
The number of bytes per single element of the tensor.
stride_order : str or tuple, optional
The order of the strides:
* 'C' (default) - the strides are computed in C-order (increasing from the right to the left)
* 'F' - the strides are computed in F-order (increasing from the left to the right)
* A tuple - it must be a permutation of ``tuple(range(len(shape)))``.
The last element of the tuple is the axis with stride 1.
See also :attr:`stride_order`.
.. highlight:: python
.. code-block:: python
assert _StridedLayout.dense((5, 3, 7), 1, "C") == _StridedLayout((5, 3, 7), (21, 7, 1), 1)
assert _StridedLayout.dense((5, 3, 7), 1, "F") == _StridedLayout((5, 3, 7), (1, 5, 15), 1)
assert _StridedLayout.dense((5, 3, 7), 1, (2, 0, 1)) == _StridedLayout((5, 3, 7), (3, 1, 15), 1)
"""
cdef _StridedLayout new_layout = _StridedLayout.__new__(cls)
new_layout.init_dense_from_tuple(shape, itemsize, stride_order)
return new_layout
@classmethod
def dense_like(
cls,
other : _StridedLayout,
stride_order : str | tuple[int] = "K"
) -> _StridedLayout:
"""
Creates a _StridedLayout with the same :attr:`shape` and :attr:`itemsize` as the other layout,
but with contiguous strides in the specified order and no slice offset.
See also :attr:`is_dense`.
Parameters
----------
other : _StridedLayout
The _StridedLayout to copy the :attr:`shape` and :attr:`itemsize` from.
stride_order : str or tuple, optional
The order of the strides:
* 'K' (default) - keeps the order of the strides as in the ``other`` layout.
* 'C' - the strides are computed in C-order (increasing from the right to the left)
* 'F' - the strides are computed in F-order (increasing from the left to the right)
* A tuple - it must be a permutation of ``tuple(range(len(shape)))``.
The last element of the tuple is the axis with stride 1.
See also :attr:`stride_order`.
.. highlight:: python
.. code-block:: python
layout = _StridedLayout.dense((5, 3, 7), 1).permuted((2, 0, 1))
assert layout == _StridedLayout((7, 5, 3), (1, 21, 7), 1)
# dense_like with the default "K" stride_order
# keeps the same order of strides as in the original layout
assert _StridedLayout.dense_like(layout) == layout
# "C", "F" recompute the strides accordingly
assert _StridedLayout.dense_like(layout, "C") == _StridedLayout((7, 5, 3), (15, 3, 1), 1)
assert _StridedLayout.dense_like(layout, "F") == _StridedLayout((7, 5, 3), (1, 7, 35), 1)
"""
cdef OrderFlag order_flag
cdef axis_vec_t stride_order_vec
cdef bint is_dense = other.get_is_dense()
if stride_order == "K":
if is_dense:
return other
other.get_stride_order(stride_order_vec)
order_flag = ORDER_PERM
else:
order_flag = _stride_order2vec(stride_order_vec, stride_order)
if order_flag == ORDER_NONE:
raise ValueError(
f"The stride_order must be 'K', 'C', 'F', "
f"or a permutation tuple. Got: {stride_order}"
)
elif order_flag == ORDER_C:
if is_dense and other.get_is_contiguous_c():
return other
elif order_flag == ORDER_F:
if is_dense and other.get_is_contiguous_f():
return other
cdef _StridedLayout new_layout = _StridedLayout.__new__(cls)
new_layout.init_dense_from_ptr(
other.base.ndim,
other.base.shape,
other.itemsize,
order_flag,
&stride_order_vec
)
return new_layout
def __repr__(self : _StridedLayout) -> str:
if self.slice_offset == 0:
return (
f"_StridedLayout(shape={self.shape}, strides={self.strides}, itemsize={self.itemsize})"
)
else:
return (
f"_StridedLayout(shape={self.shape}, strides={self.strides}, itemsize={self.itemsize}, _slice_offset={self.slice_offset})"
)
def __eq__(self : _StridedLayout, other : _StridedLayout) -> bool:
return self.itemsize == other.itemsize and self.slice_offset == other.slice_offset and _base_layout_equal(self.base, other.base)
@property
def ndim(self : _StridedLayout):
"""
The number of dimensions (length of the shape tuple).
:type: int
"""
return self.base.ndim
@property
def shape(self : _StridedLayout):
"""
Shape of the tensor.
:type: tuple[int]
"""
return self.get_shape_tuple()
@property
def strides(self : _StridedLayout):
"""
Strides of the tensor (in **counts**, not bytes).
If _StridedLayout was created with strides=None, the
returned value is None and layout is implicitly C-contiguous.
:type: tuple[int] | None
"""
return self.get_strides_tuple()
@property
def strides_in_bytes(self : _StridedLayout):
"""
Strides of the tensor (in bytes).
:type: tuple[int] | None
"""
return self.get_strides_in_bytes_tuple()
@property
def stride_order(self : _StridedLayout):
"""
A permutation of ``tuple(range(ndim))`` describing the
relative order of the strides.
.. highlight:: python
.. code-block:: python
# C-contiguous layout
assert _StridedLayout.dense((5, 3, 7), 1).stride_order == (0, 1, 2)
# F-contiguous layout
assert _StridedLayout.dense((5, 3, 7), 1, stride_order="F").stride_order == (2, 1, 0)
# Permuted layout
assert _StridedLayout.dense((5, 3, 7), 1, stride_order=(2, 0, 1)).stride_order == (2, 0, 1)
:type: tuple[int]
"""
return self.get_stride_order_tuple()
@property
def volume(self : _StridedLayout):
"""
The number of elements in the tensor, i.e. the product of the shape tuple.
:type: int
"""
return self.get_volume()
@property
def is_unique(self : _StridedLayout):
"""
If True, each element of a tensor with this layout is mapped to
a unique memory offset.
All contiguous layouts are unique and so are layouts that can be created
by permuting, slicing, flattening, squeezing, repacking, or reshaping
a contiguous layout.
Conversely, broadcast layouts (layouts with a 0 stride
for some extent greater than 1) are not unique.
For layouts resulting from manual stride manipulations
(such as with ``numpy.lib.stride_tricks``), the check
may inaccurately report False, as the exact uniqueness
check may be expensive.
:type: bool
"""
return self.get_is_unique()
@property
def is_contiguous_c(self : _StridedLayout):
"""
True iff the layout is contiguous in C-order, i.e.
the rightmost stride is 1 and each subsequent
stride to the left is the product of the
extent and the stride to the right.
.. highlight:: python
.. code-block:: python
layout = _StridedLayout.dense((2, 5, 3), 1, "C")
assert layout == _StridedLayout((2, 5, 3), (15, 3, 1), 1)
assert layout.is_contiguous_c
See also :attr:`is_contiguous_any`.
:type: bool
"""
return self.get_is_contiguous_c()
@property
def is_contiguous_f(self : _StridedLayout):
"""
True iff the layout is contiguous in F-order, i.e.
the leftmost stride is 1 and each subsequent
stride to the right is the product of the
stride and extent to the left.
.. highlight:: python
.. code-block:: python
layout = _StridedLayout.dense((2, 5, 3), 1, "F")
assert layout == _StridedLayout((2, 5, 3), (1, 2, 10), 1)
assert layout.is_contiguous_f
See also :attr:`is_contiguous_any`.
:type: bool
"""
return self.get_is_contiguous_f()
@property
def is_contiguous_any(self : _StridedLayout):
"""
True iff the layout is contiguous in some axis order, i.e.
there exists a permutation of axes such that the layout
is C-contiguous.
In a contiguous layout, the strides are non-negative and
the mapping of elements to the memory offset range
``[min_offset, max_offset]`` is 1-to-1.
.. highlight:: python
.. code-block:: python
# dense defaults to C-contiguous
layout = _StridedLayout.dense((5, 3, 7), 1)
assert layout.is_contiguous_c and not layout.is_contiguous_f
assert layout.is_contiguous_any
# reversing the order of axes gives F-contiguous layout
permuted = layout.permuted((2, 1, 0))
assert not permuted.is_contiguous_c and permuted.is_contiguous_f
assert permuted.is_contiguous_any
# neither C- nor F-order but still contiguous
permuted = layout.permuted((2, 0, 1))
assert not permuted.is_contiguous_c and not permuted.is_contiguous_f
assert permuted.is_contiguous_any
# slicing the right-most extent creates a gap in the
# offset_bounds range that is not reachable with any
# element in the sliced layout
sliced = layout[:, :, :-1]
assert not sliced.is_contiguous_c and not sliced.is_contiguous_f
assert not sliced.is_contiguous_any
:type: bool
"""
return self.get_is_contiguous_any()
@property
def is_dense(self : _StridedLayout):
"""
A dense layout is contiguous (:attr:`is_contiguous_any` is True)
and has no slice offset (:attr:`slice_offset_in_bytes` is 0).
In a dense layout, elements are mapped 1-to-1 to the ``[0, volume - 1]``
memory offset range.
:type: bool
"""
return self.get_is_dense()
@property
def offset_bounds(self : _StridedLayout):
"""
The memory offset range ``[min_offset, max_offset]`` (in element counts, not bytes)
that elements of a tensor with this layout are mapped to.
If the layout is empty (i.e. ``volume == 0``), the returned tuple is ``(0, -1)``.
Otherwise, ``min_offset <= max_offset`` and all elements of the tensor with
this layout are mapped within the ``[min_offset, max_offset]`` range.
.. highlight:: python
.. code-block:: python
# Possible implementation of the offset_bounds
def offset_bounds(layout : _StridedLayout):
if layout.volume == 0:
return 0, -1
ndim = layout.ndim
shape = layout.shape
strides = layout.strides
idx_min = [shape[i] - 1 if strides[i] < 0 else 0 for i in range(ndim)]
idx_max = [shape[i] - 1 if strides[i] > 0 else 0 for i in range(ndim)]
min_offset = sum(strides[i] * idx_min[i] for i in range(ndim)) + layout.slice_offset
max_offset = sum(strides[i] * idx_max[i] for i in range(ndim)) + layout.slice_offset
return min_offset, max_offset
:type: tuple[int, int]
"""
cdef stride_t min_offset = 0
cdef stride_t max_offset = 0
self.get_offset_bounds(min_offset, max_offset)
return min_offset, max_offset
@property
def min_offset(self : _StridedLayout):
"""
See :attr:`offset_bounds` for details.
:type: int
"""
cdef stride_t min_offset = 0
cdef stride_t max_offset = 0
self.get_offset_bounds(min_offset, max_offset)
return min_offset
@property
def max_offset(self : _StridedLayout):
"""
See :attr:`offset_bounds` for details.
:type: int
"""
cdef stride_t min_offset = 0
cdef stride_t max_offset = 0
self.get_offset_bounds(min_offset, max_offset)
return max_offset
@property
def slice_offset_in_bytes(self : _StridedLayout):
"""
The memory offset (as a number of bytes) of the element at index ``(0,) * ndim``.
Equal to :attr:`itemsize` ``*`` :attr:`slice_offset`.
.. note::
The only way for the index ``(0,) * ndim`` to be mapped to a non-zero offset
is slicing with :meth:`sliced` method (or ``[]`` operator).
:type: int
"""
return self.get_slice_offset_in_bytes()
def required_size_in_bytes(self : _StridedLayout) -> int:
"""
The memory allocation size (in bytes) needed so that
all elements of a tensor with this layout can be mapped
within the allocated memory range.
The function raises an error if ``min_offset < 0``.
Otherwise, the returned value is equal to
``(max_offset + 1) * itemsize``.
.. hint::
For dense layouts, the function always succeeds and the
``(max_offset + 1) * itemsize`` is equal to the ``volume * itemsize``.
.. highlight:: python
.. code-block:: python
# Allocating memory on a device to copy a host tensor
def device_tensor_like(a : numpy.ndarray, device : ccx.Device) -> StridedMemoryView:
a_view = StridedMemoryView(a, -1)
# get the original layout of ``a`` and convert it to a dense layout
# to avoid overallocating memory (e.g. if the ``a`` was sliced)
layout = a_view._layout.to_dense()
# get the required size in bytes to fit the tensor
required_size = layout.required_size_in_bytes()
# allocate the memory on the device
device.set_current()
mem = device.allocate(required_size, stream=device.default_stream)
# create a view on the newly allocated device memory
b_view = StridedMemoryView.from_buffer(mem, layout, a_view.dtype)
return b_view
"""
return self.get_required_size_in_bytes()
def flattened_axis_mask(self : _StridedLayout) -> axes_mask_t:
"""
A mask describing which axes of this layout are mergeable
using the :meth:`flattened` method.
"""
return self.get_flattened_axis_mask()
def to_dense(self : _StridedLayout, object stride_order="K") -> _StridedLayout:
"""
Returns a dense layout with the same shape and itemsize,
but with dense strides in the specified order.
See :meth:`dense_like` method documentation for details.
"""
return _StridedLayout.dense_like(self, stride_order)
def reshaped(self : _StridedLayout, shape : tuple[int]) -> _StridedLayout:
"""
Returns a layout with the new shape, if the new shape is compatible
with the current layout.
The new shape is compatible if:
* the new and old shapes have the same volume
* the old strides can be split or flattened to match the new shape,
assuming indices are iterated in C-order
A single extent in the ``shape`` tuple can be set to -1 to indicate
it should be inferred from the old volume and the other extents.
.. highlight:: python
.. code-block:: python
layout = _StridedLayout.dense((5, 3, 4), 1)
assert layout.reshaped((20, 3)) == _StridedLayout.dense((20, 3), 1)
assert layout.reshaped((4, -1)) == _StridedLayout.dense((4, 15), 1)
assert layout.permuted((2, 0, 1)).reshaped((4, 15,)) == _StridedLayout((4, 15), (1, 4), 1)
# layout.permuted((2, 0, 1)).reshaped((20, 3)) -> error
"""
cdef _StridedLayout new_layout = _StridedLayout.__new__(_StridedLayout)
cdef BaseLayout new_shape
init_base_layout(new_shape, len(shape))
for i in range(len(shape)):
new_shape.shape[i] = shape[i]
self.reshape_into(new_layout, new_shape)
return new_layout
def permuted(self : _StridedLayout, axis_order : tuple[int]) -> _StridedLayout:
"""
Returns a new layout where the shape and strides tuples are permuted
according to the specified permutation of axes.
"""
cdef _StridedLayout new_layout = _StridedLayout.__new__(_StridedLayout)
cdef axis_vec_t axis_order_vec
_tuple2axis_vec(axis_order_vec, axis_order)
self.permute_into(new_layout, axis_order_vec)
return new_layout
def flattened(self : _StridedLayout, start_axis : int = 0, end_axis : int = -1, mask : int | None = None) -> _StridedLayout:
"""
Merges consecutive extents into a single extent (equal to the product of merged extents)
if the corresponding strides can be replaced with a single stride
(assuming indices are iterated in C-order, i.e. the rightmost
axis is incremented first).
.. highlight:: python
.. code-block:: python
# the two extents can be merged into a single extent
# because layout.strides[0] == layout.strides[1] * layout.shape[1]
layout = _StridedLayout((3, 2), (2, 1), 1)
assert layout.flattened() == _StridedLayout((6,), (1,), 1)
# the two extents cannot be merged into a single extent
# because layout.strides[0] != layout.strides[1] * layout.shape[1]
layout = _StridedLayout((3, 2), (1, 3), 1)
assert layout.flattened() == layout
If ``start_axis`` and ``end_axis`` are provided, only the axes in the
inclusive range ``[start_axis, end_axis]`` are considered for flattening.
Alternatively, a mask specifying which axes to consider can be provided.
A mask of mergeable extents can be obtained using the :meth:`flattened_axis_mask` method.
Masks for layouts with the same number of dimensions can be combined
using the logical ``&`` (bitwise AND) operator.
.. highlight:: python
.. code-block:: python
layout = _StridedLayout.dense((4, 5, 3), 4)
layout2 = _StridedLayout((4, 5, 3), (1, 12, 4), 4)
# Even though the two layouts have the same shape initially,
# their shapes differ after flattening.
assert layout.flattened() == _StridedLayout((60,), (1,), 4)
assert layout2.flattened() == _StridedLayout((4, 15), (1, 4), 4)
# With the mask, only extents that are mergeable in both layouts are flattened
# and the resulting shape is the same for both layouts.
mask = layout.flattened_axis_mask() & layout2.flattened_axis_mask()
assert layout.flattened(mask=mask) == _StridedLayout((4, 15), (15, 1), 4)
assert layout2.flattened(mask=mask) == _StridedLayout((4, 15), (1, 4), 4)
"""
cdef _StridedLayout new_layout = _StridedLayout.__new__(_StridedLayout)
cdef axes_mask_t axis_mask
if mask is None:
axis_mask = axis_mask_from_range(self.ndim, start_axis, end_axis)
else:
axis_mask = mask
self.flatten_into(new_layout, axis_mask)
return new_layout
def squeezed(self : _StridedLayout) -> _StridedLayout:
"""
Returns a new layout where all the singleton dimensions (extents equal to 1)
are removed. Additionally, if the layout volume is 0,
the returned layout will be reduced to a 1-dim layout
with shape (0,) and strides (0,).
"""
cdef _StridedLayout new_layout = _StridedLayout.__new__(_StridedLayout)
self.squeeze_into(new_layout)
return new_layout
def unsqueezed(self : _StridedLayout, axis : int | tuple[int]) -> _StridedLayout:
"""
Returns a new layout where the specified axis or axes are added as singleton extents.
The ``axis`` can be either a single integer in range ``[0, ndim]``
or a tuple of unique integers in range ``[0, ndim + len(axis) - 1]``.
"""
cdef axis_vec_t axis_vec
if isinstance(axis, int):
axis_vec.push_back(axis)
else:
_tuple2axis_vec(axis_vec, axis)
if axis_vec.size() == 0:
return self
cdef _StridedLayout new_layout = _StridedLayout.__new__(_StridedLayout)
self.unsqueeze_into(new_layout, axis_vec)
return new_layout
def broadcast_to(self : _StridedLayout, shape : tuple[int]) -> _StridedLayout:
"""
Returns a layout with the new shape, if the old shape can be
broadcast to the new one.
The shapes are compatible if:
* the new shape has the same or greater number of dimensions
* starting from the right, each extent in the old shape must be 1 or
equal to the corresponding extent in the new shape.
Strides of the added or modified extents are set to 0, the remaining ones are unchanged.
If the shapes are not compatible, a ValueError is raised.
"""
cdef _StridedLayout new_layout = _StridedLayout.__new__(_StridedLayout)
cdef BaseLayout new_shape
cdef int new_ndim = len(shape)
init_base_layout(new_shape, new_ndim)
for i in range(new_ndim):
new_shape.shape[i] = shape[i]
self.broadcast_into(new_layout, new_shape)
return new_layout
def repacked(self : _StridedLayout, itemsize : int, data_ptr : intptr_t = 0, axis : int = -1, keep_dim : bool = True) -> _StridedLayout:
"""
Converts the layout to match the specified itemsize.
If ``new_itemsize < itemsize``, each element of the tensor is **unpacked** into multiple elements,
i.e. the extent at ``axis`` increases by the factor ``itemsize // new_itemsize``.
If ``new_itemsize > itemsize``, the consecutive elements in the tensor are **packed** into a single element,
i.e. the extent at ``axis`` decreases by the factor ``new_itemsize // itemsize``.
In either case, the ``volume * itemsize`` of the layout remains the same.
The conversion is subject to the following constraints:
* The extent at ``axis`` must be a positive integer.
* The stride at ``axis`` must be 1.
Moreover, if the ``new_itemsize > itemsize``:
* The extent at ``axis`` must be divisible by ``new_itemsize // itemsize``.
* All other strides must be divisible by ``new_itemsize // itemsize``.
* The ``slice_offset`` must be divisible by ``new_itemsize // itemsize``.
* If ``data_ptr`` is provided, it must be aligned to the new itemsize.
The maximum itemsize that satisfies all the constraints
can be obtained using the :meth:`max_compatible_itemsize` method.
If the ``keep_dim`` is False and the extent at ``axis`` would be reduced to 1,
it is omitted from the returned layout.
.. highlight:: python
.. code-block:: python
# Repacking the layout with itemsize = 4 bytes as 2, 8, and 16 sized layouts.
layout = _StridedLayout.dense((5, 4), 4)
assert layout.repacked(2) == _StridedLayout.dense((5, 8), 2)
assert layout.repacked(8) == _StridedLayout.dense((5, 2), 8)
assert layout.repacked(16) == _StridedLayout.dense((5, 1), 16)
assert layout.repacked(16, keep_dim=False) == _StridedLayout.dense((5,), 16)
.. highlight:: python
.. code-block:: python
# Viewing (5, 6) float array as (5, 3) complex64 array.
a = numpy.ones((5, 6), dtype=numpy.float32)
float_view = StridedMemoryView(a, -1)
layout = float_view._layout
assert layout.shape == (5, 6)
assert layout.itemsize == 4
complex_view = float_view.view(layout.repacked(8), numpy.complex64)
assert complex_view._layout.shape == (5, 3)
assert complex_view._layout.itemsize == 8
b = numpy.from_dlpack(complex_view)
assert b.shape == (5, 3)
"""
if itemsize == self.itemsize:
return self
cdef _StridedLayout new_layout = _StridedLayout.__new__(_StridedLayout)
if itemsize > self.itemsize:
self.pack_into(new_layout, itemsize, data_ptr, keep_dim, axis)
else:
self.unpack_into(new_layout, itemsize, axis)
return new_layout
def max_compatible_itemsize(self : _StridedLayout, max_itemsize : int = 16, data_ptr : intptr_t = 0, axis : int = -1) -> int:
"""
Returns the maximum itemsize (but no greater than ``max_itemsize``) that can be used
with the :meth:`repacked` method for the current layout.
"""
return self.get_max_compatible_itemsize(max_itemsize, data_ptr, axis)
def sliced(self : _StridedLayout, slices : int | slice | tuple[int | slice]) -> _StridedLayout:
"""
Returns a sliced layout.
The ``slices`` parameter can be a single integer, a single :py:class:`slice` object
or a tuple of integers/slices.
.. hint::
For convenience, instead of calling this method directly, please rely
on the :py:meth:`~object.__getitem__` operator (i.e. bracket syntax), e.g.:
``layout[:, start:end:step]``.
.. note::
Slicing is purely a layout transformation and does not involve
any data access.
"""
if not isinstance(slices, tuple):
slices = (slices,)
cdef _StridedLayout new_layout = _StridedLayout.__new__(_StridedLayout)
self.slice_into(new_layout, slices)
return new_layout
def __getitem__(self : _StridedLayout, slices : int | slice | tuple[int | slice]) -> _StridedLayout:
return self.sliced(slices)
cdef axes_mask_t get_flattened_axis_mask(_StridedLayout self) except? -1 nogil:
return flattened_strides_in_c_index_order_mask(self.base)
cdef int get_max_compatible_itemsize(_StridedLayout self, int max_itemsize, intptr_t data_ptr, int axis=-1) except -1 nogil:
return max_compatible_itemsize(self.base, self.slice_offset, self.itemsize, max_itemsize, data_ptr, axis)
cdef int reshape_into(_StridedLayout self, _StridedLayout out_layout, BaseLayout& new_shape) except -1 nogil:
cdef int64_t old_volume = self.get_volume()
validate_reshaped_shape(new_shape, old_volume)
_zero_strides(new_shape)
cdef BaseLayout flattened
if old_volume != 0:
flatten_strides_in_c_index_order(flattened, self.base, axis_mask_from_range(self.base.ndim, 0, -1))
if not split_strides_in_c_index_order(new_shape, flattened):
raise ValueError("Layout strides are incompatible with the new shape")
# Reset all memoized properties
out_layout._prop_mask = 0
# Copy preserved attributes
out_layout.slice_offset = self.slice_offset
out_layout.itemsize = self.itemsize
maybe_copy_volume(out_layout, self)
# Set new attributes
_swap_layout(out_layout.base, new_shape)
return 0
cdef int permute_into(_StridedLayout self, _StridedLayout out_layout, axis_vec_t& axis_order) except -1 nogil:
if axis_order.size() != <size_t>self.base.ndim:
raise ValueError(f"Permutation must have the same length as the number of dimensions, got {axis_order.size()} for {self.ndim}D tensor.")
cdef BaseLayout permuted
permute_extents(permuted, self.base, axis_order)
# Reset all memoized properties
out_layout._prop_mask = 0
# Preserved attributes
out_layout.itemsize = self.itemsize
out_layout.slice_offset = self.slice_offset
maybe_copy_volume(out_layout, self)
# Set new attributes
_swap_layout(out_layout.base, permuted)
return 0
cdef int flatten_into(_StridedLayout self, _StridedLayout out_layout, axes_mask_t axis_mask) except -1 nogil:
cdef BaseLayout flattened
cdef int ndim = flatten_strides_in_c_index_order(flattened, self.base, axis_mask)
if out_layout is self and ndim == self.base.ndim:
return 0
# Reset all memoized properties
out_layout._prop_mask = 0
# Preserved attributes
out_layout.itemsize = self.itemsize
out_layout.slice_offset = self.slice_offset
maybe_copy_volume(out_layout, self)
# Set new attributes
_swap_layout(out_layout.base, flattened)
return 0
cdef int squeeze_into(_StridedLayout self, _StridedLayout out_layout) except -1 nogil:
cdef BaseLayout squeezed
squeeze_extents(squeezed, self.base)
if out_layout is self and squeezed.ndim == self.base.ndim:
return 0
# Reset all memoized properties
out_layout._prop_mask = 0
# Preserved attributes
out_layout.itemsize = self.itemsize
out_layout.slice_offset = self.slice_offset
maybe_copy_volume(out_layout, self)
# Set new attributes
_swap_layout(out_layout.base, squeezed)
return 0
cdef int unsqueeze_into(_StridedLayout self, _StridedLayout out_layout, axis_vec_t& axis_vec) except -1 nogil:
if axis_vec.size() == 0 and self is out_layout:
return 0
cdef BaseLayout unsqueezed
unsqueeze_extents(unsqueezed, self.base, axis_vec)
# Reset all memoized properties
out_layout._prop_mask = 0
# Preserved attributes
out_layout.itemsize = self.itemsize
out_layout.slice_offset = self.slice_offset
maybe_copy_volume(out_layout, self)
# Set new attributes
_swap_layout(out_layout.base, unsqueezed)
return 0
cdef int broadcast_into(_StridedLayout self, _StridedLayout out_layout, BaseLayout& broadcast) except -1 nogil:
_validate_shape(broadcast)
broadcast_extents(broadcast, self.base)
# Reset all memoized properties
out_layout._prop_mask = 0
# Preserved attributes
out_layout.itemsize = self.itemsize
out_layout.slice_offset = self.slice_offset
# Set new attributes
_swap_layout(out_layout.base, broadcast)
return 0
cdef int pack_into(_StridedLayout self, _StridedLayout out_layout, int itemsize, intptr_t data_ptr, bint keep_dim, int axis=-1) except -1 nogil:
cdef BaseLayout packed
cdef stride_t new_slice_offset = 0
cdef int vec_size = pack_extents(
packed,
new_slice_offset,
self.base,
self.slice_offset,
self.itemsize,
itemsize,
data_ptr,
keep_dim,
axis
)
if vec_size == 1 and out_layout is self:
return 0
# Reset all memoized properties
out_layout._prop_mask = 0
# Set new attributes
out_layout.itemsize = itemsize
out_layout.slice_offset = new_slice_offset
_swap_layout(out_layout.base, packed)
return vec_size
cdef int unpack_into(_StridedLayout self, _StridedLayout out_layout, int itemsize, int axis=-1) except -1 nogil:
cdef BaseLayout unpacked
cdef int vec_size = unpack_extents(
unpacked,
self.base,
self.itemsize,
itemsize,
axis
)
if vec_size == 1 and out_layout is self:
return 0
cdef int64_t new_slice_offset = _overflow_checked_mul(self.slice_offset, vec_size)
# Reset all memoized properties
out_layout._prop_mask = 0
# Set new attributes
out_layout.itemsize = itemsize
out_layout.slice_offset = new_slice_offset
_swap_layout(out_layout.base, unpacked)
return vec_size
cdef int slice_into(_StridedLayout self, _StridedLayout out_layout, tuple slices) except -1:
cdef BaseLayout sliced
cdef stride_t slice_offset = slice_extents(sliced, self.base, slices)
cdef int64_t new_slice_offset = _overflow_checked_sum(self.slice_offset, slice_offset)
# Reset all memoized properties
out_layout._prop_mask = 0
# Preserved attributes
out_layout.itemsize = self.itemsize
# Set new attributes
_swap_layout(out_layout.base, sliced)
out_layout.slice_offset = new_slice_offset
return 0
cdef inline int maybe_copy_volume(_StridedLayout out_layout, _StridedLayout in_layout) except -1 nogil:
if _has_valid_property(in_layout, PROP_VOLUME):
out_layout._volume = in_layout.get_volume()
_mark_property_valid(out_layout, PROP_VOLUME)
return 0
cdef inline int validate_reshaped_shape(BaseLayout& new_shape, int64_t old_volume) except -1 nogil:
cdef int ndim = new_shape.ndim
cdef int axis = -1
cdef extent_t extent
for i in range(ndim):
extent = new_shape.shape[i]
if extent < -1:
raise ValueError("Extents must be non-negative")
elif extent == -1:
if axis == -1:
axis = i
else:
raise ValueError("There can be at most one -1 extent in a shape")
cdef int64_t new_volume = _c_abs(_volume(new_shape))
if axis == -1:
if new_volume != old_volume:
raise ValueError(f"The original volume {old_volume} and the new volume {new_volume} must be equal.")
else:
if new_volume == 0:
raise ValueError("The -1 extent is ambiguous when the specified sub-volume is 0")
extent = old_volume // new_volume
if extent * new_volume != old_volume:
raise ValueError(f"The original volume {old_volume} must be divisible by the specified sub-volume {new_volume}.")
new_shape.shape[axis] = extent
return 0
cdef inline axes_mask_t axis_mask_from_range(int ndim, int start_axis, int end_axis) except? -1 nogil:
cdef axes_mask_t axes_mask = flatten_all_axes_mask(ndim)
if start_axis == 0 and end_axis == -1:
return axes_mask
if not _normalize_axis(start_axis, ndim):
raise ValueError(f"Invalid start axis: {start_axis} out of range for {ndim}D tensor")
if not _normalize_axis(end_axis, ndim):
raise ValueError(f"Invalid end axis: {end_axis} out of range for {ndim}D tensor")
if start_axis > 0:
axes_mask &= (AXES_MASK_ALL << start_axis)
if end_axis < ndim:
axes_mask &= (AXES_MASK_ALL >> (STRIDED_LAYOUT_MAX_NDIM - end_axis - 1))
return axes_mask
cdef inline int flatten_strides_in_c_index_order(BaseLayout& out_layout, BaseLayout& in_layout, axes_mask_t axis_mask) except -1 nogil:
cdef int ndim = in_layout.ndim
if ndim == 0:
init_base_layout(out_layout, 1)
out_layout.shape[0] = 1
out_layout.strides[0] = 1
return 1
init_base_layout(out_layout, ndim)
cdef int group_start = 0
cdef int group_end = 0
cdef int64_t group_vol
cdef int64_t group_stride
cdef int out_i = 0
cdef extent_t* in_shape = in_layout.shape
cdef stride_t* in_strides = get_strides_ptr(in_layout)
while group_start < ndim:
group_vol = in_shape[group_start]
group_stride = in_strides[group_start]
group_end = group_start + 1
while (
group_end < ndim
and (axis_mask & _axis2mask(group_end))
and group_stride == _overflow_checked_mul(in_strides[group_end], in_shape[group_end])
):
group_vol = _overflow_checked_mul(group_vol, in_shape[group_end])
group_stride = in_strides[group_end]
group_end += 1
out_layout.shape[out_i] = group_vol
out_layout.strides[out_i] = group_stride
out_i += 1
group_start = group_end
if out_i != ndim:
trim_base_layout(out_layout, out_i)
return out_i
cdef inline axes_mask_t flattened_strides_in_c_index_order_mask(BaseLayout& layout) except? -1 nogil:
if layout.strides == NULL:
return flatten_all_axes_mask(layout.ndim)
cdef axes_mask_t axis_mask = 0
cdef int ndim = layout.ndim
cdef int group_start = 0