Skip to content

Commit e3729ee

Browse files
committed
feat: add 2d blocked implementation
1 parent 2a17361 commit e3729ee

1 file changed

Lines changed: 255 additions & 0 deletions

File tree

Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2026 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
'use strict';
20+
21+
// MODULES //
22+
23+
var loopOrder = require( '@stdlib/ndarray/base/unary-loop-interchange-order' );
24+
var blockSize = require( '@stdlib/ndarray/base/ternary-tiling-block-size' );
25+
var take = require( '@stdlib/array/base/take-indexed' );
26+
27+
28+
// MAIN //
29+
30+
/**
31+
* Applies a condition to two two-dimensional input ndarrays according to a two-dimensional boolean ndarray and assigns results to elements in a two-dimensional output ndarray via loop blocking.
32+
*
33+
* @private
34+
* @param {Object} condition - object containing boolean condition ndarray meta data
35+
* @param {*} condition.dtype - data type
36+
* @param {Collection} condition.data - data buffer
37+
* @param {NonNegativeIntegerArray} condition.shape - dimensions
38+
* @param {IntegerArray} condition.strides - stride lengths
39+
* @param {NonNegativeInteger} condition.offset - index offset
40+
* @param {string} condition.order - specifies whether `condition` is row-major (C-style) or column-major (Fortran-style)
41+
* @param {Object} x - object containing first input ndarray meta data
42+
* @param {*} x.dtype - data type
43+
* @param {Collection} x.data - data buffer
44+
* @param {NonNegativeIntegerArray} x.shape - dimensions
45+
* @param {IntegerArray} x.strides - stride lengths
46+
* @param {NonNegativeInteger} x.offset - index offset
47+
* @param {string} x.order - specifies whether `x` is row-major (C-style) or column-major (Fortran-style)
48+
* @param {Object} y - object containing second input ndarray meta data
49+
* @param {*} y.dtype - data type
50+
* @param {Collection} y.data - data buffer
51+
* @param {NonNegativeIntegerArray} y.shape - dimensions
52+
* @param {IntegerArray} y.strides - stride lengths
53+
* @param {NonNegativeInteger} y.offset - index offset
54+
* @param {string} y.order - specifies whether `y` is row-major (C-style) or column-major (Fortran-style)
55+
* @param {Object} out - object containing output ndarray meta data
56+
* @param {*} out.dtype - data type
57+
* @param {Collection} out.data - data buffer
58+
* @param {NonNegativeIntegerArray} out.shape - dimensions
59+
* @param {IntegerArray} out.strides - stride lengths
60+
* @param {NonNegativeInteger} out.offset - index offset
61+
* @param {string} out.order - specifies whether `out` is row-major (C-style) or column-major (Fortran-style)
62+
* @returns {void}
63+
*
64+
* @example
65+
* var Float64Array = require( '@stdlib/array/float64' );
66+
* var Uint8Array = require( '@stdlib/array/uint8' );
67+
*
68+
* // Create data buffers:
69+
* var cbuf = new Uint8Array( [ 1, 0, 0, 1 ] );
70+
* var xbuf = new Float64Array( [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0 ] );
71+
* var ybuf = new Float64Array( [ 10.0, 20.0, 30.0, 40.0 ] );
72+
* var obuf = new Float64Array( 4 );
73+
*
74+
* // Define the shape of the arrays:
75+
* var shape = [ 2, 2 ];
76+
*
77+
* // Define the array strides:
78+
* var sc = [ 2, 1 ];
79+
* var sx = [ 4, 1 ];
80+
* var sy = [ 2, 1 ];
81+
* var so = [ 2, 1 ];
82+
*
83+
* // Define the index offsets:
84+
* var oc = 0;
85+
* var ox = 1;
86+
* var oy = 0;
87+
* var oo = 0;
88+
*
89+
* // Create the ndarray-like objects:
90+
* var condition = {
91+
* 'dtype': 'uint8',
92+
* 'data': cbuf,
93+
* 'shape': shape,
94+
* 'strides': sc,
95+
* 'offset': oc,
96+
* 'order': 'row-major'
97+
* };
98+
* var x = {
99+
* 'dtype': 'float64',
100+
* 'data': xbuf,
101+
* 'shape': shape,
102+
* 'strides': sx,
103+
* 'offset': ox,
104+
* 'order': 'row-major'
105+
* };
106+
* var y = {
107+
* 'dtype': 'float64',
108+
* 'data': ybuf,
109+
* 'shape': shape,
110+
* 'strides': sy,
111+
* 'offset': oy,
112+
* 'order': 'row-major'
113+
* };
114+
* var out = {
115+
* 'dtype': 'float64',
116+
* 'data': obuf,
117+
* 'shape': shape,
118+
* 'strides': so,
119+
* 'offset': oo,
120+
* 'order': 'row-major'
121+
* };
122+
*
123+
* // Apply the condition:
124+
* blockedwhere2d( condition, x, y, out );
125+
*
126+
* console.log( out.data );
127+
* // => <Float64Array>[ 2.0, 20.0, 30.0, 5.0 ]
128+
*/
129+
function blockedwhere2d( condition, x, y, out ) {
130+
var bsize;
131+
var cbuf;
132+
var xbuf;
133+
var ybuf;
134+
var obuf;
135+
var dc0;
136+
var dc1;
137+
var dx0;
138+
var dx1;
139+
var dy0;
140+
var dy1;
141+
var do0;
142+
var do1;
143+
var oc1;
144+
var ox1;
145+
var oy1;
146+
var oo1;
147+
var sh;
148+
var s0;
149+
var s1;
150+
var sc;
151+
var sx;
152+
var sy;
153+
var so;
154+
var oc;
155+
var ox;
156+
var oy;
157+
var oo;
158+
var ic;
159+
var ix;
160+
var iy;
161+
var io;
162+
var i0;
163+
var i1;
164+
var j0;
165+
var j1;
166+
var o;
167+
168+
// Note on variable naming convention: s#, dc#, dx#, dy#, do#, i#, j# where # corresponds to the loop number, with `0` being the innermost loop...
169+
170+
// Resolve the loop interchange order using x as the reference array.
171+
o = loopOrder( x.shape, x.strides, out.strides );
172+
sh = o.sh;
173+
sx = o.sx;
174+
so = o.sy;
175+
176+
// Apply the same dimension permutation to condition and y strides so that
177+
sc = take( condition.strides, o.idx );
178+
sy = take( y.strides, o.idx );
179+
180+
// Determine the block size:
181+
bsize = blockSize( condition.dtype, x.dtype, y.dtype, out.dtype );
182+
183+
// Cache the indices of the first indexed elements in the respective ndarrays...
184+
oc = condition.offset;
185+
ox = x.offset;
186+
oy = y.offset;
187+
oo = out.offset;
188+
189+
// Cache references to the inputs and output ndarray buffers...
190+
cbuf = condition.data;
191+
xbuf = x.data;
192+
ybuf = y.data;
193+
obuf = out.data;
194+
195+
// Cache offset increments for the innermost loop...
196+
dc0 = sc[ 0 ];
197+
dx0 = sx[ 0 ];
198+
dy0 = sy[ 0 ];
199+
do0 = so[ 0 ];
200+
201+
// Iterate over blocks...
202+
for ( j1 = sh[ 1 ]; j1 > 0; ) {
203+
if ( j1 < bsize ) {
204+
s1 = j1;
205+
j1 = 0;
206+
} else {
207+
s1 = bsize;
208+
j1 -= bsize;
209+
}
210+
oc1 = oc + ( j1*sc[ 1 ] );
211+
ox1 = ox + ( j1*sx[ 1 ] );
212+
oy1 = oy + ( j1*sy[ 1 ] );
213+
oo1 = oo + ( j1*so[ 1 ] );
214+
for ( j0 = sh[ 0 ]; j0 > 0; ) {
215+
if ( j0 < bsize ) {
216+
s0 = j0;
217+
j0 = 0;
218+
} else {
219+
s0 = bsize;
220+
j0 -= bsize;
221+
}
222+
// Compute index offsets for the first inputs and output ndarray elements in the current block...
223+
ic = oc1 + ( j0*sc[ 0 ] );
224+
ix = ox1 + ( j0*sx[ 0 ] );
225+
iy = oy1 + ( j0*sy[ 0 ] );
226+
io = oo1 + ( j0*so[ 0 ] );
227+
228+
// Compute loop offset increments...
229+
dc1 = sc[ 1 ] - ( s0*sc[ 0 ] );
230+
dx1 = sx[ 1 ] - ( s0*sx[ 0 ] );
231+
dy1 = sy[ 1 ] - ( s0*sy[ 0 ] );
232+
do1 = so[ 1 ] - ( s0*so[ 0 ] );
233+
234+
// Iterate over the ndarray dimensions...
235+
for ( i1 = 0; i1 < s1; i1++ ) {
236+
for ( i0 = 0; i0 < s0; i0++ ) {
237+
obuf[ io ] = ( cbuf[ ic ] ) ? xbuf[ ix ] : ybuf[ iy ];
238+
ic += dc0;
239+
ix += dx0;
240+
iy += dy0;
241+
io += do0;
242+
}
243+
ic += dc1;
244+
ix += dx1;
245+
iy += dy1;
246+
io += do1;
247+
}
248+
}
249+
}
250+
}
251+
252+
253+
// EXPORTS //
254+
255+
module.exports = blockedwhere2d;

0 commit comments

Comments
 (0)