Skip to content

Commit ade68cf

Browse files
feat: add ml/strided/dkmeans-init-plus-plus
--- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown_pkg_readmes status: na - task: lint_markdown_docs status: na - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: passed - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: na - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: na - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: passed - task: lint_typescript_tests status: na - task: lint_license_headers status: passed ---
1 parent 6e874c2 commit ade68cf

4 files changed

Lines changed: 524 additions & 0 deletions

File tree

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2026 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
'use strict';
20+
21+
// MODULES //
22+
23+
var isLayout = require( '@stdlib/blas/base/assert/is-layout' );
24+
var isRowMajor = require( '@stdlib/ndarray/base/assert/is-row-major-string' );
25+
var isColumnMajor = require( '@stdlib/ndarray/base/assert/is-column-major-string' );
26+
var max = require( '@stdlib/math/base/special/fast/max' );
27+
var format = require( '@stdlib/string/format' );
28+
var ndarray = require( './ndarray.js' );
29+
30+
31+
// MAIN //
32+
33+
/**
34+
* Initializes centroids by performing the k-means++ initialization procedure.
35+
*
36+
* ## Method
37+
*
38+
* The k-means++ algorithm for choosing initial centroids is as follows:
39+
*
40+
* 1. Select a data point uniformly at random from a data set \\( X \\). This data point is first centroid and denoted \\( c_0 \\).
41+
*
42+
* 2. Compute the distance from each data point to \\( c_0 \\). Denote the distance between \\( c_j \\) and data point \\( m \\) as \\( d(x_m, c_j) \\).
43+
*
44+
* 3. Select the next centroid, \\( c_1 \\), at random from \\( X \\) with probability
45+
*
46+
* ```tex
47+
* \frac{d^2(x_m, c_0)}{\sum_{j=0}^{n-1} d^2(x_j, c_0)}
48+
* ```
49+
*
50+
* where \\( n \\) is the number of data points.
51+
*
52+
* 4. To choose centroid \\( j \\),
53+
*
54+
* a. Compute the distances from each data point to each centroid and assign each data point to its closest centroid.
55+
*
56+
* b. For \\( i = 0,\ldots,n-1 \\) and \\( p = 0,\ldots,j-2 \\), select centroid \\( j \\) at random from \\( X \\) with probability
57+
*
58+
* ```tex
59+
* \frac{d^2(x_i, c_p)}{\sum_{\{h; x_h \exits C_p\}} d^2(x_h, c_p)}
60+
* ```
61+
*
62+
* where \\( C_p \\) is the set of all data points closest to centroid \\( c_p \\) and \\( x_i \\) belongs to \\( c_p \\).
63+
*
64+
* Stated more plainly, select each subsequent centroid with a probability proportional to the distance from the centroid to the closest centroid already chosen.
65+
*
66+
* 5. Repeat step `4` until \\( k \\) centroids have been chosen.
67+
*
68+
* ## References
69+
*
70+
* - Arthur, David, and Sergei Vassilvitskii. 2007. "K-means++: The Advantages of Careful Seeding." In _Proceedings of the Eighteenth Annual Acm-Siam Symposium on Discrete Algorithms_, 1027–35. SODA '07. Philadelphia, PA, USA: Society for Industrial and Applied Mathematics. <http://dl.acm.org/citation.cfm?id=1283383.1283494>.
71+
*
72+
* @param {string} order - storage layout
73+
* @param {PositiveInteger} k - number of clusters
74+
* @param {PositiveInteger} M - number of data points
75+
* @param {PositiveInteger} N - number of features
76+
* @param {Float64Array} out - input array
77+
* @param {integer} LDO - stride of the first dimension of `out` (a.k.a., leading dimension of the matrix `out`)
78+
* @param {Float64Array} X - input array
79+
* @param {integer} LDX - stride of the first dimension of `x` (a.k.a., leading dimension of the matrix `x`)
80+
* @param {string} metric - distance metric
81+
* @param {PositiveInteger} trials - number of potential centroids per iteration (>= 1)
82+
* @param {*} seed - PRNG seed
83+
* @throws {TypeError} first argument must be a valid order
84+
* @throws {RangeError} sixth argument must be greater than or equal to max(1,N)
85+
* @throws {RangeError} eighth argument must be greater than or equal to max(1,N)
86+
* @returns {Float64Array} centroids
87+
*
88+
* @example
89+
* var Float64Array = require( '@stdlib/array/float64' );
90+
*
91+
* var k = 3;
92+
* var M = 5;
93+
* var N = 2;
94+
*
95+
* var out = new Float64Array( k*N );
96+
*
97+
* // Specify data points:
98+
* var xbuf = new Float64Array([
99+
* 0.0, 0.0,
100+
* 1.0, 1.0,
101+
* 1.0, -1.0,
102+
* -1.0, -1.0,
103+
* -1.0, 1.0
104+
* ]);
105+
*
106+
* var v = dkmeansInitPlusPlus( 'row-major', k, M, N, out, 2, xbuf, 2, 'sqeuclidean', 3, 44 );
107+
* // returns <Float64Array>[0,0,1,-1,1,1]
108+
*/
109+
function dkmeansInitPlusPlus( order, k, M, N, out, LDO, X, LDX, metric, trials, seed ) { // eslint-disable-line max-len, max-params
110+
var so1;
111+
var so2;
112+
var sx1;
113+
var sx2;
114+
var so;
115+
var sx;
116+
117+
if ( !isLayout( order ) ) {
118+
throw new TypeError( format( 'invalid argument. First argument must be a valid order. Value: `%s`.', order ) );
119+
}
120+
if ( isRowMajor( order ) ) {
121+
so = N;
122+
sx = N;
123+
} else {
124+
so = k;
125+
sx = M;
126+
}
127+
if ( LDO < max( 1, so ) ) {
128+
throw new RangeError( format( 'invalid argument. Fifth argument must be greater than or equal to max(1,%d). Value: `%d`.', so, LDO ) );
129+
}
130+
if ( LDX < max( 1, sx ) ) {
131+
throw new RangeError( format( 'invalid argument. Fifth argument must be greater than or equal to max(1,%d). Value: `%d`.', sx, LDO ) );
132+
}
133+
if ( isColumnMajor( order ) ) {
134+
so1 = 1;
135+
so2 = LDO;
136+
137+
sx1 = 1;
138+
sx2 = LDX;
139+
} else { // order === 'row-major'
140+
so1 = LDO;
141+
so2 = 1;
142+
143+
sx1 = LDX;
144+
sx2 = 1;
145+
}
146+
return ndarray( k, M, N, out, so1, so2, 0, X, sx1, sx2, 0, metric, trials, seed ); // eslint-disable-line max-len
147+
}
148+
149+
150+
// EXPORTS //
151+
152+
module.exports = dkmeansInitPlusPlus;
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2026 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
'use strict';
20+
21+
/**
22+
* Initialize centroids by performing the k-means++ initialization procedure using alternative indexing semantics.
23+
*
24+
* @module @stdlib/ml/strided/dkmeans-init-plus-plus
25+
*
26+
* @example
27+
* var Float64Array = require( '@stdlib/array/float64' );
28+
* var dkmeansInitPlusPlus = require( '@stdlib/ml/strided/dkmeans-init-plus-plus' );
29+
*
30+
* var k = 3;
31+
* var M = 5;
32+
* var N = 2;
33+
*
34+
* var out = new Float64Array( k*N );
35+
*
36+
* // Specify data points:
37+
* var xbuf = new Float64Array([
38+
* 0.0, 0.0,
39+
* 1.0, 1.0,
40+
* 1.0, -1.0,
41+
* -1.0, -1.0,
42+
* -1.0, 1.0
43+
* ]);
44+
*
45+
* var v = dkmeansInitPlusPlus( 'row-major', k, M, N, out, 2, xbuf, 2, 'sqeuclidean', 3, 44 );
46+
* // returns <Float64Array>[0,0,1,-1,1,1]
47+
*
48+
* @example
49+
* var Float64Array = require( '@stdlib/array/float64' );
50+
* var dkmeansInitPlusPlus = require( '@stdlib/ml/strided/dkmeans-init-plus-plus' );
51+
*
52+
* var k = 3;
53+
* var M = 5;
54+
* var N = 2;
55+
*
56+
* var out = new Float64Array( k*N );
57+
*
58+
* // Specify data points:
59+
* var xbuf = new Float64Array([
60+
* 0.0, 0.0,
61+
* 1.0, 1.0,
62+
* 1.0, -1.0,
63+
* -1.0, -1.0,
64+
* -1.0, 1.0
65+
* ]);
66+
*
67+
* var v = dkmeansInitPlusPlus.ndarray( k, M, N, out, 2, 1, 0, xbuf, 2, 1, 0, 'sqeuclidean', 3, 44 );
68+
* // returns <Float64Array>[0,0,1,-1,1,1]
69+
*/
70+
71+
// MODULES //
72+
73+
var join = require( 'path' ).join;
74+
var tryRequire = require( '@stdlib/utils/try-require' );
75+
var isError = require( '@stdlib/assert/is-error' );
76+
var main = require( './main.js' );
77+
78+
79+
// MAIN //
80+
81+
var dkmeansInitPlusPlus;
82+
var tmp = tryRequire( join( __dirname, './native.js' ) );
83+
if ( isError( tmp ) ) {
84+
dkmeansInitPlusPlus = main;
85+
} else {
86+
dkmeansInitPlusPlus = tmp;
87+
}
88+
89+
90+
// EXPORTS //
91+
92+
module.exports = dkmeansInitPlusPlus;
93+
94+
// exports: { "ndarray": "dkmeansInitPlusPlus.ndarray" }
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2026 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
'use strict';
20+
21+
// MODULES //
22+
23+
var setReadOnly = require( '@stdlib/utils/define-nonenumerable-read-only-property' );
24+
var dkmeansInitPlusPlus = require( './dkmeans_init_plus_plus.js' );
25+
var ndarray = require( './ndarray.js' );
26+
27+
28+
// MAIN //
29+
30+
setReadOnly( dkmeansInitPlusPlus, 'ndarray', ndarray );
31+
32+
33+
// EXPORTS //
34+
35+
module.exports = dkmeansInitPlusPlus;

0 commit comments

Comments
 (0)