Skip to content

Commit da16f56

Browse files
committed
Remove duplicated GGUF calls and use endiag for fromByteArray factories
Related-To: #121 #95
1 parent 5fd5950 commit da16f56

10 files changed

Lines changed: 145 additions & 189 deletions

File tree

skainet-core/skainet-tensors/src/commonMain/kotlin/sk/ainet/core/tensor/backend/CpuBackend.kt

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,40 @@ public class CpuTensorFP32(
218218
public fun full(shape: Shape, value: Float): CpuTensorFP32 {
219219
return CpuTensorFP32(shape, FloatArray(shape.volume) { value })
220220
}
221+
222+
/**
223+
* Creates a 2D tensor from a nested list structure.
224+
* Similar to numpy.array([[2.0, 1.5, 4.2, 3.1], [1.0, 2.5, 3.0, 4.5]])
225+
*
226+
* @param data The nested list where outer list represents rows and inner lists represent columns
227+
* @return A new CpuTensorFP32 instance with shape inferred from the nested structure
228+
* @throws IllegalArgumentException if the nested list structure is invalid or inconsistent
229+
*/
230+
public fun fromNestedList(data: List<List<Float>>): CpuTensorFP32 {
231+
require(data.isNotEmpty()) { "Data cannot be empty" }
232+
233+
val rows = data.size
234+
val cols = data[0].size
235+
require(cols > 0) { "Each row must have at least one element" }
236+
237+
// Validate that all rows have the same number of columns
238+
for (i in data.indices) {
239+
require(data[i].size == cols) {
240+
"All rows must have the same number of columns. Row 0 has $cols columns, but row $i has ${data[i].size} columns"
241+
}
242+
}
243+
244+
// Create the flat array in row-major order
245+
val flatArray = FloatArray(rows * cols)
246+
var index = 0
247+
for (row in data) {
248+
for (value in row) {
249+
flatArray[index++] = value
250+
}
251+
}
252+
253+
return CpuTensorFP32(Shape(rows, cols), flatArray)
254+
}
221255
}
222256
}
223257

@@ -1294,6 +1328,40 @@ public class CpuTensorInt32(
12941328
public fun full(shape: Shape, value: Int): CpuTensorInt32 {
12951329
return CpuTensorInt32(shape, IntArray(shape.volume) { value })
12961330
}
1331+
1332+
/**
1333+
* Creates a 2D tensor from a nested list structure.
1334+
* Similar to numpy.array([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])
1335+
*
1336+
* @param data The nested list where outer list represents rows and inner lists represent columns
1337+
* @return A new CpuTensorInt32 instance with shape inferred from the nested structure
1338+
* @throws IllegalArgumentException if the nested list structure is invalid or inconsistent
1339+
*/
1340+
public fun fromNestedList(data: List<List<Int>>): CpuTensorInt32 {
1341+
require(data.isNotEmpty()) { "Data cannot be empty" }
1342+
1343+
val rows = data.size
1344+
val cols = data[0].size
1345+
require(cols > 0) { "Each row must have at least one element" }
1346+
1347+
// Validate that all rows have the same number of columns
1348+
for (i in data.indices) {
1349+
require(data[i].size == cols) {
1350+
"All rows must have the same number of columns. Row 0 has $cols columns, but row $i has ${data[i].size} columns"
1351+
}
1352+
}
1353+
1354+
// Create the flat array in row-major order
1355+
val flatArray = IntArray(rows * cols)
1356+
var index = 0
1357+
for (row in data) {
1358+
for (value in row) {
1359+
flatArray[index++] = value
1360+
}
1361+
}
1362+
1363+
return CpuTensorInt32(Shape(rows, cols), flatArray)
1364+
}
12971365
}
12981366
}
12991367

skainet-core/skainet-tensors/src/commonMain/kotlin/sk/ainet/core/tensor/factory/FP16TensorFactory.kt

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,30 +7,30 @@ import sk.ainet.core.tensor.Tensor
77
/**
88
* Factory for creating FP16 tensors from byte data.
99
* This is currently a placeholder implementation for future development.
10-
*
10+
*
1111
*/
1212
public object FP16TensorFactory : TensorFactoryRegistry.TensorFromBytesFactory<FP16, Float> {
13-
13+
1414
/**
15-
* Creates a tensor from GGUF-style byte data with FP16 values.
16-
*
15+
* Creates a tensor from byte data with FP16 values.
16+
*
1717
* 1. Convert bytes to FP16 values with proper endianness handling
1818
* 2. Validate input data size matches expected FP16 count (shape.volume * 2 bytes)
1919
* 3. Handle FP16 to FP32 conversion if needed
2020
* 4. Create and return proper CpuTensorFP16 instance
21-
*
21+
*
2222
* @param shape The desired shape of the tensor
2323
* @param data The byte array containing FP16 data (2 bytes per value)
2424
* @return A new FP16 tensor instance
2525
* @throws NotImplementedError Currently not implemented
2626
*/
27-
override fun fromByteArray(shape: Shape, data: ByteArray): Tensor<FP16, Float> {
27+
override fun fromByteArray(shape: Shape, data: ByteArray, littleEndian: Boolean): Tensor<FP16, Float> {
2828
// Validate data size for FP16 (2 bytes per value)
2929
val expectedBytes = shape.volume * 2
3030
require(data.size == expectedBytes) {
3131
"Data size mismatch: expected $expectedBytes bytes for FP16 data, got ${data.size}"
3232
}
33-
33+
3434
// Create CpuTensorFP16 instance with FP16 byte data
3535
return sk.ainet.core.tensor.backend.CpuTensorFP16.fromFP16ByteArray(shape, data)
3636
}

skainet-core/skainet-tensors/src/commonMain/kotlin/sk/ainet/core/tensor/factory/FP32TensorFactory.kt

Lines changed: 4 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,14 @@ import sk.ainet.core.tensor.backend.CpuTensorFP32
1212
public object FP32TensorFactory : TensorFactoryRegistry.TensorFromBytesFactory<FP32, Float> {
1313

1414
/**
15-
* Creates a CpuTensorFP32 from GGUF byte data with the specified shape.
15+
* Creates a CpuTensorFP32 from byte data with the specified shape.
1616
* @param shape The desired shape of the tensor
1717
* @param data The byte array containing float data in binary format
18+
* @param littleEndian true for little-endian, false for big-endian
1819
* @return A new CpuTensorFP32 instance
1920
* @throws IllegalArgumentException if data size doesn't match expected float count
2021
*/
21-
override fun fromByteArray(shape: Shape, data: ByteArray): Tensor<FP32, Float> {
22+
override fun fromByteArray(shape: Shape, data: ByteArray, littleEndian: Boolean): Tensor<FP32, Float> {
2223
// Validate input data size matches shape requirements
2324
val expectedFloatCount = shape.volume
2425
val expectedByteSize = expectedFloatCount * 4 // 4 bytes per float
@@ -29,33 +30,9 @@ public object FP32TensorFactory : TensorFactoryRegistry.TensorFromBytesFactory<F
2930
}
3031

3132
// Convert byte array to float array using little-endian (GGUF standard)
32-
val floatArray = ByteArrayConverter.convertBytesToFloatArray(data, littleEndian = true)
33+
val floatArray = ByteArrayConverter.convertBytesToFloatArray(data, littleEndian = littleEndian)
3334

3435
// Create and return the tensor using the existing factory method
3536
return CpuTensorFP32.fromArray(shape, floatArray)
3637
}
37-
38-
/**
39-
* Creates a tensor from byte data with specified endianness.
40-
* @param shape The desired shape of the tensor
41-
* @param data The byte array containing float data
42-
* @param littleEndian true for little-endian, false for big-endian
43-
* @return A new CpuTensorFP32 instance
44-
*/
45-
public fun fromGGUFData(shape: Shape, data: ByteArray, littleEndian: Boolean): Tensor<FP32, Float> {
46-
// Validate input data size matches shape requirements
47-
val expectedFloatCount = shape.volume
48-
val expectedByteSize = expectedFloatCount * 4
49-
50-
require(data.size == expectedByteSize) {
51-
"Input data size (${data.size} bytes) does not match expected size for shape $shape " +
52-
"(expected $expectedByteSize bytes for $expectedFloatCount floats)"
53-
}
54-
55-
// Convert byte array to float array with specified endianness
56-
val floatArray = ByteArrayConverter.convertBytesToFloatArray(data, littleEndian)
57-
58-
// Create and return the tensor
59-
return CpuTensorFP32.fromArray(shape, floatArray)
60-
}
6138
}

skainet-core/skainet-tensors/src/commonMain/kotlin/sk/ainet/core/tensor/factory/Int32TensorFactory.kt

Lines changed: 7 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -10,74 +10,29 @@ import sk.ainet.core.tensor.backend.CpuTensorInt32
1010
* Handles byte-to-int conversion with proper endianness support.
1111
*/
1212
public object Int32TensorFactory : TensorFactoryRegistry.TensorFromBytesFactory<Int32, Int> {
13-
13+
1414
/**
1515
* Creates a CpuTensorInt32 from GGUF byte data with the specified shape.
1616
* @param shape The desired shape of the tensor
1717
* @param data The byte array containing int data in binary format
18+
* @param littleEndian true for little-endian, false for big-endian
1819
* @return A new CpuTensorInt32 instance
1920
* @throws IllegalArgumentException if data size doesn't match expected int count
2021
*/
21-
override fun fromByteArray(shape: Shape, data: ByteArray): Tensor<Int32, Int> {
22+
override fun fromByteArray(shape: Shape, data: ByteArray, littleEndian: Boolean): Tensor<Int32, Int> {
2223
// Validate input data size matches shape requirements
2324
val expectedIntCount = shape.volume
2425
val expectedByteSize = expectedIntCount * 4 // 4 bytes per int
25-
26+
2627
require(data.size == expectedByteSize) {
2728
"Input data size (${data.size} bytes) does not match expected size for shape $shape " +
28-
"(expected $expectedByteSize bytes for $expectedIntCount ints)"
29+
"(expected $expectedByteSize bytes for $expectedIntCount ints)"
2930
}
30-
31+
3132
// Convert byte array to int array using little-endian (GGUF standard)
3233
val intArray = ByteArrayConverter.convertBytesToIntArray(data, littleEndian = true)
33-
34+
3435
// Create and return the tensor using the existing factory method
3536
return CpuTensorInt32.fromArray(shape, intArray)
3637
}
37-
38-
/**
39-
* Creates a tensor from byte data with specified endianness.
40-
* @param shape The desired shape of the tensor
41-
* @param data The byte array containing int data
42-
* @param littleEndian true for little-endian, false for big-endian
43-
* @return A new CpuTensorInt32 instance
44-
*/
45-
public fun fromGGUFData(shape: Shape, data: ByteArray, littleEndian: Boolean): Tensor<Int32, Int> {
46-
// Validate input data size matches shape requirements
47-
val expectedIntCount = shape.volume
48-
val expectedByteSize = expectedIntCount * 4
49-
50-
require(data.size == expectedByteSize) {
51-
"Input data size (${data.size} bytes) does not match expected size for shape $shape " +
52-
"(expected $expectedByteSize bytes for $expectedIntCount ints)"
53-
}
54-
55-
// Convert byte array to int array with specified endianness
56-
val intArray = ByteArrayConverter.convertBytesToIntArray(data, littleEndian)
57-
58-
// Create and return the tensor
59-
return CpuTensorInt32.fromArray(shape, intArray)
60-
}
61-
62-
/**
63-
* Validates that the input data is properly aligned for int conversion.
64-
* @param shape The tensor shape
65-
* @param data The input data
66-
* @throws IllegalArgumentException if validation fails
67-
*/
68-
private fun validateInput(shape: Shape, data: ByteArray) {
69-
val expectedIntCount = shape.volume
70-
val expectedByteSize = expectedIntCount * 4
71-
72-
require(data.size == expectedByteSize) {
73-
"Input data size (${data.size} bytes) does not match expected size for shape $shape " +
74-
"(expected $expectedByteSize bytes for $expectedIntCount ints)"
75-
}
76-
require(data.isNotEmpty()) {
77-
"Input data cannot be empty"
78-
}
79-
require(data.size % 4 == 0) {
80-
"Input data size (${data.size} bytes) must be a multiple of 4 for int conversion"
81-
}
82-
}
8338
}

skainet-core/skainet-tensors/src/commonMain/kotlin/sk/ainet/core/tensor/factory/Int4TensorFactory.kt

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,40 +7,41 @@ import sk.ainet.core.tensor.Tensor
77
/**
88
* Factory for creating Int4 tensors from byte data.
99
* This is currently a placeholder implementation for future development.
10-
*
10+
*
1111
* Int4 tensors use packed 4-bit integer values, storing 2 values per byte.
12-
*
12+
*
1313
*/
1414
public object Int4TensorFactory : TensorFactoryRegistry.TensorFromBytesFactory<Int4, Byte> {
15-
15+
1616
/**
1717
* Creates a tensor from GGUF-style byte data with packed Int4 values.
18-
*
18+
*
1919
* When CpuTensorInt4 becomes available, this should:
2020
* 1. Implement bit unpacking for 4-bit values (2 values per byte)
2121
* 2. Handle signed 4-bit integers (range -8 to 7)
2222
* 3. Validate input data size matches expected packed size (ceil(shape.volume / 2) bytes)
2323
* 4. Implement unpacking strategy for accessing individual 4-bit values
2424
* 5. Create and return proper CpuTensorInt4 instance
25-
*
25+
*
2626
* Storage strategy considerations:
2727
* - Each byte contains 2 Int4 values
2828
* - High nibble (bits 4-7): first value
2929
* - Low nibble (bits 0-3): second value
3030
* - Handle odd tensor volumes (last byte may only use high nibble)
31-
*
31+
*
3232
* @param shape The desired shape of the tensor
3333
* @param data The byte array containing packed Int4 data (2 values per byte)
34+
* @param littleEndian true for little-endian, false for big-endian
3435
* @return A new Int4 tensor instance
3536
* @throws NotImplementedError Currently not implemented
3637
*/
37-
override fun fromByteArray(shape: Shape, data: ByteArray): Tensor<Int4, Byte> {
38+
override fun fromByteArray(shape: Shape, data: ByteArray, littleEndian: Boolean): Tensor<Int4, Byte> {
3839
// Validate data size for packed Int4 (2 values per byte, rounded up)
3940
val expectedBytes = (shape.volume + 1) / 2 // Ceiling division for odd volumes
4041
require(data.size == expectedBytes) {
4142
"Data size mismatch: expected $expectedBytes bytes for packed Int4 data (${shape.volume} values), got ${data.size}"
4243
}
43-
44+
4445
// Create CpuTensorInt4 instance with packed data
4546
return sk.ainet.core.tensor.backend.CpuTensorInt4.fromPackedByteArray(shape, data)
4647
}

skainet-core/skainet-tensors/src/commonMain/kotlin/sk/ainet/core/tensor/factory/Int8TensorFactory.kt

Lines changed: 8 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -10,55 +10,29 @@ import sk.ainet.core.tensor.backend.CpuTensorInt8
1010
* Since Int8 tensors store byte values directly, no conversion is needed.
1111
*/
1212
public object Int8TensorFactory : TensorFactoryRegistry.TensorFromBytesFactory<Int8, Byte> {
13-
13+
1414
/**
1515
* Creates a CpuTensorInt8 from GGUF byte data with the specified shape.
1616
* @param shape The desired shape of the tensor
1717
* @param data The byte array containing the tensor data
18+
* @param littleEndian true for little-endian, false for big-endian
19+
*
1820
* @return A new CpuTensorInt8 instance
1921
* @throws IllegalArgumentException if data size doesn't match expected byte count
2022
*/
21-
override fun fromByteArray(shape: Shape, data: ByteArray): Tensor<Int8, Byte> {
23+
override fun fromByteArray(shape: Shape, data: ByteArray, littleEndian: Boolean): Tensor<Int8, Byte> {
2224
// Validate input data size matches shape requirements
2325
val expectedByteCount = shape.volume
24-
26+
2527
require(data.size == expectedByteCount) {
2628
"Input data size (${data.size} bytes) does not match expected size for shape $shape " +
27-
"(expected $expectedByteCount bytes)"
29+
"(expected $expectedByteCount bytes)"
2830
}
29-
31+
3032
// For Int8 tensors, we can use the byte array directly (no conversion needed)
3133
val byteArray = ByteArrayConverter.convertBytesToByteArray(data)
32-
34+
3335
// Create and return the tensor using the existing factory method
3436
return CpuTensorInt8.fromArray(shape, byteArray)
3537
}
36-
37-
/**
38-
* Creates a tensor from byte data with validation.
39-
* This method is identical to fromByteArray since Int8 doesn't need endianness conversion.
40-
* @param shape The desired shape of the tensor
41-
* @param data The byte array containing the tensor data
42-
* @return A new CpuTensorInt8 instance
43-
*/
44-
public fun fromByteData(shape: Shape, data: ByteArray): Tensor<Int8, Byte> {
45-
return fromByteArray(shape, data)
46-
}
47-
48-
/**
49-
* Validates that the input data size matches the expected tensor volume.
50-
* @param shape The tensor shape
51-
* @param data The input data
52-
* @throws IllegalArgumentException if validation fails
53-
*/
54-
private fun validateInput(shape: Shape, data: ByteArray) {
55-
val expectedByteCount = shape.volume
56-
require(data.size == expectedByteCount) {
57-
"Input data size (${data.size} bytes) does not match expected size for shape $shape " +
58-
"(expected $expectedByteCount bytes)"
59-
}
60-
require(data.isNotEmpty()) {
61-
"Input data cannot be empty"
62-
}
63-
}
6438
}

0 commit comments

Comments
 (0)