Skip to content

Commit ed357d7

Browse files
committed
data: stream source artifacts with kotlinx-io
1 parent 82156a1 commit ed357d7

7 files changed

Lines changed: 361 additions & 98 deletions

File tree

docs/modules/ROOT/pages/tutorials/data-sources-getting-started.adoc

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ dependencies {
4040
=== Resolve one artifact
4141

4242
`JvmDataSourceResolver` materializes remote artifacts into a cache and returns
43-
a `DataSourceArtifact` that can be read as bytes. Public Hugging Face files do
44-
not need credentials. Private files can use an `Authorization` header, or the
45-
JVM resolver will read `HF_TOKEN` / `HUGGING_FACE_HUB_TOKEN` from the
43+
a `DataSourceArtifact` that opens a `kotlinx.io.Source`. Public Hugging Face
44+
files do not need credentials. Private files can use an `Authorization` header,
45+
or the JVM resolver will read `HF_TOKEN` / `HUGGING_FACE_HUB_TOKEN` from the
4646
environment when the URI provider is Hugging Face.
4747

4848
[source,kotlin]
@@ -60,6 +60,14 @@ val artifact = resolver.resolve(
6060
println(artifact.filename)
6161
println(artifact.localPath)
6262
63+
val source = artifact.openSource()
64+
try {
65+
// Pass the source to a parser/loader for model-sized artifacts.
66+
} finally {
67+
source.close()
68+
}
69+
70+
// Convenience for small sidecars and tests.
6371
val bytes = artifact.readBytes()
6472
----
6573

skainet-data/skainet-data-source/build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ kotlin {
1818
sourceSets {
1919
commonMain.dependencies {
2020
implementation(libs.kotlinx.coroutines)
21+
implementation(libs.kotlinx.io.core)
2122
}
2223

2324
commonTest.dependencies {

skainet-data/skainet-data-source/src/commonMain/kotlin/sk/ainet/data/source/DataSourceModels.kt

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
package sk.ainet.data.source
22

3+
import kotlinx.io.Sink
4+
import kotlinx.io.Source
5+
import kotlinx.io.readByteArray
6+
37
/**
48
* Cache behavior requested by a caller resolving a data artifact.
59
*/
@@ -80,9 +84,38 @@ public class DataSourceArtifact(
8084
public val localPath: String?,
8185
public val sizeBytes: Long?,
8286
public val cacheHit: Boolean,
83-
private val byteReader: suspend () -> ByteArray
87+
private val sourceOpener: suspend () -> Source
8488
) {
85-
public suspend fun readBytes(): ByteArray = byteReader()
89+
/**
90+
* Opens a fresh source for this artifact. Callers own and must close it.
91+
*/
92+
public suspend fun openSource(): Source = sourceOpener()
93+
94+
/**
95+
* Convenience for small artifacts. Prefer [openSource] or [copyTo] for
96+
* model-scale data.
97+
*/
98+
public suspend fun readBytes(): ByteArray {
99+
val source = openSource()
100+
return try {
101+
source.readByteArray()
102+
} finally {
103+
source.close()
104+
}
105+
}
106+
107+
/**
108+
* Streams this artifact into [sink]. The source is closed after copying;
109+
* [sink] is left open for the caller.
110+
*/
111+
public suspend fun copyTo(sink: Sink): Long {
112+
val source = openSource()
113+
return try {
114+
source.transferTo(sink)
115+
} finally {
116+
source.close()
117+
}
118+
}
86119
}
87120

88121
/**

0 commit comments

Comments
 (0)