Skip to content

Commit 7acfc6c

Browse files
Merge pull request #30 from smartscanapp/dev
Merge dev updates for v2.0.1
2 parents d298e54 + 27d3a79 commit 7acfc6c

5 files changed

Lines changed: 177 additions & 64 deletions

File tree

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
1+
## v2.0.1 - 01/05/2026
2+
3+
### Added
4+
* Added date filtering for queries
5+
6+
## Changed
7+
* Updated indexers to sync dates with MediaStore
8+
* Improved efficiency of large batch handling in add method in FileEmbeddingStore
9+
10+
___
11+
112
## v2.0.0 - 19/04/2026
213

314
### Added

core/src/main/java/com/fpf/smartscansdk/core/embeddings/EmbeddingStore.kt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ interface EmbeddingStore {
1313
embedding: FloatArray,
1414
topK: Int,
1515
threshold: Float,
16-
ids: Set<Long> = emptySet()
16+
ids: Set<Long> = emptySet(),
17+
startDate: Long? = null,
18+
endDate: Long? = null
1719
): List<Long>
1820
}

core/src/main/java/com/fpf/smartscansdk/core/embeddings/FileEmbeddingStore.kt

Lines changed: 70 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -142,13 +142,21 @@ class FileEmbeddingStore(
142142
withContext(Dispatchers.IO) {
143143
if (embeddings.isEmpty()) return@withContext 0
144144

145-
if (idToFileOffsetIndex.isEmpty() && file.exists()) {
145+
if (idToFileOffsetIndex.isEmpty()) {
146146
load()
147147
}
148148

149149
val filteredNewEmbeddings = embeddings.filterNot { it.id in idToFileOffsetIndex }
150150
if (filteredNewEmbeddings.isEmpty()) return@withContext 0
151151

152+
for (embedding in filteredNewEmbeddings) {
153+
if (embedding.embedding.size != embeddingDimension) {
154+
throw SmartScanException.InvalidEmbeddingDimension(
155+
"Embedding dimension mismatch. Expected $embeddingDimension, got ${embedding.embedding.size}"
156+
)
157+
}
158+
}
159+
152160
RandomAccessFile(file, "rw").use { raf ->
153161
val channel = raf.channel
154162

@@ -162,6 +170,44 @@ class FileEmbeddingStore(
162170

163171
val newCount = existingCount + filteredNewEmbeddings.size
164172

173+
// Move to the end (append mode) or start of data section if new file
174+
val nextOffset = if (fileExistsAndHasContent) {
175+
channel.size()
176+
} else {
177+
headerSize.toLong()
178+
}
179+
180+
channel.position(nextOffset)
181+
182+
val targetChunkBytes = 4 * 1024 * 1024
183+
val chunkCapacity = maxOf(
184+
recordSize,
185+
(targetChunkBytes / recordSize).coerceAtLeast(1) * recordSize
186+
)
187+
val writeBuffer = ByteBuffer.allocateDirect(chunkCapacity).order(ByteOrder.LITTLE_ENDIAN)
188+
189+
fun flushBuffer() {
190+
writeBuffer.flip()
191+
while (writeBuffer.hasRemaining()) {
192+
channel.write(writeBuffer)
193+
}
194+
writeBuffer.clear()
195+
}
196+
197+
for (embedding in filteredNewEmbeddings) {
198+
if (writeBuffer.remaining() < recordSize) {
199+
flushBuffer()
200+
}
201+
202+
writeBuffer.putLong(embedding.id)
203+
writeBuffer.putLong(embedding.date)
204+
for (f in embedding.embedding) writeBuffer.putFloat(f)
205+
}
206+
207+
if (writeBuffer.position() > 0) {
208+
flushBuffer()
209+
}
210+
165211
// Write updated count back as little-endian
166212
val headerBuf = ByteBuffer.allocate(headerSize).order(ByteOrder.LITTLE_ENDIAN)
167213
headerBuf.putInt(newCount)
@@ -171,45 +217,23 @@ class FileEmbeddingStore(
171217
channel.write(headerBuf)
172218
}
173219

174-
// Move to the end (append mode) or start of data section if new file
175-
var nextOffset = headerSize.toLong()
176-
if (fileExistsAndHasContent) {
177-
nextOffset = channel.size()
178-
} else {
179-
channel.position(headerSize.toLong())
180-
}
181-
182-
for (embedding in filteredNewEmbeddings) {
183-
if (embedding.embedding.size != embeddingDimension) {
184-
throw SmartScanException.InvalidEmbeddingDimension(
185-
"Embedding dimension mismatch. Expected $embeddingDimension, got ${embedding.embedding.size}"
186-
)
187-
}
188-
189-
val buf = ByteBuffer.allocate(recordSize).order(ByteOrder.LITTLE_ENDIAN)
190-
buf.putLong(embedding.id)
191-
buf.putLong(embedding.date)
192-
for (f in embedding.embedding) buf.putFloat(f)
193-
buf.flip()
194-
195-
channel.position(nextOffset)
196-
while (buf.hasRemaining()) {
197-
channel.write(buf)
198-
}
220+
channel.force(false)
199221

200-
// update in-memory file offset index for the newly appended entry and cache
201-
idToFileOffsetIndex[embedding.id] = nextOffset
222+
// update in-memory file offset index for the newly appended entry and cache
223+
filteredNewEmbeddings.forEachIndexed { index, embedding ->
224+
idToFileOffsetIndex[embedding.id] = nextOffset + (index.toLong() * recordSize)
225+
}
202226

203-
// Only add items to cache if it's not empty e.g after get() call, to keep it synchronized.
204-
// This prevents edge cases that could result in partial cache overwriting on-disk data
205-
// It also prevents unnecessarily keeping embeddings in memory
206-
if (cache.isNotEmpty()) {
227+
// Only add items to cache if it's not empty e.g after get() call, to keep it synchronized.
228+
// This prevents edge cases that could result in partial cache overwriting on-disk data
229+
// It also prevents unnecessarily keeping embeddings in memory
230+
if (cache.isNotEmpty()) {
231+
for (embedding in filteredNewEmbeddings) {
207232
cache[embedding.id] = embedding
208233
}
209-
nextOffset += recordSize.toLong()
210234
}
211-
channel.force(false)
212235
}
236+
213237
filteredNewEmbeddings.size
214238
}
215239
}
@@ -236,8 +260,18 @@ class FileEmbeddingStore(
236260
idToFileOffsetIndex.clear()
237261
}
238262

239-
override suspend fun query(embedding: FloatArray, topK: Int, threshold: Float, ids: Set<Long>): List<Long> {
240-
val storedEmbeddings = if (ids.isNotEmpty()) get().filter { it.id in ids } else get()
263+
override suspend fun query(embedding: FloatArray, topK: Int, threshold: Float, ids: Set<Long>, startDate: Long?, endDate: Long?): List<Long> {
264+
val storedEmbeddings = get().asSequence()
265+
.let { seq ->
266+
if (ids.isNotEmpty()) seq.filter { it.id in ids } else seq
267+
}
268+
.let { seq ->
269+
if (startDate != null) seq.filter { it.date >= startDate } else seq
270+
}
271+
.let { seq ->
272+
if (endDate != null) seq.filter { it.date <= endDate } else seq
273+
}
274+
.toList()
241275

242276
if (storedEmbeddings.isEmpty()) return emptyList()
243277

core/src/main/java/com/fpf/smartscansdk/core/indexers/ImageIndexer.kt

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,29 +23,61 @@ class ImageIndexer(
2323
private val store: EmbeddingStore,
2424
private val maxImageSize: Int = 225,
2525
context: Context,
26-
listener: ProcessorListener<Long, StoredEmbedding>? = null,
26+
listener: ProcessorListener<Long, Pair<Long, FloatArray>>? = null,
2727
memoryOptions: MemoryOptions = MemoryOptions(),
2828
batchSize: Int = 10,
29-
): BatchProcessor<Long, StoredEmbedding>(context, listener, memoryOptions, batchSize){
29+
): BatchProcessor<Long, Pair<Long, FloatArray>>(context, listener, memoryOptions, batchSize){
3030

3131

32-
override suspend fun onBatchComplete(context: Context, batch: List<StoredEmbedding>) {
33-
store.add(batch)
32+
override suspend fun onBatchComplete(context: Context, batch: List<Pair<Long, FloatArray>>) {
33+
val imageIdToDateMap = getImageToDateMap(context, batch.map { it.first })
34+
val embedsToStore = batch.map{
35+
val date = imageIdToDateMap[it.first]?: System.currentTimeMillis()
36+
StoredEmbedding(it.first, date, it.second)
37+
}
38+
store.add(embedsToStore)
3439
listener?.onBatchComplete(context, batch)
3540
}
3641

37-
override suspend fun onProcess(context: Context, item: Long): StoredEmbedding {
38-
val contentUri = ContentUris.withAppendedId(
39-
MediaStore.Images.Media.EXTERNAL_CONTENT_URI, item
40-
)
42+
override suspend fun onProcess(context: Context, item: Long): Pair<Long, FloatArray> {
43+
val contentUri = ContentUris.withAppendedId(MediaStore.Images.Media.EXTERNAL_CONTENT_URI, item)
4144
val bitmap = getBitmapFromUri(context, contentUri, maxImageSize)
42-
val embedding = withContext(NonCancellable) {
43-
embedder.embed(bitmap)
44-
}
45-
return StoredEmbedding(
46-
id = item,
47-
date = System.currentTimeMillis(),
48-
embedding = embedding
45+
val embedding = withContext(NonCancellable) { embedder.embed(bitmap) }
46+
return Pair(item, embedding)
47+
}
48+
49+
private fun getImageToDateMap(context: Context, ids: List<Long>): Map<Long, Long> {
50+
val result = mutableMapOf<Long, Long>()
51+
val uri = MediaStore.Images.Media.EXTERNAL_CONTENT_URI
52+
val projection = arrayOf(
53+
MediaStore.Images.Media._ID,
54+
MediaStore.Images.Media.DATE_ADDED
4955
)
56+
57+
val chunkSize = 500
58+
59+
ids.chunked(chunkSize).forEach { chunk ->
60+
61+
val selection = "${MediaStore.Images.Media._ID} IN (${
62+
chunk.joinToString(",")
63+
})"
64+
65+
context.applicationContext.contentResolver.query(
66+
uri,
67+
projection,
68+
selection,
69+
null,
70+
null
71+
)?.use { cursor ->
72+
73+
val idIdx = cursor.getColumnIndexOrThrow(MediaStore.Images.Media._ID)
74+
val dateIdx = cursor.getColumnIndexOrThrow(MediaStore.Images.Media.DATE_ADDED)
75+
76+
while (cursor.moveToNext()) {
77+
result[cursor.getLong(idIdx)] = cursor.getLong(dateIdx)
78+
}
79+
}
80+
}
81+
return result
5082
}
5183
}

core/src/main/java/com/fpf/smartscansdk/core/indexers/VideoIndexer.kt

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,30 +25,64 @@ class VideoIndexer(
2525
private val width: Int,
2626
private val height: Int,
2727
context: Context,
28-
listener: ProcessorListener<Long, StoredEmbedding>? = null,
28+
listener: ProcessorListener<Long, Pair<Long, FloatArray>>? = null,
2929
batchSize: Int = 10,
3030
memoryOptions: MemoryOptions = MemoryOptions(),
3131
private val store: EmbeddingStore,
32-
): BatchProcessor<Long, StoredEmbedding>(context, listener, memoryOptions, batchSize){
32+
): BatchProcessor<Long, Pair<Long, FloatArray>>(context, listener, memoryOptions, batchSize){
3333

34-
override suspend fun onBatchComplete(context: Context, batch: List<StoredEmbedding>) {
35-
store.add(batch)
34+
override suspend fun onBatchComplete(context: Context, batch: List<Pair<Long, FloatArray>>) {
35+
val videoIdToDateMap = getVideoToDateMap(context, batch.map { it.first })
36+
val embedsToStore = batch.map{
37+
val date = videoIdToDateMap[it.first]?: System.currentTimeMillis()
38+
StoredEmbedding(it.first, date, it.second)
39+
}
40+
store.add(embedsToStore)
3641
listener?.onBatchComplete(context, batch)
3742
}
3843

39-
override suspend fun onProcess(context: Context, item: Long): StoredEmbedding {
40-
val contentUri = ContentUris.withAppendedId(
41-
MediaStore.Video.Media.EXTERNAL_CONTENT_URI, item
42-
)
44+
override suspend fun onProcess(context: Context, item: Long): Pair<Long, FloatArray> {
45+
val contentUri = ContentUris.withAppendedId(MediaStore.Video.Media.EXTERNAL_CONTENT_URI, item)
4346
val frameBitmaps = extractFramesFromVideo(context, contentUri, width = width, height = height, frameCount = frameCount)?: throw IllegalStateException("Invalid frames")
4447
val rawEmbeddings = embedBatch(context, embedder, frameBitmaps)
4548
val embedding: FloatArray = generatePrototypeEmbedding(rawEmbeddings)
49+
return Pair(item, embedding)
50+
}
4651

47-
return StoredEmbedding(
48-
id = item,
49-
date = System.currentTimeMillis(),
50-
embedding = embedding
52+
private fun getVideoToDateMap(context: Context, ids: List<Long>): Map<Long, Long> {
53+
val result = mutableMapOf<Long, Long>()
54+
val uri = MediaStore.Video.Media.EXTERNAL_CONTENT_URI
55+
val projection = arrayOf(
56+
MediaStore.Video.Media._ID,
57+
MediaStore.Video.Media.DATE_ADDED
5158
)
59+
60+
val chunkSize = 500
61+
62+
ids.chunked(chunkSize).forEach { chunk ->
63+
64+
val selection = "${MediaStore.Video.Media._ID} IN (${
65+
chunk.joinToString(",")
66+
})"
67+
68+
context.applicationContext.contentResolver.query(
69+
uri,
70+
projection,
71+
selection,
72+
null,
73+
null
74+
)?.use { cursor ->
75+
76+
val idIdx = cursor.getColumnIndexOrThrow(MediaStore.Video.Media._ID)
77+
val dateIdx = cursor.getColumnIndexOrThrow(MediaStore.Video.Media.DATE_ADDED)
78+
79+
while (cursor.moveToNext()) {
80+
result[cursor.getLong(idIdx)] = cursor.getLong(dateIdx)
81+
}
82+
}
83+
}
84+
85+
return result
5286
}
5387

5488
}

0 commit comments

Comments
 (0)