Skip to content

Commit 01e186d

Browse files
authored
Merge pull request #43 from Paulanerus/dev
Semantic Search (1.5.0)
2 parents 372ef30 + 3f573ae commit 01e186d

32 files changed

Lines changed: 1256 additions & 288 deletions

.github/workflows/build-and-publish.yml

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ jobs:
4141
uses: actions/checkout@v4
4242

4343
- name: Set up Java
44-
uses: actions/setup-java@v4.5.0
44+
uses: actions/setup-java@v5.0.0
4545
with:
4646
java-version: '21'
4747
distribution: 'corretto'
@@ -70,7 +70,7 @@ jobs:
7070
uses: actions/checkout@v4
7171

7272
- name: Set up Java
73-
uses: actions/setup-java@v4.5.0
73+
uses: actions/setup-java@v5.0.0
7474
with:
7575
java-version: '21'
7676
distribution: 'corretto'
@@ -93,17 +93,23 @@ jobs:
9393

9494
strategy:
9595
matrix:
96-
os: [windows-latest, macos-latest, ubuntu-latest]
96+
include:
97+
- os: windows-latest
98+
distribution: microsoft
99+
- os: macos-latest
100+
distribution: corretto
101+
- os: ubuntu-latest
102+
distribution: corretto
97103

98104
steps:
99105
- name: Check out code
100106
uses: actions/checkout@v4
101107

102-
- name: Set up Java
103-
uses: actions/setup-java@v4.5.0
108+
- name: Set up Java (${{ matrix.distribution }})
109+
uses: actions/setup-java@v5.0.0
104110
with:
105111
java-version: '21'
106-
distribution: 'corretto'
112+
distribution: ${{ matrix.distribution }}
107113
cache: 'gradle'
108114

109115
- name: Build ${{ matrix.os }}
@@ -113,10 +119,10 @@ jobs:
113119
if: matrix.os == 'windows-latest'
114120
run: gh release upload v${{ env.VERSION }} build/compose/binaries/main-release/msi/TextVariantExplorer-${{ env.VERSION }}.msi --clobber
115121

116-
- name: Upload Linux Artifact
122+
- name: Upload Linux Artifact
117123
if: matrix.os == 'ubuntu-latest'
118124
run: gh release upload v${{ env.VERSION }} build/compose/binaries/main-release/deb/textvariantexplorer_${{ env.VERSION }}_amd64.deb --clobber
119125

120126
- name: Upload macOS Artifact
121127
if: matrix.os == 'macos-latest'
122-
run: gh release upload v${{ env.VERSION }} build/compose/binaries/main-release/dmg/TextVariantExplorer-${{ env.VERSION }}.Dmg --clobber
128+
run: gh release upload v${{ env.VERSION }} build/compose/binaries/main-release/dmg/TextVariantExplorer-${{ env.VERSION }}.dmg --clobber

api/src/main/kotlin/dev/paulee/api/data/Data.kt

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import com.fasterxml.jackson.annotation.JsonInclude
44
import com.fasterxml.jackson.annotation.JsonSubTypes
55
import com.fasterxml.jackson.annotation.JsonTypeInfo
66
import dev.paulee.api.data.provider.StorageType
7+
import dev.paulee.api.internal.Embedding
78

89
enum class Language {
910
ARABIC,
@@ -62,7 +63,7 @@ annotation class ViewFilter(
6263
val name: String,
6364
val fields: Array<String> = [],
6465
val alwaysShow: Array<String> = [],
65-
val global: Boolean = true
66+
val global: Boolean = true,
6667
)
6768

6869
enum class FieldType {
@@ -93,7 +94,7 @@ sealed interface SourceField {
9394
data class BasicField(
9495
override val name: String,
9596
override val fieldType: FieldType,
96-
@param:JsonInclude(JsonInclude.Include.NON_EMPTY) override val sourceLink: String = ""
97+
@param:JsonInclude(JsonInclude.Include.NON_EMPTY) override val sourceLink: String = "",
9798
) :
9899
SourceField
99100

@@ -103,15 +104,16 @@ data class IndexField(
103104
@param:JsonInclude(JsonInclude.Include.NON_EMPTY)
104105
override val sourceLink: String = "",
105106
val lang: Language,
106-
val default: Boolean = false
107+
val default: Boolean = false,
108+
val embeddingModel: Embedding.Model? = null,
107109
) : SourceField
108110

109111
data class UniqueField(
110112
override val name: String,
111113
override val fieldType: FieldType,
112114
@param:JsonInclude(JsonInclude.Include.NON_EMPTY)
113115
override val sourceLink: String = "",
114-
val identify: Boolean = false
116+
val identify: Boolean = false,
115117
) : SourceField
116118

117119
data class VariantMapping(val base: String, val variants: List<String>)
@@ -123,7 +125,7 @@ data class Source(
123125
val name: String,
124126
val fields: List<SourceField>,
125127
val variantMapping: VariantMapping? = null,
126-
val preFilter: PreFilter? = null
128+
val preFilter: PreFilter? = null,
127129
)
128130

129131
data class DataInfo(val name: String, val sources: List<Source>, val storageType: StorageType = StorageType.SQLITE)

api/src/main/kotlin/dev/paulee/api/data/IDataService.kt

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,13 @@ package dev.paulee.api.data
22

33
import dev.paulee.api.data.provider.IStorageProvider
44
import dev.paulee.api.data.provider.QueryOrder
5+
import dev.paulee.api.internal.Embedding
56
import java.io.Closeable
67
import java.nio.file.Path
78

89
interface IDataService : Closeable {
910

10-
suspend fun createDataPool(dataInfo: DataInfo, path: Path, onProgress: (progress: Int) -> Unit): Boolean
11-
12-
fun loadDataPools(path: Path): Int
11+
suspend fun createDataPool(dataInfo: DataInfo, onProgress: (progress: Int) -> Unit): Boolean
1312

1413
fun selectDataPool(selection: String)
1514

@@ -23,13 +22,26 @@ interface IDataService : Closeable {
2322

2423
fun getSuggestions(field: String, value: String): List<String>
2524

26-
fun getPage(query: String, order: QueryOrder?, pageCount: Int): Pair<List<Map<String, String>>, Map<String, List<Map<String, String>>>>
25+
suspend fun downloadModel(model: Embedding.Model, path: Path, onProgress: (progress: Int) -> Unit)
26+
27+
fun getPage(
28+
query: String,
29+
isSemantic: Boolean,
30+
order: QueryOrder?,
31+
pageCount: Int,
32+
): Pair<List<Map<String, String>>, Map<String, List<Map<String, String>>>>
2733

28-
fun getPageCount(query: String): Triple<Long, Long, Set<String>>
34+
fun getPageCount(query: String, isSemantic: Boolean): Triple<Long, Long, Set<String>>
2935

3036
fun createStorageProvider(infoName: String, path: Path): IStorageProvider?
3137

3238
fun dataInfoToString(dataInfo: DataInfo): String?
3339

3440
fun dataInfoFromString(dataInfo: String): DataInfo?
41+
42+
fun appDir(): Path
43+
44+
fun dataDir(): Path
45+
46+
fun modelDir(): Path
3547
}

api/src/main/kotlin/dev/paulee/api/data/provider/StorageProvider.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ interface IStorageProvider : Closeable {
2424

2525
fun get(
2626
name: String,
27-
ids: Set<Long> = emptySet(),
27+
ids: List<Long> = emptyList(),
2828
whereClause: List<String> = emptyList(),
2929
filter: List<String> = emptyList(),
3030
order: QueryOrder? = null,
@@ -34,7 +34,7 @@ interface IStorageProvider : Closeable {
3434

3535
fun count(
3636
name: String,
37-
ids: Set<Long> = emptySet(),
37+
ids: List<Long> = emptyList(),
3838
whereClause: List<String> = emptyList(),
3939
filter: List<String> = emptyList(),
4040
): Long
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package dev.paulee.api.internal
2+
3+
object Embedding {
4+
enum class Model(
5+
val id: String,
6+
val description: String,
7+
val author: String,
8+
val parameter: String,
9+
val link: String,
10+
val modelData: ModelData,
11+
) {
12+
EmbeddingGemma(
13+
"onnx-community/embeddinggemma-300m-ONNX",
14+
"A lightweight open embedding model from Google, built on Gemma 3 and trained on 100+ spoken languages.",
15+
"Google DeepMind",
16+
"300M",
17+
"https://huggingface.co/google/embeddinggemma-300m",
18+
ModelData()
19+
),
20+
AncientGreekBert(
21+
"onnx-community/Ancient-Greek-BERT-ONNX",
22+
"A BERT model specialized for Greek and Ancient Greek texts.",
23+
"Pranaydeep Singh, Gorik Rutten and Els Lefever",
24+
"110M",
25+
"https://huggingface.co/pranaydeeps/Ancient-Greek-BERT",
26+
ModelData(
27+
maxLength = 512,
28+
modelData = ""
29+
)
30+
)
31+
}
32+
33+
data class ModelData(
34+
val dimension: Int = 768,
35+
val maxLength: Int = 2048,
36+
val model: String = "onnx/model.onnx",
37+
val modelData: String = "onnx/model.onnx_data",
38+
val tokenizer: String = "tokenizer.json",
39+
val tokenizerConfig: String = "tokenizer_config.json",
40+
)
41+
}

api/src/main/kotlin/dev/paulee/api/plugin/IPluginService.kt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,17 @@ import java.nio.file.Path
66

77
interface IPluginService {
88

9-
fun loadFromDirectory(path: Path): Int
10-
119
fun loadPlugin(path: Path): IPlugin?
1210

1311
fun getPluginMetadata(plugin: IPlugin): PluginMetadata?
1412

1513
fun getDataInfo(plugin: IPlugin): String?
1614

17-
fun initAll(dataService: IDataService, path: Path)
15+
fun initAll(dataService: IDataService)
1816

1917
fun getPlugins(): List<IPlugin>
2018

2119
fun getViewFilter(plugin: IPlugin): ViewFilter?
20+
21+
fun pluginDir(): Path
2222
}

build.gradle.kts

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ compose.desktop {
3737

3838
jvmArgs += listOf(
3939
"--add-modules", "java.sql",
40+
"--add-modules", "jdk.incubator.vector",
4041
"-Dapi.version=${property("api.version")}",
4142
"-Dcore.version=${property("core.version")}",
4243
"-Dui.version=${property("ui.version")}",
@@ -47,7 +48,26 @@ compose.desktop {
4748
targetFormats(TargetFormat.Dmg, TargetFormat.Msi, TargetFormat.Deb)
4849
packageName = "TextVariantExplorer"
4950

50-
modules += listOf("java.sql")
51+
licenseFile.set(project.file("LICENSE.md"))
52+
53+
modules += listOf("java.sql", "jdk.incubator.vector")
54+
55+
linux {
56+
iconFile.set(project.file("ui/src/main/resources/icon.png"))
57+
}
58+
59+
windows {
60+
iconFile.set(project.file("ui/src/main/resources/icon.ico"))
61+
62+
menu = true
63+
upgradeUuid = "C4AF61D5-8472-482D-B2A0-F92E32D7A18C"
64+
}
65+
66+
macOS {
67+
iconFile.set(project.file("ui/src/main/resources/icon.icns"))
68+
69+
appCategory = "public.app-category.utilities"
70+
}
5171
}
5272
}
5373
}

core/build.gradle.kts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
1+
import org.gradle.internal.os.OperatingSystem
2+
13
plugins {
24
kotlin("jvm")
35
}
46

57
group = "dev.paulee"
68
version = rootProject.extra["core.version"] as String
79

10+
val os = OperatingSystem.current()
11+
812
repositories {
913
mavenCentral()
1014
}
@@ -36,6 +40,11 @@ dependencies {
3640

3741
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:${rootProject.extra["coroutines.version"]}")
3842

43+
implementation("ai.djl.huggingface:tokenizers:${rootProject.extra["tokenizers.version"]}")
44+
45+
if(os.isMacOsX) implementation("com.microsoft.onnxruntime:onnxruntime:${rootProject.extra["onnx.version"]}")
46+
else implementation("com.microsoft.onnxruntime:onnxruntime_gpu:${rootProject.extra["onnx.version"]}")
47+
3948
testImplementation(kotlin("test"))
4049
}
4150

0 commit comments

Comments
 (0)