@@ -5,10 +5,8 @@ import dev.paulee.api.data.Language
55import dev.paulee.api.data.Source
66import dev.paulee.api.data.UniqueField
77import dev.paulee.core.normalizeDataSource
8- import dev.paulee.core.splitStr
98import org.apache.lucene.analysis.Analyzer
109import org.apache.lucene.analysis.core.WhitespaceAnalyzer
11- import org.apache.lucene.analysis.en.EnglishAnalyzer
1210import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper
1311import org.apache.lucene.document.Document
1412import org.apache.lucene.document.Field
@@ -18,25 +16,33 @@ import org.apache.lucene.index.DirectoryReader
1816import org.apache.lucene.index.IndexWriter
1917import org.apache.lucene.index.IndexWriterConfig
2018import org.apache.lucene.index.Term
21- import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser
22- import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler
23- import org.apache.lucene.search.BooleanClause
24- import org.apache.lucene.search.BooleanQuery
19+ import org.apache.lucene.queryparser.classic.QueryParser
2520import org.apache.lucene.search.IndexSearcher
26- import org.apache.lucene.search.PhraseQuery
27- import org.apache.lucene.search.TermQuery
21+ import org.apache.lucene.search.Query
2822import org.apache.lucene.store.BaseDirectory
2923import org.apache.lucene.store.FSDirectory
3024import org.apache.lucene.store.NIOFSDirectory
3125import org.slf4j.LoggerFactory.getLogger
3226import java.io.Closeable
3327import java.nio.file.Path
3428
35- class Indexer (path : Path , sources : List <Source >) : Closeable {
29+ internal class CustomParser (private val defaultField : String , defaultAnalyzer : Analyzer ) :
30+ QueryParser (defaultField, defaultAnalyzer) {
31+
32+ override fun newFieldQuery (
33+ analyzer : Analyzer ,
34+ field : String? ,
35+ queryText : String? ,
36+ quoted : Boolean ,
37+ ): Query {
38+ val target = if (quoted) " $defaultField .ws" else (field ? : defaultField)
39+ return super .newFieldQuery(analyzer, target, queryText, quoted)
40+ }
41+ }
3642
37- companion object {
38- private val QUOTE_REGEX = Regex (" \" ([^\" ]+?)\" " )
43+ internal class Indexer (path : Path , sources : List <Source >) : Closeable {
3944
45+ companion object {
4046 private val OPERATOR_CASCADE_REGEX =
4147 Regex (" (?i)\\ b(AND(?:\\ s+NOT)?|OR(?:\\ s+NOT)?|NOT)\\ b(?:\\ s+(?:AND|OR|NOT)\\ b)*" )
4248
@@ -125,11 +131,7 @@ class Indexer(path: Path, sources: List<Source>) : Closeable {
125131 }
126132
127133 fun searchFieldIndex (field : String , query : String ): List <Document > {
128- val exactTerms = QUOTE_REGEX .findAll(query).map { it.groupValues[1 ] }.toList()
129-
130- val stripped = query.replace(QUOTE_REGEX , " " ).trim()
131-
132- val normalized = normalizeOperator(stripped)
134+ val normalized = normalizeOperator(query)
133135
134136 DirectoryReader .openIfChanged(this .reader)?.let {
135137 this .reader.close()
@@ -139,32 +141,20 @@ class Indexer(path: Path, sources: List<Source>) : Closeable {
139141
140142 val searcher = IndexSearcher (this .reader)
141143
142- val queryBuilder = BooleanQuery .Builder ()
144+ val perField =
145+ PerFieldAnalyzerWrapper (
146+ mappedAnalyzer[field] ? : LangAnalyzer .new(Language .ENGLISH ),
147+ mappedAnalyzer
148+ )
143149
144- if (normalized.isNotBlank()) {
145- val parser = StandardQueryParser (mappedAnalyzer[field] ? : EnglishAnalyzer ()).apply {
146- defaultOperator = StandardQueryConfigHandler .Operator .AND
147- allowLeadingWildcard = true
148- }
149-
150- queryBuilder.add(parser.parse(normalized, field), BooleanClause .Occur .MUST )
150+ val parser = CustomParser (field, perField).apply {
151+ defaultOperator = QueryParser .Operator .AND
152+ allowLeadingWildcard = true
151153 }
152154
153- exactTerms.map { splitStr(it, ' ' ) }.forEach { rawTerm ->
154- if (rawTerm.isEmpty()) return @forEach
155-
156- if (rawTerm.size == 1 ) {
157- queryBuilder.add(TermQuery (Term (" $field .ws" , rawTerm[0 ])), BooleanClause .Occur .MUST )
158- } else {
159- val phraseBuilder = PhraseQuery .Builder ()
160-
161- rawTerm.forEach { phraseBuilder.add(Term (" $field .ws" , it)) }
162-
163- queryBuilder.add(phraseBuilder.build(), BooleanClause .Occur .MUST )
164- }
165- }
155+ val query = parser.parse(normalized)
166156
167- val hits = searcher.search(queryBuilder.build() , Int .MAX_VALUE )
157+ val hits = searcher.search(query , Int .MAX_VALUE )
168158 return hits.scoreDocs.map { searcher.storedFields().document(it.doc) }
169159 }
170160
0 commit comments