Skip to content

Commit e157b0b

Browse files
committed
Adds use_global_scoring feature
Adds the ability to use global scoring in OpenSearch queries. This feature allows for more accurate relevance scoring across multiple shards, improving the quality of search results. This feature comes at a performance cost, as it requires additional coordination between shards. As such, it is disabled by default to ensure only users that require this functionality will enable it. From the OpenSearch documentation: ```text dfs_query_then_fetch scores documents using global term and document frequencies across all shards. It’s usually slower but more accurate. ```
1 parent 56a27b9 commit e157b0b

4 files changed

Lines changed: 128 additions & 7 deletions

File tree

app/graphql/types/query_type.rb

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ def record_id(id:, index:)
6868
'of the words much match. Options include: "OR", "AND"'
6969
argument :query_mode, String, required: false, default_value: 'keyword',
7070
description: 'Search mode: "keyword" (lexical search), "semantic" (vector search), or "hybrid" (both)'
71+
argument :use_global_scoring, Boolean, required: false, default_value: false,
72+
description: 'Calculate relevance scores globally across all shards ' \
73+
'instead of per-shard. Defaults to false.'
7174

7275
# applied filters
7376
argument :access_to_files_filter, [String],
@@ -105,12 +108,15 @@ def record_id(id:, index:)
105108
end
106109

107110
def search(searchterm:, citation:, contributors:, funding_information:, geodistance:, geobox:, identifiers:,
108-
locations:, subjects:, title:, index:, source:, from:, boolean_type:, fulltext:, per_page: 20, query_mode: 'keyword', **filters)
111+
locations:, subjects:, title:, index:, source:, from:, boolean_type:, fulltext:, per_page: 20,
112+
query_mode: 'keyword', use_global_scoring: false, **filters)
109113
query = construct_query(searchterm, citation, contributors, funding_information, geodistance, geobox, identifiers,
110114
locations, subjects, title, source, boolean_type, filters, per_page, query_mode)
111115

112116
results = Opensearch.new.search(from, query, Timdex::OSClient, highlight: highlight_requested?, index: index,
113-
fulltext: fulltext, query_mode: query_mode, requested_aggregations: requested_aggregations)
117+
fulltext: fulltext, query_mode: query_mode,
118+
requested_aggregations: requested_aggregations,
119+
use_global_scoring: use_global_scoring)
114120

115121
response = {}
116122
response[:hits] = results['hits']['total']['value']

app/models/opensearch.rb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,16 @@ class Opensearch
44
MAX_SIZE = 200
55

66
def search(from, params, client, highlight: false, index: nil, fulltext: false, query_mode: 'keyword',
7-
requested_aggregations: [])
7+
requested_aggregations: [], use_global_scoring: false)
88
@params = params
99
@highlight = highlight
1010
@fulltext = fulltext?(fulltext)
1111
@query_mode = query_mode
1212
@requested_aggregations = requested_aggregations
1313
index = default_index unless index.present?
14-
client.search(index:,
15-
body: build_query(from))
14+
search_params = { index:, body: build_query(from) }
15+
search_params[:search_type] = 'dfs_query_then_fetch' if use_global_scoring
16+
client.search(**search_params)
1617
end
1718

1819
# Only treat fulltext as true if it is boolean true or the string 'true' (case insensitive)

test/controllers/graphql_controller_test.rb

Lines changed: 86 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -872,7 +872,7 @@ class GraphqlControllerTest < ActionDispatch::IntegrationTest
872872
initial_hits_count = json_dataset['data']['search']['hits']
873873
initial_still_images_count = json_dataset['data']['search']['aggregations']['contentType'].find do |x|
874874
x['key'] == 'still image'
875-
end ['docCount']
875+
end['docCount']
876876

877877
post '/graphql', params: { query:
878878
'{
@@ -940,7 +940,7 @@ class GraphqlControllerTest < ActionDispatch::IntegrationTest
940940

941941
test 'graphql search respects perPage argument' do
942942
VCR.use_cassette('opensearch_init') do
943-
VCR.use_cassette('graphql_search_per_page_5', match_requests_on: [:method, :uri]) do
943+
VCR.use_cassette('graphql_search_per_page_5', match_requests_on: %i[method uri]) do
944944
post '/graphql', params: { query: '{
945945
search(perPage:5) {
946946
hits
@@ -1140,4 +1140,88 @@ class GraphqlControllerTest < ActionDispatch::IntegrationTest
11401140
end
11411141
end
11421142
end
1143+
1144+
test 'graphql search with useGlobalScoring true passes search_type to opensearch' do
1145+
mock_response = {
1146+
'hits' => {
1147+
'total' => { 'value' => 1 },
1148+
'hits' => [
1149+
{
1150+
'_source' => {
1151+
'title' => 'Data analytics and big data'
1152+
}
1153+
}
1154+
]
1155+
}
1156+
}
1157+
# Verify that when useGlobalScoring is true, the search_type parameter is set
1158+
Opensearch.any_instance.expects(:search).with do |_from, _params, _client, **kwargs|
1159+
kwargs[:use_global_scoring] == true
1160+
end.returns(mock_response)
1161+
1162+
post '/graphql', params: { query: '{
1163+
search(searchterm: "data analytics", useGlobalScoring: true) {
1164+
records {
1165+
title
1166+
}
1167+
}
1168+
}' }
1169+
assert_equal(200, response.status)
1170+
end
1171+
1172+
test 'graphql search with useGlobalScoring false passes use_global_scoring false to opensearch' do
1173+
mock_response = {
1174+
'hits' => {
1175+
'total' => { 'value' => 1 },
1176+
'hits' => [
1177+
{
1178+
'_source' => {
1179+
'title' => 'Data analytics and big data'
1180+
}
1181+
}
1182+
]
1183+
}
1184+
}
1185+
# Verify that when useGlobalScoring is false (or omitted), use_global_scoring is false
1186+
Opensearch.any_instance.expects(:search).with do |_from, _params, _client, **kwargs|
1187+
kwargs[:use_global_scoring] == false
1188+
end.returns(mock_response)
1189+
1190+
post '/graphql', params: { query: '{
1191+
search(searchterm: "data analytics", useGlobalScoring: false) {
1192+
records {
1193+
title
1194+
}
1195+
}
1196+
}' }
1197+
assert_equal(200, response.status)
1198+
end
1199+
1200+
test 'graphql search useGlobalScoring defaults to false' do
1201+
mock_response = {
1202+
'hits' => {
1203+
'total' => { 'value' => 1 },
1204+
'hits' => [
1205+
{
1206+
'_source' => {
1207+
'title' => 'Data analytics and big data'
1208+
}
1209+
}
1210+
]
1211+
}
1212+
}
1213+
# Verify that when useGlobalScoring is not specified, use_global_scoring defaults to false
1214+
Opensearch.any_instance.expects(:search).with do |_from, _params, _client, **kwargs|
1215+
kwargs[:use_global_scoring] == false
1216+
end.returns(mock_response)
1217+
1218+
post '/graphql', params: { query: '{
1219+
search(searchterm: "data analytics") {
1220+
records {
1221+
title
1222+
}
1223+
}
1224+
}' }
1225+
assert_equal(200, response.status)
1226+
end
11431227
end

test/models/opensearch_test.rb

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,4 +202,34 @@ class OpensearchTest < ActiveSupport::TestCase
202202
assert json['aggregations'].key?('source')
203203
assert_not json['aggregations'].key?('invalid_agg')
204204
end
205+
206+
test 'search with use_global_scoring false does not set search_type' do
207+
mock_client = mock
208+
mock_client.expects(:search).with do |params|
209+
!params.key?(:search_type)
210+
end.returns({ 'hits' => { 'hits' => [], 'total' => { 'value' => 0 } } })
211+
212+
os = Opensearch.new
213+
os.search(0, {}, mock_client, use_global_scoring: false)
214+
end
215+
216+
test 'search with use_global_scoring true sets search_type to dfs_query_then_fetch' do
217+
mock_client = mock
218+
mock_client.expects(:search).with do |params|
219+
params[:search_type] == 'dfs_query_then_fetch'
220+
end.returns({ 'hits' => { 'hits' => [], 'total' => { 'value' => 0 } } })
221+
222+
os = Opensearch.new
223+
os.search(0, {}, mock_client, use_global_scoring: true)
224+
end
225+
226+
test 'search defaults to use_global_scoring false' do
227+
mock_client = mock
228+
mock_client.expects(:search).with do |params|
229+
!params.key?(:search_type)
230+
end.returns({ 'hits' => { 'hits' => [], 'total' => { 'value' => 0 } } })
231+
232+
os = Opensearch.new
233+
os.search(0, {}, mock_client)
234+
end
205235
end

0 commit comments

Comments
 (0)