Skip to content

Commit 6ce5b61

Browse files
authored
Merge pull request #964 from MITLibraries/dfs_query_then_fetch
Adds use_global_scoring feature
2 parents 56a27b9 + e157b0b commit 6ce5b61

4 files changed

Lines changed: 128 additions & 7 deletions

File tree

app/graphql/types/query_type.rb

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ def record_id(id:, index:)
6868
'of the words much match. Options include: "OR", "AND"'
6969
argument :query_mode, String, required: false, default_value: 'keyword',
7070
description: 'Search mode: "keyword" (lexical search), "semantic" (vector search), or "hybrid" (both)'
71+
argument :use_global_scoring, Boolean, required: false, default_value: false,
72+
description: 'Calculate relevance scores globally across all shards ' \
73+
'instead of per-shard. Defaults to false.'
7174

7275
# applied filters
7376
argument :access_to_files_filter, [String],
@@ -105,12 +108,15 @@ def record_id(id:, index:)
105108
end
106109

107110
def search(searchterm:, citation:, contributors:, funding_information:, geodistance:, geobox:, identifiers:,
108-
locations:, subjects:, title:, index:, source:, from:, boolean_type:, fulltext:, per_page: 20, query_mode: 'keyword', **filters)
111+
locations:, subjects:, title:, index:, source:, from:, boolean_type:, fulltext:, per_page: 20,
112+
query_mode: 'keyword', use_global_scoring: false, **filters)
109113
query = construct_query(searchterm, citation, contributors, funding_information, geodistance, geobox, identifiers,
110114
locations, subjects, title, source, boolean_type, filters, per_page, query_mode)
111115

112116
results = Opensearch.new.search(from, query, Timdex::OSClient, highlight: highlight_requested?, index: index,
113-
fulltext: fulltext, query_mode: query_mode, requested_aggregations: requested_aggregations)
117+
fulltext: fulltext, query_mode: query_mode,
118+
requested_aggregations: requested_aggregations,
119+
use_global_scoring: use_global_scoring)
114120

115121
response = {}
116122
response[:hits] = results['hits']['total']['value']

app/models/opensearch.rb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,16 @@ class Opensearch
44
MAX_SIZE = 200
55

66
def search(from, params, client, highlight: false, index: nil, fulltext: false, query_mode: 'keyword',
7-
requested_aggregations: [])
7+
requested_aggregations: [], use_global_scoring: false)
88
@params = params
99
@highlight = highlight
1010
@fulltext = fulltext?(fulltext)
1111
@query_mode = query_mode
1212
@requested_aggregations = requested_aggregations
1313
index = default_index unless index.present?
14-
client.search(index:,
15-
body: build_query(from))
14+
search_params = { index:, body: build_query(from) }
15+
search_params[:search_type] = 'dfs_query_then_fetch' if use_global_scoring
16+
client.search(**search_params)
1617
end
1718

1819
# Only treat fulltext as true if it is boolean true or the string 'true' (case insensitive)

test/controllers/graphql_controller_test.rb

Lines changed: 86 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -872,7 +872,7 @@ class GraphqlControllerTest < ActionDispatch::IntegrationTest
872872
initial_hits_count = json_dataset['data']['search']['hits']
873873
initial_still_images_count = json_dataset['data']['search']['aggregations']['contentType'].find do |x|
874874
x['key'] == 'still image'
875-
end ['docCount']
875+
end['docCount']
876876

877877
post '/graphql', params: { query:
878878
'{
@@ -940,7 +940,7 @@ class GraphqlControllerTest < ActionDispatch::IntegrationTest
940940

941941
test 'graphql search respects perPage argument' do
942942
VCR.use_cassette('opensearch_init') do
943-
VCR.use_cassette('graphql_search_per_page_5', match_requests_on: [:method, :uri]) do
943+
VCR.use_cassette('graphql_search_per_page_5', match_requests_on: %i[method uri]) do
944944
post '/graphql', params: { query: '{
945945
search(perPage:5) {
946946
hits
@@ -1140,4 +1140,88 @@ class GraphqlControllerTest < ActionDispatch::IntegrationTest
11401140
end
11411141
end
11421142
end
1143+
1144+
test 'graphql search with useGlobalScoring true passes search_type to opensearch' do
1145+
mock_response = {
1146+
'hits' => {
1147+
'total' => { 'value' => 1 },
1148+
'hits' => [
1149+
{
1150+
'_source' => {
1151+
'title' => 'Data analytics and big data'
1152+
}
1153+
}
1154+
]
1155+
}
1156+
}
1157+
# Verify that when useGlobalScoring is true, the search_type parameter is set
1158+
Opensearch.any_instance.expects(:search).with do |_from, _params, _client, **kwargs|
1159+
kwargs[:use_global_scoring] == true
1160+
end.returns(mock_response)
1161+
1162+
post '/graphql', params: { query: '{
1163+
search(searchterm: "data analytics", useGlobalScoring: true) {
1164+
records {
1165+
title
1166+
}
1167+
}
1168+
}' }
1169+
assert_equal(200, response.status)
1170+
end
1171+
1172+
test 'graphql search with useGlobalScoring false passes use_global_scoring false to opensearch' do
1173+
mock_response = {
1174+
'hits' => {
1175+
'total' => { 'value' => 1 },
1176+
'hits' => [
1177+
{
1178+
'_source' => {
1179+
'title' => 'Data analytics and big data'
1180+
}
1181+
}
1182+
]
1183+
}
1184+
}
1185+
# Verify that when useGlobalScoring is false (or omitted), use_global_scoring is false
1186+
Opensearch.any_instance.expects(:search).with do |_from, _params, _client, **kwargs|
1187+
kwargs[:use_global_scoring] == false
1188+
end.returns(mock_response)
1189+
1190+
post '/graphql', params: { query: '{
1191+
search(searchterm: "data analytics", useGlobalScoring: false) {
1192+
records {
1193+
title
1194+
}
1195+
}
1196+
}' }
1197+
assert_equal(200, response.status)
1198+
end
1199+
1200+
test 'graphql search useGlobalScoring defaults to false' do
1201+
mock_response = {
1202+
'hits' => {
1203+
'total' => { 'value' => 1 },
1204+
'hits' => [
1205+
{
1206+
'_source' => {
1207+
'title' => 'Data analytics and big data'
1208+
}
1209+
}
1210+
]
1211+
}
1212+
}
1213+
# Verify that when useGlobalScoring is not specified, use_global_scoring defaults to false
1214+
Opensearch.any_instance.expects(:search).with do |_from, _params, _client, **kwargs|
1215+
kwargs[:use_global_scoring] == false
1216+
end.returns(mock_response)
1217+
1218+
post '/graphql', params: { query: '{
1219+
search(searchterm: "data analytics") {
1220+
records {
1221+
title
1222+
}
1223+
}
1224+
}' }
1225+
assert_equal(200, response.status)
1226+
end
11431227
end

test/models/opensearch_test.rb

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,4 +202,34 @@ class OpensearchTest < ActiveSupport::TestCase
202202
assert json['aggregations'].key?('source')
203203
assert_not json['aggregations'].key?('invalid_agg')
204204
end
205+
206+
test 'search with use_global_scoring false does not set search_type' do
207+
mock_client = mock
208+
mock_client.expects(:search).with do |params|
209+
!params.key?(:search_type)
210+
end.returns({ 'hits' => { 'hits' => [], 'total' => { 'value' => 0 } } })
211+
212+
os = Opensearch.new
213+
os.search(0, {}, mock_client, use_global_scoring: false)
214+
end
215+
216+
test 'search with use_global_scoring true sets search_type to dfs_query_then_fetch' do
217+
mock_client = mock
218+
mock_client.expects(:search).with do |params|
219+
params[:search_type] == 'dfs_query_then_fetch'
220+
end.returns({ 'hits' => { 'hits' => [], 'total' => { 'value' => 0 } } })
221+
222+
os = Opensearch.new
223+
os.search(0, {}, mock_client, use_global_scoring: true)
224+
end
225+
226+
test 'search defaults to use_global_scoring false' do
227+
mock_client = mock
228+
mock_client.expects(:search).with do |params|
229+
!params.key?(:search_type)
230+
end.returns({ 'hits' => { 'hits' => [], 'total' => { 'value' => 0 } } })
231+
232+
os = Opensearch.new
233+
os.search(0, {}, mock_client)
234+
end
205235
end

0 commit comments

Comments
 (0)