@@ -7,11 +7,16 @@ class MlCitation
77 # For now the initialize method just needs to consult the external lambda.
88 #
99 # @param phrase String. Often a `Term.phrase`.
10- # @return Nothing intentional. Data is written to Hash `@detections` during processing.
10+ # @return Nothing intentional. Data is written to Boolean `@detections` during processing.
1111 def initialize ( phrase )
1212 return unless self . class . expected_env?
1313
14- response = fetch ( phrase )
14+ @detections = false
15+
16+ features = extract_features ( phrase )
17+ return unless enough_nonzero_values? ( features )
18+
19+ response = fetch ( features )
1520 @detections = response unless response == 'Error'
1621 end
1722
@@ -111,10 +116,10 @@ def define_lambda
111116 # define_payload defines the Hash that will be sent to the lambda.
112117 #
113118 # @return Hash
114- def define_payload ( phrase )
119+ def define_payload ( features )
115120 {
116121 action : 'predict' ,
117- features : extract_features ( phrase ) ,
122+ features : features ,
118123 challenge_secret : self . class . lambda_secret
119124 }
120125 end
@@ -135,9 +140,9 @@ def extract_features(phrase)
135140 # error handling with the response.
136141 #
137142 # @return Boolean or 'Error'
138- def fetch ( phrase )
143+ def fetch ( features )
139144 lambda = define_lambda
140- payload = define_payload ( phrase )
145+ payload = define_payload ( features )
141146
142147 response = lambda . post ( self . class . lambda_path , payload . to_json )
143148
@@ -151,5 +156,18 @@ def fetch(phrase)
151156 'Error'
152157 end
153158 end
159+
160+ # Enough_nonzero_values? checks that a provided hash contains at least three values which are not zero.
161+ #
162+ # @note We chose 3 as our value here after analyzing the behavior of the citation detector across nearly a year of
163+ # search traffic. For searches which had only one or two features that are not zero, we found no actual citations.
164+ # To see the analyses, look at the "Filtering results" and "Surprising predictions" notebooks at
165+ # https://github.com/MITLibraries/tacos-notebooks/tree/main/notebooks/explorations
166+ #
167+ # @param hash Hash
168+ # @return Integer
169+ def enough_nonzero_values? ( hash )
170+ hash . values . count { |v | v != 0 } >= 3
171+ end
154172 end
155173end
0 commit comments