@@ -69,7 +69,8 @@ def random_typo(str_err: StrErrer, random: Random) -> StrErrer:
6969class FaqEntry (BaseModel ):
7070 title : str | None
7171 answer : str
72- matched_questions : list [str ]
72+ keywords : list [str ]
73+ questions : list [str ]
7374
7475
7576class FaqConfig (BaseModel ):
@@ -102,21 +103,21 @@ def iterate_answers(self):
102103
103104 def iterate_questions (self ):
104105 for faq in self .faqs :
105- for question in faq .matched_questions :
106+ for question in faq .questions :
106107 yield question
107108
108109 def question_count (self ):
109- return sum ((len (faq .matched_questions ) for faq in self .faqs ))
110+ return sum ((len (faq .questions ) for faq in self .faqs ))
110111
111112 def filter_short_questions (self , min_words : int ):
112113 """
113114 Filters out questions shorter than min_words and removes empty entries.
114115 """
115116 for faq in self .faqs :
116- faq .matched_questions = [
117- q for q in faq .matched_questions if len (q .split ()) >= min_words ]
117+ faq .questions = [
118+ q for q in faq .questions if len (q .split ()) >= min_words ]
118119 self .faqs = [faq for faq in self .faqs if len (
119- faq .matched_questions ) > 0 ]
120+ faq .questions ) > 0 ]
120121
121122 def make_typos (
122123 self ,
@@ -149,7 +150,7 @@ def make_typos(
149150 for faq in self .faqs :
150151 new_qs : list [str ] = []
151152
152- for question in faq .matched_questions :
153+ for question in faq .questions :
153154 q_min_typos = min_typos
154155 q_max_typos = max_typos
155156 if scale_max_per_word :
@@ -168,7 +169,7 @@ def make_typos(
168169 new_qs .append (typo_q .result )
169170 typo_count += num_typos
170171
171- faq .matched_questions .extend (new_qs )
172+ faq .questions .extend (new_qs )
172173 typo_entry_count += len (new_qs )
173174
174175 return typo_entry_count , typo_count
@@ -178,7 +179,7 @@ def make_question_pairs(self) -> Dataset:
178179 Makes question-to-question pairs from the FAQs, where each question is paired with all
179180 other questions in its set (positive samples) and from other sets (negative sample).
180181 """
181- return make_entry_pairs ([faq .matched_questions for faq in self .faqs ])
182+ return make_entry_pairs ([faq .questions for faq in self .faqs ])
182183
183184 def make_question_answer_pairs (self ) -> Dataset :
184185 """
@@ -188,7 +189,7 @@ def make_question_answer_pairs(self) -> Dataset:
188189 questions , answers , scores = [], [], []
189190
190191 for faq in self .faqs :
191- for question in faq .matched_questions :
192+ for question in faq .questions :
192193 # Positive sample (correct answer)
193194 questions .append (question )
194195 answers .append (faq .answer )
@@ -212,7 +213,7 @@ def make_everything_pairs(self) -> Dataset:
212213 Makes pairs of titles, answers, and questions from the FAQs, where each set is paired with its correct
213214 answer (positive sample) and other incorrect answers (negative samples).
214215 """
215- return make_entry_pairs ([[faq .title , faq .answer , * faq .matched_questions ] for faq in self .faqs ])
216+ return make_entry_pairs ([[faq .title , faq .answer , * faq .questions ] for faq in self .faqs ])
216217
217218
218219def make_wiki_qa_dataset (faqs : FaqConfig , max_count : int = - 1 ) -> Dataset :
0 commit comments