Skip to content

Commit 882e9f9

Browse files
Split "matched_questions" into "keywords" and "questions" (#24)
* Split "matched_questions" to separate data better * Update code for "matched_questions" split * Update docker images * Update GitHub Actions workflows * Fix missing questions field * Filter special characters from dictionary matching * Add keywords to dictionary matching
1 parent cfd561f commit 882e9f9

10 files changed

Lines changed: 402 additions & 250 deletions

File tree

.github/workflows/codeql.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ jobs:
3232
strategy:
3333
fail-fast: false
3434
matrix:
35-
language: [ 'csharp', 'javascript' ]
35+
language: [ 'csharp', 'javascript', 'python' ]
3636
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
3737
# Use only 'java' to analyze code written in Java, Kotlin or both
3838
# Use only 'javascript' to analyze code written in JavaScript, TypeScript or both

.github/workflows/frontend.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
- name: Use Node.js
3939
uses: actions/setup-node@v4
4040
with:
41-
node-version: 20
41+
node-version: 22
4242

4343
- name: Setup Pages
4444
uses: actions/configure-pages@v5

BingusApi/config/faq_config.json

Lines changed: 351 additions & 228 deletions
Large diffs are not rendered by default.

BingusLib/FaqHandling/FaqConfig.cs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@ public record FaqConfigEntry
1414
[JsonPropertyName("answer")]
1515
public string Answer { get; set; } = "";
1616

17-
[JsonPropertyName("matched_questions")]
17+
[JsonPropertyName("keywords")]
18+
public string[] Keywords { get; set; } = [];
19+
20+
[JsonPropertyName("questions")]
1821
public string[] Questions { get; set; } = [];
1922

2023
public FaqConfigEntry() { }
@@ -25,9 +28,14 @@ public FaqConfigEntry(string answer)
2528
Answer = answer;
2629
}
2730

28-
public FaqConfigEntry(string answer, IEnumerable<string> questions)
31+
public FaqConfigEntry(
32+
string answer,
33+
IEnumerable<string> keywords,
34+
IEnumerable<string> questions
35+
)
2936
: this(answer)
3037
{
38+
Keywords = keywords.ToArray();
3139
Questions = questions.ToArray();
3240
}
3341
}
@@ -50,6 +58,11 @@ public FaqConfigEntry(string answer, IEnumerable<string> questions)
5058
{
5159
foreach (var entry in FaqEntries)
5260
{
61+
foreach (var keyword in entry.Keywords)
62+
{
63+
yield return (entry.Title, keyword, entry.Answer);
64+
}
65+
5366
foreach (var question in entry.Questions)
5467
{
5568
yield return (entry.Title, question, entry.Answer);

BingusLib/FaqHandling/FaqDict.cs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1+
using System.Text.RegularExpressions;
12
using static BingusLib.FaqHandling.FaqConfig;
23

34
namespace BingusLib.FaqHandling
45
{
5-
public class FaqDict
6+
public partial class FaqDict
67
{
78
private readonly Dictionary<string, FaqEntry> _faqDict = [];
89

@@ -20,6 +21,16 @@ public FaqDict(IEnumerable<FaqConfigEntry> faqConfigEntries)
2021
Answer = entry.Answer,
2122
};
2223

24+
foreach (var keyword in entry.Keywords)
25+
{
26+
_faqDict[CleanQuery(keyword)] = new FaqEntry()
27+
{
28+
Title = entry.Title,
29+
Question = keyword,
30+
Answer = entry.Answer,
31+
};
32+
}
33+
2334
foreach (var question in entry.Questions)
2435
{
2536
_faqDict[CleanQuery(question)] = new FaqEntry()
@@ -32,11 +43,15 @@ public FaqDict(IEnumerable<FaqConfigEntry> faqConfigEntries)
3243
}
3344
}
3445

35-
private static string CleanQuery(string query) => query.Trim().ToLowerInvariant();
46+
private static string CleanQuery(string query) =>
47+
QueryFilterRegex().Replace(query.ToLowerInvariant(), "");
3648

3749
public FaqEntry? Search(string query)
3850
{
3951
return _faqDict.TryGetValue(CleanQuery(query), out var entry) ? entry : null;
4052
}
53+
54+
[GeneratedRegex("[^A-Za-z]")]
55+
private static partial Regex QueryFilterRegex();
4156
}
4257
}

bingus-bot/src/commands/ask.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import { EmbedList, fetchBingus, fetchBingusData } from "../util.js";
99

1010
async function getFaqConfig() {
1111
return (await fetchBingusData()).faqs.flatMap((x) =>
12-
x.matched_questions.filter((x) => x.length > 0 && x.length <= 100),
12+
x.keywords.filter((x) => x.length > 0 && x.length <= 100),
1313
);
1414
}
1515

bingus-bot/src/util.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,11 +168,11 @@ export class EmbedList {
168168
}
169169

170170
export interface FaqConfig {
171-
average_questions: boolean;
172171
faqs: {
173172
title: string;
174173
answer: string;
175-
matched_questions: string[];
174+
keywords: string[];
175+
questions: string[];
176176
}[];
177177
}
178178

bingus-python-encoder/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM python:3.11-slim
1+
FROM python:3.14-slim
22

33
WORKDIR /usr/src/app
44

bingus-python-encoder/data_utils.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ def random_typo(str_err: StrErrer, random: Random) -> StrErrer:
6969
class FaqEntry(BaseModel):
7070
title: str | None
7171
answer: str
72-
matched_questions: list[str]
72+
keywords: list[str]
73+
questions: list[str]
7374

7475

7576
class FaqConfig(BaseModel):
@@ -102,21 +103,21 @@ def iterate_answers(self):
102103

103104
def iterate_questions(self):
104105
for faq in self.faqs:
105-
for question in faq.matched_questions:
106+
for question in faq.questions:
106107
yield question
107108

108109
def question_count(self):
109-
return sum((len(faq.matched_questions) for faq in self.faqs))
110+
return sum((len(faq.questions) for faq in self.faqs))
110111

111112
def filter_short_questions(self, min_words: int):
112113
"""
113114
Filters out questions shorter than min_words and removes empty entries.
114115
"""
115116
for faq in self.faqs:
116-
faq.matched_questions = [
117-
q for q in faq.matched_questions if len(q.split()) >= min_words]
117+
faq.questions = [
118+
q for q in faq.questions if len(q.split()) >= min_words]
118119
self.faqs = [faq for faq in self.faqs if len(
119-
faq.matched_questions) > 0]
120+
faq.questions) > 0]
120121

121122
def make_typos(
122123
self,
@@ -149,7 +150,7 @@ def make_typos(
149150
for faq in self.faqs:
150151
new_qs: list[str] = []
151152

152-
for question in faq.matched_questions:
153+
for question in faq.questions:
153154
q_min_typos = min_typos
154155
q_max_typos = max_typos
155156
if scale_max_per_word:
@@ -168,7 +169,7 @@ def make_typos(
168169
new_qs.append(typo_q.result)
169170
typo_count += num_typos
170171

171-
faq.matched_questions.extend(new_qs)
172+
faq.questions.extend(new_qs)
172173
typo_entry_count += len(new_qs)
173174

174175
return typo_entry_count, typo_count
@@ -178,7 +179,7 @@ def make_question_pairs(self) -> Dataset:
178179
Makes question-to-question pairs from the FAQs, where each question is paired with all
179180
other questions in its set (positive samples) and from other sets (negative sample).
180181
"""
181-
return make_entry_pairs([faq.matched_questions for faq in self.faqs])
182+
return make_entry_pairs([faq.questions for faq in self.faqs])
182183

183184
def make_question_answer_pairs(self) -> Dataset:
184185
"""
@@ -188,7 +189,7 @@ def make_question_answer_pairs(self) -> Dataset:
188189
questions, answers, scores = [], [], []
189190

190191
for faq in self.faqs:
191-
for question in faq.matched_questions:
192+
for question in faq.questions:
192193
# Positive sample (correct answer)
193194
questions.append(question)
194195
answers.append(faq.answer)
@@ -212,7 +213,7 @@ def make_everything_pairs(self) -> Dataset:
212213
Makes pairs of titles, answers, and questions from the FAQs, where each set is paired with its correct
213214
answer (positive sample) and other incorrect answers (negative samples).
214215
"""
215-
return make_entry_pairs([[faq.title, faq.answer, *faq.matched_questions] for faq in self.faqs])
216+
return make_entry_pairs([[faq.title, faq.answer, *faq.questions] for faq in self.faqs])
216217

217218

218219
def make_wiki_qa_dataset(faqs: FaqConfig, max_count: int = -1) -> Dataset:

bot.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM node:20-slim AS base
1+
FROM node:22-slim AS base
22

33
COPY ./bingus-bot/ /app/bingus-bot/
44
COPY ./package*.json /app/

0 commit comments

Comments
 (0)