Skip to content

Commit 91f1c8a

Browse files
authored
Merge pull request CMU-313#18 from CMU-313/feat/fuzzy-search
Feature: Add "Fuzzy match" search dropdown option for typo tolerance
2 parents 8c35c80 + 18d7729 commit 91f1c8a

10 files changed

Lines changed: 174 additions & 5 deletions

File tree

.github/workflows/test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
env:
5858
SETUP: >-
5959
{
60-
"url": "http://127.0.0.1:4567/forum",
60+
"url": "http://127.0.0.1:4567",
6161
"secret": "abcdef",
6262
"admin:username": "admin",
6363
"admin:email": "test@example.org",

public/language/en-GB/search.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
"match-all-words": "Match all words",
2424
"match-any-word": "Match any word",
2525
"match-contains": "Match contains",
26+
"match-fuzzy": "Fuzzy match",
2627
"all": "All",
2728
"any": "Any",
2829
"posted-by": "Posted by",

public/language/en-US/search.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
"match-all-words": "Match all words",
2424
"match-any-word": "Match any word",
2525
"match-contains": "Match contains",
26+
"match-fuzzy": "Fuzzy match",
2627
"all": "All",
2728
"any": "Any",
2829
"posted-by": "Posted by",

src/search.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ const privileges = require('./privileges');
1313
const activitypub = require('./activitypub');
1414
const utils = require('./utils');
1515

16+
const fuzzy = require('./search/fuzzy');
17+
1618
const search = module.exports;
1719

1820
search.search = async function (data) {
@@ -210,6 +212,9 @@ async function searchInBookmarks(data, searchCids, searchUids) {
210212
const needle = queryStr.toLowerCase();
211213
return content.toLowerCase().includes(needle) || title.toLowerCase().includes(needle);
212214
}
215+
if (matchWords === 'fuzzy') {
216+
return fuzzy.fuzzyMatches(queryStr, content) || fuzzy.fuzzyMatches(queryStr, title);
217+
}
213218
const tokens = queryStr.split(' ');
214219
const method = (matchWords === 'any' ? 'some' : 'every');
215220
return tokens[method](

src/search/fuzzy.js

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
'use strict';
2+
3+
function levenshtein(a, b) {
4+
if (a.length === 0) return b.length;
5+
if (b.length === 0) return a.length;
6+
const m = a.length;
7+
const n = b.length;
8+
const d = Array(m + 1).fill(null).map(() => Array(n + 1).fill(0));
9+
for (let i = 0; i <= m; i++) d[i][0] = i;
10+
for (let j = 0; j <= n; j++) d[0][j] = j;
11+
for (let i = 1; i <= m; i++) {
12+
for (let j = 1; j <= n; j++) {
13+
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
14+
d[i][j] = Math.min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost);
15+
}
16+
}
17+
return d[m][n];
18+
}
19+
20+
function maxFuzzyEdits(tokenLength) {
21+
if (tokenLength <= 2) return 1;
22+
if (tokenLength <= 5) return 1;
23+
if (tokenLength <= 9) return 2;
24+
return 3;
25+
}
26+
27+
function fuzzyMatches(query, text) {
28+
const queryTokens = String(query || '').toLowerCase().split(/\s+/).filter(Boolean);
29+
const textTokens = String(text || '').toLowerCase().match(/\p{L}+/gu) || [];
30+
if (!queryTokens.length || !textTokens.length) return false;
31+
return queryTokens.some((qt) => {
32+
const allowed = maxFuzzyEdits(qt.length);
33+
return textTokens.some((tt) => {
34+
const dist = levenshtein(qt, tt);
35+
return dist <= allowed || tt.includes(qt) || qt.includes(tt);
36+
});
37+
});
38+
}
39+
40+
exports.levenshtein = levenshtein;
41+
exports.maxFuzzyEdits = maxFuzzyEdits;
42+
exports.fuzzyMatches = fuzzyMatches;

test/api.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,8 @@ describe('API', async () => {
470470
});
471471
}
472472

473-
url = nconf.get('url') + (prefix || '') + testPath;
473+
const base = nconf.get('url') + (prefix || '') + (nconf.get('relative_path') || '');
474+
url = base + testPath;
474475
});
475476

476477
it('should contain a valid request body (if present) with application/json or multipart/form-data type if POST/PUT/DELETE', () => {

test/mocks/databasemock.js

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,17 +46,21 @@ nconf.defaults({
4646
relative_path: '',
4747
});
4848

49-
const urlObject = url.parse(nconf.get('url'));
49+
const testPort = process.env.TEST_PORT || 4568;
50+
let urlObject = url.parse(nconf.get('url'));
51+
// Use a different port for tests to avoid EADDRINUSE when NodeBB or another process is on the default port
52+
urlObject = url.parse(`${urlObject.protocol}//${urlObject.hostname}:${testPort}${urlObject.path || ''}`);
5053
const relativePath = urlObject.pathname !== '/' ? urlObject.pathname : '';
5154
nconf.set('relative_path', relativePath);
5255
nconf.set('asset_base_url', `${relativePath}/assets`);
5356
nconf.set('upload_path', path.join(nconf.get('base_dir'), nconf.get('upload_path')));
5457
nconf.set('upload_url', '/assets/uploads');
5558
nconf.set('url_parsed', urlObject);
59+
nconf.set('url', `${urlObject.protocol}//${urlObject.host}`);
5660
nconf.set('base_url', `${urlObject.protocol}//${urlObject.host}`);
5761
nconf.set('secure', urlObject.protocol === 'https:');
58-
nconf.set('use_port', !!urlObject.port);
59-
nconf.set('port', urlObject.port || nconf.get('port') || (nconf.get('PORT_ENV_VAR') ? nconf.get(nconf.get('PORT_ENV_VAR')) : false) || 4567);
62+
nconf.set('use_port', true);
63+
nconf.set('port', testPort);
6064

6165
// cookies don't provide isolation by port: http://stackoverflow.com/a/16328399/122353
6266
const domain = nconf.get('cookieDomain') || urlObject.hostname;

test/search-fuzzy.js

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
'use strict';
2+
3+
const assert = require('assert');
4+
const { levenshtein, fuzzyMatches } = require('../src/search/fuzzy');
5+
6+
describe('Search fuzzy (unit)', () => {
7+
describe('levenshtein()', () => {
8+
it('computes correct edit distance for one-char difference', () => {
9+
assert.strictEqual(levenshtein('helo', 'hello'), 1);
10+
});
11+
12+
it('returns 0 for identical strings', () => {
13+
assert.strictEqual(levenshtein('hello', 'hello'), 0);
14+
});
15+
16+
it('returns length of non-empty string when other is empty', () => {
17+
assert.strictEqual(levenshtein('', 'hello'), 5);
18+
assert.strictEqual(levenshtein('hello', ''), 5);
19+
});
20+
21+
it('returns 0 when both strings are empty', () => {
22+
assert.strictEqual(levenshtein('', ''), 0);
23+
});
24+
25+
it('computes correct edit distance for unrelated words', () => {
26+
assert.strictEqual(levenshtein('cat', 'banana'), 5);
27+
});
28+
29+
it('computes 1 for character replacement', () => {
30+
assert.strictEqual(levenshtein('type', 'typo'), 1);
31+
});
32+
33+
it('computes 1 for character removal', () => {
34+
assert.strictEqual(levenshtein('type', 'typ'), 1);
35+
});
36+
37+
it('computes 1 for character insert', () => {
38+
assert.strictEqual(levenshtein('type', 'types'), 1);
39+
});
40+
41+
it('computes 1 for number replacement', () => {
42+
assert.strictEqual(levenshtein('type1', 'type2'), 1);
43+
});
44+
45+
it('computes 1 for number insert', () => {
46+
assert.strictEqual(levenshtein('type', 'type1'), 1);
47+
});
48+
49+
it('computes 1 for number removal', () => {
50+
assert.strictEqual(levenshtein('type1', 'type'), 1);
51+
});
52+
53+
it('computes correct edit distance for different numbers', () => {
54+
assert.strictEqual(levenshtein('123', '456'), 3);
55+
assert.strictEqual(levenshtein('111', '123'), 2);
56+
});
57+
58+
it('computes correct edit distance for punctuation', () => {
59+
assert.strictEqual(levenshtein('', '.,;!'), 4);
60+
assert.strictEqual(levenshtein('ab.c', 'abc'), 1);
61+
assert.strictEqual(levenshtein('abc.', 'abc'), 1);
62+
});
63+
64+
it('computes correct edit distance for punctuation insertion', () => {
65+
assert.strictEqual(levenshtein('abc', 'ab.c'), 1);
66+
assert.strictEqual(levenshtein('abc', 'abc.'), 1);
67+
});
68+
69+
it('computes correct edit distance for punctuation removal', () => {
70+
assert.strictEqual(levenshtein('ab.c', 'abc'), 1);
71+
assert.strictEqual(levenshtein('abc.', 'abc'), 1);
72+
});
73+
});
74+
75+
describe('fuzzyMatches()', () => {
76+
it('matches small edit distance', () => {
77+
assert.strictEqual(fuzzyMatches('helo', 'hello'), true);
78+
});
79+
80+
it('matches small edit distance in phrase', () => {
81+
assert.strictEqual(fuzzyMatches('helo', 'hello world'), true);
82+
});
83+
84+
it('returns false for unrelated words', () => {
85+
assert.strictEqual(fuzzyMatches('cat', 'banana'), false);
86+
});
87+
88+
it('matches substring cases (query token contained in text token)', () => {
89+
assert.strictEqual(fuzzyMatches('node', 'nodebb'), true);
90+
});
91+
92+
it('matches when text token is contained in query token', () => {
93+
assert.strictEqual(fuzzyMatches('nodebb', 'node'), true);
94+
});
95+
96+
it('matches one of multiple tokens', () => {
97+
assert.strictEqual(fuzzyMatches('helo world', 'hello there'), true);
98+
});
99+
100+
it('returns false for empty query', () => {
101+
assert.strictEqual(fuzzyMatches('', 'hello world'), false);
102+
});
103+
104+
it('returns false for empty text', () => {
105+
assert.strictEqual(fuzzyMatches('hello', ''), false);
106+
});
107+
108+
it('is case insensitive', () => {
109+
assert.strictEqual(fuzzyMatches('HELO', 'hello'), true);
110+
assert.strictEqual(fuzzyMatches('helo', 'HELLO'), true);
111+
});
112+
});
113+
});

vendor/nodebb-theme-harmony-2.1.35/templates/search.tpl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
<option value="all">[[search:match-all-words]]</option>
2626
<option value="any">[[search:match-any-word]]</option>
2727
<option value="contains">[[search:match-contains]]</option>
28+
<option value="fuzzy">[[search:match-fuzzy]]</option>
2829
</select>
2930

3031
<select id="show-results-as" name="showAs" class="post-search-item form-select text-sm py-2 ps-2 pe-3">

vendor/nodebb-theme-harmony-main/templates/search.tpl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
<option value="all">[[search:match-all-words]]</option>
2626
<option value="any">[[search:match-any-word]]</option>
2727
<option value="contains">[[search:match-contains]]</option>
28+
<option value="fuzzy">[[search:match-fuzzy]]</option>
2829
</select>
2930

3031
<select id="show-results-as" name="showAs" class="post-search-item form-select text-sm py-2 ps-2 pe-3">

0 commit comments

Comments
 (0)