Skip to content

Commit ec3b7da

Browse files
lenovobenbenlihaidong
andauthored
fix(zhihu): decode numeric entities in answer detail (#1629)
Co-authored-by: lihaidong <lihaidong@kingsoft.com>
1 parent 4de04c4 commit ec3b7da

2 files changed

Lines changed: 20 additions & 0 deletions

File tree

clis/zhihu/answer-detail.js

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,18 @@ function stripHtml(html) {
1919
.replace(/&amp;/g, '&')
2020
.replace(/&quot;/g, '"')
2121
.replace(/&#39;/g, "'")
22+
.replace(/&#(\d+);/g, (_, value) => {
23+
const codePoint = Number(value);
24+
return Number.isInteger(codePoint) && codePoint >= 0 && codePoint <= 0x10FFFF
25+
? String.fromCodePoint(codePoint)
26+
: _;
27+
})
28+
.replace(/&#x([0-9a-f]+);/gi, (_, value) => {
29+
const codePoint = Number.parseInt(value, 16);
30+
return Number.isInteger(codePoint) && codePoint >= 0 && codePoint <= 0x10FFFF
31+
? String.fromCodePoint(codePoint)
32+
: _;
33+
})
2234
.replace(/\n{3,}/g, '\n\n')
2335
.trim();
2436
}

clis/zhihu/answer-detail.test.js

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,14 @@ describe('zhihu answer-detail helpers', () => {
304304
expect(out).toBe('hi there & you\n\nsecond');
305305
});
306306

307+
it('stripHtml decodes numeric entities', () => {
308+
expect(helpers.stripHtml('&#34;中文&#34; &#x26; &#39;test&#39;')).toBe('"中文" & \'test\'');
309+
});
310+
311+
it('stripHtml keeps invalid numeric entities unchanged', () => {
312+
expect(helpers.stripHtml('bad &#9999999999; entity')).toBe('bad &#9999999999; entity');
313+
});
314+
307315
it('stripHtml maps <br> to single newline', () => {
308316
expect(helpers.stripHtml('a<br>b<br/>c')).toBe('a\nb\nc');
309317
});

0 commit comments

Comments
 (0)