Skip to content

Commit 6f0824b

Browse files
committed
Add test for japanese diacritics
1 parent a943852 commit 6f0824b

2 files changed

Lines changed: 11 additions & 18 deletions

File tree

src/utils/stripDiacritics.test.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,8 @@ describe('stripDiacritics', () => {
4141
expect(str.length).toBe(112);
4242
expect(stripDiacritics(str)).toBe('');
4343
});
44+
45+
it('removes combining marks from Japanese characters', () => {
46+
expect(stripDiacritics('ネバダ州')).toBe('ネハタ州');
47+
});
4448
});

src/utils/stripDiacritics.ts

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,5 @@
1-
/**
2-
* Licensed under the Apache License, Version 2.0 (the "License");
3-
* you may not use this file except in compliance with the License.
4-
* You may obtain a copy of the License at
5-
*
6-
* http://www.apache.org/licenses/LICENSE-2.0
7-
*
8-
* Unless required by applicable law or agreed to in writing, software
9-
* distributed under the License is distributed on an "AS IS" BASIS,
10-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11-
* See the License for the specific language governing permissions and
12-
* limitations under the License.
13-
*
14-
* Taken from: http://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript/18391901#18391901
15-
*/
16-
171
// prettier-ignore
2+
183
const map = [
194
{ base: 'A', letters: '\u0041\u24B6\uFF21\u00C0\u00C1\u00C2\u1EA6\u1EA4\u1EAA\u1EA8\u00C3\u0100\u0102\u1EB0\u1EAE\u1EB4\u1EB2\u0226\u01E0\u00C4\u01DE\u1EA2\u00C5\u01FA\u01CD\u0200\u0202\u1EA0\u1EAC\u1EB6\u1E00\u0104\u023A\u2C6F' },
205
{ base: 'AA', letters: '\uA732' },
@@ -109,12 +94,16 @@ const map = [
10994
return acc;
11095
}, {});
11196

112-
// "what?" version ... http://jsperf.com/diacritics/12
97+
// Combining marks
98+
const latin = '\u0300-\u036F';
99+
const japanese = '\u3099\u309A';
100+
113101
export default function stripDiacritics(str: string): string {
114102
return (
115103
str
116104
.normalize('NFD')
117-
.replace(/[\u0300-\u036F\u3099\u309A]/g, '') // Remove combining diacritics
105+
// Remove combining diacritics
106+
.replace(new RegExp(`[${latin}${japanese}]`, 'g'), '')
118107
/* eslint-disable-next-line no-control-regex */
119108
.replace(/[^\u0000-\u007E]/g, (a) => map[a] || a)
120109
);

0 commit comments

Comments
 (0)