File tree Expand file tree Collapse file tree 1 file changed +36
-1
lines changed
Expand file tree Collapse file tree 1 file changed +36
-1
lines changed Original file line number Diff line number Diff line change @@ -55,6 +55,40 @@ function detectBOM(bytes) {
5555 return null ;
5656}
5757
58+ function isValidUTF8 ( bytes ) {
59+ let i = 0 ;
60+ while ( i < bytes . length ) {
61+ const byte = bytes [ i ] ;
62+
63+ if ( byte < 0x80 ) {
64+ i ++ ;
65+ } else if ( byte >> 5 === 0x06 ) {
66+ if ( i + 1 >= bytes . length || bytes [ i + 1 ] >> 6 !== 0x02 ) return false ;
67+ i += 2 ;
68+ } else if ( byte >> 4 === 0x0e ) {
69+ if (
70+ i + 2 >= bytes . length ||
71+ bytes [ i + 1 ] >> 6 !== 0x02 ||
72+ bytes [ i + 2 ] >> 6 !== 0x02
73+ )
74+ return false ;
75+ i += 3 ;
76+ } else if ( byte >> 3 === 0x1e ) {
77+ if (
78+ i + 3 >= bytes . length ||
79+ bytes [ i + 1 ] >> 6 !== 0x02 ||
80+ bytes [ i + 2 ] >> 6 !== 0x02 ||
81+ bytes [ i + 3 ] >> 6 !== 0x02
82+ )
83+ return false ;
84+ i += 4 ;
85+ } else {
86+ return false ;
87+ }
88+ }
89+ return true ;
90+ }
91+
5892export async function detectEncoding ( buffer ) {
5993 if ( ! buffer || buffer . byteLength === 0 ) {
6094 return settings . value . defaultFileEncoding || "UTF-8" ;
@@ -74,9 +108,10 @@ export async function detectEncoding(buffer) {
74108 else if ( byte < 0x80 ) ascii ++ ;
75109 }
76110
77- if ( ascii / sample . length > 0.95 ) return "UTF-8" ;
78111 if ( nulls > sample . length * 0.3 ) return "UTF-16LE" ;
79112
113+ if ( isValidUTF8 ( sample ) ) return "UTF-8" ;
114+
80115 const encodings = [
81116 ...new Set ( [
82117 "UTF-8" ,
You can’t perform that action at this time.
0 commit comments