Skip to content

Commit 86447e9

Browse files
committed
We have extra and unncessary checks.
1 parent 2adc753 commit 86447e9

4 files changed

Lines changed: 15 additions & 7 deletions

File tree

src/UTF8.cs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ private static (int utfAdjust, int scalarAdjust) GetFinalScalarUtfAdjustments(by
124124
} // Too short
125125
// range check
126126
codePoint = (uint)(firstByte & 0b00011111) << 6 | (uint)(buf[pos + 1] & 0b00111111);
127-
if ((codePoint < 0x80) || (0x7ff < codePoint))
127+
// codePoint is necessarily <= 0x7ff
128+
if (codePoint < 0x80)
128129
{
129130
return buf + pos;
130131
} // Overlong
@@ -141,7 +142,8 @@ private static (int utfAdjust, int scalarAdjust) GetFinalScalarUtfAdjustments(by
141142
(uint)(buf[pos + 1] & 0b00111111) << 6 |
142143
(uint)(buf[pos + 2] & 0b00111111);
143144
// Either overlong or too large:
144-
if ((codePoint < 0x800) || (0xffff < codePoint) ||
145+
// codePoint is necessarily <= 0xffff
146+
if ((codePoint < 0x800) ||
145147
(0xd7ff < codePoint && codePoint < 0xe000))
146148
{
147149
return buf + pos;
@@ -238,7 +240,8 @@ private static (int utfAdjust, int scalarAdjust) GetFinalScalarUtfAdjustments(by
238240
} // Too short
239241
// range check
240242
codePoint = (uint)(firstByte & 0b00011111) << 6 | (uint)(pInputBuffer[pos + 1] & 0b00111111);
241-
if ((codePoint < 0x80) || (0x7ff < codePoint))
243+
// codePoint is necessarily <= 0x7ff
244+
if (codePoint < 0x80)
242245
{
243246
utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment;
244247
scalarCountAdjustment = TempScalarCountAdjustment;
@@ -261,7 +264,8 @@ private static (int utfAdjust, int scalarAdjust) GetFinalScalarUtfAdjustments(by
261264
(uint)(pInputBuffer[pos + 1] & 0b00111111) << 6 |
262265
(uint)(pInputBuffer[pos + 2] & 0b00111111);
263266
// Either overlong or too large:
264-
if ((codePoint < 0x800) || (0xffff < codePoint) ||
267+
// codePoint is necessarily <= 0xffff
268+
if ((codePoint < 0x800) ||
265269
(0xd7ff < codePoint && codePoint < 0xe000))
266270
{
267271
utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment;

test/AsciiTest.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
namespace tests;
1+
namespace tests;
2+
23
using System.Text;
34
using SimdUnicode;
45

test/UTF8ValidationTests.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
namespace tests;
2+
23
using System.Text;
34
using SimdUnicode;
45
using System.Diagnostics;
@@ -1245,7 +1246,8 @@ public static bool ValidateUtf8Fuschia(byte[] data)
12451246
if ((data[pos + 1] & 0b11000000) != 0b10000000) return false;
12461247

12471248
codePoint = (uint)((byte1 & 0b00011111) << 6 | (data[pos + 1] & 0b00111111));
1248-
if (codePoint < 0x80 || 0x7ff < codePoint) return false;
1249+
// codePoint is necessarily <= 0x7ff
1250+
if (codePoint < 0x80) return false;
12491251
pos += 2;
12501252
}
12511253
else if ((byte1 & 0b11110000) == 0b11100000)
@@ -1255,7 +1257,7 @@ public static bool ValidateUtf8Fuschia(byte[] data)
12551257
if ((data[pos + 2] & 0b11000000) != 0b10000000) return false;
12561258

12571259
codePoint = (uint)((byte1 & 0b00001111) << 12 | (data[pos + 1] & 0b00111111) << 6 | (data[pos + 2] & 0b00111111));
1258-
if (codePoint < 0x800 || 0xffff < codePoint || (0xd7ff < codePoint && codePoint < 0xe000)) return false;
1260+
if (codePoint < 0x800 || (0xd7ff < codePoint && codePoint < 0xe000)) return false;
12591261
pos += 3;
12601262
}
12611263
else if ((byte1 & 0b11111000) == 0b11110000)

test/helpers/randomutf8.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
namespace tests;
2+
23
using System;
34
using System.Collections.Generic;
45
using System.Linq;

0 commit comments

Comments
 (0)