Skip to content

Commit 15bbecf

Browse files
committed
Switched name encoding to more reliable Secomba Base4K implementation
1 parent 29c32d8 commit 15bbecf

4 files changed

Lines changed: 296 additions & 5 deletions

File tree

Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
// Some parts of the following code were used from Secomba/Base4K on the MIT License basis.
2+
// See the associated license file for more information.
3+
4+
using System;
5+
using System.Buffers;
6+
using System.Collections.Generic;
7+
using System.IO;
8+
using System.Runtime.CompilerServices;
9+
using System.Text;
10+
11+
namespace SecureFolderFS.Core.Cryptography.Cipher
12+
{
13+
public enum Base4KVersion
14+
{
15+
V1,
16+
V2
17+
}
18+
19+
public static class SecombaBase4K
20+
{
21+
// Base addresses for mapping regions
22+
private const int BASE_FLAG_START = 0x04000;
23+
private const int BASE1_START = 0x06000;
24+
private const int BASE1_START_LEGACY = 0x05000;
25+
26+
// Sizes of each mapping region
27+
private const int BASE_FLAG_SIZE = 0x100;
28+
private const int BASE1_SIZE = 0x01000;
29+
30+
private static readonly UTF8Encoding Utf8Encoding = new UTF8Encoding(true, true);
31+
32+
/// <summary>
33+
/// Encodes the specified raw bytes as a Base4K string, mapping each group of bits
34+
/// to Unicode characters in a specific range, suitable for use as file names.
35+
/// </summary>
36+
/// <param name="raw">The raw bytes to encode.</param>
37+
/// <param name="version">The version of Base4K encoding to use. Defaults to <see cref="Base4KVersion.V2"/>.</param>
38+
/// <returns>A Base4K-encoded string representation of the input bytes.</returns>
39+
/// <exception cref="ArgumentException">Thrown when <paramref name="raw"/> is empty or too short to encode.</exception>
40+
[SkipLocalsInit]
41+
public static string Encode(ReadOnlySpan<byte> raw, Base4KVersion version = Base4KVersion.V2)
42+
{
43+
if (raw.Length <= 1)
44+
throw new ArgumentException("Input must be at least 2 bytes long.", nameof(raw));
45+
46+
var maxByteCount = (raw.Length + 1) * 3;
47+
var rentedBuffer = ArrayPool<byte>.Shared.Rent(maxByteCount);
48+
try
49+
{
50+
var buffer = rentedBuffer.AsSpan();
51+
var bufferPos = 0;
52+
Span<byte> utf8Buffer = stackalloc byte[4];
53+
int offset;
54+
55+
for (var i = 0; i < raw.Length * 2 - 2; i += 3)
56+
{
57+
offset = i % 2 == 0
58+
? ((raw[i / 2] << 4) | ((raw[i / 2 + 1] >> 4) & 0x0f)) & 0x0fff
59+
: ((raw[i / 2] << 8) | (raw[i / 2 + 1] & 0xff)) & 0x0fff;
60+
61+
offset += version == Base4KVersion.V1 ? BASE1_START_LEGACY : BASE1_START;
62+
63+
var written = ToUtf8(offset, utf8Buffer);
64+
utf8Buffer.Slice(0, written).CopyTo(buffer.Slice(bufferPos));
65+
bufferPos += written;
66+
}
67+
68+
if ((raw.Length * 2) % 3 == 2)
69+
{
70+
offset = (raw[^1] & 0xff) + BASE_FLAG_START;
71+
var written = ToUtf8(offset, utf8Buffer);
72+
utf8Buffer.Slice(0, written).CopyTo(buffer.Slice(bufferPos));
73+
bufferPos += written;
74+
}
75+
else if ((raw.Length * 2) % 3 == 1)
76+
{
77+
offset = (raw[^1] & 0x0f) + BASE_FLAG_START;
78+
var written = ToUtf8(offset, utf8Buffer);
79+
utf8Buffer.Slice(0, written).CopyTo(buffer.Slice(bufferPos));
80+
bufferPos += written;
81+
}
82+
83+
return Utf8Encoding.GetString(buffer.Slice(0, bufferPos));
84+
}
85+
finally
86+
{
87+
ArrayPool<byte>.Shared.Return(rentedBuffer);
88+
}
89+
}
90+
91+
/// <summary>
92+
/// Decodes a Base4K-encoded string back to the original raw bytes.
93+
/// Attempts decoding with both V2 and V1 (legacy) base addresses automatically.
94+
/// </summary>
95+
/// <param name="encoded">The Base4K-encoded string to decode.</param>
96+
/// <returns>The decoded bytes, or <see langword="null"/> if decoding failed due to invalid or malformed input.</returns>
97+
public static byte[]? Decode(ReadOnlySpan<char> encoded)
98+
{
99+
return DecodeInternal(encoded, BASE1_START) ?? DecodeInternal(encoded, BASE1_START_LEGACY);
100+
}
101+
102+
private static byte[]? DecodeInternal(ReadOnlySpan<char> encoded, int base1Start)
103+
{
104+
var byteCount = Utf8Encoding.GetByteCount(encoded);
105+
var encBytes = new byte[byteCount];
106+
var written = Utf8Encoding.GetBytes(encoded, encBytes);
107+
108+
using var memoryStream = new MemoryStream();
109+
var rentedCollector = ArrayPool<int>.Shared.Rent(written / 3 + 1);
110+
var collectorCount = 0;
111+
try
112+
{
113+
for (var i = 0; i < written;)
114+
{
115+
int nrOfBytes;
116+
if ((encBytes[i] & 0x80) == 0)
117+
{
118+
// 1 byte
119+
nrOfBytes = 1;
120+
}
121+
else if ((encBytes[i] & 0x40) == 0)
122+
{
123+
// Continuation byte — invalid as a leading byte
124+
return null;
125+
}
126+
else if ((encBytes[i] & 0x20) == 0)
127+
{
128+
// 2 bytes
129+
nrOfBytes = 2;
130+
}
131+
else if ((encBytes[i] & 0x10) == 0)
132+
{
133+
// 3 bytes
134+
nrOfBytes = 3;
135+
}
136+
else if ((encBytes[i] & 0x08) == 0)
137+
{
138+
// 4 bytes
139+
nrOfBytes = 4;
140+
}
141+
else
142+
{
143+
// Invalid leading byte
144+
return null;
145+
}
146+
147+
var code = ToCode(encBytes, i, nrOfBytes);
148+
i += nrOfBytes;
149+
150+
if (!(code >= base1Start && code < base1Start + BASE1_SIZE))
151+
{
152+
if (i < written || !(code >= BASE_FLAG_START && code < BASE_FLAG_START + BASE_FLAG_SIZE))
153+
return null;
154+
}
155+
156+
rentedCollector[collectorCount++] = code;
157+
}
158+
159+
for (var i = 0; i < collectorCount; i++)
160+
{
161+
if (rentedCollector[i] >= base1Start)
162+
rentedCollector[i] -= base1Start;
163+
else
164+
{
165+
rentedCollector[i] -= BASE_FLAG_START;
166+
if (i % 2 == 0)
167+
memoryStream.WriteByte((byte)rentedCollector[i]);
168+
else
169+
memoryStream.WriteByte((byte)(((rentedCollector[i - 1] << 4) | ((rentedCollector[i] & 0x0f)) & 0xff)));
170+
171+
break;
172+
}
173+
174+
if (i % 2 == 0)
175+
memoryStream.WriteByte((byte)(rentedCollector[i] >> 4));
176+
else
177+
{
178+
memoryStream.WriteByte((byte)(((rentedCollector[i - 1] << 4) | ((rentedCollector[i] & 0x0f00) >> 8)) & 0xff));
179+
memoryStream.WriteByte((byte)(rentedCollector[i] & 0xff));
180+
}
181+
}
182+
}
183+
finally
184+
{
185+
ArrayPool<int>.Shared.Return(rentedCollector);
186+
}
187+
188+
return memoryStream.ToArray();
189+
}
190+
191+
private static int ToUtf8(int code, Span<byte> destination)
192+
{
193+
switch (code)
194+
{
195+
case > 0xffff:
196+
{
197+
destination[0] = (byte)(0xf0 | ((code >> 18) & 0x07));
198+
destination[1] = (byte)(0x80 | ((code >> 12) & 0x3f));
199+
destination[2] = (byte)(0x80 | ((code >> 6) & 0x3f));
200+
destination[3] = (byte)(0x80 | (code & 0x3f));
201+
return 4;
202+
}
203+
204+
case > 0x7ff:
205+
{
206+
destination[0] = (byte)(0xe0 | ((code >> 12) & 0x0f));
207+
destination[1] = (byte)(0x80 | ((code >> 6) & 0x3f));
208+
destination[2] = (byte)(0x80 | (code & 0x3f));
209+
return 3;
210+
}
211+
212+
case > 0x7f:
213+
{
214+
destination[0] = (byte)(0xc0 | ((code >> 6) & 0x1f));
215+
destination[1] = (byte)(0x80 | (code & 0x3f));
216+
return 2;
217+
}
218+
219+
default:
220+
{
221+
destination[0] = (byte)(code & 0x7f);
222+
return 1;
223+
}
224+
}
225+
}
226+
227+
private static int ToCode(ReadOnlySpan<byte> utf8Char, int offset, int length)
228+
{
229+
var result = 0;
230+
switch (length)
231+
{
232+
case 1:
233+
{
234+
result |= utf8Char[offset];
235+
break;
236+
}
237+
238+
case 2:
239+
{
240+
result |= (utf8Char[offset + 0] & 0x1f) << 6;
241+
result |= (utf8Char[offset + 1] & 0x3f);
242+
break;
243+
}
244+
245+
case 3:
246+
{
247+
result |= (utf8Char[offset + 0] & 0x0f) << 12;
248+
result |= (utf8Char[offset + 1] & 0x3f) << 6;
249+
result |= (utf8Char[offset + 2] & 0x3f);
250+
break;
251+
}
252+
253+
case 4:
254+
{
255+
result |= (utf8Char[offset + 0] & 0x07) << 18;
256+
result |= (utf8Char[offset + 1] & 0x3f) << 12;
257+
result |= (utf8Char[offset + 2] & 0x3f) << 6;
258+
result |= (utf8Char[offset + 3] & 0x3f);
259+
break;
260+
}
261+
}
262+
263+
return result;
264+
}
265+
}
266+
}

src/Core/SecureFolderFS.Core.Cryptography/NameCrypt/BaseNameCrypt.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
using System.Buffers.Text;
33
using System.Runtime.CompilerServices;
44
using System.Text;
5-
using Lex4K;
5+
using SecureFolderFS.Core.Cryptography.Cipher;
66

77
namespace SecureFolderFS.Core.Cryptography.NameCrypt
88
{
@@ -35,7 +35,7 @@ public virtual string EncryptName(ReadOnlySpan<char> plaintextName, ReadOnlySpan
3535
return fileNameEncodingId switch
3636
{
3737
Constants.CipherId.ENCODING_BASE64URL => Base64Url.EncodeToString(ciphertextNameBuffer),
38-
Constants.CipherId.ENCODING_BASE4K => Base4K.EncodeChainToString(ciphertextNameBuffer),
38+
Constants.CipherId.ENCODING_BASE4K => SecombaBase4K.Encode(ciphertextNameBuffer),
3939
_ => throw new ArgumentOutOfRangeException(nameof(fileNameEncodingId))
4040
};
4141
}
@@ -46,7 +46,7 @@ public virtual string EncryptName(ReadOnlySpan<char> plaintextName, ReadOnlySpan
4646
{
4747
try
4848
{
49-
if (!ciphertextName.IsNormalized(NORMALIZATION))
49+
if (fileNameEncodingId == Constants.CipherId.ENCODING_BASE4K && !ciphertextName.IsNormalized(NORMALIZATION))
5050
{
5151
var normalizedLength = ciphertextName.GetNormalizedLength(NORMALIZATION);
5252
var destination = normalizedLength < 256 ? stackalloc char[normalizedLength] : new char[normalizedLength];
@@ -73,7 +73,7 @@ public virtual string EncryptName(ReadOnlySpan<char> plaintextName, ReadOnlySpan
7373
var decoded = fileNameEncodingId switch
7474
{
7575
Constants.CipherId.ENCODING_BASE64URL => Base64Url.DecodeFromChars(name),
76-
Constants.CipherId.ENCODING_BASE4K => Base4K.DecodeChainToNewBuffer(name),
76+
Constants.CipherId.ENCODING_BASE4K => SecombaBase4K.Decode(name),
7777
_ => throw new ArgumentOutOfRangeException(nameof(fileNameEncodingId))
7878
};
7979

src/Core/SecureFolderFS.Core.FileSystem/Helpers/Paths/Abstract/AbstractPathHelpers.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
using System;
22
using System.IO;
3-
using System.Text;
43
using SecureFolderFS.Core.Cryptography;
54

65
namespace SecureFolderFS.Core.FileSystem.Helpers.Paths.Abstract
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
Secomba Base4K
2+
https://github.com/secomba/base4k/blob/master/LICENSE
3+
MIT
4+
https://github.com/secomba/base4k
5+
6+
The MIT License (MIT)
7+
8+
Copyright (c) 2014 Secomba GmbH
9+
10+
Permission is hereby granted, free of charge, to any person obtaining a copy
11+
of this software and associated documentation files (the "Software"), to deal
12+
in the Software without restriction, including without limitation the rights
13+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14+
copies of the Software, and to permit persons to whom the Software is
15+
furnished to do so, subject to the following conditions:
16+
17+
The above copyright notice and this permission notice shall be included in all
18+
copies or substantial portions of the Software.
19+
20+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26+
SOFTWARE.

0 commit comments

Comments
 (0)