1+ // Some parts of the following code were used from Secomba/Base4K on the MIT License basis.
2+ // See the associated license file for more information.
3+
4+ using System ;
5+ using System . Buffers ;
6+ using System . Collections . Generic ;
7+ using System . IO ;
8+ using System . Runtime . CompilerServices ;
9+ using System . Text ;
10+
11+ namespace SecureFolderFS . Core . Cryptography . Cipher
12+ {
13+ public enum Base4KVersion
14+ {
15+ V1 ,
16+ V2
17+ }
18+
19+ public static class SecombaBase4K
20+ {
21+ // Base addresses for mapping regions
22+ private const int BASE_FLAG_START = 0x04000 ;
23+ private const int BASE1_START = 0x06000 ;
24+ private const int BASE1_START_LEGACY = 0x05000 ;
25+
26+ // Sizes of each mapping region
27+ private const int BASE_FLAG_SIZE = 0x100 ;
28+ private const int BASE1_SIZE = 0x01000 ;
29+
30+ private static readonly UTF8Encoding Utf8Encoding = new UTF8Encoding ( true , true ) ;
31+
32+ /// <summary>
33+ /// Encodes the specified raw bytes as a Base4K string, mapping each group of bits
34+ /// to Unicode characters in a specific range, suitable for use as file names.
35+ /// </summary>
36+ /// <param name="raw">The raw bytes to encode.</param>
37+ /// <param name="version">The version of Base4K encoding to use. Defaults to <see cref="Base4KVersion.V2"/>.</param>
38+ /// <returns>A Base4K-encoded string representation of the input bytes.</returns>
39+ /// <exception cref="ArgumentException">Thrown when <paramref name="raw"/> is empty or too short to encode.</exception>
40+ [ SkipLocalsInit ]
41+ public static string Encode ( ReadOnlySpan < byte > raw , Base4KVersion version = Base4KVersion . V2 )
42+ {
43+ if ( raw . Length <= 1 )
44+ throw new ArgumentException ( "Input must be at least 2 bytes long." , nameof ( raw ) ) ;
45+
46+ var maxByteCount = ( raw . Length + 1 ) * 3 ;
47+ var rentedBuffer = ArrayPool < byte > . Shared . Rent ( maxByteCount ) ;
48+ try
49+ {
50+ var buffer = rentedBuffer . AsSpan ( ) ;
51+ var bufferPos = 0 ;
52+ Span < byte > utf8Buffer = stackalloc byte [ 4 ] ;
53+ int offset ;
54+
55+ for ( var i = 0 ; i < raw . Length * 2 - 2 ; i += 3 )
56+ {
57+ offset = i % 2 == 0
58+ ? ( ( raw [ i / 2 ] << 4 ) | ( ( raw [ i / 2 + 1 ] >> 4 ) & 0x0f ) ) & 0x0fff
59+ : ( ( raw [ i / 2 ] << 8 ) | ( raw [ i / 2 + 1 ] & 0xff ) ) & 0x0fff ;
60+
61+ offset += version == Base4KVersion . V1 ? BASE1_START_LEGACY : BASE1_START ;
62+
63+ var written = ToUtf8 ( offset , utf8Buffer ) ;
64+ utf8Buffer . Slice ( 0 , written ) . CopyTo ( buffer . Slice ( bufferPos ) ) ;
65+ bufferPos += written ;
66+ }
67+
68+ if ( ( raw . Length * 2 ) % 3 == 2 )
69+ {
70+ offset = ( raw [ ^ 1 ] & 0xff ) + BASE_FLAG_START ;
71+ var written = ToUtf8 ( offset , utf8Buffer ) ;
72+ utf8Buffer . Slice ( 0 , written ) . CopyTo ( buffer . Slice ( bufferPos ) ) ;
73+ bufferPos += written ;
74+ }
75+ else if ( ( raw . Length * 2 ) % 3 == 1 )
76+ {
77+ offset = ( raw [ ^ 1 ] & 0x0f ) + BASE_FLAG_START ;
78+ var written = ToUtf8 ( offset , utf8Buffer ) ;
79+ utf8Buffer . Slice ( 0 , written ) . CopyTo ( buffer . Slice ( bufferPos ) ) ;
80+ bufferPos += written ;
81+ }
82+
83+ return Utf8Encoding . GetString ( buffer . Slice ( 0 , bufferPos ) ) ;
84+ }
85+ finally
86+ {
87+ ArrayPool < byte > . Shared . Return ( rentedBuffer ) ;
88+ }
89+ }
90+
91+ /// <summary>
92+ /// Decodes a Base4K-encoded string back to the original raw bytes.
93+ /// Attempts decoding with both V2 and V1 (legacy) base addresses automatically.
94+ /// </summary>
95+ /// <param name="encoded">The Base4K-encoded string to decode.</param>
96+ /// <returns>The decoded bytes, or <see langword="null"/> if decoding failed due to invalid or malformed input.</returns>
97+ public static byte [ ] ? Decode ( ReadOnlySpan < char > encoded )
98+ {
99+ return DecodeInternal ( encoded , BASE1_START ) ?? DecodeInternal ( encoded , BASE1_START_LEGACY ) ;
100+ }
101+
102+ private static byte [ ] ? DecodeInternal ( ReadOnlySpan < char > encoded , int base1Start )
103+ {
104+ var byteCount = Utf8Encoding . GetByteCount ( encoded ) ;
105+ var encBytes = new byte [ byteCount ] ;
106+ var written = Utf8Encoding . GetBytes ( encoded , encBytes ) ;
107+
108+ using var memoryStream = new MemoryStream ( ) ;
109+ var rentedCollector = ArrayPool < int > . Shared . Rent ( written / 3 + 1 ) ;
110+ var collectorCount = 0 ;
111+ try
112+ {
113+ for ( var i = 0 ; i < written ; )
114+ {
115+ int nrOfBytes ;
116+ if ( ( encBytes [ i ] & 0x80 ) == 0 )
117+ {
118+ // 1 byte
119+ nrOfBytes = 1 ;
120+ }
121+ else if ( ( encBytes [ i ] & 0x40 ) == 0 )
122+ {
123+ // Continuation byte — invalid as a leading byte
124+ return null ;
125+ }
126+ else if ( ( encBytes [ i ] & 0x20 ) == 0 )
127+ {
128+ // 2 bytes
129+ nrOfBytes = 2 ;
130+ }
131+ else if ( ( encBytes [ i ] & 0x10 ) == 0 )
132+ {
133+ // 3 bytes
134+ nrOfBytes = 3 ;
135+ }
136+ else if ( ( encBytes [ i ] & 0x08 ) == 0 )
137+ {
138+ // 4 bytes
139+ nrOfBytes = 4 ;
140+ }
141+ else
142+ {
143+ // Invalid leading byte
144+ return null ;
145+ }
146+
147+ var code = ToCode ( encBytes , i , nrOfBytes ) ;
148+ i += nrOfBytes ;
149+
150+ if ( ! ( code >= base1Start && code < base1Start + BASE1_SIZE ) )
151+ {
152+ if ( i < written || ! ( code >= BASE_FLAG_START && code < BASE_FLAG_START + BASE_FLAG_SIZE ) )
153+ return null ;
154+ }
155+
156+ rentedCollector [ collectorCount ++ ] = code ;
157+ }
158+
159+ for ( var i = 0 ; i < collectorCount ; i ++ )
160+ {
161+ if ( rentedCollector [ i ] >= base1Start )
162+ rentedCollector [ i ] -= base1Start ;
163+ else
164+ {
165+ rentedCollector [ i ] -= BASE_FLAG_START ;
166+ if ( i % 2 == 0 )
167+ memoryStream . WriteByte ( ( byte ) rentedCollector [ i ] ) ;
168+ else
169+ memoryStream . WriteByte ( ( byte ) ( ( ( rentedCollector [ i - 1 ] << 4 ) | ( ( rentedCollector [ i ] & 0x0f ) ) & 0xff ) ) ) ;
170+
171+ break ;
172+ }
173+
174+ if ( i % 2 == 0 )
175+ memoryStream . WriteByte ( ( byte ) ( rentedCollector [ i ] >> 4 ) ) ;
176+ else
177+ {
178+ memoryStream . WriteByte ( ( byte ) ( ( ( rentedCollector [ i - 1 ] << 4 ) | ( ( rentedCollector [ i ] & 0x0f00 ) >> 8 ) ) & 0xff ) ) ;
179+ memoryStream . WriteByte ( ( byte ) ( rentedCollector [ i ] & 0xff ) ) ;
180+ }
181+ }
182+ }
183+ finally
184+ {
185+ ArrayPool < int > . Shared . Return ( rentedCollector ) ;
186+ }
187+
188+ return memoryStream . ToArray ( ) ;
189+ }
190+
191+ private static int ToUtf8 ( int code , Span < byte > destination )
192+ {
193+ switch ( code )
194+ {
195+ case > 0xffff :
196+ {
197+ destination [ 0 ] = ( byte ) ( 0xf0 | ( ( code >> 18 ) & 0x07 ) ) ;
198+ destination [ 1 ] = ( byte ) ( 0x80 | ( ( code >> 12 ) & 0x3f ) ) ;
199+ destination [ 2 ] = ( byte ) ( 0x80 | ( ( code >> 6 ) & 0x3f ) ) ;
200+ destination [ 3 ] = ( byte ) ( 0x80 | ( code & 0x3f ) ) ;
201+ return 4 ;
202+ }
203+
204+ case > 0x7ff :
205+ {
206+ destination [ 0 ] = ( byte ) ( 0xe0 | ( ( code >> 12 ) & 0x0f ) ) ;
207+ destination [ 1 ] = ( byte ) ( 0x80 | ( ( code >> 6 ) & 0x3f ) ) ;
208+ destination [ 2 ] = ( byte ) ( 0x80 | ( code & 0x3f ) ) ;
209+ return 3 ;
210+ }
211+
212+ case > 0x7f :
213+ {
214+ destination [ 0 ] = ( byte ) ( 0xc0 | ( ( code >> 6 ) & 0x1f ) ) ;
215+ destination [ 1 ] = ( byte ) ( 0x80 | ( code & 0x3f ) ) ;
216+ return 2 ;
217+ }
218+
219+ default :
220+ {
221+ destination [ 0 ] = ( byte ) ( code & 0x7f ) ;
222+ return 1 ;
223+ }
224+ }
225+ }
226+
227+ private static int ToCode ( ReadOnlySpan < byte > utf8Char , int offset , int length )
228+ {
229+ var result = 0 ;
230+ switch ( length )
231+ {
232+ case 1 :
233+ {
234+ result |= utf8Char [ offset ] ;
235+ break ;
236+ }
237+
238+ case 2 :
239+ {
240+ result |= ( utf8Char [ offset + 0 ] & 0x1f ) << 6 ;
241+ result |= ( utf8Char [ offset + 1 ] & 0x3f ) ;
242+ break ;
243+ }
244+
245+ case 3 :
246+ {
247+ result |= ( utf8Char [ offset + 0 ] & 0x0f ) << 12 ;
248+ result |= ( utf8Char [ offset + 1 ] & 0x3f ) << 6 ;
249+ result |= ( utf8Char [ offset + 2 ] & 0x3f ) ;
250+ break ;
251+ }
252+
253+ case 4 :
254+ {
255+ result |= ( utf8Char [ offset + 0 ] & 0x07 ) << 18 ;
256+ result |= ( utf8Char [ offset + 1 ] & 0x3f ) << 12 ;
257+ result |= ( utf8Char [ offset + 2 ] & 0x3f ) << 6 ;
258+ result |= ( utf8Char [ offset + 3 ] & 0x3f ) ;
259+ break ;
260+ }
261+ }
262+
263+ return result ;
264+ }
265+ }
266+ }
0 commit comments