Skip to content

Commit 79e5ceb

Browse files
authored
[2016] 升级 Goldfish 到 18.11.15(base64/string->utf8 改用 C++ 实现) (#3851)
1 parent 18dfdd9 commit 79e5ceb

7 files changed

Lines changed: 431 additions & 160 deletions

File tree

TeXmacs/plugins/goldfish/goldfish/liii/base64.scm

Lines changed: 2 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -8,56 +8,9 @@
88
base64-decode
99
) ;export
1010
(begin
11-
(define-constant BYTE2BASE64_BV
12-
(string->utf8 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
13-
) ;string->utf8
14-
) ;define-constant
15-
16-
(define-constant BASE64_PAD_BYTE (char->integer #\=))
17-
1811
(define bytevector-base64-encode
1912
(typed-lambda ((bv bytevector?))
20-
(define (encode b1 b2 b3)
21-
(let* ((p1 b1)
22-
(p2 (if b2 b2 0))
23-
(p3 (if b3 b3 0))
24-
(combined (bitwise-ior (ash p1 16) (ash p2 8) p3))
25-
(c1 (bitwise-and (ash combined -18) 63))
26-
(c2 (bitwise-and (ash combined -12) 63))
27-
(c3 (bitwise-and (ash combined -6) 63))
28-
(c4 (bitwise-and combined 63))
29-
) ;
30-
(values (BYTE2BASE64_BV c1)
31-
(BYTE2BASE64_BV c2)
32-
(if b2 (BYTE2BASE64_BV c3) BASE64_PAD_BYTE)
33-
(if b3 (BYTE2BASE64_BV c4) BASE64_PAD_BYTE)
34-
) ;values
35-
) ;let*
36-
) ;define
37-
(let* ((input-N (bytevector-length bv))
38-
(output-N (* 4 (ceiling (/ input-N 3))))
39-
(output (make-bytevector output-N))
40-
) ;
41-
(let loop
42-
((i 0) (j 0))
43-
(when (< i input-N)
44-
(let* ((b1 (bv i))
45-
(b2 (if (< (+ i 1) input-N) (bv (+ i 1)) #f))
46-
(b3 (if (< (+ i 2) input-N) (bv (+ i 2)) #f))
47-
) ;
48-
(receive (r1 r2 r3 r4)
49-
(encode b1 b2 b3)
50-
(bytevector-u8-set! output j r1)
51-
(bytevector-u8-set! output (+ j 1) r2)
52-
(bytevector-u8-set! output (+ j 2) r3)
53-
(bytevector-u8-set! output (+ j 3) r4)
54-
(loop (+ i 3) (+ j 4))
55-
) ;receive
56-
) ;let*
57-
) ;when
58-
) ;let
59-
output
60-
) ;let*
13+
(g_bytevector-base64-encode bv (bytevector-length bv))
6114
) ;typed-lambda
6215
) ;define
6316

@@ -74,70 +27,8 @@
7427
) ;cond
7528
) ;define
7629

77-
(define-constant BASE64_TO_BYTE_V
78-
(let ((byte2base64-N (bytevector-length BYTE2BASE64_BV)))
79-
(let loop
80-
((i 0) (v (make-vector 256 -1)))
81-
(if (< i byte2base64-N)
82-
(begin
83-
(vector-set! v (BYTE2BASE64_BV i) i)
84-
(loop (+ i 1) v)
85-
) ;begin
86-
v
87-
) ;if
88-
) ;let
89-
) ;let
90-
) ;define-constant
91-
9230
(define (bytevector-base64-decode bv)
93-
(define (decode c1 c2 c3 c4)
94-
(let* ((b1 (BASE64_TO_BYTE_V c1))
95-
(b2 (BASE64_TO_BYTE_V c2))
96-
(b3 (BASE64_TO_BYTE_V c3))
97-
(b4 (BASE64_TO_BYTE_V c4))
98-
) ;
99-
(if (or (negative? b1)
100-
(negative? b2)
101-
(and (negative? b3) (not (equal? c3 BASE64_PAD_BYTE)))
102-
(and (negative? b4) (not (equal? c4 BASE64_PAD_BYTE)))
103-
) ;or
104-
(value-error "Invalid base64 input")
105-
(values (bitwise-ior (ash b1 2) (ash b2 -4))
106-
(bitwise-and (bitwise-ior (ash b2 4) (ash b3 -2)) 255)
107-
(bitwise-and (bitwise-ior (ash b3 6) b4) 255)
108-
(if (negative? b3) 1 (if (negative? b4) 2 3))
109-
) ;values
110-
) ;if
111-
) ;let*
112-
) ;define
113-
(let* ((input-N (bytevector-length bv))
114-
(output-N (* input-N 3/4))
115-
(output (make-bytevector output-N))
116-
) ;
117-
(unless (zero? (modulo input-N 4))
118-
(value-error "length of the input bytevector must be 4X")
119-
) ;unless
120-
(let loop
121-
((i 0) (j 0))
122-
(if (< i input-N)
123-
(receive (r1 r2 r3 cnt)
124-
(decode (bv i) (bv (+ i 1)) (bv (+ i 2)) (bv (+ i 3)))
125-
(bytevector-u8-set! output j r1)
126-
(when (>= cnt 2)
127-
(bytevector-u8-set! output (+ j 1) r2)
128-
) ;when
129-
(when (>= cnt 3)
130-
(bytevector-u8-set! output (+ j 2) r3)
131-
) ;when
132-
(loop (+ i 4) (+ j cnt))
133-
) ;receive
134-
(let ((final (make-bytevector j)))
135-
(vector-copy! final 0 output 0 j)
136-
final
137-
) ;let
138-
) ;if
139-
) ;let
140-
) ;let*
31+
(g_bytevector-base64-decode bv (bytevector-length bv))
14132
) ;define
14233

14334
(define string-base64-decode

TeXmacs/plugins/goldfish/goldfish/scheme/base.scm

Lines changed: 1 addition & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -574,54 +574,7 @@
574574
) ;if
575575
) ;define*
576576

577-
(define* (string->utf8 str (start 0) (end #t))
578-
;; start < end in this case
579-
(define (string->utf8-sub str start end)
580-
(let ((bv (string->byte-vector str)) (N (string-length str)))
581-
(let loop
582-
((pos 0) (cnt 0) (start-pos 0))
583-
(let ((next-pos (bytevector-advance-utf8 bv pos N)))
584-
(cond ((and (not (zero? start)) (zero? start-pos) (= cnt start))
585-
(loop next-pos (+ cnt 1) pos)
586-
) ;
587-
((and (integer? end) (= cnt end))
588-
(copy bv (make-byte-vector (- pos start-pos)) start-pos pos)
589-
) ;
590-
((and end (= next-pos N))
591-
(copy bv (make-byte-vector (- N start-pos)) start-pos N)
592-
) ;
593-
((= next-pos pos) (error 'value-error "Invalid UTF-8 sequence at index: " pos))
594-
(else (loop next-pos (+ cnt 1) start-pos))
595-
) ;cond
596-
) ;let
597-
) ;let
598-
) ;let
599-
) ;define
600-
601-
(when (not (string? str))
602-
(error 'type-error "str must be string")
603-
) ;when
604-
(let ((N (utf8-string-length str)))
605-
(when (and (> N 0) (or (< start 0) (>= start N)))
606-
(error 'out-of-range
607-
(string-append "start must >= 0 and < " (number->string N))
608-
) ;error
609-
) ;when
610-
(when (and (integer? end) (or (< end 0) (>= end (+ N 1))))
611-
(error 'out-of-range
612-
(string-append "end must >= 0 and < " (number->string (+ N 1)))
613-
) ;error
614-
) ;when
615-
(when (and (integer? end) (> start end))
616-
(error 'out-of-range "start <= end failed" start end)
617-
) ;when
618-
619-
(if (and (integer? end) (= start end))
620-
(byte-vector)
621-
(string->utf8-sub str start end)
622-
) ;if
623-
) ;let
624-
) ;define*
577+
(define* (string->utf8 str (start 0) (end #t)) (g_string->utf8 str start end))
625578

626579
(define (raise . args)
627580
(apply throw #t args)

TeXmacs/plugins/goldfish/src/goldfish.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@
6868
#include <isocline.h>
6969
#endif
7070

71-
#define GOLDFISH_VERSION "18.11.13"
71+
#define GOLDFISH_VERSION "18.11.15"
7272

7373
#define GOLDFISH_PATH_MAXN TB_PATH_MAXN
7474

@@ -108,6 +108,8 @@ static vector<string> find_function_libraries_in_load_path (s7_scheme* sc, const
108108
void glue_njson (s7_scheme* sc);
109109
void glue_http (s7_scheme* sc);
110110
void glue_http_async (s7_scheme* sc);
111+
void glue_liii_base64 (s7_scheme* sc);
112+
void glue_scheme_base (s7_scheme* sc);
111113
void glue_liii_hashlib (s7_scheme* sc);
112114
void glue_liii_os (s7_scheme* sc);
113115
void glue_liii_path (s7_scheme* sc);
@@ -699,6 +701,8 @@ glue_for_community_edition (s7_scheme* sc) {
699701
glue_liii_datetime (sc);
700702
glue_liii_uuid (sc);
701703
glue_liii_hashlib (sc);
704+
glue_liii_base64 (sc);
705+
glue_scheme_base (sc);
702706
glue_njson (sc);
703707
glue_http (sc);
704708
glue_http_async (sc);
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
//
2+
// Copyright (C) 2024-2026 The Goldfish Scheme Authors
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied See the
13+
// License for the specific language governing permissions and limitations
14+
// under the License.
15+
//
16+
17+
#include "s7.h"
18+
#include <cstring>
19+
20+
namespace goldfish {
21+
22+
static s7_pointer
23+
base64_error (s7_scheme* sc, const char* caller, const char* kind, const char* msg, s7_pointer arg) {
24+
return s7_error (sc, s7_make_symbol (sc, kind), s7_list (sc, 2, s7_make_string (sc, msg), arg));
25+
}
26+
27+
static s7_pointer
28+
f_bytevector_base64_decode (s7_scheme* sc, s7_pointer args) {
29+
s7_pointer arg= s7_car (args);
30+
31+
if (!s7_is_byte_vector (arg)) {
32+
return base64_error (sc, "bytevector-base64-decode", "type-error",
33+
"bytevector-base64-decode: input must be bytevector", arg);
34+
}
35+
36+
s7_int in_len= s7_integer (s7_cadr (args));
37+
uint8_t* in = (uint8_t*) s7_byte_vector_elements (arg);
38+
39+
if (in_len % 4 != 0) {
40+
return base64_error (sc, "bytevector-base64-decode", "value-error",
41+
"bytevector-base64-decode: length of the input bytevector must be 4X", arg);
42+
}
43+
44+
static const uint8_t decode_table[256]= {
45+
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
46+
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62,
47+
255, 255, 255, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 255, 255, 255, 255, 0,
48+
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
49+
23, 24, 25, 255, 255, 255, 255, 255, 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
50+
39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, 255, 255, 255, 255,
51+
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
52+
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
53+
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
54+
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
55+
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
56+
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255};
57+
58+
const uint8_t PAD= (uint8_t) '=';
59+
60+
s7_int out_cap= (in_len / 4) * 3;
61+
s7_pointer out = s7_make_byte_vector (sc, out_cap, 1, NULL);
62+
uint8_t* out_buf= (uint8_t*) s7_byte_vector_elements (out);
63+
s7_int out_len= 0;
64+
65+
for (s7_int i= 0; i < in_len; i+= 4) {
66+
uint8_t c1= in[i];
67+
uint8_t c2= in[i + 1];
68+
uint8_t c3= in[i + 2];
69+
uint8_t c4= in[i + 3];
70+
71+
bool c3_pad= (c3 == PAD);
72+
bool c4_pad= (c4 == PAD);
73+
74+
if (c1 == PAD || c2 == PAD) {
75+
return base64_error (sc, "bytevector-base64-decode", "value-error",
76+
"bytevector-base64-decode: Invalid base64 input", arg);
77+
}
78+
79+
uint8_t v1= decode_table[c1];
80+
uint8_t v2= decode_table[c2];
81+
uint8_t v3= c3_pad ? 0 : decode_table[c3];
82+
uint8_t v4= c4_pad ? 0 : decode_table[c4];
83+
84+
if (v1 == 0xFF || v2 == 0xFF || (!c3_pad && v3 == 0xFF) || (!c4_pad && v4 == 0xFF) || (c3_pad && !c4_pad)) {
85+
return base64_error (sc, "bytevector-base64-decode", "value-error",
86+
"bytevector-base64-decode: Invalid base64 input", arg);
87+
}
88+
89+
out_buf[out_len++]= (uint8_t) ((v1 << 2) | (v2 >> 4));
90+
if (!c3_pad) {
91+
out_buf[out_len++]= (uint8_t) ((v2 << 4) | (v3 >> 2));
92+
if (!c4_pad) {
93+
out_buf[out_len++]= (uint8_t) ((v3 << 6) | v4);
94+
}
95+
}
96+
}
97+
98+
if (out_len != out_cap) {
99+
s7_pointer final_out= s7_make_byte_vector (sc, out_len, 1, NULL);
100+
memcpy (s7_byte_vector_elements (final_out), out_buf, out_len);
101+
return final_out;
102+
}
103+
return out;
104+
}
105+
106+
static s7_pointer
107+
f_bytevector_base64_encode (s7_scheme* sc, s7_pointer args) {
108+
s7_pointer arg= s7_car (args);
109+
110+
if (!s7_is_byte_vector (arg)) {
111+
return base64_error (sc, "bytevector-base64-encode", "type-error",
112+
"bytevector-base64-encode: input must be bytevector", arg);
113+
}
114+
115+
s7_int in_len= s7_integer (s7_cadr (args));
116+
uint8_t* in = (uint8_t*) s7_byte_vector_elements (arg);
117+
118+
static const uint8_t encode_table[64]= {
119+
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
120+
'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
121+
's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'};
122+
123+
const uint8_t PAD= (uint8_t) '=';
124+
125+
s7_int out_len= (in_len == 0) ? 0 : 4 * ((in_len + 2) / 3);
126+
s7_pointer out = s7_make_byte_vector (sc, out_len, 1, NULL);
127+
uint8_t* out_buf= (uint8_t*) s7_byte_vector_elements (out);
128+
129+
s7_int i= 0;
130+
s7_int j= 0;
131+
132+
while (i + 2 < in_len) {
133+
uint32_t triple= ((uint32_t) in[i] << 16) | ((uint32_t) in[i + 1] << 8) | in[i + 2];
134+
out_buf[j] = encode_table[(triple >> 18) & 0x3F];
135+
out_buf[j + 1] = encode_table[(triple >> 12) & 0x3F];
136+
out_buf[j + 2] = encode_table[(triple >> 6) & 0x3F];
137+
out_buf[j + 3] = encode_table[triple & 0x3F];
138+
i+= 3;
139+
j+= 4;
140+
}
141+
142+
s7_int rem= in_len - i;
143+
if (rem == 1) {
144+
uint32_t triple= (uint32_t) in[i] << 16;
145+
out_buf[j] = encode_table[(triple >> 18) & 0x3F];
146+
out_buf[j + 1] = encode_table[(triple >> 12) & 0x3F];
147+
out_buf[j + 2] = PAD;
148+
out_buf[j + 3] = PAD;
149+
}
150+
else if (rem == 2) {
151+
uint32_t triple= ((uint32_t) in[i] << 16) | ((uint32_t) in[i + 1] << 8);
152+
out_buf[j] = encode_table[(triple >> 18) & 0x3F];
153+
out_buf[j + 1] = encode_table[(triple >> 12) & 0x3F];
154+
out_buf[j + 2] = encode_table[(triple >> 6) & 0x3F];
155+
out_buf[j + 3] = PAD;
156+
}
157+
158+
return out;
159+
}
160+
161+
static void
162+
glue_bytevector_base64_decode (s7_scheme* sc) {
163+
const char* name= "g_bytevector-base64-decode";
164+
const char* desc= "(g_bytevector-base64-decode bv len) => bytevector";
165+
s7_define_function (sc, name, f_bytevector_base64_decode, 2, 0, false, desc);
166+
}
167+
168+
static void
169+
glue_bytevector_base64_encode (s7_scheme* sc) {
170+
const char* name= "g_bytevector-base64-encode";
171+
const char* desc= "(g_bytevector-base64-encode bv len) => bytevector";
172+
s7_define_function (sc, name, f_bytevector_base64_encode, 2, 0, false, desc);
173+
}
174+
175+
void
176+
glue_liii_base64 (s7_scheme* sc) {
177+
glue_bytevector_base64_decode (sc);
178+
glue_bytevector_base64_encode (sc);
179+
}
180+
181+
} // namespace goldfish

0 commit comments

Comments
 (0)