|
| 1 | +/* |
| 2 | +** 2022-11-18 |
| 3 | +** |
| 4 | +** The author disclaims copyright to this source code. In place of |
| 5 | +** a legal notice, here is a blessing: |
| 6 | +** |
| 7 | +** May you do good and not evil. |
| 8 | +** May you find forgiveness for yourself and forgive others. |
| 9 | +** May you share freely, never taking more than you give. |
| 10 | +** |
| 11 | +************************************************************************* |
| 12 | +** |
| 13 | +** This is a SQLite extension for converting in either direction |
| 14 | +** between a (binary) blob and base64 text. Base64 can transit a |
| 15 | +** sane USASCII channel unmolested. It also plays nicely in CSV or |
| 16 | +** written as TCL brace-enclosed literals or SQL string literals, |
| 17 | +** and can be used unmodified in XML-like documents. |
| 18 | +** |
| 19 | +** This is an independent implementation of conversions specified in |
| 20 | +** RFC 4648, done on the above date by the author (Larry Brasfield) |
| 21 | +** who thereby has the right to put this into the public domain. |
| 22 | +** |
| 23 | +** The conversions meet RFC 4648 requirements, provided that this |
| 24 | +** C source specifies that line-feeds are included in the encoded |
| 25 | +** data to limit visible line lengths to 72 characters and to |
| 26 | +** terminate any encoded blob having non-zero length. |
| 27 | +** |
| 28 | +** Length limitations are not imposed except that the runtime |
| 29 | +** SQLite string or blob length limits are respected. Otherwise, |
| 30 | +** any length binary sequence can be represented and recovered. |
| 31 | +** Generated base64 sequences, with their line-feeds included, |
| 32 | +** can be concatenated; the result converted back to binary will |
| 33 | +** be the concatenation of the represented binary sequences. |
| 34 | +** |
| 35 | +** This SQLite3 extension creates a function, base64(x), which |
| 36 | +** either: converts text x containing base64 to a returned blob; |
| 37 | +** or converts a blob x to returned text containing base64. An |
| 38 | +** error will be thrown for other input argument types. |
| 39 | +** |
| 40 | +** This code relies on UTF-8 encoding only with respect to the |
| 41 | +** meaning of the first 128 (7-bit) codes matching that of USASCII. |
| 42 | +** It will fail miserably if somehow made to try to convert EBCDIC. |
| 43 | +** Because it is table-driven, it could be enhanced to handle that, |
| 44 | +** but the world and SQLite have moved on from that anachronism. |
| 45 | +** |
| 46 | +** To build the extension: |
| 47 | +** Set shell variable SQDIR=<your favorite SQLite checkout directory> |
| 48 | +** *Nix: gcc -O2 -shared -I$SQDIR -fPIC -o base64.so base64.c |
| 49 | +** OSX: gcc -O2 -dynamiclib -fPIC -I$SQDIR -o base64.dylib base64.c |
| 50 | +** Win32: gcc -O2 -shared -I%SQDIR% -o base64.dll base64.c |
| 51 | +** Win32: cl /Os -I%SQDIR% base64.c -link -dll -out:base64.dll |
| 52 | +*/ |
| 53 | + |
| 54 | +#include <assert.h> |
| 55 | + |
| 56 | +#include "sqlite3ext.h" |
| 57 | + |
| 58 | +#ifndef deliberate_fall_through |
| 59 | +/* Quiet some compilers about some of our intentional code. */ |
| 60 | +# if GCC_VERSION>=7000000 |
| 61 | +# define deliberate_fall_through __attribute__((fallthrough)); |
| 62 | +# else |
| 63 | +# define deliberate_fall_through |
| 64 | +# endif |
| 65 | +#endif |
| 66 | + |
| 67 | +SQLITE_EXTENSION_INIT1; |
| 68 | + |
| 69 | +#define PC 0x80 /* pad character */ |
| 70 | +#define WS 0x81 /* whitespace */ |
| 71 | +#define ND 0x82 /* Not above or digit-value */ |
| 72 | +#define PAD_CHAR '=' |
| 73 | + |
| 74 | +#ifndef U8_TYPEDEF |
| 75 | +typedef unsigned char u8; |
| 76 | +#define U8_TYPEDEF |
| 77 | +#endif |
| 78 | + |
| 79 | +/* Decoding table, ASCII (7-bit) value to base 64 digit value or other */ |
| 80 | +static const u8 b64DigitValues[128] = { |
| 81 | + /* HT LF VT FF CR */ |
| 82 | + ND,ND,ND,ND, ND,ND,ND,ND, ND,WS,WS,WS, WS,WS,ND,ND, |
| 83 | + /* US */ |
| 84 | + ND,ND,ND,ND, ND,ND,ND,ND, ND,ND,ND,ND, ND,ND,ND,ND, |
| 85 | + /*sp + / */ |
| 86 | + WS,ND,ND,ND, ND,ND,ND,ND, ND,ND,ND,62, ND,ND,ND,63, |
| 87 | + /* 0 1 5 9 = */ |
| 88 | + 52,53,54,55, 56,57,58,59, 60,61,ND,ND, ND,PC,ND,ND, |
| 89 | + /* A O */ |
| 90 | + ND, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, |
| 91 | + /* P Z */ |
| 92 | + 15,16,17,18, 19,20,21,22, 23,24,25,ND, ND,ND,ND,ND, |
| 93 | + /* a o */ |
| 94 | + ND,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, |
| 95 | + /* p z */ |
| 96 | + 41,42,43,44, 45,46,47,48, 49,50,51,ND, ND,ND,ND,ND |
| 97 | +}; |
| 98 | + |
| 99 | +static const char b64Numerals[64+1] |
| 100 | += "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
| 101 | + |
| 102 | +#define BX_DV_PROTO(c) \ |
| 103 | + ((((u8)(c))<0x80)? (u8)(b64DigitValues[(u8)(c)]) : 0x80) |
| 104 | +#define IS_BX_DIGIT(bdp) (((u8)(bdp))<0x80) |
| 105 | +#define IS_BX_WS(bdp) ((bdp)==WS) |
| 106 | +#define IS_BX_PAD(bdp) ((bdp)==PC) |
| 107 | +#define BX_NUMERAL(dv) (b64Numerals[(u8)(dv)]) |
| 108 | +/* Width of base64 lines. Should be an integer multiple of 4. */ |
| 109 | +#define B64_DARK_MAX 72 |
| 110 | + |
| 111 | +/* Encode a byte buffer into base64 text with linefeeds appended to limit |
| 112 | +** encoded group lengths to B64_DARK_MAX or to terminate the last group. |
| 113 | +*/ |
| 114 | +static char* toBase64( u8 *pIn, int nbIn, char *pOut ){ |
| 115 | + int nCol = 0; |
| 116 | + while( nbIn >= 3 ){ |
| 117 | + /* Do the bit-shuffle, exploiting unsigned input to avoid masking. */ |
| 118 | + pOut[0] = BX_NUMERAL(pIn[0]>>2); |
| 119 | + pOut[1] = BX_NUMERAL(((pIn[0]<<4)|(pIn[1]>>4))&0x3f); |
| 120 | + pOut[2] = BX_NUMERAL(((pIn[1]&0xf)<<2)|(pIn[2]>>6)); |
| 121 | + pOut[3] = BX_NUMERAL(pIn[2]&0x3f); |
| 122 | + pOut += 4; |
| 123 | + nbIn -= 3; |
| 124 | + pIn += 3; |
| 125 | + if( (nCol += 4)>=B64_DARK_MAX || nbIn<=0 ){ |
| 126 | + *pOut++ = '\n'; |
| 127 | + nCol = 0; |
| 128 | + } |
| 129 | + } |
| 130 | + if( nbIn > 0 ){ |
| 131 | + signed char nco = nbIn+1; |
| 132 | + int nbe; |
| 133 | + unsigned long qv = *pIn++; |
| 134 | + for( nbe=1; nbe<3; ++nbe ){ |
| 135 | + qv <<= 8; |
| 136 | + if( nbe<nbIn ) qv |= *pIn++; |
| 137 | + } |
| 138 | + for( nbe=3; nbe>=0; --nbe ){ |
| 139 | + char ce = (nbe<nco)? BX_NUMERAL((u8)(qv & 0x3f)) : PAD_CHAR; |
| 140 | + qv >>= 6; |
| 141 | + pOut[nbe] = ce; |
| 142 | + } |
| 143 | + pOut += 4; |
| 144 | + *pOut++ = '\n'; |
| 145 | + } |
| 146 | + *pOut = 0; |
| 147 | + return pOut; |
| 148 | +} |
| 149 | + |
| 150 | +/* Skip over text which is not base64 numeral(s). */ |
| 151 | +static char * skipNonB64( char *s, int nc ){ |
| 152 | + char c; |
| 153 | + while( nc-- > 0 && (c = *s) && !IS_BX_DIGIT(BX_DV_PROTO(c)) ) ++s; |
| 154 | + return s; |
| 155 | +} |
| 156 | + |
| 157 | +/* Decode base64 text into a byte buffer. */ |
| 158 | +static u8* fromBase64( char *pIn, int ncIn, u8 *pOut ){ |
| 159 | + if( ncIn>0 && pIn[ncIn-1]=='\n' ) --ncIn; |
| 160 | + while( ncIn>0 && *pIn!=PAD_CHAR ){ |
| 161 | + static signed char nboi[] = { 0, 0, 1, 2, 3 }; |
| 162 | + char *pUse = skipNonB64(pIn, ncIn); |
| 163 | + unsigned long qv = 0L; |
| 164 | + int nti, nbo, nac; |
| 165 | + ncIn -= (pUse - pIn); |
| 166 | + pIn = pUse; |
| 167 | + nti = (ncIn>4)? 4 : ncIn; |
| 168 | + ncIn -= nti; |
| 169 | + nbo = nboi[nti]; |
| 170 | + if( nbo==0 ) break; |
| 171 | + for( nac=0; nac<4; ++nac ){ |
| 172 | + char c = (nac<nti)? *pIn++ : b64Numerals[0]; |
| 173 | + u8 bdp = BX_DV_PROTO(c); |
| 174 | + switch( bdp ){ |
| 175 | + case ND: |
| 176 | + /* Treat dark non-digits as pad, but they terminate decode too. */ |
| 177 | + ncIn = 0; |
| 178 | + deliberate_fall_through; /* FALLTHRU */ |
| 179 | + case WS: |
| 180 | + /* Treat whitespace as pad and terminate this group.*/ |
| 181 | + nti = nac; |
| 182 | + deliberate_fall_through; /* FALLTHRU */ |
| 183 | + case PC: |
| 184 | + bdp = 0; |
| 185 | + --nbo; |
| 186 | + deliberate_fall_through; /* FALLTHRU */ |
| 187 | + default: /* bdp is the digit value. */ |
| 188 | + qv = qv<<6 | bdp; |
| 189 | + break; |
| 190 | + } |
| 191 | + } |
| 192 | + switch( nbo ){ |
| 193 | + case 3: |
| 194 | + pOut[2] = (qv) & 0xff; |
| 195 | + deliberate_fall_through; /* FALLTHRU */ |
| 196 | + case 2: |
| 197 | + pOut[1] = (qv>>8) & 0xff; |
| 198 | + deliberate_fall_through; /* FALLTHRU */ |
| 199 | + case 1: |
| 200 | + pOut[0] = (qv>>16) & 0xff; |
| 201 | + break; |
| 202 | + } |
| 203 | + pOut += nbo; |
| 204 | + } |
| 205 | + return pOut; |
| 206 | +} |
| 207 | + |
| 208 | +/* This function does the work for the SQLite base64(x) UDF. */ |
| 209 | +static void base64(sqlite3_context *context, int na, sqlite3_value *av[]){ |
| 210 | + sqlite3_int64 nb; |
| 211 | + sqlite3_int64 nv = sqlite3_value_bytes(av[0]); |
| 212 | + sqlite3_int64 nc; |
| 213 | + int nvMax = sqlite3_limit(sqlite3_context_db_handle(context), |
| 214 | + SQLITE_LIMIT_LENGTH, -1); |
| 215 | + char *cBuf; |
| 216 | + u8 *bBuf; |
| 217 | + assert(na==1); |
| 218 | + switch( sqlite3_value_type(av[0]) ){ |
| 219 | + case SQLITE_BLOB: |
| 220 | + nb = nv; |
| 221 | + nc = 4*((nv+2)/3); /* quads needed */ |
| 222 | + nc += (nc+(B64_DARK_MAX-1))/B64_DARK_MAX + 1; /* LFs and a 0-terminator */ |
| 223 | + if( nvMax < nc ){ |
| 224 | + sqlite3_result_error(context, "blob expanded to base64 too big", -1); |
| 225 | + return; |
| 226 | + } |
| 227 | + bBuf = (u8*)sqlite3_value_blob(av[0]); |
| 228 | + if( !bBuf ){ |
| 229 | + if( SQLITE_NOMEM==sqlite3_errcode(sqlite3_context_db_handle(context)) ){ |
| 230 | + goto memFail; |
| 231 | + } |
| 232 | + sqlite3_result_text(context,"",-1,SQLITE_STATIC); |
| 233 | + break; |
| 234 | + } |
| 235 | + cBuf = sqlite3_malloc64(nc); |
| 236 | + if( !cBuf ) goto memFail; |
| 237 | + nc = (int)(toBase64(bBuf, nb, cBuf) - cBuf); |
| 238 | + sqlite3_result_text(context, cBuf, nc, sqlite3_free); |
| 239 | + break; |
| 240 | + case SQLITE_TEXT: |
| 241 | + nc = nv; |
| 242 | + nb = 3*((nv+3)/4); /* may overestimate due to LF and padding */ |
| 243 | + if( nvMax < nb ){ |
| 244 | + sqlite3_result_error(context, "blob from base64 may be too big", -1); |
| 245 | + return; |
| 246 | + }else if( nb<1 ){ |
| 247 | + nb = 1; |
| 248 | + } |
| 249 | + cBuf = (char *)sqlite3_value_text(av[0]); |
| 250 | + if( !cBuf ){ |
| 251 | + if( SQLITE_NOMEM==sqlite3_errcode(sqlite3_context_db_handle(context)) ){ |
| 252 | + goto memFail; |
| 253 | + } |
| 254 | + sqlite3_result_zeroblob(context, 0); |
| 255 | + break; |
| 256 | + } |
| 257 | + bBuf = sqlite3_malloc64(nb); |
| 258 | + if( !bBuf ) goto memFail; |
| 259 | + nb = (int)(fromBase64(cBuf, nc, bBuf) - bBuf); |
| 260 | + sqlite3_result_blob(context, bBuf, nb, sqlite3_free); |
| 261 | + break; |
| 262 | + default: |
| 263 | + sqlite3_result_error(context, "base64 accepts only blob or text", -1); |
| 264 | + return; |
| 265 | + } |
| 266 | + return; |
| 267 | + memFail: |
| 268 | + sqlite3_result_error(context, "base64 OOM", -1); |
| 269 | +} |
| 270 | + |
| 271 | +/* |
| 272 | +** Establish linkage to running SQLite library. |
| 273 | +*/ |
| 274 | +#ifndef SQLITE_SHELL_EXTFUNCS |
| 275 | +#ifdef _WIN32 |
| 276 | +__declspec(dllexport) |
| 277 | +#endif |
| 278 | +int sqlite3_base64_init |
| 279 | +#else |
| 280 | +static int sqlite3_base64_init |
| 281 | +#endif |
| 282 | +(sqlite3 *db, char **pzErr, const sqlite3_api_routines *pApi){ |
| 283 | + SQLITE_EXTENSION_INIT2(pApi); |
| 284 | + (void)pzErr; |
| 285 | + return sqlite3_create_function |
| 286 | + (db, "base64", 1, |
| 287 | + SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS|SQLITE_DIRECTONLY|SQLITE_UTF8, |
| 288 | + 0, base64, 0, 0); |
| 289 | +} |
| 290 | + |
| 291 | +/* |
| 292 | +** Define some macros to allow this extension to be built into the shell |
| 293 | +** conveniently, in conjunction with use of SQLITE_SHELL_EXTFUNCS. This |
| 294 | +** allows shell.c, as distributed, to have this extension built in. |
| 295 | +*/ |
| 296 | +#define BASE64_INIT(db) sqlite3_base64_init(db, 0, 0) |
| 297 | +#define BASE64_EXPOSE(db, pzErr) /* Not needed, ..._init() does this. */ |
0 commit comments