|
1 | | -# tinyBits |
| 1 | +# TinyBits |
2 | 2 |
|
3 | | -TinyBits is a lightweight serialization library designed for efficient encoding and decoding of structured data. It provides compact binary representations for various data types, including integers, strings, arrays, maps, and more. |
| 3 | +A compact, efficient binary serialization library designed for performance and small message sizes. |
| 4 | + |
| 5 | +## Overview |
| 6 | + |
| 7 | +TinyBits is a lightweight C library for serializing and deserializing data in a compact binary format. It features string deduplication, optimized floating-point encoding, and a straightforward API. |
4 | 8 |
|
5 | 9 | ## Features |
6 | 10 |
|
7 | | -- **Compact Serialization**: Optimized for minimal size and fast encoding/decoding. |
8 | | -- **String Deduplication**: Reduces redundancy by reusing previously encoded strings. |
9 | | -- **Floating-Point Compression**: Supports compressed encoding for floating-point numbers. |
10 | | -- **Customizable**: Easily extendable for additional data types or features. |
| 11 | +- Minimal dependencies (standard C library only) |
| 12 | +- Single header implementation |
| 13 | +- Fast encoding and decoding |
| 14 | +- String deduplication |
| 15 | +- Optimized floating-point representation |
| 16 | +- Support for integers, strings, arrays, maps, doubles, booleans, null, and binary blobs |
| 17 | +- Configurable feature flags |
| 18 | + |
| 19 | +## Building |
| 20 | + |
| 21 | +TinyBits is designed as a single-header library. The repository contains separate implementation files that are combined into a single header via the build script. |
| 22 | + |
| 23 | +```bash |
| 24 | +# Generate the amalgamated header |
| 25 | +./build.sh |
11 | 26 |
|
12 | | -## File Structure |
| 27 | +# The resulting file will be created at dist/tinybits.h |
| 28 | +``` |
13 | 29 |
|
14 | | -- `src/`: Contains the core implementation of TinyBits. |
15 | | - - `tinybits.h`: Main header file for the library. |
16 | | - - `internal/`: Internal implementation details. |
17 | | - - `common.h`: Shared utilities and constants. |
18 | | - - `packer.h`: Functions for encoding data. |
19 | | - - `unpacker.h`: Functions for decoding data. |
20 | | -- `test/`: Placeholder for unit tests. |
21 | | -- `bench/`: Placeholder for benchmarking tools. |
22 | | -- `build.sh`: Script to generate an amalgamated header file. |
| 30 | +Simply include this generated header in your project to use TinyBits. |
23 | 31 |
|
24 | 32 | ## Usage |
25 | 33 |
|
26 | | -### Building the Amalgamated Header |
| 34 | +### Basic Example |
27 | 35 |
|
28 | | -Run the `build.sh` script to generate a single header file (`dist/tinybits_amalgamated.h`) that includes all necessary components: |
| 36 | +```c |
| 37 | +#include "tinybits.h" |
| 38 | +#include <stdio.h> |
| 39 | + |
| 40 | +int main() { |
| 41 | + // Create a packer |
| 42 | + tiny_bits_packer *packer = tiny_bits_packer_create(1024, |
| 43 | + TB_FEATURE_STRING_DEDUPE | TB_FEATURE_COMPRESS_FLOATS); |
| 44 | + |
| 45 | + // Pack some values |
| 46 | + pack_map(packer, 3); |
| 47 | + |
| 48 | + // Add a string key-value pair |
| 49 | + pack_str(packer, "name", 4); |
| 50 | + pack_str(packer, "TinyBits Library", 16); |
| 51 | + |
| 52 | + // Add a number key-value pair |
| 53 | + pack_str(packer, "version", 7); |
| 54 | + pack_double(packer, 1.0); |
| 55 | + |
| 56 | + // Add a nested array |
| 57 | + pack_str(packer, "features", 8); |
| 58 | + pack_arr(packer, 3); |
| 59 | + pack_str(packer, "compact", 7); |
| 60 | + pack_str(packer, "fast", 4); |
| 61 | + pack_str(packer, "flexible", 8); |
| 62 | + |
| 63 | + // Create unpacker and set buffer |
| 64 | + tiny_bits_unpacker *unpacker = tiny_bits_unpacker_create(); |
| 65 | + tiny_bits_unpacker_set_buffer(unpacker, packer->buffer, packer->current_pos); |
| 66 | + |
| 67 | + // Unpack and process values |
| 68 | + tiny_bits_value value; |
| 69 | + enum tiny_bits_type type = unpack_value(unpacker, &value); |
| 70 | + |
| 71 | + // Process the data... |
| 72 | + |
| 73 | + // Clean up |
| 74 | + tiny_bits_packer_destroy(packer); |
| 75 | + tiny_bits_unpacker_destroy(unpacker); |
| 76 | + |
| 77 | + return 0; |
| 78 | +} |
| 79 | +``` |
| 80 | + |
| 81 | +## API Reference |
| 82 | + |
| 83 | +### Encoder API |
| 84 | + |
| 85 | +```c |
| 86 | +// Create a new packer with initial capacity and features |
| 87 | +// Features: |
| 88 | +// - TB_FEATURE_STRING_DEDUPE (0x01): Enable string deduplication |
| 89 | +// - TB_FEATURE_COMPRESS_FLOATS (0x02): Enable float compression |
| 90 | +tiny_bits_packer *tiny_bits_packer_create(size_t initial_capacity, uint8_t features); |
| 91 | + |
| 92 | +// Reset the packer (reuse existing memory) |
| 93 | +void tiny_bits_packer_reset(tiny_bits_packer *encoder); |
| 94 | + |
| 95 | +// Free all resources |
| 96 | +void tiny_bits_packer_destroy(tiny_bits_packer *encoder); |
| 97 | + |
| 98 | +// Core packing functions |
| 99 | +int pack_int(tiny_bits_packer *encoder, int64_t value); |
| 100 | +int pack_str(tiny_bits_packer *encoder, char *str, uint32_t str_len); |
| 101 | +int pack_double(tiny_bits_packer *encoder, double val); |
| 102 | +int pack_arr(tiny_bits_packer *encoder, int arr_len); |
| 103 | +int pack_map(tiny_bits_packer *encoder, int map_len); |
| 104 | +int pack_null(tiny_bits_packer *encoder); |
| 105 | +int pack_true(tiny_bits_packer *encoder); |
| 106 | +int pack_false(tiny_bits_packer *encoder); |
| 107 | +int pack_blob(tiny_bits_packer *encoder, const char *blob, int blob_size); |
| 108 | + |
| 109 | +// Special float values |
| 110 | +int pack_nan(tiny_bits_packer *encoder); |
| 111 | +int pack_infinity(tiny_bits_packer *encoder); |
| 112 | +int pack_negative_infinity(tiny_bits_packer *encoder); |
| 113 | +``` |
| 114 | +
|
| 115 | +### Decoder API |
| 116 | +
|
| 117 | +```c |
| 118 | +// Create a new unpacker |
| 119 | +tiny_bits_unpacker *tiny_bits_unpacker_create(void); |
| 120 | +
|
| 121 | +// Set the buffer to decode |
| 122 | +void tiny_bits_unpacker_set_buffer(tiny_bits_unpacker *decoder, |
| 123 | + const unsigned char *buffer, |
| 124 | + size_t size); |
| 125 | +
|
| 126 | +// Reset the unpacker to start position |
| 127 | +void tiny_bits_unpacker_reset(tiny_bits_unpacker *decoder); |
| 128 | +
|
| 129 | +// Free all resources |
| 130 | +void tiny_bits_unpacker_destroy(tiny_bits_unpacker *decoder); |
| 131 | +
|
| 132 | +// Unpack the next value |
| 133 | +enum tiny_bits_type unpack_value(tiny_bits_unpacker *decoder, tiny_bits_value *value); |
| 134 | +``` |
| 135 | + |
| 136 | +### Return Types |
| 137 | + |
| 138 | +```c |
| 139 | +// Value types returned by the unpacker |
| 140 | +enum tiny_bits_type { |
| 141 | + TINY_BITS_ARRAY, // Array of values |
| 142 | + TINY_BITS_MAP, // Map of key-value pairs |
| 143 | + TINY_BITS_INT, // Integer value |
| 144 | + TINY_BITS_DOUBLE, // Floating-point value |
| 145 | + TINY_BITS_STR, // String value |
| 146 | + TINY_BITS_BLOB, // Binary blob |
| 147 | + TINY_BITS_TRUE, // Boolean true |
| 148 | + TINY_BITS_FALSE, // Boolean false |
| 149 | + TINY_BITS_NULL, // Null value |
| 150 | + TINY_BITS_NAN, // Not-a-Number |
| 151 | + TINY_BITS_INF, // Positive infinity |
| 152 | + TINY_BITS_N_INF, // Negative infinity |
| 153 | + TINY_BITS_EXT, // Extension type (reserved) |
| 154 | + TINY_BITS_FINISHED, // End of buffer |
| 155 | + TINY_BITS_ERROR // Parsing error |
| 156 | +}; |
| 157 | +``` |
| 158 | + |
| 159 | +## Working with Collections |
| 160 | + |
| 161 | +When encoding arrays and maps, first call `pack_arr()` or `pack_map()` with the number of elements, then encode each element in sequence: |
| 162 | + |
| 163 | +```c |
| 164 | +// Array with 3 strings |
| 165 | +pack_arr(packer, 3); |
| 166 | +pack_str(packer, "one", 3); |
| 167 | +pack_str(packer, "two", 3); |
| 168 | +pack_str(packer, "three", 5); |
| 169 | + |
| 170 | +// Map with 2 key-value pairs |
| 171 | +pack_map(packer, 2); |
| 172 | +pack_str(packer, "key1", 4); |
| 173 | +pack_int(packer, 42); |
| 174 | +pack_str(packer, "key2", 4); |
| 175 | +pack_str(packer, "value", 5); |
| 176 | +``` |
| 177 | +
|
| 178 | +When decoding, the unpacker will return `TINY_BITS_ARRAY` or `TINY_BITS_MAP` with the count in `value.length`, then you should read that many values: |
| 179 | +
|
| 180 | +```c |
| 181 | +tiny_bits_value value; |
| 182 | +enum tiny_bits_type type = unpack_value(unpacker, &value); |
| 183 | +
|
| 184 | +if (type == TINY_BITS_ARRAY) { |
| 185 | + size_t count = value.length; |
| 186 | + // Read 'count' elements from the array |
| 187 | + for (size_t i = 0; i < count; i++) { |
| 188 | + // Unpack the next value |
| 189 | + } |
| 190 | +} |
| 191 | +``` |
| 192 | + |
| 193 | +## Memory Management |
| 194 | + |
| 195 | +- `tiny_bits_packer_create()` allocates memory for the encoder |
| 196 | +- `tiny_bits_packer_reset()` reuses existing memory |
| 197 | +- `tiny_bits_packer_destroy()` frees all allocated memory |
| 198 | +- The encoder automatically grows its buffer as needed |
| 199 | + |
| 200 | +## Feature Flags |
| 201 | + |
| 202 | +### String Deduplication |
| 203 | + |
| 204 | +When `TB_FEATURE_STRING_DEDUPE` is enabled, the packer maintains a hash table of previously encoded strings (2-128 bytes) and sends references instead of duplicating data. |
| 205 | + |
| 206 | +### Float Compression |
| 207 | + |
| 208 | +When `TB_FEATURE_COMPRESS_FLOATS` is enabled, floating-point values with 12 or fewer decimal places are encoded as scaled integers for space efficiency. |
| 209 | + |
| 210 | +## Performance Considerations |
| 211 | + |
| 212 | +- Enable string deduplication for data with many repeated strings |
| 213 | +- Reuse encoder/decoder instances when processing multiple messages |
| 214 | +- Floating point compression is a little bit expensive |
29 | 215 |
|
30 | | -```bash |
31 | | -[build.sh](http://_vscodecontentref_/1) |
|
0 commit comments