Skip to content

Commit a8a9107

Browse files
committed
.
1 parent 9c560f4 commit a8a9107

2 files changed

Lines changed: 39 additions & 14 deletions

File tree

c/examples/json_struct_metadata.c

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#define JSON_STRUCT_CODEC_HEADER_SIZE 21
99

1010
const uint8_t json_struct_codec_magic[4] = { 'J', 'B', 'L', 'B' };
11+
const uint8_t json_struct_codec_version = 1;
1112

1213
// little-endian read of a uint64_t from an address
1314
static uint64_t
@@ -38,8 +39,9 @@ set_u64_le(uint8_t *dest, uint64_t value)
3839
dest[7] = (uint8_t) ((value >> 56) & 0xFF);
3940
}
4041

41-
// extract the json and binary payloads from the `json+struct` codec data buffer passed
42-
// in note that the output pointers reference memory inside the buffer passed in
42+
// Extract the json and binary payloads from the `json+struct` codec data buffer.
43+
// Note that the output pointers `json` and `binary` reference memory
44+
// inside the `metadata` buffer passed in.
4345
void
4446
json_struct_codec_get_components(uint8_t *metadata, tsk_size_t metadata_length,
4547
uint8_t **json, tsk_size_t *json_length, uint8_t **binary, tsk_size_t *binary_length)
@@ -54,7 +56,7 @@ json_struct_codec_get_components(uint8_t *metadata, tsk_size_t metadata_length,
5456
errx(EXIT_FAILURE, "bad magic bytes.");
5557

5658
uint8_t version = metadata[4];
57-
if (version != 1)
59+
if (version != json_struct_codec_version)
5860
errx(EXIT_FAILURE, "bad version number.");
5961

6062
uint64_t json_length_u64 = load_u64_le(metadata + 5);
@@ -89,15 +91,14 @@ json_struct_codec_get_components(uint8_t *metadata, tsk_size_t metadata_length,
8991
*binary_length = (tsk_size_t) binary_length_u64;
9092
}
9193

92-
// malloc and return a data buffer for the `json+struct` codec that contains the given
93-
// components
94+
// malloc and return a data buffer for the `json+struct` codec
95+
// that contains the given components
9496
void
9597
json_struct_codec_create_buffer(const uint8_t *json, tsk_size_t json_length,
9698
const uint8_t *binary, tsk_size_t binary_length, uint8_t **buffer,
9799
tsk_size_t *buffer_length)
98100
{
99-
// first figure out the total length of the codec's data and allocate the buffer for
100-
// it
101+
// figure out the total length of the codec's data and allocate the buffer for it
101102
tsk_size_t header_length = JSON_STRUCT_CODEC_HEADER_SIZE;
102103
tsk_size_t padding_length = (8 - ((header_length + json_length) & 0x07)) % 8;
103104
tsk_size_t total_length
@@ -108,7 +109,7 @@ json_struct_codec_create_buffer(const uint8_t *json, tsk_size_t json_length,
108109

109110
// then set up the bytes for the codec header
110111
memcpy(bytes, json_struct_codec_magic, 4);
111-
bytes[4] = 1;
112+
bytes[4] = json_struct_codec_version;
112113
set_u64_le(bytes + 5, (uint64_t) json_length);
113114
set_u64_le(bytes + 13, (uint64_t) binary_length);
114115

@@ -146,13 +147,13 @@ main(int argc, char **argv)
146147

147148
// print the recovered data to demonstrate that the round-trip worked
148149
// note that the JSON data is not NULL-terminated unless you put a NULL there!
149-
printf("JSON payload: %.*s\n", (int) decoded_json_length, decoded_json);
150+
printf("JSON: %.*s\n", (int) decoded_json_length, decoded_json);
150151

151-
printf("Binary payload:");
152-
for (tsk_size_t binary_index = 0; binary_index < decoded_binary_length;
153-
binary_index++)
154-
printf(" %#04x", decoded_binary[binary_index]);
152+
printf("Binary data:");
153+
for (tsk_size_t j = 0; j < decoded_binary_length; j++)
154+
printf(" %#04x", decoded_binary[j]);
155155
printf("\n");
156156

157+
free(metadata);
157158
return EXIT_SUCCESS;
158159
}

docs/c-api.rst

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -959,13 +959,37 @@ parse metadata using an external JSON library, and for
959959
struct-encoded metadata the values can be directly unpacked.
960960
Examples of both can be found in
961961
`the SLiM code <https://messerlab.github.com/slim/>`_.
962+
(In Python, tskit automatically decodes both JSON and binary
963+
metadata and provides it as Python-data-typed metadata,
964+
just as for other codecs.)
962965

963966
The :ref:`"json+struct" <sec_metadata_codecs_jsonstruct>`_
964967
metadata codec is a little less straightforward to use,
965968
so we provide here an example of how to write to it
966-
and read from it in C.
969+
and read from it in C. See :ref:`sec_metadata_codes_jsonstruct`
970+
for details of how the metadata is encoded.
971+
972+
The structure of this example is as follows:
973+
974+
1. Values specific to the metadata's header (e.g., the magic bytes `JBLB`).
975+
2. Functions that encode/decode `uint_64t`, used to store the lengths
976+
of the two components in the header.
977+
3. A method to "read" the metadata: really, to get pointers to the
978+
json and struct components.
979+
4. A method to write the metadata, again just given pointers to
980+
and lengths of the two components.
981+
5. The program itself just round-trips a very simple chunk of metadata,
982+
consisting of the JSON "`{"a": 1}`" and some binary `uint_8t` bytes ("`1234`").
967983

968984
.. literalinclude:: ../c/examples/json_struct_metadata.c
969985
:language: c
970986

987+
Much of the complexity of the code is careful error checking of the lengths.
971988

989+
Here ``json_struct_codec_get_components`` takes a pointer to binary metadata
990+
and returns pointers to *within that memory*.
991+
A different approach might have copied the two portions of the metadata
992+
into two buffers (to then be decoded, for instance).
993+
However, that would double the memory footprint,
994+
and since this codec is intended for large metadata,
995+
we did not use that approach in this example.

0 commit comments

Comments
 (0)