Skip to content

Commit 74c5697

Browse files
committed
Fix ASan leak in corpus token_map and Windows/Linux vector blob assembly
- semantic.c: free strdup'd keys+values in token_map via cbm_ht_foreach before cbm_ht_free (44KB leak in 7920 allocations) - code_vectors_blob.S: add Windows COFF branch (.rdata,"dr") alongside macOS Mach-O and Linux ELF sections
1 parent 2350240 commit 74c5697

File tree

2 files changed

+22
-1
lines changed

2 files changed

+22
-1
lines changed

src/semantic/semantic.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1476,6 +1476,12 @@ const char *cbm_sem_corpus_token_at(const cbm_sem_corpus_t *corpus, int index,
14761476
return corpus->entries[index].token;
14771477
}
14781478

1479+
static void free_ht_kv(const char *key, void *value, void *userdata) {
1480+
(void)userdata;
1481+
free((void *)key);
1482+
free(value);
1483+
}
1484+
14791485
void cbm_sem_corpus_free(cbm_sem_corpus_t *corpus) {
14801486
if (!corpus) {
14811487
return;
@@ -1490,6 +1496,7 @@ void cbm_sem_corpus_free(cbm_sem_corpus_t *corpus) {
14901496
free(corpus->doc_token_ids);
14911497
free(corpus->doc_token_counts);
14921498
if (corpus->token_map) {
1499+
cbm_ht_foreach(corpus->token_map, free_ht_kv, NULL);
14931500
cbm_ht_free(corpus->token_map);
14941501
}
14951502
free(corpus);

vendored/nomic/code_vectors_blob.S

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* nomic-embed-code vector blob embedded via assembler.
2-
* Cross-platform: macOS (Mach-O) vs Linux/Windows (ELF/COFF). */
2+
* Cross-platform: macOS (Mach-O) vs Linux (ELF) vs Windows (COFF). */
33

44
#if defined(__APPLE__)
55
.section __DATA,__const
@@ -15,6 +15,20 @@ _PRETRAINED_VECTOR_BLOB_END:
1515
_PRETRAINED_VECTOR_BLOB_LEN:
1616
.long _PRETRAINED_VECTOR_BLOB_END - _PRETRAINED_VECTOR_BLOB
1717

18+
#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__MINGW32__)
19+
.section .rdata,"dr"
20+
.globl PRETRAINED_VECTOR_BLOB
21+
.globl PRETRAINED_VECTOR_BLOB_LEN
22+
.p2align 4
23+
PRETRAINED_VECTOR_BLOB:
24+
.incbin "vendored/nomic/code_vectors.bin"
25+
PRETRAINED_VECTOR_BLOB_END:
26+
27+
.section .rdata,"dr"
28+
.p2align 2
29+
PRETRAINED_VECTOR_BLOB_LEN:
30+
.long PRETRAINED_VECTOR_BLOB_END - PRETRAINED_VECTOR_BLOB
31+
1832
#else
1933
.section .rodata,"a",@progbits
2034
.globl PRETRAINED_VECTOR_BLOB

0 commit comments

Comments
 (0)