Skip to content

Commit 13fe55d

Browse files
framework: implement dynamic hash table for properties
- Replace fixed 199-bucket hash with a dynamic power-of-two table. - Implement DJB2 hash algorithm with linear probing for O(1) lookups. - Add lazy allocation to reduce memory overhead for simple objects. - Maintain ABI compatibility by preserving linear names/values arrays. - Add robust fallback to linear search for OOM (Out of Memory) safety. - Significantly improves performance for projects with large property lists.
1 parent c5534c8 commit 13fe55d

1 file changed

Lines changed: 169 additions & 36 deletions

File tree

src/framework/mlt_properties.c

Lines changed: 169 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,11 @@
5050

5151
typedef struct
5252
{
53-
int hash[199];
53+
int *buckets;
54+
unsigned int capacity;
55+
unsigned int mask;
56+
unsigned int used;
57+
5458
char **name;
5559
mlt_property *value;
5660
int count;
@@ -98,10 +102,14 @@ int mlt_properties_init(mlt_properties self, void *child)
98102
// Allocate the local structure
99103
self->local = calloc(1, sizeof(property_list));
100104

105+
((property_list *) self->local)->buckets = NULL;
106+
((property_list *) self->local)->capacity = 0;
107+
((property_list *) self->local)->mask = 0;
108+
((property_list *) self->local)->used = 0;
109+
101110
// Increment the ref count
102111
((property_list *) self->local)->ref_count = 1;
103112
pthread_mutex_init(&((property_list *) self->local)->mutex, NULL);
104-
;
105113
}
106114

107115
// Check that initialisation was successful
@@ -318,15 +326,86 @@ int mlt_properties_preset(mlt_properties self, const char *name)
318326
*
319327
* \private \memberof mlt_properties_s
320328
* \param name a string
321-
* \return an integer
329+
* \return an unsigned integer
322330
*/
323331

324-
static inline int generate_hash(const char *name)
332+
static inline unsigned int generate_hash(const char *name)
325333
{
334+
// Initial magic constant 5381. This has been proven to result in fewer collisions.
326335
unsigned int hash = 5381;
327-
while (*name)
328-
hash = hash * 33 + (unsigned int) (*name++);
329-
return hash % 199;
336+
int c;
337+
338+
while ((c = (unsigned char) *name++)) {
339+
hash = ((hash << 5) + hash) + (unsigned int) c;
340+
}
341+
342+
return hash;
343+
}
344+
345+
/** Insert an index into the hash bucket array using linear probing.
346+
*
347+
* This internal helper handles collisions by searching for the next
348+
* available slot in the bucket array.
349+
* \private \memberof mlt_properties_s
350+
* \param buckets pointer to the array of hash buckets
351+
* \param mask bitmask for fast modulo operation (capacity - 1)
352+
* \param name the string key to hash
353+
* \param index the index value to store in the bucket
354+
*/
355+
static void hash_insert(int *buckets, unsigned int mask, const char *name, int index)
356+
{
357+
unsigned int hash = generate_hash(name);
358+
unsigned int i = hash & mask;
359+
360+
while (buckets[i] != -1) {
361+
i = (i + 1) & mask;
362+
}
363+
buckets[i] = index;
364+
}
365+
366+
/** Check and trigger a rehash of the properties bucket list.
367+
*
368+
* Maintains a load factor below 0.75 to ensure O(1) performance.
369+
* If memory allocation fails during expansion, the hash table is
370+
* gracefully disabled, falling back to safe linear searches.
371+
* \private \memberof mlt_properties_s
372+
* \param list the property_list object to be resized or initialized
373+
*/
374+
static void check_rehash(property_list *list)
375+
{
376+
//(used * 4 >= capacity * 3) avoids slow floating-point division.
377+
if (list->buckets == NULL || (list->used * 4 >= list->capacity * 3)) {
378+
// Start with 64 to reduce early reallocations for medium-sized objects
379+
unsigned int new_capacity = list->capacity == 0 ? 64 : list->capacity * 2;
380+
381+
unsigned int new_mask = new_capacity - 1;
382+
int *new_buckets = malloc(new_capacity * sizeof(int));
383+
384+
if (new_buckets) {
385+
// Initialize buckets to -1 (empty)
386+
for (unsigned int i = 0; i < new_capacity; i++)
387+
new_buckets[i] = -1;
388+
389+
if (list->buckets) {
390+
// Rehash existing items into the new bucket array
391+
for (int i = 0; i < list->count; i++) {
392+
if (list->name[i])
393+
hash_insert(new_buckets, new_mask, list->name[i], i);
394+
}
395+
free(list->buckets);
396+
}
397+
list->buckets = new_buckets;
398+
list->capacity = new_capacity;
399+
list->mask = new_mask;
400+
} else if (list->buckets) {
401+
// Memory failure: invalidate hash table and fallback to linear search
402+
free(list->buckets);
403+
list->buckets = NULL;
404+
list->capacity = 0;
405+
list->mask = 0;
406+
list->used = 0;
407+
}
408+
}
330409
}
331410

332411
/** Copy a serializable property to a properties list that is mirroring this one.
@@ -535,25 +614,42 @@ static inline mlt_property mlt_properties_find(mlt_properties self, const char *
535614
{
536615
if (!self || !name)
537616
return NULL;
617+
538618
property_list *list = self->local;
539619
mlt_property value = NULL;
540-
int key = generate_hash(name);
541620

542621
mlt_properties_lock(self);
543622

544-
int i = list->hash[key] - 1;
545-
if (i >= 0) {
546-
// Check if we're hashed
547-
if (list->count > 0 && list->name[i] && !strcmp(list->name[i], name))
548-
value = list->value[i];
623+
// If the hash table is active, it is the authoritative source for O(1) lookups.
624+
if (list->buckets) {
625+
unsigned int hash = generate_hash(name);
626+
unsigned int i = hash & list->mask;
627+
628+
// Linear probing: traverse the bucket array until an empty slot (-1) is found
629+
while (list->buckets[i] != -1) {
630+
int index = list->buckets[i];
631+
if (list->name[index] && !strcmp(list->name[index], name)) {
632+
value = list->value[index];
633+
mlt_properties_unlock(self);
634+
return value;
635+
}
636+
i = (i + 1) & list->mask;
637+
}
549638

550-
// Locate the item
551-
for (i = list->count - 1; value == NULL && i >= 0; i--)
552-
if (list->name[i] && !strcmp(list->name[i], name))
553-
value = list->value[i];
639+
// If we reached an empty slot (-1) in the hash table, the property definitely does not exist.
640+
mlt_properties_unlock(self);
641+
return NULL;
642+
}
643+
644+
// Fallback Linear Search
645+
for (int i = list->count - 1; i >= 0; i--) {
646+
if (list->name[i] && !strcmp(list->name[i], name)) {
647+
value = list->value[i];
648+
break;
649+
}
554650
}
555-
mlt_properties_unlock(self);
556651

652+
mlt_properties_unlock(self);
557653
return value;
558654
}
559655

@@ -562,37 +658,57 @@ static inline mlt_property mlt_properties_find(mlt_properties self, const char *
562658
* \private \memberof mlt_properties_s
563659
* \param self a properties list
564660
* \param name the name of the new property
565-
* \return the new property
661+
* \return the new property or NULL for failure
566662
*/
567663

568664
static mlt_property mlt_properties_add(mlt_properties self, const char *name)
569665
{
666+
if (!self || !name)
667+
return NULL;
668+
570669
property_list *list = self->local;
571-
int key = generate_hash(name);
572670
mlt_property result;
573671

574672
mlt_properties_lock(self);
575673

576-
// Check that we have space and resize if necessary
674+
// Doubling capacity instead of fixed increments reduces realloc overhead.
577675
if (list->count == list->size) {
578-
list->size += 50;
579-
list->name = realloc(list->name, list->size * sizeof(const char *));
580-
list->value = realloc(list->value, list->size * sizeof(mlt_property));
676+
int new_size = list->size == 0 ? 16 : list->size * 2;
677+
678+
// Use temporary pointers to prevent memory leaks on realloc failure
679+
char **new_names = realloc(list->name, new_size * sizeof(const char *));
680+
mlt_property *new_values = realloc(list->value, new_size * sizeof(mlt_property));
681+
682+
if (new_names && new_values) {
683+
list->name = new_names;
684+
list->value = new_values;
685+
list->size = new_size;
686+
} else {
687+
// Memory allocation failed; rollback or handle error
688+
if (new_names)
689+
list->name = new_names;
690+
if (new_values)
691+
list->value = new_values;
692+
mlt_properties_unlock(self);
693+
return NULL;
694+
}
581695
}
582696

583-
// Assign name/value pair
697+
// Initialize new property entry
584698
list->name[list->count] = strdup(name);
585699
list->value[list->count] = mlt_property_init();
586700

587-
// Assign to hash table
588-
if (list->hash[key] == 0)
589-
list->hash[key] = list->count + 1;
701+
// Update internal hash table for O(1) lookups
702+
check_rehash(list);
703+
704+
if (list->buckets) {
705+
hash_insert(list->buckets, list->mask, name, list->count);
706+
list->used++;
707+
}
590708

591-
// Return and increment count accordingly
592709
result = list->value[list->count++];
593710

594711
mlt_properties_unlock(self);
595-
596712
return result;
597713
}
598714

@@ -1278,21 +1394,32 @@ int mlt_properties_rename(mlt_properties self, const char *source, const char *d
12781394

12791395
if (value == NULL) {
12801396
property_list *list = self->local;
1281-
int i = 0;
1282-
1283-
// Locate the item
1397+
int found = 0;
12841398
mlt_properties_lock(self);
1285-
for (i = 0; i < list->count; i++) {
1399+
1400+
for (int i = 0; i < list->count; i++) {
1401+
// Check if the property name matches the source.
12861402
if (list->name[i] && !strcmp(list->name[i], source)) {
12871403
free(list->name[i]);
12881404
list->name[i] = strdup(dest);
1289-
list->hash[generate_hash(dest)] = i + 1;
1405+
found = 1;
12901406
break;
12911407
}
12921408
}
1409+
1410+
// Rebuild the hash table if a property was renamed.
1411+
if (found && list->buckets) {
1412+
for (unsigned int j = 0; j < list->capacity; j++)
1413+
list->buckets[j] = -1;
1414+
1415+
for (int j = 0; j < list->count; j++) {
1416+
if (list->name[j]) {
1417+
hash_insert(list->buckets, list->mask, list->name[j], j);
1418+
}
1419+
}
1420+
}
12931421
mlt_properties_unlock(self);
12941422
}
1295-
12961423
return value != NULL;
12971424
}
12981425

@@ -1522,6 +1649,12 @@ void mlt_properties_close(mlt_properties self)
15221649
pthread_mutex_destroy(&list->mutex);
15231650
free(list->name);
15241651
free(list->value);
1652+
1653+
if (list->buckets) {
1654+
free(list->buckets);
1655+
list->buckets = NULL;
1656+
}
1657+
15251658
free(list);
15261659

15271660
// Free self now if self has no child

0 commit comments

Comments
 (0)