Skip to content

Commit b8e2e9f

Browse files
Merge pull request #33 from fastly/sv/cdata-codegen-updates-part-3
CDATA codegen updates, part 3
2 parents c111e3f + 3827dc5 commit b8e2e9f

33 files changed

Lines changed: 1523 additions & 376 deletions

fuzz/target.c

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -508,8 +508,6 @@ fuzz_eager_output(const uint8_t *data, size_t size)
508508
}
509509
}
510510

511-
enum re_is_anchored_res anchorage[MAX_PATTERNS] = {0};
512-
513511
/* for each pattern, attempt to compile to a DFA */
514512
for (size_t p_i = 0; p_i < env.pattern_count; p_i++) {
515513
const char *p = env.patterns[p_i];
@@ -528,14 +526,9 @@ fuzz_eager_output(const uint8_t *data, size_t size)
528526
continue; /* invalid regex */
529527
}
530528

531-
const fsm_output_id_t endid = (fsm_output_id_t)p_i;
532-
ret = fsm_seteageroutputonends(fsm, endid);
533-
assert(ret == 1);
534-
535529
if (verbose) {
536530
fprintf(stderr, "==== pattern %zd, pre det\n", p_i);
537531
fsm_dump(stderr, fsm);
538-
fsm_eager_output_dump(stderr, fsm);
539532
fprintf(stderr, "====\n");
540533

541534
fsm_state_t c = fsm_countstates(fsm);
@@ -544,12 +537,6 @@ fuzz_eager_output(const uint8_t *data, size_t size)
544537
}
545538
}
546539

547-
ret = fsm_determinise(fsm);
548-
assert(ret == 1);
549-
550-
ret = fsm_minimise(fsm);
551-
assert(ret == 1);
552-
553540
fsm_state_t start;
554541
if (!fsm_getstart(fsm, &start)) {
555542
fsm_free(fsm);
@@ -578,7 +565,7 @@ fuzz_eager_output(const uint8_t *data, size_t size)
578565
/* copy and combine fsms into one DFA */
579566
{
580567
size_t used = 0;
581-
struct fsm_union_entry entries[MAX_PATTERNS] = {0};
568+
struct fsm *nfas[MAX_PATTERNS] = {0};
582569

583570
for (size_t i = 0; i < env.fsm_count; i++) {
584571
/* there can be gaps, fsms[] lines up with patterns[] */
@@ -604,9 +591,7 @@ fuzz_eager_output(const uint8_t *data, size_t size)
604591
}
605592
}
606593

607-
entries[used].fsm = cp;
608-
entries[used].anchored_start = anchorage[i] & RE_IS_ANCHORED_START;
609-
entries[used].anchored_end = anchorage[i] & RE_IS_ANCHORED_END;
594+
nfas[used] = cp;
610595
used++;
611596
}
612597

@@ -615,7 +600,7 @@ fuzz_eager_output(const uint8_t *data, size_t size)
615600
}
616601

617602
/* consumes entries[] */
618-
struct fsm *fsm = fsm_union_repeated_pattern_group(used, entries, NULL);
603+
struct fsm *fsm = fsm_union_repeated_pattern_group(used, nfas, NULL, 0);
619604
assert(fsm != NULL);
620605

621606
if (verbose) {

include/fsm/bool.h

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,22 @@ struct fsm *
5252
fsm_union_array(size_t fsm_count,
5353
struct fsm **fsms, struct fsm_combined_base_pair *bases);
5454

55-
struct fsm_union_entry {
56-
struct fsm *fsm;
57-
bool anchored_start;
58-
bool anchored_end;
59-
};
60-
55+
/* Combine an array of NFAs into a single NFA that attempts to match them
56+
* all in one pass, with an extra loop so that more than one pattern with
57+
* eager outputs can match. Ownership of the NFAs is transferred, they will
58+
* be combined (or freed, if they don't have a start state).
59+
*
60+
* This MUST be called with NFAs constructed via re_comp, Calling it with
61+
* manually constructed NFAs or DFAs is unsupported.
62+
*
63+
* This will set end IDs and/or output IDs representing matching each
64+
* of the original NFAs on the combined result, where nfas[i] will
65+
* get ID of (id_base + i).
66+
*
67+
* Returns NULL on error. */
6168
struct fsm *
62-
fsm_union_repeated_pattern_group(size_t entry_count,
63-
struct fsm_union_entry *entries, struct fsm_combined_base_pair *bases);
69+
fsm_union_repeated_pattern_group(size_t nfa_count,
70+
struct fsm **nfas, struct fsm_combined_base_pair *bases, size_t id_base);
6471

6572
struct fsm *
6673
fsm_intersect(struct fsm *a, struct fsm *b);

src/libfsm/closure.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ epsilon_closure_single(const struct fsm *fsm, struct state_set **closures, fsm_s
128128
}
129129

130130
struct state_set **
131-
epsilon_closure(struct fsm *fsm)
131+
epsilon_closure(const struct fsm *fsm)
132132
{
133133
struct state_set **closures;
134134
fsm_state_t s;
@@ -190,7 +190,7 @@ epsilon_closure(struct fsm *fsm)
190190
}
191191

192192
void
193-
closure_free(struct fsm *fsm, struct state_set **closures, size_t n)
193+
closure_free(const struct fsm *fsm, struct state_set **closures, size_t n)
194194
{
195195
fsm_state_t s;
196196

src/libfsm/determinise.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ fsm_determinise_with_config(struct fsm *nfa,
240240
assert(dfa->states[m->dfastate].edges == NULL);
241241

242242
dfa->states[m->dfastate].edges = m->edges;
243+
m->edges = NULL; /* transfer ownership */
243244

244245
/*
245246
* The current DFA state is an end state if any of its associated NFA
@@ -616,6 +617,8 @@ map_free(struct map *map)
616617
if (b == NULL) {
617618
continue;
618619
}
620+
/* free any edge sets that didn't get transferred */
621+
edge_set_free(map->alloc, b->edges);
619622
f_free(map->alloc, b);
620623
}
621624

src/libfsm/eager_output.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,10 @@ fsm_eager_output_compact(struct fsm *fsm, fsm_state_t *mapping, size_t mapping_c
408408

409409
assert(ob->state < mapping_count);
410410
const fsm_state_t nstate = mapping[ob->state];
411-
if (nstate == FSM_STATE_REMAP_NO_STATE) { continue; }
411+
if (nstate == FSM_STATE_REMAP_NO_STATE) {
412+
f_free(fsm->alloc, ob->entry);
413+
continue;
414+
}
412415

413416
const uint64_t hash = hash_id(nstate);
414417

src/libfsm/internal.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,10 @@ state_hasnondeterminism(const struct fsm *fsm, fsm_state_t state, struct bm *bm)
9494
* for states, with wrapper to populate malloced array of user-facing structs.
9595
*/
9696
struct state_set **
97-
epsilon_closure(struct fsm *fsm);
97+
epsilon_closure(const struct fsm *fsm);
9898

9999
void
100-
closure_free(struct fsm *fsm, struct state_set **closures, size_t n);
100+
closure_free(const struct fsm *fsm, struct state_set **closures, size_t n);
101101

102102
/*
103103
* Internal free function that invokes free(3) by default, or a user-provided

0 commit comments

Comments
 (0)