Skip to content

Commit 019b88a

Browse files
committed
Ported robinst's changes to link parsing.
See commonmark/commonmark.js#101 This uses a separate stack for brackets, instead of putting them on the delimiter stack. This avoids the need for looking through the delimiter stack for the next bracket. It also avoids a shortcut reference lookup when the reference text contains brackets. The change dramatically improved performance on the nested links pathological test for commonmark.js. It has a smaller but measurable effect here.
1 parent 899b2bd commit 019b88a

1 file changed

Lines changed: 67 additions & 34 deletions

File tree

src/inlines.c

Lines changed: 67 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,25 @@ typedef struct delimiter {
3838
unsigned char delim_char;
3939
bool can_open;
4040
bool can_close;
41-
bool active;
4241
} delimiter;
4342

43+
typedef struct bracket {
44+
struct bracket *previous;
45+
struct delimiter *previous_delimiter;
46+
cmark_node *inl_text;
47+
bufsize_t position;
48+
bool image;
49+
bool active;
50+
bool bracket_after;
51+
} bracket;
52+
4453
typedef struct {
4554
cmark_mem *mem;
4655
cmark_chunk input;
4756
bufsize_t pos;
4857
cmark_reference_map *refmap;
4958
delimiter *last_delim;
59+
bracket *last_bracket;
5060
} subject;
5161

5262
static CMARK_INLINE bool S_is_line_end_char(char c) {
@@ -139,6 +149,7 @@ static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer,
139149
e->pos = 0;
140150
e->refmap = refmap;
141151
e->last_delim = NULL;
152+
e->last_bracket = NULL;
142153
}
143154

144155
static CMARK_INLINE int isbacktick(int c) { return (c == '`'); }
@@ -342,6 +353,16 @@ static void remove_delimiter(subject *subj, delimiter *delim) {
342353
free(delim);
343354
}
344355

356+
static void pop_bracket(subject *subj) {
357+
bracket *b;
358+
if (subj->last_bracket == NULL)
359+
return;
360+
b = subj->last_bracket;
361+
subj->last_bracket = subj->last_bracket->previous;
362+
free(b);
363+
}
364+
365+
345366
static void push_delimiter(subject *subj, unsigned char c, bool can_open,
346367
bool can_close, cmark_node *inl_text) {
347368
delimiter *delim = (delimiter *)subj->mem->calloc(1, sizeof(delimiter));
@@ -355,10 +376,24 @@ static void push_delimiter(subject *subj, unsigned char c, bool can_open,
355376
delim->previous->next = delim;
356377
}
357378
delim->position = subj->pos;
358-
delim->active = true;
359379
subj->last_delim = delim;
360380
}
361381

382+
static void push_bracket(subject *subj, bool image, cmark_node *inl_text) {
383+
bracket *b = (bracket *)subj->mem->calloc(1, sizeof(bracket));
384+
if (subj->last_bracket != NULL) {
385+
subj->last_bracket->bracket_after = true;
386+
}
387+
b->image = image;
388+
b->active = true;
389+
b->inl_text = inl_text;
390+
b->previous = subj->last_bracket;
391+
b->previous_delimiter = subj->last_delim;
392+
b->position = subj->pos;
393+
b->bracket_after = false;
394+
subj->last_bracket = b;
395+
}
396+
362397
// Assumes the subject has a c at the current position.
363398
static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) {
364399
bufsize_t numdelims;
@@ -465,9 +500,7 @@ static void process_emphasis(subject *subj, delimiter *stack_bottom) {
465500

466501
// now move forward, looking for closers, and handling each
467502
while (closer != NULL) {
468-
if (closer->can_close &&
469-
(closer->delim_char == '*' || closer->delim_char == '_' ||
470-
closer->delim_char == '"' || closer->delim_char == '\'')) {
503+
if (closer->can_close) {
471504
// Now look backwards for first matching opener:
472505
opener = closer->previous;
473506
opener_found = false;
@@ -755,42 +788,35 @@ static cmark_node *handle_close_bracket(subject *subj) {
755788
bufsize_t starturl, endurl, starttitle, endtitle, endall;
756789
bufsize_t n;
757790
bufsize_t sps;
758-
cmark_reference *ref;
759-
bool is_image = false;
791+
cmark_reference *ref = NULL;
760792
cmark_chunk url_chunk, title_chunk;
761793
cmark_chunk url, title;
762-
delimiter *opener;
794+
bracket *opener;
763795
cmark_node *inl;
764796
cmark_chunk raw_label;
765797
int found_label;
766798
cmark_node *tmp, *tmpnext;
799+
bool is_image;
767800

768801
advance(subj); // advance past ]
769802
initial_pos = subj->pos;
770803

771-
// look through list of delimiters for a [ or !
772-
opener = subj->last_delim;
773-
while (opener) {
774-
if (opener->delim_char == '[' || opener->delim_char == '!') {
775-
break;
776-
}
777-
opener = opener->previous;
778-
}
804+
// get last [ or ![
805+
opener = subj->last_bracket;
779806

780807
if (opener == NULL) {
781808
return make_str(subj->mem, cmark_chunk_literal("]"));
782809
}
783810

784811
if (!opener->active) {
785812
// take delimiter off stack
786-
remove_delimiter(subj, opener);
813+
pop_bracket(subj);
787814
return make_str(subj->mem, cmark_chunk_literal("]"));
788815
}
789816

790817
// If we got here, we matched a potential link/image text.
791-
is_image = opener->delim_char == '!';
792-
793818
// Now we check to see if it's a link/image.
819+
is_image = opener->image;
794820

795821
// First, look for an inline link.
796822
if (peek_char(subj) == '(' &&
@@ -830,20 +856,23 @@ static cmark_node *handle_close_bracket(subject *subj) {
830856
// skip spaces
831857
raw_label = cmark_chunk_literal("");
832858
found_label = link_label(subj, &raw_label);
833-
if (!found_label || raw_label.len == 0) {
834-
cmark_chunk_free(subj->mem, &raw_label);
835-
raw_label = cmark_chunk_dup(&subj->input, opener->position,
836-
initial_pos - opener->position - 1);
837-
}
838-
839859
if (!found_label) {
840860
// If we have a shortcut reference link, back up
841861
// to before the spacse we skipped.
842862
subj->pos = initial_pos;
843863
}
844864

845-
ref = cmark_reference_lookup(subj->refmap, &raw_label);
846-
cmark_chunk_free(subj->mem, &raw_label);
865+
if ((!found_label || raw_label.len == 0) && !opener->bracket_after) {
866+
cmark_chunk_free(subj->mem, &raw_label);
867+
raw_label = cmark_chunk_dup(&subj->input, opener->position,
868+
initial_pos - opener->position - 1);
869+
found_label = true;
870+
}
871+
872+
if (found_label) {
873+
ref = cmark_reference_lookup(subj->refmap, &raw_label);
874+
cmark_chunk_free(subj->mem, &raw_label);
875+
}
847876

848877
if (ref != NULL) { // found
849878
url = chunk_clone(subj->mem, &ref->url);
@@ -855,7 +884,7 @@ static cmark_node *handle_close_bracket(subject *subj) {
855884

856885
noMatch:
857886
// If we fall through to here, it means we didn't match a link:
858-
remove_delimiter(subj, opener); // remove this opener from delimiter list
887+
pop_bracket(subj); // remove this opener from delimiter list
859888
subj->pos = initial_pos;
860889
return make_str(subj->mem, cmark_chunk_literal("]"));
861890

@@ -875,16 +904,16 @@ static cmark_node *handle_close_bracket(subject *subj) {
875904
// Free the bracket [:
876905
cmark_node_free(opener->inl_text);
877906

878-
process_emphasis(subj, opener);
907+
process_emphasis(subj, opener->previous_delimiter);
908+
pop_bracket(subj);
879909

880910
// Now, if we have a link, we also want to deactivate earlier link
881911
// delimiters. (This code can be removed if we decide to allow links
882912
// inside links.)
883-
remove_delimiter(subj, opener);
884913
if (!is_image) {
885-
opener = subj->last_delim;
914+
opener = subj->last_bracket;
886915
while (opener != NULL) {
887-
if (opener->delim_char == '[') {
916+
if (!opener->image) {
888917
if (!opener->active) {
889918
break;
890919
} else {
@@ -1005,7 +1034,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
10051034
case '[':
10061035
advance(subj);
10071036
new_inl = make_str(subj->mem, cmark_chunk_literal("["));
1008-
push_delimiter(subj, '[', true, false, new_inl);
1037+
push_bracket(subj, false, new_inl);
10091038
break;
10101039
case ']':
10111040
new_inl = handle_close_bracket(subj);
@@ -1015,7 +1044,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
10151044
if (peek_char(subj) == '[') {
10161045
advance(subj);
10171046
new_inl = make_str(subj->mem, cmark_chunk_literal("!["));
1018-
push_delimiter(subj, '!', false, true, new_inl);
1047+
push_bracket(subj, true, new_inl);
10191048
} else {
10201049
new_inl = make_str(subj->mem, cmark_chunk_literal("!"));
10211050
}
@@ -1050,6 +1079,10 @@ extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, cmark_refere
10501079
;
10511080

10521081
process_emphasis(&subj, NULL);
1082+
// free bracket stack
1083+
while (subj.last_bracket) {
1084+
pop_bracket(&subj);
1085+
}
10531086
}
10541087

10551088
// Parse zero or more space characters, including at most one newline.

0 commit comments

Comments
 (0)