Skip to content

Commit 1c0a5b5

Browse files
fix markdown parser to deal with void tags/unclosed void tags
1 parent fb6cd93 commit 1c0a5b5

5 files changed

Lines changed: 186 additions & 14 deletions

File tree

include/basic/builtin_string_functions.h

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,14 +242,77 @@ char* basic_str(struct basic_ctx* ctx);
242242
*/
243243
char* basic_bool(struct basic_ctx* ctx);
244244

245+
/**
246+
* @brief Replaces all occurrences of a substring.
247+
*
248+
* This function searches a string for every occurrence of a target substring and replaces it
249+
* with another string.
250+
*
251+
* @param ctx BASIC interpreter context
252+
* @return The string with matching substrings replaced
253+
*/
245254
char* basic_replace(struct basic_ctx* ctx);
246255

256+
/**
257+
* @brief Applies BASIC syntax highlighting to a string.
258+
*
259+
* This function returns a copy of the supplied BASIC source string with ANSI colour escape
260+
* sequences inserted for keywords, functions, strings, comments, numbers, and operators.
261+
*
262+
* @param ctx BASIC interpreter context
263+
* @return The syntax-highlighted string
264+
*/
247265
char* basic_highlight(struct basic_ctx* ctx);
248266

267+
/**
268+
* @brief Executes the BASIC MATCH statement.
269+
*
270+
* This statement evaluates a regular expression against a string and stores the match result
271+
* in the requested BASIC variable. Optional capture variables receive captured substrings.
272+
*
273+
* @param ctx BASIC interpreter context
274+
*/
249275
void match_statement(struct basic_ctx *ctx);
250276

277+
/**
278+
* @brief Returns a secure random string.
279+
*
280+
* This function generates a string using the system secure random source.
281+
*
282+
* @param ctx BASIC interpreter context
283+
* @return A secure random string
284+
*/
251285
char* basic_secure_random_string(struct basic_ctx* ctx);
252286

287+
/**
288+
* @brief Copies a BASIC string into a memory buffer.
289+
*
290+
* This function writes the contents of a BASIC string to a raw memory buffer, translating escaped
291+
* NUL and escape bytes back to their original byte values.
292+
*
293+
* @param ctx BASIC interpreter context
294+
* @return The number of bytes written to the buffer
295+
*/
253296
int64_t basic_string_to_buffer(struct basic_ctx *ctx);
254297

255-
char *basic_buffer_to_string(struct basic_ctx *ctx);
298+
/**
299+
* @brief Converts a memory buffer to a BASIC string.
300+
*
301+
* This function reads bytes from a raw memory buffer and returns a BASIC string, escaping embedded
302+
* NUL and escape bytes so they can be stored safely.
303+
*
304+
* @param ctx BASIC interpreter context
305+
* @return The encoded BASIC string
306+
*/
307+
char *basic_buffer_to_string(struct basic_ctx *ctx);
308+
309+
/**
310+
* @brief Converts HTML to Markdown.
311+
*
312+
* This function converts an HTML string into Markdown text using the built-in HTML to Markdown
313+
* converter.
314+
*
315+
* @param ctx BASIC interpreter context
316+
* @return The converted Markdown string
317+
*/
318+
char* basic_markdown(struct basic_ctx* ctx);

os/programs/voyager.rrbasic

Lines changed: 58 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -83,32 +83,77 @@ DEF PROCtext
8383
PROCshowCursor
8484
ENDPROC
8585

86+
DEF FNmarkdown_header$(L$)
87+
R$ = ""
88+
IF LEFT$(L$, 7) = "###### " THEN R$ = "###### "
89+
IF LEFT$(L$, 6) = "##### " THEN R$ = "##### "
90+
IF LEFT$(L$, 5) = "#### " THEN R$ = "#### "
91+
IF LEFT$(L$, 4) = "### " THEN R$ = "### "
92+
IF LEFT$(L$, 3) = "## " THEN R$ = "## "
93+
IF LEFT$(L$, 2) = "# " THEN R$ = "# "
94+
= R$
95+
96+
DEF FNmarkdown_wrap_pos(L$, width)
97+
depth = 0
98+
last_space = 0
99+
i = 1
100+
result = 0
101+
102+
IF LEN(L$) <= width THEN
103+
result = LEN(L$)
104+
ELSE
105+
REPEAT
106+
C$ = MID$(L$, i, 1)
107+
IF C$ = "[" THEN depth = depth + 1
108+
IF C$ = "(" AND depth > 0 THEN depth = depth + 1
109+
IF C$ = ")" AND depth > 0 THEN depth = depth - 1
110+
IF C$ = "]" AND depth > 0 THEN depth = depth - 1
111+
IF C$ = " " AND depth = 0 THEN last_space = i
112+
i = i + 1
113+
UNTIL i > width
114+
IF last_space > 0 THEN
115+
result = last_space
116+
ELSE
117+
i = width + 1
118+
REPEAT
119+
C$ = MID$(L$, i, 1)
120+
IF C$ = "]" THEN depth = 0
121+
IF C$ = ")" THEN depth = 0
122+
IF C$ = " " AND depth = 0 AND result = 0 THEN result = i
123+
i = i + 1
124+
UNTIL i > LEN(L$) OR result <> 0
125+
IF result = 0 THEN result = LEN(L$)
126+
ENDIF
127+
ENDIF
128+
= result
129+
86130
REM load a file into the array, translating TAB to four spaces
87131
DEF PROCload(file$)
88-
PROChttp_get("https://brainbox.cc/")
132+
PROChttp_get("http://frogfind.com/")
89133
body$ = FNhttp_result$("body")
134+
body$ = MARKDOWN$(body$)
90135
body$ = REPLACE$(body$, CHR$(13) + CHR$(10), CHR$(13))
91136
body$ = REPLACE$(body$, CHR$(10) + CHR$(13), CHR$(13))
92137
body$ = REPLACE$(body$, CHR$(10), CHR$(13))
93138
lines = 0
94139
REPEAT
95140
CONT$ = TOKENIZE$(body$, CHR$(13))
96-
IF LEN(CONT$) > TERMWIDTH - 2 THEN
97-
REPEAT
98-
L$ = LEFT$(CONT$, TERMWIDTH - 2)
99-
CONT$ = MID$(CONT$, TERMWIDTH - 2, LEN(CONT$))
141+
header$ = FNmarkdown_header$(CONT$)
142+
REPEAT
143+
cut = FNmarkdown_wrap_pos(CONT$, TERMWIDTH - 2)
144+
IF cut < LEN(CONT$) THEN
145+
L$ = LEFT$(CONT$, cut)
100146
content$(lines) = L$
101147
lines = lines + 1
102-
IF lines = max THEN
103-
PROCgrow
104-
ENDIF
105-
UNTIL LEN(CONT$) <= TERMWIDTH - 2
106-
ENDIF
148+
IF lines >= max THEN PROCgrow
149+
CONT$ = MID$(CONT$, cut + 1, LEN(CONT$))
150+
IF LEFT$(CONT$, 1) = " " THEN CONT$ = MID$(CONT$, 2, LEN(CONT$))
151+
IF header$ <> "" THEN CONT$ = header$ + CONT$
152+
ENDIF
153+
UNTIL cut >= LEN(CONT$)
107154
content$(lines) = CONT$
108155
lines = lines + 1
109-
IF lines = max THEN
110-
PROCgrow
111-
ENDIF
156+
IF lines >= max THEN PROCgrow
112157
UNTIL body$ = ""
113158
ENDPROC
114159

src/basic/function.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ struct basic_str_fn builtin_str[] =
275275
{ basic_bigshr, "BIGSHR$" },
276276
{ basic_bigmodpow, "BIGMODPOW$" },
277277
{ basic_bigmodinv, "BIGMODINV$" },
278+
{ basic_markdown, "MARKDOWN$" },
278279
{ NULL, NULL },
279280
};
280281

src/basic/string.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,20 @@ char* basic_lower(struct basic_ctx* ctx)
320320
return modified;
321321
}
322322

323+
char* basic_markdown(struct basic_ctx* ctx) {
324+
PARAMS_START;
325+
PARAMS_GET_ITEM(BIP_STRING);
326+
PARAMS_END("MARKDOWN$", "");
327+
html2md_result_t out = {};
328+
if (html2md_convert(strval, NULL, &out)) {
329+
char* out_md = (char*)gc_strdup(ctx, out.markdown);
330+
html2md_free(&out);
331+
html2md_define_glyphs();
332+
return out_md;
333+
}
334+
return "";
335+
}
336+
323337
char* basic_highlight(struct basic_ctx* ctx) {
324338
GENERATE_ENUM_STRING_NAMES(TOKEN, token_names)
325339
GENERATE_ENUM_STRING_LENGTHS(TOKEN, token_name_lengths)

src/html_md.c

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,27 @@ typedef struct {
8585
html2md_options_t opt;
8686

8787
int suppress_text_depth;
88+
int form_depth;
8889
} html2md_ctx_t;
8990

9091
static void md_append_char(html2md_ctx_t *ctx, char ch);
9192
static char md_prev_char(const html2md_ctx_t *ctx);
9293

94+
static int tag_is_void(tag_t tag)
95+
{
96+
switch (tag) {
97+
case tag_br:
98+
case tag_img:
99+
case tag_hr:
100+
case tag_link:
101+
case tag_meta:
102+
case tag_input:
103+
return 1;
104+
default:
105+
return 0;
106+
}
107+
}
108+
93109
static void html2md_set_default_options(html2md_options_t *opt)
94110
{
95111
memset(opt, 0, sizeof(*opt));
@@ -577,6 +593,9 @@ static int decode_entity(const char *s, size_t *consumed, const char **replaceme
577593
static void open_tag(html2md_ctx_t *ctx, tag_t tag)
578594
{
579595
if (tag_is_ignored(tag, &ctx->opt)) {
596+
if (!tag_is_void(tag)) {
597+
ctx->ignore_depth++;
598+
}
580599
ctx->ignore_depth++;
581600
return;
582601
}
@@ -1082,8 +1101,38 @@ static void finish_tag(html2md_ctx_t *ctx)
10821101

10831102
tag_t tag = tag_from_name(ctx->tag_name);
10841103

1104+
if (tag == tag_form) {
1105+
if (!ctx->closing_tag) {
1106+
ctx->form_depth++;
1107+
} else {
1108+
if (ctx->form_depth == 0) {
1109+
return;
1110+
}
1111+
1112+
ctx->form_depth--;
1113+
}
1114+
}
1115+
1116+
if (tag_is_ignored(tag, &ctx->opt)) {
1117+
if (!ctx->closing_tag) {
1118+
if (!tag_is_void(tag)) {
1119+
ctx->ignore_depth++;
1120+
}
1121+
} else {
1122+
if (ctx->ignore_depth != 0) {
1123+
ctx->ignore_depth--;
1124+
}
1125+
}
1126+
1127+
return;
1128+
}
1129+
10851130
if (!ctx->closing_tag) {
10861131
open_tag(ctx, tag);
1132+
1133+
if (tag_is_void(tag)) {
1134+
close_tag(ctx, tag);
1135+
}
10871136
} else {
10881137
close_tag(ctx, tag);
10891138
}

0 commit comments

Comments
 (0)