Skip to content

Commit 65cf7d0

Browse files
authored
Fix argument parsing (#16)
* Fix argument parsing
1 parent f17c420 commit 65cf7d0

File tree

4 files changed

+435
-38
lines changed

4 files changed

+435
-38
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ check: ## Static analysis
5555
--check-level=exhaustive --project=$(BUILD_DIR)/compile_commands.json \
5656
--suppress=missingIncludeSystem -i$(BUILD_DIR)
5757

58-
check-all: format lint check ## Run all checks
58+
check-all: test format lint check ## Run all checks
5959

6060
fix: ## Fix code formatting and linting issues
6161
@test -n "$(CLANG_FORMAT)" || { echo "error: clang-format not found"; exit 1; }

include/tl_flag.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,12 @@ typedef struct {
4444
/**
4545
* @brief Parses the given command line arguments.
4646
*
47-
* Parses argv into flags. A flag is anything starting with "--". It can carry
48-
* a value written as --name=value, or as --name value in the next entry.
49-
* A bare "--" ends flag parsing; everything after it is a positional, even if
50-
* it starts with dashes. Any previously parsed state is thrown away first.
47+
* Parses argv into flags and positionals. A flag is anything starting
48+
* with "-" or "--" (e.g. "-h", "--help"). It can carry a value written as
49+
* --name=value, or as --name value in the next entry. A bare "-" is a
50+
* positional. A bare "--" ends flag parsing; everything after it is a
51+
* positional, even if it starts with dashes. Any previously parsed state
52+
* is thrown away first.
5153
*
5254
* @param argc The number of command line arguments.
5355
* @param argv The command line arguments.
@@ -124,8 +126,8 @@ const char *tl_get_flag_at(const char *flag, size_t index);
124126
/**
125127
* @brief Returns the number of positional arguments.
126128
*
127-
* Positionals are bare arguments (not starting with `--`) and everything
128-
* after a bare `--` terminator, in the order they appeared.
129+
* Positionals are bare arguments (not starting with `-` or `--`) and
130+
* everything after a bare `--` terminator, in the order they appeared.
129131
*
130132
* @return The positional argument count.
131133
*/

src/tl_flag.c

Lines changed: 69 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,39 @@ static char *line_buf = NULL;
1313
static char **line_tokens = NULL;
1414

1515
/**
16-
* @brief Returns whether the token is a long flag (starts with "--" and has content).
16+
* @brief Returns whether the token is a flag.
17+
*
18+
* A flag starts with "-" or "--" and is not a bare "-" or "--".
19+
* A bare "-" is not a flag (it's a common stdin placeholder).
20+
* A bare "--" is the positional terminator and is handled separately.
1721
*/
18-
static bool is_long_flag(const char *s) {
19-
return s != NULL && s[0] == '-' && s[1] == '-' && s[2] != '\0';
22+
static bool is_flag(const char *s) {
23+
if (s == NULL || s[0] != '-' || s[1] == '\0') {
24+
return false;
25+
}
26+
if (s[1] == '-' && s[2] == '\0') {
27+
return false;
28+
}
29+
return true;
2030
}
2131

2232
/**
2333
* @brief Returns whether the token is the bare "--" terminator.
2434
*/
2535
static bool is_dash_dash(const char *s) {
26-
return s != NULL && s[0] == '-' && s[1] == '-' && s[2] == '\0';
36+
if (s == NULL) {
37+
return false;
38+
}
39+
if (s[0] != '-') {
40+
return false;
41+
}
42+
if (s[1] != '-') {
43+
return false;
44+
}
45+
if (s[2] != '\0') {
46+
return false;
47+
}
48+
return true;
2749
}
2850

2951
/**
@@ -40,7 +62,7 @@ static bool flag_matches(const tl_flag_t *f, const char *name, size_t name_len)
4062
* @brief Fills the flag and positional tables from a token list.
4163
*
4264
* The first token is the program name and is skipped. The rest are
43-
* sorted into flags (anything starting with "--") and positionals
65+
* sorted into flags (anything starting with "-" or "--") and positionals
4466
* (everything else, plus anything after a bare "--").
4567
*/
4668
static bool parse_tokens(char **tokens, int count) {
@@ -70,8 +92,8 @@ static bool parse_tokens(char **tokens, int count) {
7092
after_dd = true;
7193
continue;
7294
}
73-
// Long flag
74-
if (is_long_flag(tok)) {
95+
// Flag
96+
if (is_flag(tok)) {
7597
char *eq = strchr(tok, '=');
7698
if (eq) {
7799
flags[flag_count].name = tok;
@@ -81,7 +103,7 @@ static bool parse_tokens(char **tokens, int count) {
81103
const char *value = NULL;
82104
// Consume the next token as the value if it is not another flag
83105
// and not the "--" terminator
84-
if (i + 1 < count && !is_long_flag(tokens[i + 1]) && !is_dash_dash(tokens[i + 1])) {
106+
if (i + 1 < count && !is_flag(tokens[i + 1]) && !is_dash_dash(tokens[i + 1])) {
85107
value = tokens[i + 1];
86108
i++;
87109
}
@@ -98,6 +120,43 @@ static bool parse_tokens(char **tokens, int count) {
98120
return true;
99121
}
100122

123+
/**
124+
* @brief Reads one token from `line` starting at `*i` into `line_buf` at `*bi`.
125+
*
126+
* Stops at unquoted whitespace or end of line. Writes the NUL terminator.
127+
* Returns true on success, false if a quoted string was never closed.
128+
*/
129+
static bool read_one_token(const char *line, size_t len, size_t *i, size_t *bi) {
130+
bool in_quote = false;
131+
while (*i < len) {
132+
char c = line[*i];
133+
if (!in_quote && (c == ' ' || c == '\t')) {
134+
break;
135+
}
136+
if (c == '"') {
137+
if (in_quote) {
138+
in_quote = false;
139+
} else {
140+
in_quote = true;
141+
}
142+
(*i)++;
143+
continue;
144+
}
145+
if (c == '\\' && *i + 1 < len) {
146+
line_buf[(*bi)++] = line[*i + 1];
147+
*i += 2;
148+
continue;
149+
}
150+
line_buf[(*bi)++] = c;
151+
(*i)++;
152+
}
153+
if (in_quote) {
154+
return false;
155+
}
156+
line_buf[(*bi)++] = '\0';
157+
return true;
158+
}
159+
101160
/**
102161
* @brief Splits a command line string into tokens stored in line_tokens.
103162
*
@@ -131,31 +190,10 @@ static int tokenize_line(const char *line) {
131190
if (i >= len) {
132191
break;
133192
}
134-
// Start a new token at the current buffer position
135193
line_tokens[n++] = &line_buf[bi];
136-
bool in_quote = false;
137-
while (i < len) {
138-
char c = line[i];
139-
if (!in_quote && (c == ' ' || c == '\t')) {
140-
break;
141-
}
142-
if (c == '"') {
143-
in_quote = in_quote ? false : true;
144-
i++;
145-
continue;
146-
}
147-
if (c == '\\' && i + 1 < len) {
148-
line_buf[bi++] = line[i + 1];
149-
i += 2;
150-
continue;
151-
}
152-
line_buf[bi++] = c;
153-
i++;
154-
}
155-
if (in_quote) {
156-
return -1; // unterminated quoted string
194+
if (!read_one_token(line, len, &i, &bi)) {
195+
return -1;
157196
}
158-
line_buf[bi++] = '\0';
159197
}
160198
return (int)n;
161199
}

0 commit comments

Comments
 (0)