|
| 1 | +#pragma once |
| 2 | + |
| 3 | +#include <cstdio> |
| 4 | +#include <filesystem> |
| 5 | +#include <memory> |
| 6 | +#include <optional> |
| 7 | +#include <string> |
| 8 | +#include <string_view> |
| 9 | +#include <unordered_map> |
| 10 | +#include <unordered_set> |
| 11 | +#include <variant> |
| 12 | +#include <vector> |
| 13 | + |
| 14 | +namespace cfbox::sh { |
| 15 | + |
| 16 | +// ── Token ──────────────────────────────────────────────────────── |
| 17 | +enum class TokType { |
| 18 | + Word, Newline, Eof, |
| 19 | + Pipe, Semi, And, Or, // | ; && || |
| 20 | + LParen, RParen, LBrace, RBrace, |
| 21 | + Less, Great, DGreate, // < > >> |
| 22 | + LessAnd, GreatAnd, // <& >& |
| 23 | + // Keywords stored as Word with keyword flag |
| 24 | +}; |
| 25 | + |
| 26 | +struct Token { |
| 27 | + TokType type = TokType::Word; |
| 28 | + std::string value; |
| 29 | + bool keyword = false; // true for if/then/else/fi/while/until/do/done/for/in/case/esac |
| 30 | +}; |
| 31 | + |
| 32 | +// ── Redirection ────────────────────────────────────────────────── |
| 33 | +struct Redir { |
| 34 | + enum Type { Read, Write, Append, DupIn, DupOut }; |
| 35 | + int fd = -1; // target fd (default inferred: 0 for Read, 1 for Write/Append) |
| 36 | + Type type = Read; |
| 37 | + std::string target; // filename or fd number for dup |
| 38 | +}; |
| 39 | + |
| 40 | +// ── AST Nodes ──────────────────────────────────────────────────── |
| 41 | +struct SimpleCommand { |
| 42 | + std::vector<std::string> words; // command + args (pre-expansion) |
| 43 | + std::vector<Redir> redirs; |
| 44 | + std::vector<std::pair<std::string, std::string>> assigns; // VAR=val before cmd |
| 45 | +}; |
| 46 | + |
| 47 | +struct Pipeline; |
| 48 | +struct IfClause; |
| 49 | +struct WhileClause; |
| 50 | +struct ForClause; |
| 51 | +struct Subshell; |
| 52 | +struct BraceGroup; |
| 53 | + |
| 54 | +using Command = std::variant<SimpleCommand, |
| 55 | + std::unique_ptr<Pipeline>, |
| 56 | + std::unique_ptr<IfClause>, |
| 57 | + std::unique_ptr<WhileClause>, |
| 58 | + std::unique_ptr<ForClause>, |
| 59 | + std::unique_ptr<Subshell>, |
| 60 | + std::unique_ptr<BraceGroup>>; |
| 61 | + |
| 62 | +struct Pipeline { |
| 63 | + std::vector<Command> commands; |
| 64 | + bool negated = false; |
| 65 | +}; |
| 66 | + |
| 67 | +struct AndOr { |
| 68 | + enum class Op { Semi, And, Or }; |
| 69 | + // entries[0].first is unused (Semi); entries[0].second is the first pipeline |
| 70 | + std::vector<std::pair<Op, std::unique_ptr<Pipeline>>> entries; |
| 71 | +}; |
| 72 | + |
| 73 | +struct IfClause { |
| 74 | + std::vector<std::unique_ptr<AndOr>> conditions; // if cond; elif cond; ... |
| 75 | + std::vector<std::unique_ptr<AndOr>> bodies; // then body; elif body; ... |
| 76 | + std::unique_ptr<AndOr> else_body; // optional else |
| 77 | +}; |
| 78 | + |
| 79 | +struct WhileClause { |
| 80 | + bool is_until = false; |
| 81 | + std::unique_ptr<AndOr> condition; |
| 82 | + std::unique_ptr<AndOr> body; |
| 83 | +}; |
| 84 | + |
| 85 | +struct ForClause { |
| 86 | + std::string var_name; |
| 87 | + std::vector<std::string> words; // empty means "$@" |
| 88 | + std::unique_ptr<AndOr> body; |
| 89 | +}; |
| 90 | + |
| 91 | +struct Subshell { |
| 92 | + std::unique_ptr<AndOr> body; |
| 93 | +}; |
| 94 | + |
| 95 | +struct BraceGroup { |
| 96 | + std::unique_ptr<AndOr> body; |
| 97 | +}; |
| 98 | + |
| 99 | +// ── Shell State ────────────────────────────────────────────────── |
| 100 | +class ShellState { |
| 101 | +public: |
| 102 | + // Variables |
| 103 | + auto get_var(std::string_view name) const -> std::string; |
| 104 | + auto set_var(const std::string& name, const std::string& value) -> void; |
| 105 | + auto unset_var(const std::string& name) -> void; |
| 106 | + auto export_var(const std::string& name) -> void; |
| 107 | + auto is_exported(const std::string& name) const -> bool; |
| 108 | + auto all_vars() const -> const std::unordered_map<std::string, std::string>& { return vars_; } |
| 109 | + |
| 110 | + // Positional parameters |
| 111 | + auto positional_params() const -> const std::vector<std::string>& { return positional_; } |
| 112 | + auto set_positional(std::vector<std::string> args) -> void; |
| 113 | + auto shift(int n) -> void; |
| 114 | + |
| 115 | + // Special parameters |
| 116 | + auto last_status() const -> int { return last_status_; } |
| 117 | + auto set_last_status(int s) -> void { last_status_ = s; } |
| 118 | + auto shell_pid() const -> int; |
| 119 | + |
| 120 | + // Script name ($0) |
| 121 | + auto script_name() const -> const std::string& { return script_name_; } |
| 122 | + auto set_script_name(std::string name) -> void { script_name_ = std::move(name); } |
| 123 | + |
| 124 | + // Control flow flags |
| 125 | + bool should_exit = false; |
| 126 | + int exit_status = 0; |
| 127 | + bool break_loop = false; |
| 128 | + int break_count = 0; |
| 129 | + bool continue_loop = false; |
| 130 | + |
| 131 | +private: |
| 132 | + std::unordered_map<std::string, std::string> vars_; |
| 133 | + std::unordered_set<std::string> exported_; |
| 134 | + std::vector<std::string> positional_; |
| 135 | + int last_status_ = 0; |
| 136 | + std::string script_name_; |
| 137 | +}; |
| 138 | + |
| 139 | +// ── Lexer ──────────────────────────────────────────────────────── |
| 140 | +class Lexer { |
| 141 | +public: |
| 142 | + explicit Lexer(std::string_view input); |
| 143 | + |
| 144 | + auto next_token() -> Token; |
| 145 | + auto peek_token() -> const Token&; |
| 146 | + |
| 147 | +private: |
| 148 | + auto advance() -> char; |
| 149 | + auto peek() const -> char; |
| 150 | + auto at_end() const -> bool; |
| 151 | + |
| 152 | + auto skip_spaces_and_comments() -> void; |
| 153 | + auto read_word() -> Token; |
| 154 | + auto read_operator() -> std::optional<Token>; |
| 155 | + auto read_single_quoted() -> std::string; |
| 156 | + auto read_double_quoted() -> std::string; |
| 157 | + |
| 158 | + auto is_keyword(std::string_view word) const -> bool; |
| 159 | + |
| 160 | + std::string_view input_; |
| 161 | + std::size_t pos_ = 0; |
| 162 | + Token cached_; |
| 163 | + bool has_cached_ = false; |
| 164 | +}; |
| 165 | + |
| 166 | +// ── Parser ─────────────────────────────────────────────────────── |
| 167 | +class Parser { |
| 168 | +public: |
| 169 | + explicit Parser(Lexer& lexer); |
| 170 | + |
| 171 | + auto parse_program() -> std::unique_ptr<AndOr>; |
| 172 | + |
| 173 | +private: |
| 174 | + auto advance() -> const Token&; |
| 175 | + auto peek() const -> const Token&; |
| 176 | + auto expect(TokType type) -> bool; |
| 177 | + auto expect_keyword(std::string_view kw) -> bool; |
| 178 | + |
| 179 | + auto parse_compound_list() -> std::unique_ptr<AndOr>; |
| 180 | + auto parse_and_or() -> std::unique_ptr<AndOr>; |
| 181 | + auto parse_pipeline() -> std::unique_ptr<Pipeline>; |
| 182 | + auto parse_command() -> Command; |
| 183 | + auto parse_simple_command() -> SimpleCommand; |
| 184 | + auto parse_redirect() -> std::optional<Redir>; |
| 185 | + auto parse_if() -> std::unique_ptr<IfClause>; |
| 186 | + auto parse_while() -> std::unique_ptr<WhileClause>; |
| 187 | + auto parse_for() -> std::unique_ptr<ForClause>; |
| 188 | + auto parse_subshell() -> std::unique_ptr<Subshell>; |
| 189 | + auto parse_brace_group() -> std::unique_ptr<BraceGroup>; |
| 190 | + |
| 191 | + Lexer& lexer_; |
| 192 | + Token current_; |
| 193 | +}; |
| 194 | + |
| 195 | +// ── Executor ───────────────────────────────────────────────────── |
| 196 | +auto execute(AndOr& node, ShellState& state) -> int; |
| 197 | +auto execute_command(Command& cmd, ShellState& state) -> int; |
| 198 | + |
| 199 | +// ── Builtins ───────────────────────────────────────────────────── |
| 200 | +using BuiltinFunc = int (*)(std::vector<std::string>& args, ShellState& state); |
| 201 | + |
| 202 | +auto get_builtins() -> const std::unordered_map<std::string, BuiltinFunc>&; |
| 203 | +auto is_builtin(const std::string& name) -> bool; |
| 204 | +auto run_builtin(const std::string& name, std::vector<std::string>& args, ShellState& state) -> int; |
| 205 | + |
| 206 | +// ── Word Expansion ─────────────────────────────────────────────── |
| 207 | +auto expand_word(const std::string& word, const ShellState& state) -> std::vector<std::string>; |
| 208 | +auto expand_words(const std::vector<std::string>& words, const ShellState& state) -> std::vector<std::string>; |
| 209 | + |
| 210 | +} // namespace cfbox::sh |
0 commit comments