|
| 1 | +# ============================================================================= |
| 2 | +# Self-Hosting Codegen (Phase V, milestone 4) |
| 3 | +# ============================================================================= |
| 4 | +# Consumes an AST (the nested-tagged-array form produced by V.3) and emits |
| 5 | +# canonical OMNIcode source. The round-trip invariant: |
| 6 | +# |
| 7 | +# source → tokens → AST → source' → tokens' → AST' |
| 8 | +# where AST == AST' |
| 9 | +# |
| 10 | +# is the contract. The emitted source isn't required to be byte-identical |
| 11 | +# to the input — whitespace and parens can differ — but the AST you get |
| 12 | +# back from re-parsing must be the same. |
| 13 | +# |
| 14 | +# Why this matters: with codegen in place, OMC programs can MANIPULATE |
| 15 | +# their own AST and emit back runnable source. Refactoring tools, |
| 16 | +# rewriting passes, and the omnicc-style optimizer-as-source-transform |
| 17 | +# all become accessible from within OMC itself. |
| 18 | +# ============================================================================= |
| 19 | + |
| 20 | +# --------------------------------------------------------------------------- |
| 21 | +# Indentation helper: produce a string of N levels of two-space indent. |
| 22 | +# --------------------------------------------------------------------------- |
| 23 | +fn indent_of(level) -> string { |
| 24 | + h s = ""; |
| 25 | + h i = 0; |
| 26 | + while i < level { |
| 27 | + s = str_concat(s, " "); |
| 28 | + i = i + 1; |
| 29 | + } |
| 30 | + return s; |
| 31 | +} |
| 32 | + |
| 33 | +# --------------------------------------------------------------------------- |
| 34 | +# Escape a string for source emission — turn newlines into \n etc. |
| 35 | +# Mirror of the lexer's escape-decode step. |
| 36 | +# --------------------------------------------------------------------------- |
| 37 | +fn escape_for_source(s) -> string { |
| 38 | + h out = ""; |
| 39 | + h n = str_len(s); |
| 40 | + h i = 0; |
| 41 | + while i < n { |
| 42 | + h c = str_slice(s, i, i + 1); |
| 43 | + if c == "\n" { |
| 44 | + out = str_concat(out, "\\n"); |
| 45 | + } else { |
| 46 | + if c == "\t" { |
| 47 | + out = str_concat(out, "\\t"); |
| 48 | + } else { |
| 49 | + if c == "\r" { |
| 50 | + out = str_concat(out, "\\r"); |
| 51 | + } else { |
| 52 | + if c == "\"" { |
| 53 | + out = str_concat(out, "\\\""); |
| 54 | + } else { |
| 55 | + if c == "\\" { |
| 56 | + out = str_concat(out, "\\\\"); |
| 57 | + } else { |
| 58 | + out = str_concat(out, c); |
| 59 | + } |
| 60 | + } |
| 61 | + } |
| 62 | + } |
| 63 | + } |
| 64 | + i = i + 1; |
| 65 | + } |
| 66 | + return out; |
| 67 | +} |
| 68 | + |
| 69 | +# --------------------------------------------------------------------------- |
| 70 | +# Emit an expression. Returns a string. BINOPs always get parens around |
| 71 | +# them — the round-trip rule is "no precedence ambiguity," not "minimal |
| 72 | +# parens." An AST-aware pretty-printer could elide unnecessary parens |
| 73 | +# but for milestone 4 we play it safe. |
| 74 | +# --------------------------------------------------------------------------- |
| 75 | +fn emit_expr(ast) -> string { |
| 76 | + h tag = arr_get(ast, 0); |
| 77 | + |
| 78 | + if tag == "NUMBER" { |
| 79 | + return arr_get(ast, 1); |
| 80 | + } |
| 81 | + if tag == "FLOAT" { |
| 82 | + return arr_get(ast, 1); |
| 83 | + } |
| 84 | + if tag == "STRING" { |
| 85 | + h body = escape_for_source(arr_get(ast, 1)); |
| 86 | + return concat_many("\"", body, "\""); |
| 87 | + } |
| 88 | + if tag == "BOOL" { |
| 89 | + return arr_get(ast, 1); |
| 90 | + } |
| 91 | + if tag == "VAR" { |
| 92 | + return arr_get(ast, 1); |
| 93 | + } |
| 94 | + if tag == "BINOP" { |
| 95 | + h op = arr_get(ast, 1); |
| 96 | + h left = emit_expr(arr_get(ast, 2)); |
| 97 | + h right = emit_expr(arr_get(ast, 3)); |
| 98 | + return concat_many("(", left, " ", op, " ", right, ")"); |
| 99 | + } |
| 100 | + if tag == "CALL" { |
| 101 | + h name = arr_get(ast, 1); |
| 102 | + h args = arr_get(ast, 2); |
| 103 | + h out = str_concat(name, "("); |
| 104 | + h i = 0; |
| 105 | + h n = arr_len(args); |
| 106 | + while i < n { |
| 107 | + if i > 0 { out = str_concat(out, ", "); } |
| 108 | + out = str_concat(out, emit_expr(arr_get(args, i))); |
| 109 | + i = i + 1; |
| 110 | + } |
| 111 | + return str_concat(out, ")"); |
| 112 | + } |
| 113 | + return concat_many("/* UNKNOWN_EXPR ", tag, " */"); |
| 114 | +} |
| 115 | + |
| 116 | +# --------------------------------------------------------------------------- |
| 117 | +# Emit a single statement. Returns a string ending with a newline. |
| 118 | +# `level` controls the indent depth (0 = top level). |
| 119 | +# --------------------------------------------------------------------------- |
| 120 | +fn emit_stmt(ast, level) -> string { |
| 121 | + h pad = indent_of(level); |
| 122 | + h tag = arr_get(ast, 0); |
| 123 | + |
| 124 | + if tag == "VARDECL" { |
| 125 | + h name = arr_get(ast, 1); |
| 126 | + h v = emit_expr(arr_get(ast, 2)); |
| 127 | + return concat_many(pad, "h ", name, " = ", v, ";\n"); |
| 128 | + } |
| 129 | + if tag == "ASSIGN" { |
| 130 | + h name = arr_get(ast, 1); |
| 131 | + h v = emit_expr(arr_get(ast, 2)); |
| 132 | + return concat_many(pad, name, " = ", v, ";\n"); |
| 133 | + } |
| 134 | + if tag == "EXPRSTMT" { |
| 135 | + h e = emit_expr(arr_get(ast, 1)); |
| 136 | + return concat_many(pad, e, ";\n"); |
| 137 | + } |
| 138 | + if tag == "RETURN" { |
| 139 | + h v = arr_get(ast, 1); |
| 140 | + if v == "null" { |
| 141 | + return concat_many(pad, "return;\n"); |
| 142 | + } |
| 143 | + return concat_many(pad, "return ", emit_expr(v), ";\n"); |
| 144 | + } |
| 145 | + if tag == "PRINT" { |
| 146 | + h e = emit_expr(arr_get(ast, 1)); |
| 147 | + return concat_many(pad, "print(", e, ");\n"); |
| 148 | + } |
| 149 | + if tag == "IF" { |
| 150 | + h cond = emit_expr(arr_get(ast, 1)); |
| 151 | + h then_body = emit_block(arr_get(ast, 2), level + 1); |
| 152 | + h else_body = arr_get(ast, 3); |
| 153 | + h out = concat_many(pad, "if ", cond, " {\n", then_body, pad, "}"); |
| 154 | + if arr_len(else_body) > 0 { |
| 155 | + h eb = emit_block(else_body, level + 1); |
| 156 | + out = concat_many(out, " else {\n", eb, pad, "}"); |
| 157 | + } |
| 158 | + return str_concat(out, "\n"); |
| 159 | + } |
| 160 | + if tag == "WHILE" { |
| 161 | + h cond = emit_expr(arr_get(ast, 1)); |
| 162 | + h body = emit_block(arr_get(ast, 2), level + 1); |
| 163 | + return concat_many(pad, "while ", cond, " {\n", body, pad, "}\n"); |
| 164 | + } |
| 165 | + if tag == "FNDEF" { |
| 166 | + h name = arr_get(ast, 1); |
| 167 | + h params = arr_get(ast, 2); |
| 168 | + h body = emit_block(arr_get(ast, 3), level + 1); |
| 169 | + h plist = ""; |
| 170 | + h i = 0; |
| 171 | + h pn = arr_len(params); |
| 172 | + while i < pn { |
| 173 | + if i > 0 { plist = str_concat(plist, ", "); } |
| 174 | + plist = str_concat(plist, arr_get(params, i)); |
| 175 | + i = i + 1; |
| 176 | + } |
| 177 | + return concat_many(pad, "fn ", name, "(", plist, ") {\n", body, pad, "}\n"); |
| 178 | + } |
| 179 | + return concat_many(pad, "/* UNKNOWN_STMT ", tag, " */\n"); |
| 180 | +} |
| 181 | + |
| 182 | +# --------------------------------------------------------------------------- |
| 183 | +# Emit a block (array of statements) at the given indent level. Returns |
| 184 | +# a single string with the statements concatenated. |
| 185 | +# --------------------------------------------------------------------------- |
| 186 | +fn emit_block(stmts, level) -> string { |
| 187 | + h out = ""; |
| 188 | + h i = 0; |
| 189 | + h n = arr_len(stmts); |
| 190 | + while i < n { |
| 191 | + out = str_concat(out, emit_stmt(arr_get(stmts, i), level)); |
| 192 | + i = i + 1; |
| 193 | + } |
| 194 | + return out; |
| 195 | +} |
| 196 | + |
| 197 | +# --------------------------------------------------------------------------- |
| 198 | +# Emit a whole program (array of top-level statements). |
| 199 | +# --------------------------------------------------------------------------- |
| 200 | +fn emit_program(stmts) -> string { |
| 201 | + return emit_block(stmts, 0); |
| 202 | +} |
| 203 | + |
| 204 | +# =========================================================================== |
| 205 | +# Demo driver — build ASTs by hand (matching what V.3 would produce from |
| 206 | +# its hand-rolled token streams), emit source, print it. |
| 207 | +# |
| 208 | +# In real round-tripping we'd do: source → tokens → AST → source'. Doing |
| 209 | +# the full pipeline inside one OMC file would require composing lexer + |
| 210 | +# parser + codegen, which works in principle but blows the demo up. The |
| 211 | +# in-line ASTs here are the same shapes V.3 produces. |
| 212 | +# =========================================================================== |
| 213 | + |
| 214 | +print("== Self-Hosting Codegen (Phase V, milestone 4) =="); |
| 215 | +print(""); |
| 216 | + |
| 217 | +# Demo 1: `h x = 89 + 144;` |
| 218 | +print("--- AST 1: VARDECL x = (89 + 144) ---"); |
| 219 | +h ast1 = ["VARDECL", "x", |
| 220 | + ["BINOP", "+", ["NUMBER", "89"], ["NUMBER", "144"]] |
| 221 | +]; |
| 222 | +print("Emitted OMC source:"); |
| 223 | +print(emit_stmt(ast1, 0)); |
| 224 | + |
| 225 | +# Demo 2: an if/else with returns |
| 226 | +print("--- AST 2: if x == 89 { return x; } else { return 0; } ---"); |
| 227 | +h ast2 = ["IF", |
| 228 | + ["BINOP", "==", ["VAR", "x"], ["NUMBER", "89"]], |
| 229 | + [["RETURN", ["VAR", "x"]]], |
| 230 | + [["RETURN", ["NUMBER", "0"]]] |
| 231 | +]; |
| 232 | +print("Emitted OMC source:"); |
| 233 | +print(emit_stmt(ast2, 0)); |
| 234 | + |
| 235 | +# Demo 3: recursive fib |
| 236 | +print("--- AST 3: fn fib(n) { return fib(n-1) + fib(n-2); } ---"); |
| 237 | +h ast3 = ["FNDEF", "fib", ["n"], |
| 238 | + [ |
| 239 | + ["RETURN", |
| 240 | + ["BINOP", "+", |
| 241 | + ["CALL", "fib", [["BINOP", "-", ["VAR", "n"], ["NUMBER", "1"]]]], |
| 242 | + ["CALL", "fib", [["BINOP", "-", ["VAR", "n"], ["NUMBER", "2"]]]] |
| 243 | + ] |
| 244 | + ] |
| 245 | + ] |
| 246 | +]; |
| 247 | +print("Emitted OMC source:"); |
| 248 | +print(emit_stmt(ast3, 0)); |
| 249 | + |
| 250 | +# Demo 4: while loop with assignments |
| 251 | +print("--- AST 4: while i < 10 { sum = sum + i; i = i + 1; } ---"); |
| 252 | +h ast4 = ["WHILE", |
| 253 | + ["BINOP", "<", ["VAR", "i"], ["NUMBER", "10"]], |
| 254 | + [ |
| 255 | + ["ASSIGN", "sum", ["BINOP", "+", ["VAR", "sum"], ["VAR", "i"]]], |
| 256 | + ["ASSIGN", "i", ["BINOP", "+", ["VAR", "i"], ["NUMBER", "1"]]] |
| 257 | + ] |
| 258 | +]; |
| 259 | +print("Emitted OMC source:"); |
| 260 | +print(emit_stmt(ast4, 0)); |
| 261 | + |
| 262 | +# Demo 5: a full small program. This is the kind of output a refactoring |
| 263 | +# tool would produce — start with an AST, transform it, emit it back. |
| 264 | +print("--- AST 5: a small program ---"); |
| 265 | +h prog = [ |
| 266 | + ["FNDEF", "double", ["x"], |
| 267 | + [["RETURN", ["BINOP", "*", ["VAR", "x"], ["NUMBER", "2"]]]] |
| 268 | + ], |
| 269 | + ["VARDECL", "n", ["NUMBER", "21"]], |
| 270 | + ["VARDECL", "m", ["CALL", "double", [["VAR", "n"]]]], |
| 271 | + ["PRINT", ["VAR", "m"]], |
| 272 | + ["IF", |
| 273 | + ["BINOP", "==", ["VAR", "m"], ["NUMBER", "42"]], |
| 274 | + [["PRINT", ["STRING", "the answer"]]], |
| 275 | + [["PRINT", ["STRING", "not it"]]] |
| 276 | + ] |
| 277 | +]; |
| 278 | +print("Emitted OMC source (a complete runnable program):"); |
| 279 | +print(emit_program(prog)); |
| 280 | + |
| 281 | +print("== Observations =="); |
| 282 | +print("- Every AST node has a canonical source form. The pretty-printer"); |
| 283 | +print(" is total — every node maps to legible OMC."); |
| 284 | +print("- BINOPs always get parens. Costs a few extra characters; saves us"); |
| 285 | +print(" from re-implementing the parser's precedence table. The output"); |
| 286 | +print(" is still re-parseable to the same AST."); |
| 287 | +print("- Indentation is two spaces per level. The output is human-readable"); |
| 288 | +print(" even for nested fn-defs and if/else trees."); |
| 289 | +print("- This emitter, fed into the lexer→parser, would produce an AST"); |
| 290 | +print(" the same shape we started with. Closing that loop in a single"); |
| 291 | +print(" OMC program is V.5 — the fixpoint demo."); |
0 commit comments