From 05800fd715a243dd44baff818289080f2d834ebf Mon Sep 17 00:00:00 2001 From: Mitchell Paulus Date: Sun, 1 Feb 2026 08:46:48 -0600 Subject: [PATCH 1/2] Start config loading development --- DEVELOP.md | 21 ++++++++++ config.msh | 9 +++++ lib/std.msh | 2 +- mshell/Config.go | 99 ++++++++++++++++++++++++++++++++++++++++++++++++ mshell/Main.go | 36 ++++++++++++++++-- 5 files changed, 162 insertions(+), 5 deletions(-) create mode 100644 config.msh create mode 100644 mshell/Config.go diff --git a/DEVELOP.md b/DEVELOP.md index 851c60e4..d5ac87cc 100644 --- a/DEVELOP.md +++ b/DEVELOP.md @@ -94,6 +94,27 @@ MShellObject MShellFloat ``` +# Executable Lookups + +Two steps: + +- Find the absolute path to file +- Understand how to transform CLI to run file + +## Linux/Unix like: + +Finding: + +- PATH: all files with an executable bit +- Explicit set + +Running: + +- If it has shebang -> Run directly, let OS handle interpreter setup. +- No shebang -> Can check for extension/explicit pattern configuration. + +Map exact name -> [ 'asdf' 'asdfasdf' ] + ## References [fish shell built in](https://github.com/fish-shell/fish-shell/tree/master/src/builtins) diff --git a/config.msh b/config.msh new file mode 100644 index 00000000..a2c2391c --- /dev/null +++ b/config.msh @@ -0,0 +1,9 @@ +{ + 'bins': { + 'name': ['name'], + 'othername': ['uv' 'run'] + }, + 'binPatterns': { + '*.py': ['uv' 'run' '--script' @file] + } +} diff --git a/lib/std.msh b/lib/std.msh index 54e18edf..bc64eec6 100644 --- a/lib/std.msh +++ b/lib/std.msh @@ -461,7 +461,7 @@ end # msh {{{ def __mshCompletion { 'complete': ['msh' 'mshell'] } ([str] -- [str]) input! - ['-h' '--help' '--html' '--lex' '--parse' '--version' '-c'] options! + ['-h' '--help' '--config' '--html' '--lex' '--parse' '--version' '-c'] options! ['lsp' 'bin' 'completions'] subcommands! @input len 0 > if diff --git a/mshell/Config.go b/mshell/Config.go new file mode 100644 index 00000000..97448c18 --- /dev/null +++ b/mshell/Config.go @@ -0,0 +1,99 @@ +package main + +import ( + "fmt" + "os" + "path/filepath" +) + +const mshellConfigEnvVar = "MSH_CONFIG" + +var loadedConfigDict *MShellParseDict +var loadedConfigPath string + +// LoadConfig resolves, reads, and parses the config dictionary. +// Cases: +// - --config PATH: returns parsed dict + path, or error if missing/unreadable/invalid. +// - MSH_CONFIG set: returns parsed dict + path, or error if missing/unreadable/invalid. +// - Default path exists: returns parsed dict + path, or error if unreadable/invalid. +// - Default path missing: returns (nil, "", nil). +func LoadConfig(configFlagPath string) (*MShellParseDict, string, error) { + configPath, explicit, err := resolveConfigPath(configFlagPath) + if err != nil { + return nil, "", err + } + if configPath == "" { + return nil, "", nil + } + + contents, err := os.ReadFile(configPath) + if err != nil { + if !explicit && os.IsNotExist(err) { + return nil, "", nil + } + return nil, "", fmt.Errorf("config file %s: %w", configPath, err) + } + + dict, err := parseConfigDict(configPath, string(contents)) + if err != nil { + return nil, "", err + } + + return dict, configPath, nil +} + +// resolveConfigPath returns the resolved path and whether it was explicitly set. +func resolveConfigPath(configFlagPath string) (string, bool, error) { + if configFlagPath != "" { + return configFlagPath, true, nil + } + + if envValue, ok := os.LookupEnv(mshellConfigEnvVar); ok && envValue != "" { + return envValue, true, nil + } + + defaultPath, err := defaultConfigPath() + if err != nil { + return "", false, err + } + + return defaultPath, false, nil +} + +// defaultConfigPath returns the XDG or ~/.config default config path. +func defaultConfigPath() (string, error) { + if xdgConfigHome, ok := os.LookupEnv("XDG_CONFIG_HOME"); ok && xdgConfigHome != "" { + return filepath.Join(xdgConfigHome, "mshell", "config.msh"), nil + } + + homeDir, err := os.UserHomeDir() + if err != nil { + return "", err + } + + return filepath.Join(homeDir, ".config", "mshell", "config.msh"), nil +} + +// parseConfigDict parses a single dictionary literal and rejects extra items. +func parseConfigDict(path string, input string) (*MShellParseDict, error) { + lexer := NewLexer(input, &TokenFile{Path: path}) + parser := NewMShellParser(lexer) + file, err := parser.ParseFile() + if err != nil { + return nil, fmt.Errorf("%s: %w", path, err) + } + + if len(file.Definitions) != 0 { + return nil, fmt.Errorf("%s: config file must not contain definitions", path) + } + if len(file.Items) != 1 { + return nil, fmt.Errorf("%s: config file must contain a single dictionary literal", path) + } + + dict, ok := file.Items[0].(*MShellParseDict) + if !ok { + return nil, fmt.Errorf("%s: config file must contain a dictionary literal", path) + } + + return dict, nil +} diff --git a/mshell/Main.go b/mshell/Main.go index c746b03b..9e68f575 100644 --- a/mshell/Main.go +++ b/mshell/Main.go @@ -97,6 +97,7 @@ func main() { input := "" inputSet := false positionalArgs := []string{} + configPathFlag := "" var inputFile *TokenFile inputFile = nil @@ -108,7 +109,17 @@ func main() { for i < len(os.Args) { arg := os.Args[i] i++ - if arg == "--lex" { + if strings.HasPrefix(arg, "--config=") { + configPathFlag = strings.TrimPrefix(arg, "--config=") + } else if arg == "--config" { + if i >= len(os.Args) { + fmt.Println("Error: --config requires a path") + os.Exit(1) + return + } + configPathFlag = os.Args[i] + i++ + } else if arg == "--lex" { command = CLILEX // printLex = true } else if arg == "--typecheck" { @@ -127,6 +138,7 @@ func main() { fmt.Println("Usage: msh lsp") fmt.Println("") fmt.Println("Options:") + fmt.Println(" --config PATH Load config from PATH (overrides MSH_CONFIG and defaults)") fmt.Println(" --html Render the input as HTML") fmt.Println(" --lex Print the tokens lexed from the input") fmt.Println(" --parse Print the parsed Abstract Syntax Tree as JSON") @@ -189,6 +201,14 @@ func main() { return } + if command == CLIEXECUTE { + loadedConfigDict, loadedConfigPath, err = LoadConfig(configPathFlag) + if err != nil { + fmt.Fprintln(os.Stderr, err.Error()) + os.Exit(1) + } + } + if len(input) == 0 && term.IsTerminal(stdOutFd) && term.IsTerminal(int(os.Stdin.Fd())) { // fmt.Fprintf(os.Stdout, "Got here\n") numRows, numCols, err := term.GetSize(stdOutFd) @@ -3554,9 +3574,14 @@ _msh_completion() { cur="${COMP_WORDS[COMP_CWORD]}" prev="${COMP_WORDS[COMP_CWORD-1]}" + if [[ $prev == "--config" ]]; then + COMPREPLY=( $(compgen -f -- "$cur") ) + return 0 + fi + if [[ $COMP_CWORD -eq 1 ]]; then if [[ "$cur" == -* ]]; then - COMPREPLY=( $(compgen -W "--html --lex --parse --version --help -h -c" -- "$cur") ) + COMPREPLY=( $(compgen -W "--config --html --lex --parse --version --help -h -c" -- "$cur") ) return 0 fi COMPREPLY=( $(compgen -W "bin lsp completions" -- "$cur") ) @@ -3584,7 +3609,7 @@ _msh_completion() { esac if [[ "$cur" == -* ]]; then - COMPREPLY=( $(compgen -W "--html --lex --parse --version --help -h -c" -- "$cur") ) + COMPREPLY=( $(compgen -W "--config --html --lex --parse --version --help -h -c" -- "$cur") ) return 0 fi @@ -3598,6 +3623,7 @@ complete -F _msh_completion msh mshell func fishCompletionScript() string { return `function __msh_register_completions --argument-names cmd + complete -c $cmd -f -l config -r -d 'Load config from PATH' complete -c $cmd -f -l html -d 'Render the input as HTML' complete -c $cmd -f -l lex -d 'Print tokens from the input' complete -c $cmd -f -l parse -d 'Print the parsed AST as JSON' @@ -3629,6 +3655,7 @@ def "msh_bin_subcommands" [] { } export extern "msh" [ + --config: string --html --lex --parse @@ -3651,6 +3678,7 @@ export extern "msh completions" [ ] export extern "mshell" [ + --config: string --html --lex --parse @@ -3677,7 +3705,7 @@ export extern "mshell completions" [ func elvishCompletionScript() string { return `fn _msh_complete { |@args| if (== (count $args) 0) { - put bin lsp completions --html --lex --parse --version --help -h -c + put bin lsp completions --config --html --lex --parse --version --help -h -c return } From a0b9787f37f2b5ad25855e7423cb834c22792374 Mon Sep 17 00:00:00 2001 From: Mitchell Paulus Date: Sun, 1 Feb 2026 09:45:23 -0600 Subject: [PATCH 2/2] Add more example config.msh --- DEVELOP.md | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- config.msh | 22 +++++++++++++++++++--- 2 files changed, 69 insertions(+), 4 deletions(-) diff --git a/DEVELOP.md b/DEVELOP.md index d5ac87cc..db1800c9 100644 --- a/DEVELOP.md +++ b/DEVELOP.md @@ -105,16 +105,65 @@ Two steps: Finding: +- Explicitly set, and has executable - PATH: all files with an executable bit -- Explicit set Running: - If it has shebang -> Run directly, let OS handle interpreter setup. - No shebang -> Can check for extension/explicit pattern configuration. +## Windows: + +Finding: + +- Explicit set +- PATH: All files with specific extension: Default .exe, .bat, .cmd, .msh. + Have to get others from configuration. + Here we also add extensions in order to try to match. +- What about trying to get many files in directory that don't have extension, because they are done like linux? + For example, my script directory? A special comment string? + - Grab anything with a SHEBANG + +Running: + +- Special pattern by extension/explicit configuration. +- Else: + - Check for SHEBANG. If we have a shebang, then we know it was a script file, and is text. + - We also know that it must essentially be text. + - Then can check for a shebang map configuration. Full string -> new CLI list + +Example: + + Map exact name -> [ 'asdf' 'asdfasdf' ] +## Shebang info + +Short answer: Linux does a raw byte check for #!, parses only the first line into interpreter + at most one optional-arg string, + then rebuilds argv as interpreter [optional-arg] script_path argv[1...] and re-execs the interpreter. It does not split the rest of + the line into multiple args. (sources.debian.org (https://sources.debian.org/src/linux/6.12.8-1/fs/binfmt_script.c)) + + More exact flow (from kernel source + execve man page): + + - Detection: the binfmt_script handler checks bprm->buf[0] == '#' and bprm->buf[1] == '!'; if not, it returns -ENOEXEC. That’s a + literal first‑two‑bytes test. (sources.debian.org (https://sources.debian.org/src/linux/6.12.8-1/fs/binfmt_script.c)) + - Parsing: it looks for a newline (or buffer end), trims trailing spaces/tabs, skips leading spaces/tabs after #!, then takes the + interpreter path up to the first space/tab/NUL. If anything remains, it is treated as one optional argument string (Linux passes + the entire remainder as a single argument, even if it contains spaces). (sources.debian.org (https://sources.debian.org/src/ + linux/6.12.8-1/fs/binfmt_script.c)) + - Argv construction: it removes the original argv[0] and splices in interpreter, optional-arg (if any), then the script path (the + pathname passed to execve), followed by the original argv[1...]. The original argv[0] is not recoverable. (sources.debian.org + (https://sources.debian.org/src/linux/6.12.8-1/fs/binfmt_script.c)) + - Exec: it opens the interpreter path and restarts execution using that interpreter. (sources.debian.org (https:// + sources.debian.org/src/linux/6.12.8-1/fs/binfmt_script.c)) + - Length limit: Linux caps the text after #! (before newline) to 127 chars pre‑5.1 and 255 chars since 5.1; excess is ignored. + (man7.org (https://man7.org/linux/man-pages/man2/execve.2.html?utm_source=openai)) + + So your “splits on whitespace and adds the filepath as the last argument” is close but not quite: it splits only once (interpreter + vs optional-arg), and the script path is inserted before the original argv[1...], not always last (only last if no extra args were + supplied). (man7.org (https://man7.org/linux/man-pages/man2/execve.2.html?utm_source=openai)) + ## References [fish shell built in](https://github.com/fish-shell/fish-shell/tree/master/src/builtins) diff --git a/config.msh b/config.msh index a2c2391c..a82742c0 100644 --- a/config.msh +++ b/config.msh @@ -1,9 +1,25 @@ { 'bins': { - 'name': ['name'], - 'othername': ['uv' 'run'] + 'examplename': '/full/path', }, 'binPatterns': { - '*.py': ['uv' 'run' '--script' @file] + '.py': ['uv' 'run' '--script'], + '.msh': ['uv' 'run' '--script'], + }, + 'binRun':{ + 'myfile.py': ['pipenv' 'run'] + }, + 'shebangs': { + # '#!/bin/bash' + # '#!/bin/sh' + # '#!/usr/bin/awk -E' + # '#!/usr/bin/awk -f' + '#!/usr/bin/env -S uv run --script': ['uv' 'run' '--script'], + # '#!/usr/bin/env bash' + '#!/usr/bin/env mshell': ['msh'], + '#!/usr/bin/env python': ['uv' 'run' '--script'], + '#!/usr/bin/env python3': ['uv' 'run' '--script'], + # '#!/usr/bin/fish' + '#!/usr/bin/python3': ['uv' 'run' '--script'], } }