diff --git a/DEVELOP.md b/DEVELOP.md index 03ad9f2c..84e8a441 100644 --- a/DEVELOP.md +++ b/DEVELOP.md @@ -94,6 +94,76 @@ MShellObject MShellFloat ``` +# Executable Lookups + +Two steps: + +- Find the absolute path to file +- Understand how to transform CLI to run file + +## Linux/Unix like: + +Finding: + +- Explicitly set, and has executable +- PATH: all files with an executable bit + +Running: + +- If it has shebang -> Run directly, let OS handle interpreter setup. +- No shebang -> Can check for extension/explicit pattern configuration. + +## Windows: + +Finding: + +- Explicit set +- PATH: All files with specific extension: Default .exe, .bat, .cmd, .msh. + Have to get others from configuration. + Here we also add extensions in order to try to match. +- What about trying to get many files in directory that don't have extension, because they are done like linux? + For example, my script directory? A special comment string? + - Grab anything with a SHEBANG + +Running: + +- Special pattern by extension/explicit configuration. +- Else: + - Check for SHEBANG. If we have a shebang, then we know it was a script file, and is text. + - We also know that it must essentially be text. + - Then can check for a shebang map configuration. Full string -> new CLI list + +Example: + + +Map exact name -> [ 'asdf' 'asdfasdf' ] + +## Shebang info + +Short answer: Linux does a raw byte check for #!, parses only the first line into interpreter + at most one optional-arg string, + then rebuilds argv as interpreter [optional-arg] script_path argv[1...] and re-execs the interpreter. It does not split the rest of + the line into multiple args. (sources.debian.org (https://sources.debian.org/src/linux/6.12.8-1/fs/binfmt_script.c)) + + More exact flow (from kernel source + execve man page): + + - Detection: the binfmt_script handler checks bprm->buf[0] == '#' and bprm->buf[1] == '!'; if not, it returns -ENOEXEC. That’s a + literal first‑two‑bytes test. (sources.debian.org (https://sources.debian.org/src/linux/6.12.8-1/fs/binfmt_script.c)) + - Parsing: it looks for a newline (or buffer end), trims trailing spaces/tabs, skips leading spaces/tabs after #!, then takes the + interpreter path up to the first space/tab/NUL. If anything remains, it is treated as one optional argument string (Linux passes + the entire remainder as a single argument, even if it contains spaces). (sources.debian.org (https://sources.debian.org/src/ + linux/6.12.8-1/fs/binfmt_script.c)) + - Argv construction: it removes the original argv[0] and splices in interpreter, optional-arg (if any), then the script path (the + pathname passed to execve), followed by the original argv[1...]. The original argv[0] is not recoverable. (sources.debian.org + (https://sources.debian.org/src/linux/6.12.8-1/fs/binfmt_script.c)) + - Exec: it opens the interpreter path and restarts execution using that interpreter. (sources.debian.org (https:// + sources.debian.org/src/linux/6.12.8-1/fs/binfmt_script.c)) + - Length limit: Linux caps the text after #! (before newline) to 127 chars pre‑5.1 and 255 chars since 5.1; excess is ignored. + (man7.org (https://man7.org/linux/man-pages/man2/execve.2.html?utm_source=openai)) + + So your “splits on whitespace and adds the filepath as the last argument” is close but not quite: it splits only once (interpreter + vs optional-arg), and the script path is inserted before the original argv[1...], not always last (only last if no extra args were + supplied). (man7.org (https://man7.org/linux/man-pages/man2/execve.2.html?utm_source=openai)) + ## References [fish shell built in](https://github.com/fish-shell/fish-shell/tree/master/src/builtins) diff --git a/config.msh b/config.msh new file mode 100644 index 00000000..a82742c0 --- /dev/null +++ b/config.msh @@ -0,0 +1,25 @@ +{ + 'bins': { + 'examplename': '/full/path', + }, + 'binPatterns': { + '.py': ['uv' 'run' '--script'], + '.msh': ['uv' 'run' '--script'], + }, + 'binRun':{ + 'myfile.py': ['pipenv' 'run'] + }, + 'shebangs': { + # '#!/bin/bash' + # '#!/bin/sh' + # '#!/usr/bin/awk -E' + # '#!/usr/bin/awk -f' + '#!/usr/bin/env -S uv run --script': ['uv' 'run' '--script'], + # '#!/usr/bin/env bash' + '#!/usr/bin/env mshell': ['msh'], + '#!/usr/bin/env python': ['uv' 'run' '--script'], + '#!/usr/bin/env python3': ['uv' 'run' '--script'], + # '#!/usr/bin/fish' + '#!/usr/bin/python3': ['uv' 'run' '--script'], + } +} diff --git a/lib/std.msh b/lib/std.msh index 524bc5e0..7c898fc6 100644 --- a/lib/std.msh +++ b/lib/std.msh @@ -481,7 +481,7 @@ end # msh {{{ def __mshCompletion { 'complete': ['msh' 'mshell'] } ([str] -- [str]) input! - ['-h' '--help' '--html' '--lex' '--parse' '--version' '-c'] options! + ['-h' '--help' '--config' '--html' '--lex' '--parse' '--version' '-c'] options! ['lsp' 'bin' 'completions'] subcommands! @input len 0 > if diff --git a/mshell/Config.go b/mshell/Config.go new file mode 100644 index 00000000..97448c18 --- /dev/null +++ b/mshell/Config.go @@ -0,0 +1,99 @@ +package main + +import ( + "fmt" + "os" + "path/filepath" +) + +const mshellConfigEnvVar = "MSH_CONFIG" + +var loadedConfigDict *MShellParseDict +var loadedConfigPath string + +// LoadConfig resolves, reads, and parses the config dictionary. +// Cases: +// - --config PATH: returns parsed dict + path, or error if missing/unreadable/invalid. +// - MSH_CONFIG set: returns parsed dict + path, or error if missing/unreadable/invalid. +// - Default path exists: returns parsed dict + path, or error if unreadable/invalid. +// - Default path missing: returns (nil, "", nil). +func LoadConfig(configFlagPath string) (*MShellParseDict, string, error) { + configPath, explicit, err := resolveConfigPath(configFlagPath) + if err != nil { + return nil, "", err + } + if configPath == "" { + return nil, "", nil + } + + contents, err := os.ReadFile(configPath) + if err != nil { + if !explicit && os.IsNotExist(err) { + return nil, "", nil + } + return nil, "", fmt.Errorf("config file %s: %w", configPath, err) + } + + dict, err := parseConfigDict(configPath, string(contents)) + if err != nil { + return nil, "", err + } + + return dict, configPath, nil +} + +// resolveConfigPath returns the resolved path and whether it was explicitly set. +func resolveConfigPath(configFlagPath string) (string, bool, error) { + if configFlagPath != "" { + return configFlagPath, true, nil + } + + if envValue, ok := os.LookupEnv(mshellConfigEnvVar); ok && envValue != "" { + return envValue, true, nil + } + + defaultPath, err := defaultConfigPath() + if err != nil { + return "", false, err + } + + return defaultPath, false, nil +} + +// defaultConfigPath returns the XDG or ~/.config default config path. +func defaultConfigPath() (string, error) { + if xdgConfigHome, ok := os.LookupEnv("XDG_CONFIG_HOME"); ok && xdgConfigHome != "" { + return filepath.Join(xdgConfigHome, "mshell", "config.msh"), nil + } + + homeDir, err := os.UserHomeDir() + if err != nil { + return "", err + } + + return filepath.Join(homeDir, ".config", "mshell", "config.msh"), nil +} + +// parseConfigDict parses a single dictionary literal and rejects extra items. +func parseConfigDict(path string, input string) (*MShellParseDict, error) { + lexer := NewLexer(input, &TokenFile{Path: path}) + parser := NewMShellParser(lexer) + file, err := parser.ParseFile() + if err != nil { + return nil, fmt.Errorf("%s: %w", path, err) + } + + if len(file.Definitions) != 0 { + return nil, fmt.Errorf("%s: config file must not contain definitions", path) + } + if len(file.Items) != 1 { + return nil, fmt.Errorf("%s: config file must contain a single dictionary literal", path) + } + + dict, ok := file.Items[0].(*MShellParseDict) + if !ok { + return nil, fmt.Errorf("%s: config file must contain a dictionary literal", path) + } + + return dict, nil +} diff --git a/mshell/Main.go b/mshell/Main.go index 22edf32e..398a89fd 100644 --- a/mshell/Main.go +++ b/mshell/Main.go @@ -106,6 +106,7 @@ func main() { input := "" inputSet := false positionalArgs := []string{} + configPathFlag := "" var inputFile *TokenFile inputFile = nil @@ -117,7 +118,17 @@ func main() { for i < len(os.Args) { arg := os.Args[i] i++ - if arg == "--lex" { + if strings.HasPrefix(arg, "--config=") { + configPathFlag = strings.TrimPrefix(arg, "--config=") + } else if arg == "--config" { + if i >= len(os.Args) { + fmt.Println("Error: --config requires a path") + os.Exit(1) + return + } + configPathFlag = os.Args[i] + i++ + } else if arg == "--lex" { command = CLILEX // printLex = true } else if arg == "--typecheck" { @@ -137,6 +148,7 @@ func main() { fmt.Println("Usage: msh fm") fmt.Println("") fmt.Println("Options:") + fmt.Println(" --config PATH Load config from PATH (overrides MSH_CONFIG and defaults)") fmt.Println(" --html Render the input as HTML") fmt.Println(" --lex Print the tokens lexed from the input") fmt.Println(" --parse Print the parsed Abstract Syntax Tree as JSON") @@ -200,6 +212,14 @@ func main() { return } + if command == CLIEXECUTE { + loadedConfigDict, loadedConfigPath, err = LoadConfig(configPathFlag) + if err != nil { + fmt.Fprintln(os.Stderr, err.Error()) + os.Exit(1) + } + } + if !inputSet && term.IsTerminal(stdOutFd) && term.IsTerminal(int(os.Stdin.Fd())) { // fmt.Fprintf(os.Stdout, "Got here\n") numRows, numCols, err := term.GetSize(stdOutFd) @@ -3622,9 +3642,14 @@ _msh_completion() { cur="${COMP_WORDS[COMP_CWORD]}" prev="${COMP_WORDS[COMP_CWORD-1]}" + if [[ $prev == "--config" ]]; then + COMPREPLY=( $(compgen -f -- "$cur") ) + return 0 + fi + if [[ $COMP_CWORD -eq 1 ]]; then if [[ "$cur" == -* ]]; then - COMPREPLY=( $(compgen -W "--html --lex --parse --version --help -h -c" -- "$cur") ) + COMPREPLY=( $(compgen -W "--config --html --lex --parse --version --help -h -c" -- "$cur") ) return 0 fi COMPREPLY=( $(compgen -W "bin lsp completions" -- "$cur") ) @@ -3652,7 +3677,7 @@ _msh_completion() { esac if [[ "$cur" == -* ]]; then - COMPREPLY=( $(compgen -W "--html --lex --parse --version --help -h -c" -- "$cur") ) + COMPREPLY=( $(compgen -W "--config --html --lex --parse --version --help -h -c" -- "$cur") ) return 0 fi @@ -3666,6 +3691,7 @@ complete -F _msh_completion msh mshell func fishCompletionScript() string { return `function __msh_register_completions --argument-names cmd + complete -c $cmd -f -l config -r -d 'Load config from PATH' complete -c $cmd -f -l html -d 'Render the input as HTML' complete -c $cmd -f -l lex -d 'Print tokens from the input' complete -c $cmd -f -l parse -d 'Print the parsed AST as JSON' @@ -3697,6 +3723,7 @@ def "msh_bin_subcommands" [] { } export extern "msh" [ + --config: string --html --lex --parse @@ -3719,6 +3746,7 @@ export extern "msh completions" [ ] export extern "mshell" [ + --config: string --html --lex --parse @@ -3745,7 +3773,7 @@ export extern "mshell completions" [ func elvishCompletionScript() string { return `fn _msh_complete { |@args| if (== (count $args) 0) { - put bin lsp completions --html --lex --parse --version --help -h -c + put bin lsp completions --config --html --lex --parse --version --help -h -c return }