55 "context"
66 "encoding/json"
77 "errors"
8- "flag"
98 "fmt"
109 "io"
1110 "log/slog"
@@ -20,6 +19,7 @@ import (
2019 "github.com/Azure/agentbaker/aks-node-controller/parser"
2120 "github.com/Azure/agentbaker/aks-node-controller/pkg/nodeconfigutils"
2221 "github.com/fsnotify/fsnotify"
22+ "github.com/urfave/cli/v3"
2323)
2424
2525type App struct {
@@ -36,42 +36,6 @@ type App struct {
3636 nodeCustomDataPath string
3737}
3838
39- // commandMetadata holds all metadata for a command in one place.
40- type commandMetadata struct {
41- taskName string
42- handler func (* App , context.Context , []string ) error
43- }
44-
45- // getCommandRegistry returns the command registry mapping command names to their metadata.
46- // Adding a new command only requires adding one entry here.
47- func getCommandRegistry () map [string ]commandMetadata {
48- return map [string ]commandMetadata {
49- "provision" : {
50- taskName : "Provision" ,
51- handler : func (a * App , ctx context.Context , args []string ) error {
52- provisionResult , err := a .runProvision (ctx , args [2 :])
53- // Always notify after provisioning attempt (success is a no-op inside notifier)
54- a .writeCompleteFileOnError (provisionResult , err )
55- return err
56- },
57- },
58- "provision-wait" : {
59- taskName : "ProvisionWait" ,
60- handler : func (a * App , ctx context.Context , args []string ) error {
61- provisionStatusFiles := ProvisionStatusFiles {
62- ProvisionJSONFile : provisionJSONFilePath ,
63- ProvisionCompleteFile : provisionCompleteFilePath ,
64- }
65- provisionOutput , err := a .ProvisionWait (ctx , provisionStatusFiles )
66- //nolint:forbidigo // stdout is part of the interface
67- fmt .Println (provisionOutput )
68- slog .Info ("provision-wait finished" , "provisionOutput" , provisionOutput )
69- return err
70- },
71- },
72- }
73- }
74-
7539// provision.json values are emitted as strings by the shell jq invocation.
7640// We only care about ExitCode + Error + Output (snippet) for failure detection.
7741type ProvisionResult struct {
@@ -99,76 +63,120 @@ type ProvisionStatusFiles struct {
9963}
10064
10165func (a * App ) Run (ctx context.Context , args []string ) int {
102- if handled := handleInfoCommand (args ); handled {
103- return 0
66+ cmd := & cli.Command {
67+ Name : "aks-node-controller" ,
68+ Usage : "Parse contract and run csecmd" ,
69+ Version : Version ,
70+ ExitErrHandler : func (context.Context , * cli.Command , error ) {
71+ // Return errors to the caller so exit codes are derived consistently in one place.
72+ },
73+ Action : func (context.Context , * cli.Command ) error {
74+ if len (args ) > 1 {
75+ return fmt .Errorf ("unknown command: %s" , args [1 ])
76+ }
77+ return errors .New ("missing command argument" )
78+ },
79+ Commands : []* cli.Command {
80+ {
81+ Name : "provision" ,
82+ Usage : "Run node provisioning" ,
83+ Flags : []cli.Flag {
84+ & cli.StringFlag {Name : "provision-config" , Usage : "path to the provision config file" },
85+ & cli.StringFlag {Name : "nbc-cmd" , Usage : "path to the NBC command file" },
86+ & cli.BoolFlag {Name : "dry-run" , Usage : "print the command that would be run without executing it" },
87+ },
88+ Action : func (ctx context.Context , cmd * cli.Command ) error {
89+ return a .runProvisionCommand (ctx , ProvisionFlags {
90+ ProvisionConfig : cmd .String ("provision-config" ),
91+ NBCCmd : cmd .String ("nbc-cmd" ),
92+ }, cmd .Bool ("dry-run" ))
93+ },
94+ },
95+ {
96+ Name : "provision-wait" ,
97+ Usage : "Wait for provisioning to complete" ,
98+ Action : func (ctx context.Context , cmd * cli.Command ) error {
99+ provisionStatusFiles := ProvisionStatusFiles {
100+ ProvisionJSONFile : provisionJSONFilePath ,
101+ ProvisionCompleteFile : provisionCompleteFilePath ,
102+ }
103+ provisionOutput , err := a .runProvisionWaitCommand (ctx , provisionStatusFiles )
104+ _ , _ = fmt .Fprintln (cmd .Root ().Writer , provisionOutput )
105+ return err
106+ },
107+ },
108+ {
109+ Name : "version" ,
110+ Usage : "Print the version" ,
111+ Action : func (_ context.Context , cmd * cli.Command ) error {
112+ _ , _ = fmt .Fprintln (cmd .Root ().Writer , Version )
113+ return nil
114+ },
115+ },
116+ {
117+ Name : "download-hotfix" ,
118+ Usage : "Download the requested hotfix binary" ,
119+ Action : func (ctx context.Context , cmd * cli.Command ) error {
120+ if len (cmd .Args ().Slice ()) > 0 {
121+ return fmt .Errorf ("unexpected download-hotfix arguments: %s" , strings .Join (cmd .Args ().Slice (), " " ))
122+ }
123+ return a .runDownloadHotfixCommand (ctx )
124+ },
125+ },
126+ },
104127 }
105128
106- slog .Info ("aks-node-controller started" , "args" , args )
107- err := a .run (ctx , args )
108- exitCode := errToExitCode (err )
109- if exitCode == 0 {
110- slog .Info ("aks-node-controller finished successfully." )
111- } else {
112- slog .Error ("aks-node-controller failed" , "error" , err )
113- }
114- return exitCode
129+ err := cmd .Run (ctx , args )
130+ return errToExitCode (err )
115131}
116132
117- // handleInfoCommand handles --version, version, --help, and help commands
118- // without logging noise. Returns true if handled.
119- func handleInfoCommand (args []string ) bool {
120- if len (args ) < 2 {
121- return false
122- }
123- switch args [1 ] {
124- case "--version" , "version" :
125- _ , _ = fmt .Fprintln (os .Stdout , Version )
126- return true
127- case "--help" , "help" :
128- _ , _ = fmt .Fprintln (os .Stdout , "Usage: aks-node-controller <command> [options]" )
129- _ , _ = fmt .Fprintln (os .Stdout )
130- _ , _ = fmt .Fprintln (os .Stdout , "Commands:" )
131- _ , _ = fmt .Fprintln (os .Stdout , " provision Run node provisioning" )
132- _ , _ = fmt .Fprintln (os .Stdout , " provision-wait Wait for provisioning to complete" )
133- _ , _ = fmt .Fprintln (os .Stdout , " version Print the version" )
134- _ , _ = fmt .Fprintln (os .Stdout , " help Print this help message" )
135- return true
136- default :
137- return false
138- }
139- }
133+ func (a * App ) runProvisionCommand (ctx context.Context , flags ProvisionFlags , dryRun bool ) error {
134+ slog .Info ("aks-node-controller started" , "task" , "Provision" )
140135
141- func ( a * App ) run ( ctx context. Context , args [] string ) error {
142- command := ""
143- if len ( args ) >= 2 {
144- command = args [ 1 ]
145- }
146- if command == "" {
147- return errors . New ( "missing command argument" )
148- }
149-
150- cmd , ok := getCommandRegistry ()[ command ]
151- if ! ok {
152- return fmt . Errorf ( "unknown command: %s" , command )
136+ startTime := time . Now ()
137+ a . eventLogger . LogEvent ( "Provision" , "Starting" , helpers . EventLevelInformational , startTime , startTime )
138+ provisionResult , err := a . runProvision ( ctx , flags , dryRun )
139+ a . writeCompleteFileOnError ( provisionResult , err )
140+ endTime := time . Now ()
141+ if err != nil {
142+ message := fmt . Sprintf ( "aks-node-controller exited with error %s" , err . Error () )
143+ a . eventLogger . LogEvent ( "Provision" , message , helpers . EventLevelError , startTime , endTime )
144+ slog . Error ( "aks-node-controller failed" , "error" , err )
145+ } else {
146+ a . eventLogger . LogEvent ( "Provision" , "Completed" , helpers . EventLevelInformational , startTime , endTime )
147+ slog . Info ( "aks-node-controller finished successfully." )
153148 }
149+ return err
150+ }
154151
155- // Self-update before provisioning: check for hotfix version and install if needed.
156- // On success, syscall.Exec replaces this process and never returns.
157- // On any failure, selfUpdate logs a warning and returns nil (best-effort).
158- a .selfUpdate (ctx )
152+ func (a * App ) runProvisionWaitCommand (ctx context.Context , provisionStatusFiles ProvisionStatusFiles ) (string , error ) {
153+ slog .Info ("aks-node-controller started" , "task" , "ProvisionWait" )
159154
160155 startTime := time .Now ()
161- a .eventLogger .LogEvent (cmd .taskName , "Starting" , helpers .EventLevelInformational , startTime , startTime )
162-
163- err := cmd .handler (a , ctx , args )
156+ a .eventLogger .LogEvent ("ProvisionWait" , "Starting" , helpers .EventLevelInformational , startTime , startTime )
157+ provisionOutput , err := a .ProvisionWait (ctx , provisionStatusFiles )
164158 endTime := time .Now ()
165159 if err != nil {
166160 message := fmt .Sprintf ("aks-node-controller exited with error %s" , err .Error ())
167- a .eventLogger .LogEvent (cmd .taskName , message , helpers .EventLevelError , startTime , endTime )
161+ a .eventLogger .LogEvent ("ProvisionWait" , message , helpers .EventLevelError , startTime , endTime )
162+ slog .Error ("aks-node-controller failed" , "error" , err )
168163 } else {
169- a .eventLogger .LogEvent (cmd .taskName , "Completed" , helpers .EventLevelInformational , startTime , endTime )
164+ a .eventLogger .LogEvent ("ProvisionWait" , "Completed" , helpers .EventLevelInformational , startTime , endTime )
165+ slog .Info ("aks-node-controller finished successfully." )
170166 }
171- return err
167+ slog .Info ("provision-wait finished" , "provisionOutput" , provisionOutput )
168+ return provisionOutput , err
169+ }
170+
171+ func (a * App ) runDownloadHotfixCommand (ctx context.Context ) error {
172+ slog .Info ("aks-node-controller hotfix download started" )
173+ err := a .downloadHotfix (ctx )
174+ if err != nil {
175+ slog .Error ("aks-node-controller hotfix download failed" , "error" , err )
176+ return err
177+ }
178+ slog .Info ("aks-node-controller hotfix download finished" )
179+ return nil
172180}
173181
174182func buildCmdFromProvisionConfig (ctx context.Context , path string ) (* exec.Cmd , error ) {
@@ -275,10 +283,10 @@ func (a *App) Provision(ctx context.Context, flags ProvisionFlags) (*ProvisionRe
275283 return provisionResult , err
276284}
277285
278- // runProvision encapsulates argument parsing and execution for the "provision" subcommand.
286+ // runProvision encapsulates execution for the "provision" subcommand after CLI parsing .
279287// It returns an error describing any failure; callers should pass that error to
280288// writeCompleteFileOnError so the sentinel file can be written on fail-fast paths.
281- func (a * App ) runProvision (ctx context.Context , args [] string ) (* ProvisionResult , error ) {
289+ func (a * App ) runProvision (ctx context.Context , flags ProvisionFlags , dryRun bool ) (* ProvisionResult , error ) {
282290 // Handle panics gracefully to ensure we always return a valid result
283291 provisionResult := & ProvisionResult {}
284292 var err error
@@ -293,25 +301,15 @@ func (a *App) runProvision(ctx context.Context, args []string) (*ProvisionResult
293301 }
294302 }()
295303
296- fs := flag .NewFlagSet ("provision" , flag .ContinueOnError )
297- provisionConfig := fs .String ("provision-config" , "" , "path to the provision config file" )
298- nbcCMD := fs .String ("nbc-cmd" , "" , "path to the NBC command file" )
299- dryRun := fs .Bool ("dry-run" , false , "print the command that would be run without executing it" )
300- if parseErr := fs .Parse (args ); parseErr != nil {
301- provisionResult .ExitCode = strconv .Itoa (240 )
302- provisionResult .Error = fmt .Sprintf ("parse args: %v" , parseErr )
303- return provisionResult , errors .New (provisionResult .Error )
304- }
305-
306- if * provisionConfig == "" && * nbcCMD == "" {
304+ if flags .ProvisionConfig == "" && flags .NBCCmd == "" {
307305 provisionResult .ExitCode = strconv .Itoa (240 )
308306 provisionResult .Error = "--provision-config or --nbc-cmd is required"
309307 return provisionResult , errors .New (provisionResult .Error )
310308 }
311- if * dryRun {
309+ if dryRun {
312310 a .cmdRun = cmdRunnerDryRun
313311 }
314- return a .Provision (ctx , ProvisionFlags { ProvisionConfig : * provisionConfig , NBCCmd : * nbcCMD } )
312+ return a .Provision (ctx , flags )
315313}
316314
317315// writeCompleteFileOnError writes the provision.complete sentinel if err is non-nil,
0 commit comments