@@ -18,9 +18,11 @@ import (
1818 "context"
1919 "fmt"
2020 "log"
21+ "net/http"
2122 "net/url"
2223 "path"
2324 "regexp"
25+ "time"
2426
2527 apiv1 "github.com/google/cloud-android-orchestration/api/v1"
2628 "github.com/google/cloud-android-orchestration/pkg/app/accounts"
@@ -41,6 +43,7 @@ type GCPIMConfig struct {
4143 UseExternalIP bool
4244 // If true, instances created should be compatible with `acloud CLI`.
4345 AcloudCompatible bool
46+ HostReadyTimeout time.Duration
4447}
4548
4649const (
@@ -78,28 +81,40 @@ func (m *GCEInstanceManager) ListZones() (*apiv1.ListZonesResponse, error) {
7881 }, nil
7982}
8083
81- func (m * GCEInstanceManager ) GetHostAddr (zone string , host string ) (string , error ) {
82- instance , err := m .getHostInstance (zone , host )
83- if err != nil {
84- return "" , err
85- }
86- ilen := len (instance .NetworkInterfaces )
84+ func getHostAddrWithIns (ins * compute.Instance ) (string , error ) {
85+ ilen := len (ins .NetworkInterfaces )
8786 if ilen == 0 {
88- log .Printf ("host instance %s in zone %s is missing a network interface" , host , zone )
87+ log .Printf ("host instance %s in zone %s is missing a network interface" , ins . Name , ins . Zone )
8988 return "" , errors .NewInternalError ("host instance missing a network interface" , nil )
9089 }
9190 if ilen > 1 {
92- log .Printf ("host instance %s in zone %s has %d network interfaces" , host , zone , ilen )
91+ log .Printf ("host instance %s in zone %s has %d network interfaces" , ins .Name , ins .Zone , ilen )
92+ }
93+ return ins .NetworkInterfaces [0 ].NetworkIP , nil
94+ }
95+
96+ func (m * GCEInstanceManager ) GetHostAddr (zone string , host string ) (string , error ) {
97+ ins , err := m .getHostInstance (zone , host )
98+ if err != nil {
99+ return "" , err
100+ }
101+ return getHostAddrWithIns (ins )
102+ }
103+
104+ func getHostURLWithIns (ins * compute.Instance , config * Config ) (* url.URL , error ) {
105+ addr , err := getHostAddrWithIns (ins )
106+ if err != nil {
107+ return nil , err
93108 }
94- return instance . NetworkInterfaces [ 0 ]. NetworkIP , nil
109+ return url . Parse ( fmt . Sprintf ( "%s://%s:%d" , config . HostOrchestratorProtocol , addr , config . GCP . HostOrchestratorPort ))
95110}
96111
97112func (m * GCEInstanceManager ) GetHostURL (zone string , host string ) (* url.URL , error ) {
98- addr , err := m .GetHostAddr (zone , host )
113+ ins , err := m .getHostInstance (zone , host )
99114 if err != nil {
100115 return nil , err
101116 }
102- return url . Parse ( fmt . Sprintf ( "%s://%s:%d" , m .Config . HostOrchestratorProtocol , addr , m . Config . GCP . HostOrchestratorPort ) )
117+ return getHostURLWithIns ( ins , & m .Config )
103118}
104119
105120const operationStatusDone = "DONE"
@@ -253,16 +268,81 @@ func (m *GCEInstanceManager) WaitOperation(zone string, user accounts.User, name
253268 if op .Status != operationStatusDone {
254269 return nil , errors .NewServiceUnavailableError ("Wait for operation timed out" , nil )
255270 }
256- getter := opResultGetter {Service : m .Service , Op : op }
257- return getter .Get ()
271+ getter := opResultGetter {
272+ Service : m .Service ,
273+ Op : op ,
274+ Config : & m .Config ,
275+ }
276+ res , err := getter .Get ()
277+ if err != nil {
278+ return nil , err
279+ }
280+ if hostInst , ok := res .(* apiv1.HostInstance ); ok && op .OperationType == "insert" {
281+ return m .WaitHostAvailability (zone , user , hostInst .Name )
282+ }
283+ return res , nil
284+ }
285+
286+ func (m * GCEInstanceManager ) WaitHostAvailability (zone string , user accounts.User , host string ) (* apiv1.HostInstance , error ) {
287+ ins , err := m .getHostInstance (zone , host )
288+ if err != nil {
289+ return nil , err
290+ }
291+ hostInstance , err := BuildHostInstance (ins )
292+ if err != nil {
293+ return nil , err
294+ }
295+ client , err := getHostClientWithIns (ins , & m .Config )
296+ if err != nil {
297+ return nil , err
298+ }
299+ if err := m .waitForOrchestrator (zone , hostInstance , client ); err != nil {
300+ return nil , err
301+ }
302+ return hostInstance , nil
303+ }
304+
305+ func (m * GCEInstanceManager ) waitForOrchestrator (zone string , host * apiv1.HostInstance , client HostClient ) error {
306+ timeout := m .Config .GCP .HostReadyTimeout
307+ if timeout == 0 {
308+ timeout = 5 * time .Minute
309+ }
310+ retryDelay := 5 * time .Second
311+ deadline := time .Now ().Add (timeout )
312+
313+ for time .Now ().Before (deadline ) {
314+ _ , err := m .Service .Instances .Get (m .Config .GCP .ProjectID , zone , host .Name ).Context (context .TODO ()).Do ()
315+ if err != nil {
316+ if apiErr , ok := err .(* googleapi.Error ); ok && apiErr .Code == http .StatusNotFound {
317+ return errors .NewNotFoundError ("Host was deleted concurrently" , err )
318+ }
319+ return fmt .Errorf ("failed to check host existence: %w" , err )
320+ }
321+
322+ status , err := client .Get ("/" , "" , nil )
323+ if err == nil && status != http .StatusBadGateway {
324+ return nil
325+ }
326+
327+ time .Sleep (retryDelay )
328+ }
329+ return errors .NewServiceUnavailableError ("Wait for host orchestrator timed out" , nil )
330+ }
331+
332+ func getHostClientWithIns (ins * compute.Instance , config * Config ) (HostClient , error ) {
333+ url , err := getHostURLWithIns (ins , config )
334+ if err != nil {
335+ return nil , err
336+ }
337+ return NewNetHostClient (url , config .AllowSelfSignedHostSSLCertificate ), nil
258338}
259339
260340func (m * GCEInstanceManager ) GetHostClient (zone string , host string ) (HostClient , error ) {
261- url , err := m .GetHostURL (zone , host )
341+ ins , err := m .getHostInstance (zone , host )
262342 if err != nil {
263343 return nil , err
264344 }
265- return NewNetHostClient ( url , m .Config . AllowSelfSignedHostSSLCertificate ), nil
345+ return getHostClientWithIns ( ins , & m .Config )
266346}
267347
268348func (m * GCEInstanceManager ) getHostInstance (zone string , host string ) (* compute.Instance , error ) {
@@ -326,6 +406,7 @@ var (
326406type opResultGetter struct {
327407 Service * compute.Service
328408 Op * compute.Operation
409+ Config * Config
329410}
330411
331412func (g * opResultGetter ) Get () (any , error ) {
0 commit comments