Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions pkg/app/instances/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,18 @@ func (m *DockerInstanceManager) waitCreateHostOperation(host string) (*apiv1.Hos
return nil, fmt.Errorf("failed to inspect docker container: %w", err)
}
if res.State.Running {
client, err := m.GetHostClient("local", host)
if err != nil {
return nil, err
}
// There is a delay between host creation and its readiness, wait for host readiness and return when it is ready.
readinessChecker, ok := client.(HostReadinessChecker)
if !ok {
readinessChecker = &BasicHostReadinessChecker{Client: client}
}
if err := WaitForHostReady(readinessChecker, 2*time.Minute, 5*time.Second); err != nil {
return nil, err
}
return &apiv1.HostInstance{
Name: host,
}, nil
Expand Down
114 changes: 98 additions & 16 deletions pkg/app/instances/gce.go
Comment thread
k311093 marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ import (
"context"
"fmt"
"log"
"net/http"
"net/url"
"path"
"regexp"
"time"

apiv1 "github.com/google/cloud-android-orchestration/api/v1"
"github.com/google/cloud-android-orchestration/pkg/app/accounts"
Expand Down Expand Up @@ -79,27 +81,19 @@ func (m *GCEInstanceManager) ListZones() (*apiv1.ListZonesResponse, error) {
}

func (m *GCEInstanceManager) GetHostAddr(zone string, host string) (string, error) {
instance, err := m.getHostInstance(zone, host)
ins, err := m.getHostInstance(zone, host)
if err != nil {
return "", err
}
ilen := len(instance.NetworkInterfaces)
if ilen == 0 {
log.Printf("host instance %s in zone %s is missing a network interface", host, zone)
return "", errors.NewInternalError("host instance missing a network interface", nil)
}
if ilen > 1 {
log.Printf("host instance %s in zone %s has %d network interfaces", host, zone, ilen)
}
return instance.NetworkInterfaces[0].NetworkIP, nil
return getHostAddrWithIns(ins)
}

func (m *GCEInstanceManager) GetHostURL(zone string, host string) (*url.URL, error) {
addr, err := m.GetHostAddr(zone, host)
ins, err := m.getHostInstance(zone, host)
if err != nil {
return nil, err
}
return url.Parse(fmt.Sprintf("%s://%s:%d", m.Config.HostOrchestratorProtocol, addr, m.Config.GCP.HostOrchestratorPort))
return getHostURLWithIns(ins, &m.Config)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you have getHostURLWithIns as a method of GCEInstanceManager in order to reuse its Config field rather than have it to pass as a parameter? Same for other similar functions in this PR

}

const operationStatusDone = "DONE"
Expand Down Expand Up @@ -253,16 +247,103 @@ func (m *GCEInstanceManager) WaitOperation(zone string, user accounts.User, name
if op.Status != operationStatusDone {
return nil, errors.NewServiceUnavailableError("Wait for operation timed out", nil)
}
getter := opResultGetter{Service: m.Service, Op: op}
return getter.Get()
getter := opResultGetter{
Service: m.Service,
Op: op,
Config: &m.Config,
}
res, err := getter.Get()
if err != nil {
return nil, err
}
if hostInst, ok := res.(*apiv1.HostInstance); ok && op.OperationType == "insert" {
return m.WaitHostAvailability(zone, user, hostInst.Name)
}
return res, nil
}

func (m *GCEInstanceManager) WaitHostAvailability(zone string, user accounts.User, host string) (*apiv1.HostInstance, error) {
ins, err := m.getHostInstance(zone, host)
if err != nil {
return nil, err
}
hostInstance, err := BuildHostInstance(ins)
if err != nil {
return nil, err
}
client, err := getHostClientWithIns(ins, &m.Config)
if err != nil {
return nil, err
}
if err := m.waitForOrchestrator(zone, hostInstance, client); err != nil {
return nil, err
}
return hostInstance, nil
}

type gceHostReadinessChecker struct {
basicChecker *BasicHostReadinessChecker
manager *GCEInstanceManager
zone string
hostName string
}

func (g *gceHostReadinessChecker) IsHostReady() (bool, error) {
_, err := g.manager.Service.Instances.Get(g.manager.Config.GCP.ProjectID, g.zone, g.hostName).Context(context.TODO()).Do()
if err != nil {
if apiErr, ok := err.(*googleapi.Error); ok && apiErr.Code == http.StatusNotFound {
return false, errors.NewNotFoundError("Host was deleted concurrently", err)
}
return false, fmt.Errorf("failed to check host existence: %w", err)
}
return g.basicChecker.IsHostReady()
}

func (m *GCEInstanceManager) waitForOrchestrator(zone string, host *apiv1.HostInstance, client HostClient) error {
gceChecker := &gceHostReadinessChecker{
basicChecker: &BasicHostReadinessChecker{Client: client},
manager: m,
zone: zone,
hostName: host.Name,
}

return WaitForHostReady(gceChecker, 5*time.Minute, 5*time.Second)
}

func getHostAddrWithIns(ins *compute.Instance) (string, error) {
ilen := len(ins.NetworkInterfaces)
if ilen == 0 {
log.Printf("host instance %s in zone %s is missing a network interface", ins.Name, ins.Zone)
return "", errors.NewInternalError("host instance missing a network interface", nil)
}
if ilen > 1 {
log.Printf("host instance %s in zone %s has %d network interfaces", ins.Name, ins.Zone, ilen)
}
return ins.NetworkInterfaces[0].NetworkIP, nil
}

func getHostURLWithIns(ins *compute.Instance, config *Config) (*url.URL, error) {
addr, err := getHostAddrWithIns(ins)
if err != nil {
return nil, err
}
return url.Parse(fmt.Sprintf("%s://%s:%d", config.HostOrchestratorProtocol, addr, config.GCP.HostOrchestratorPort))
}

func getHostClientWithIns(ins *compute.Instance, config *Config) (HostClient, error) {
url, err := getHostURLWithIns(ins, config)
if err != nil {
return nil, err
}
return NewNetHostClient(url, config.AllowSelfSignedHostSSLCertificate), nil
}

func (m *GCEInstanceManager) GetHostClient(zone string, host string) (HostClient, error) {
url, err := m.GetHostURL(zone, host)
ins, err := m.getHostInstance(zone, host)
if err != nil {
return nil, err
}
return NewNetHostClient(url, m.Config.AllowSelfSignedHostSSLCertificate), nil
return getHostClientWithIns(ins, &m.Config)
}

func (m *GCEInstanceManager) getHostInstance(zone string, host string) (*compute.Instance, error) {
Expand Down Expand Up @@ -326,6 +407,7 @@ var (
type opResultGetter struct {
Service *compute.Service
Op *compute.Operation
Config *Config
}

func (g *opResultGetter) Get() (any, error) {
Expand Down
19 changes: 18 additions & 1 deletion pkg/app/instances/gce_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"net/http/httptest"
"net/url"
"reflect"
"strconv"
"strings"
"testing"

Expand Down Expand Up @@ -572,9 +573,14 @@ func TestWaitCreateInstanceOperationSucceeds(t *testing.T) {
Name: "foo",
MachineType: "mt",
MinCpuPlatform: "mcp",
NetworkInterfaces: []*compute.NetworkInterface{
{NetworkIP: "127.0.0.1"},
},
}
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch path := r.URL.Path; path {
case "/":
replyJSON(w, map[string]string{})
case "/projects/google.com:test-project/zones/us-central1-a/operations/operation-1/wait":
replyJSON(w, operation)
case "/projects/google.com:test-project/zones/us-central1-a/instances/foo":
Expand All @@ -584,7 +590,18 @@ func TestWaitCreateInstanceOperationSucceeds(t *testing.T) {
}
}))
defer ts.Close()
im := NewGCEInstanceManager(testConfig, buildTestService(t, ts), testNameGenerator)

tsURL, _ := url.Parse(ts.URL)
port, _ := strconv.Atoi(tsURL.Port())

cfg := testConfig
cfg.HostOrchestratorProtocol = "http"

gcpCfg := *cfg.GCP
gcpCfg.HostOrchestratorPort = port
cfg.GCP = &gcpCfg

im := NewGCEInstanceManager(cfg, buildTestService(t, ts), testNameGenerator)

res, _ := im.WaitOperation(zone, &TestUser{}, opName)

Expand Down
50 changes: 50 additions & 0 deletions pkg/app/instances/helper.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package instances

import (
"net/http"
"time"

"github.com/google/cloud-android-orchestration/pkg/app/errors"
)

func WaitForHostReady(checker HostReadinessChecker, maxWait time.Duration, retryDelay time.Duration) error {
deadline := time.Now().Add(maxWait)

for time.Now().Before(deadline) {
ready, err := checker.IsHostReady()
if err != nil {
return err
}
if ready {
return nil
}
time.Sleep(retryDelay)
}
return errors.NewServiceUnavailableError("wait for host orchestrator timed out", nil)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's now return NewInternalError after time out, otherwise the client will retry again.

}

type BasicHostReadinessChecker struct {
Client HostClient
}

func (c *BasicHostReadinessChecker) IsHostReady() (bool, error) {
status, err := c.Client.Get("/", "", nil)
if err != nil || status == http.StatusBadGateway || status == http.StatusServiceUnavailable {
return false, nil
}
return true, nil
}
62 changes: 62 additions & 0 deletions pkg/app/instances/helper_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package instances

import (
"testing"
"time"
)

type mockHostReadinessChecker struct {
isHostReadyFunc func() (bool, error)
}

func (c *mockHostReadinessChecker) IsHostReady() (bool, error) {
return c.isHostReadyFunc()
}

func TestWaitForHostReadySucceeds(t *testing.T) {
checker := &mockHostReadinessChecker{
isHostReadyFunc: func() (bool, error) {
return true, nil
},
}

err := WaitForHostReady(checker, 100*time.Millisecond, 1*time.Millisecond)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
}

func TestWaitForHostReadyRetries(t *testing.T) {
calls := 0
checker := &mockHostReadinessChecker{
isHostReadyFunc: func() (bool, error) {
calls++
if calls == 1 {
return false, nil
}
return true, nil
},
}

err := WaitForHostReady(checker, 100*time.Millisecond, 1*time.Millisecond)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if calls != 2 {
t.Errorf("expected 2 calls, got %d", calls)
}
}
4 changes: 4 additions & 0 deletions pkg/app/instances/instances.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ type Manager interface {
GetHostClient(zone string, host string) (HostClient, error)
}

type HostReadinessChecker interface {
IsHostReady() (bool, error)
}

type HostClient interface {
// Get and Post requests return the HTTP status code or an error.
// The response body is parsed into the res output parameter if provided.
Expand Down
Loading