Skip to content

Commit 8e4cb37

Browse files
authored
Add <wait 'string'> boot command that waits for text to appear (#178)
The functionality builds on the built in Vision framework on macOS, and the existing VNC integration. A custom VNC driver has been added that picks up changes to the VNC framebuffer, and allows custom boot commands without changes to the packer-sdk bootcommand grammar, by using a simple regex replacement that uses two symbols from the Unicode Private Use Area (PUA) as markers.
1 parent 8a2c4e6 commit 8e4cb37

5 files changed

Lines changed: 302 additions & 7 deletions

File tree

.web-docs/components/builder/tart/README.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,28 @@ For more examples of various boot commands, see the sample projects from our
271271
<!-- End of code generated from the comments of the BootConfig struct in bootcommand/config.go; -->
272272

273273

274+
In addition to the `<waitXX>` command, Tart builder supports a custom `<wait 'string'>`
275+
command that uses computer vision to wait for the given string to appear in the VM's
276+
screen output. This string can be a simple word, a sentence or a regular expression.
277+
278+
In JSON:
279+
280+
```json
281+
"boot_command": [
282+
"<wait 'hello'>",
283+
"<wait '(foo|bar|baz)123'>"
284+
]
285+
```
286+
287+
In HCL2:
288+
289+
```hcl
290+
boot_command = [
291+
"<wait 'hello'>",
292+
"<wait '(foo|bar|baz)123'>"
293+
]
294+
```
295+
274296
#### Optional:
275297

276298
<!-- Code generated from the comments of the BootConfig struct in bootcommand/config.go; DO NOT EDIT MANUALLY -->

builder/tart/step_run.go

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ func typeBootCommandOverVNC(
166166
}
167167
}
168168

169-
ui.Say("Waiting for the VNC server credentials from Tart...")
169+
ui.Say("Waiting for VNC server credentials from Tart...")
170170

171171
vncCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
172172
defer cancel()
@@ -201,41 +201,54 @@ func typeBootCommandOverVNC(
201201
dialer := net.Dialer{}
202202
netConn, err := dialer.DialContext(ctx, "tcp", fmt.Sprintf("%s:%s", vncHost, vncPort))
203203
if err != nil {
204-
err := fmt.Errorf("Failed to connect to the Tart's VNC server: %s", err)
204+
err := fmt.Errorf("Failed to connect to Tart's VNC server: %s", err)
205205
state.Put("error", err)
206206
ui.Error(err.Error())
207207

208208
return false
209209
}
210210
defer netConn.Close()
211211

212+
serverMessageChannel := make(chan vnc.ServerMessage)
212213
vncClient, err := vnc.Client(netConn, &vnc.ClientConfig{
213214
Auth: []vnc.ClientAuth{
214215
&vnc.PasswordAuth{Password: vncPassword},
215216
},
217+
ServerMessageCh: serverMessageChannel,
216218
})
217219
if err != nil {
218-
err := fmt.Errorf("Failed to connect to the Tart's VNC server: %s", err)
220+
err := fmt.Errorf("Failed to connect to Tart's VNC server: %s", err)
219221
state.Put("error", err)
220222
ui.Error(err.Error())
221223

222224
return false
223225
}
224226
defer vncClient.Close()
225227

226-
ui.Say("Connected to the VNC!")
228+
ui.Say("Connected to VNC server!")
229+
230+
err = vncClient.SetEncodings([]vnc.Encoding{
231+
&vnc.RawEncoding{},
232+
&DesktopSizePseudoEncoding{},
233+
})
234+
if err != nil {
235+
err := fmt.Errorf("Failed to set VNC encoding: %s", err)
236+
state.Put("error", err)
237+
ui.Error(err.Error())
238+
return false
239+
}
240+
241+
vncDriver := newCustomDriver(vncClient, serverMessageChannel, config, ctx)
227242

228243
if config.VNCConfig.BootWait > 0 {
229244
message := fmt.Sprintf("Waiting %v after the VM has booted...", config.VNCConfig.BootWait)
230245
ui.Say(message)
231246
time.Sleep(config.VNCConfig.BootWait)
232247
}
233248

234-
message := fmt.Sprintf("Typing commands with key interval %v...", config.BootKeyInterval)
249+
message := fmt.Sprintf("Typing commands with key interval %v...", vncDriver.KeyInterval())
235250
ui.Say(message)
236251

237-
vncDriver := bootcommand.NewVNCDriver(vncClient, config.BootKeyInterval)
238-
239252
command, err := interpolate.Render(config.VNCConfig.FlatBootCommand(), &config.ctx)
240253
if err != nil {
241254
err := fmt.Errorf("Failed to render the boot command: %s", err)
@@ -245,6 +258,10 @@ func typeBootCommandOverVNC(
245258
return false
246259
}
247260

261+
stringWaitRegex := regexp.MustCompile(`<wait\s*'(.+?)'>`)
262+
command = stringWaitRegex.ReplaceAllString(command,
263+
fmt.Sprintf(`%c${1}%c`, WaitForStringStart, WaitForStringEnd))
264+
248265
seq, err := bootcommand.GenerateExpressionSequence(command)
249266
if err != nil {
250267
err := fmt.Errorf("Failed to parse the boot command: %s", err)

builder/tart/vnc.go

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
package tart
2+
3+
/*
4+
#cgo CFLAGS: -x objective-c
5+
#cgo LDFLAGS: -framework Foundation -framework CoreGraphics -framework Vision
6+
7+
#import <stdlib.h>
8+
#import "vnc.mm"
9+
10+
*/
11+
import "C"
12+
13+
import (
14+
"context"
15+
"fmt"
16+
"github.com/hashicorp/packer-plugin-sdk/bootcommand"
17+
"github.com/mitchellh/go-vnc"
18+
"io"
19+
"os"
20+
"strings"
21+
"time"
22+
23+
"image"
24+
"image/color"
25+
26+
"unsafe"
27+
)
28+
29+
type customDriver struct {
30+
vncClient *vnc.ClientConn
31+
serverMessageChannel chan vnc.ServerMessage
32+
config *Config
33+
vncDriver bootcommand.BCDriver
34+
keyInterval time.Duration
35+
ctx context.Context
36+
frameBuffer *image.RGBA
37+
waitString strings.Builder
38+
}
39+
40+
func newCustomDriver(vncClient *vnc.ClientConn,
41+
serverMessageChannel chan vnc.ServerMessage,
42+
config *Config,
43+
ctx context.Context) *customDriver {
44+
45+
// Resolve key interval manually so we can accurately report it back
46+
keyInterval := bootcommand.PackerKeyDefault
47+
if delay, err := time.ParseDuration(os.Getenv(bootcommand.PackerKeyEnv)); err == nil {
48+
keyInterval = delay
49+
}
50+
if config.BootKeyInterval > time.Duration(0) {
51+
keyInterval = config.BootKeyInterval
52+
}
53+
54+
w, h := int(vncClient.FrameBufferWidth), int(vncClient.FrameBufferHeight)
55+
56+
d := &customDriver{
57+
vncClient: vncClient,
58+
serverMessageChannel: serverMessageChannel,
59+
config: config,
60+
vncDriver: bootcommand.NewVNCDriver(vncClient, keyInterval),
61+
keyInterval: keyInterval,
62+
ctx: ctx,
63+
frameBuffer: image.NewRGBA(image.Rect(0, 0, w, h)),
64+
}
65+
66+
return d
67+
}
68+
69+
func (d *customDriver) KeyInterval() time.Duration {
70+
return d.keyInterval
71+
}
72+
73+
const WaitForStringStart uint32 = 0xE000
74+
const WaitForStringEnd uint32 = 0xE0001
75+
76+
func (d *customDriver) SendKey(key rune, action bootcommand.KeyAction) error {
77+
switch uint32(key) {
78+
case WaitForStringStart:
79+
d.waitString.Grow(1)
80+
return nil
81+
case WaitForStringEnd:
82+
waitString := d.waitString.String()
83+
d.waitString.Reset()
84+
85+
waitStringCStr := C.CString(waitString)
86+
defer C.free(unsafe.Pointer(waitStringCStr))
87+
88+
for {
89+
fmt.Fprintf(os.Stderr, "🔎 Looking for '%s'...\n", waitString)
90+
if C.recognizeTextInFramebuffer(waitStringCStr,
91+
unsafe.Pointer(&d.frameBuffer.Pix[0]),
92+
C.int(d.frameBuffer.Bounds().Dx()),
93+
C.int(d.frameBuffer.Bounds().Dy())) {
94+
break
95+
}
96+
97+
if err := d.WaitForFramebufferUpdate(); err != nil {
98+
return err
99+
}
100+
}
101+
102+
return nil
103+
default:
104+
if d.waitString.Cap() > 0 {
105+
d.waitString.WriteRune(key)
106+
return nil
107+
} else {
108+
return d.vncDriver.SendKey(key, action)
109+
}
110+
}
111+
}
112+
113+
func (d *customDriver) SendSpecial(special string, action bootcommand.KeyAction) error {
114+
return d.vncDriver.SendSpecial(special, action)
115+
}
116+
117+
func (d *customDriver) Flush() error {
118+
return d.vncDriver.Flush()
119+
}
120+
121+
func (d *customDriver) WaitForFramebufferUpdate() error {
122+
123+
for {
124+
w, h := d.vncClient.FrameBufferWidth, d.vncClient.FrameBufferHeight
125+
fmt.Fprintf(os.Stderr, "📡 Requesting frame buffer update for %dx%d\n", w, h)
126+
127+
if err := d.vncClient.FramebufferUpdateRequest(true, 0, 0, w, h); err != nil {
128+
return err
129+
}
130+
131+
select {
132+
case msg := <-d.serverMessageChannel:
133+
if framebufferUpdateMessage, ok := msg.(*vnc.FramebufferUpdateMessage); ok {
134+
if len(framebufferUpdateMessage.Rectangles) == 0 {
135+
return fmt.Errorf("⚠️ Frame update did not have any rectangles")
136+
}
137+
138+
for _, rect := range framebufferUpdateMessage.Rectangles {
139+
switch encoding := rect.Enc.(type) {
140+
case *DesktopSizePseudoEncoding:
141+
w, h := int(d.vncClient.FrameBufferWidth), int(d.vncClient.FrameBufferHeight)
142+
d.frameBuffer = image.NewRGBA(image.Rect(0, 0, w, h))
143+
fmt.Fprintf(os.Stderr, "🖥️ New desktop size is %dx%d, resized framebuffer\n", w, h)
144+
continue
145+
case *vnc.RawEncoding:
146+
for i, c := range encoding.Colors {
147+
x, y := i%int(rect.Width), i/int(rect.Width)
148+
r, g, b := uint8(c.R), uint8(c.G), uint8(c.B)
149+
d.frameBuffer.Set(int(rect.X)+x, int(rect.Y)+y, color.RGBA{r, g, b, 255})
150+
}
151+
default:
152+
return fmt.Errorf("⚠️ Frame had unknown encoding %s", encoding)
153+
}
154+
}
155+
return nil
156+
} else {
157+
// Ignore messages we didn't ask for
158+
fmt.Fprintln(os.Stderr, "⚠️ Ignoring unknown message type", msg.Type(), msg)
159+
continue
160+
}
161+
case <-d.ctx.Done():
162+
return d.ctx.Err()
163+
}
164+
}
165+
}
166+
167+
type DesktopSizePseudoEncoding struct{}
168+
169+
func (*DesktopSizePseudoEncoding) Read(c *vnc.ClientConn, rect *vnc.Rectangle, r io.Reader) (vnc.Encoding, error) {
170+
c.FrameBufferWidth = rect.Width
171+
c.FrameBufferHeight = rect.Height
172+
return &DesktopSizePseudoEncoding{}, nil
173+
}
174+
175+
func (*DesktopSizePseudoEncoding) Type() int32 {
176+
return -223 // RFC 6143 7.8.2
177+
}

builder/tart/vnc.mm

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#import <stdio.h>
2+
3+
#import <CoreGraphics/CoreGraphics.h>
4+
#import <Vision/Vision.h>
5+
6+
bool recognizeTextInFramebuffer(const char* text, void* framebuffer, int width, int height)
7+
{
8+
@autoreleasepool {
9+
// Prepare regular expression for needle
10+
NSError *error = nil;
11+
NSRegularExpression *regex = [NSRegularExpression regularExpressionWithPattern:
12+
[NSString stringWithUTF8String:text] options:NSRegularExpressionCaseInsensitive
13+
error:&error];
14+
if (error) {
15+
fprintf(stderr, "⚠️ Failed to create search string regex: %s\n",
16+
error.localizedDescription.UTF8String);
17+
return false;
18+
}
19+
20+
// Create CGImage wrapper around framebuffer pixel data
21+
CGDataProviderRef provider = CGDataProviderCreateWithData(
22+
NULL, framebuffer, width * height * 4, NULL);
23+
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
24+
CGImageRef image = CGImageCreate(width, height, 8, 32, width * 4,
25+
colorSpace, (CGBitmapInfo)kCGImageAlphaPremultipliedLast, provider,
26+
NULL, false, kCGRenderingIntentDefault);
27+
CGColorSpaceRelease(colorSpace);
28+
CGDataProviderRelease(provider);
29+
30+
// Recognize text in the framebuffer
31+
VNRecognizeTextRequest *textRecognizer = [[VNRecognizeTextRequest alloc] init];
32+
textRecognizer.recognitionLevel = VNRequestTextRecognitionLevelAccurate;
33+
VNImageRequestHandler *imageRequest = [[VNImageRequestHandler alloc]
34+
initWithCGImage:image options:@{}];
35+
CGImageRelease(image);
36+
37+
BOOL ret = [imageRequest performRequests:@[textRecognizer] error:&error];
38+
if (error || !ret) {
39+
fprintf(stderr, "⚠️ Failed to perform image recognition request: %s\n",
40+
error.localizedDescription.UTF8String);
41+
return false;
42+
}
43+
44+
// Then search for the needle
45+
for (VNRecognizedTextObservation *observation in textRecognizer.results) {
46+
for (VNRecognizedText *candidate in [observation topCandidates:1]) {
47+
fprintf(stderr, "💬 Observed '%s' with confidence %f\n",
48+
candidate.string.UTF8String, candidate.confidence);
49+
NSRange range = NSMakeRange(0, candidate.string.length);
50+
if ([regex matchesInString:candidate.string options:0 range:range].count > 0)
51+
return true;
52+
}
53+
}
54+
}
55+
56+
return false;
57+
}

docs/builders/tart.mdx

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,28 @@ For more advanced examples, please refer to the [`example/` directory](https://g
8989

9090
@include 'packer-plugin-sdk/bootcommand/BootConfig.mdx'
9191

92+
In addition to the `<waitXX>` command, Tart builder supports a custom `<wait 'string'>`
93+
command that uses computer vision to wait for the given string to appear in the VM's
94+
screen output. This string can be a simple word, a sentence or a regular expression.
95+
96+
In JSON:
97+
98+
```json
99+
"boot_command": [
100+
"<wait 'hello'>",
101+
"<wait '(foo|bar|baz)123'>"
102+
]
103+
```
104+
105+
In HCL2:
106+
107+
```hcl
108+
boot_command = [
109+
"<wait 'hello'>",
110+
"<wait '(foo|bar|baz)123'>"
111+
]
112+
```
113+
92114
#### Optional:
93115

94116
@include 'packer-plugin-sdk/bootcommand/BootConfig-not-required.mdx'

0 commit comments

Comments
 (0)