Skip to content

Commit b054508

Browse files
Merging 7c272bf into trunk-temp/pr-170951/d2f25e95-b7c6-44cc-ad25-d19f72945fd8
2 parents c945012 + 7c272bf commit b054508

2 files changed

Lines changed: 54 additions & 112 deletions

File tree

pkg/roachprod/failureinjection/failures/disk_stall.go

Lines changed: 54 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,17 @@ import (
2323

2424
var cockroachIOController = filepath.Join("/sys/fs/cgroup/system.slice", install.VirtualClusterLabel(install.SystemInterfaceName, 0)+".service", "io.max")
2525

26+
// diskMajMinExpr is a shell command substitution that expands to the
27+
// "MAJ:MIN" pair of the block device backing /mnt/data1 on the current node.
28+
// It is evaluated on each node so that the device is resolved locally, since
29+
// GCE does not guarantee consistent /dev/sdX letter assignment across VMs
30+
// (see #170379). All cgroup-disk-staller commands that need the device's
31+
// major:minor go through this single expression.
32+
// Double quotes (not single) around [:space:] keep this expression usable
33+
// inside an outer `bash -c '...'` invocation (setThroughputCmd) without
34+
// terminating the surrounding single-quoted string.
35+
const diskMajMinExpr = `$(lsblk -dn -o MAJ:MIN "$(findmnt -n -o SOURCE /mnt/data1)" | tr -d "[:space:]")`
36+
2637
const CgroupsDiskStallName = "cgroup-disk-stall"
2738

2839
type CGroupDiskStaller struct {
@@ -352,11 +363,6 @@ func (s *CGroupDiskStaller) setThroughputCmd(
352363
bw throughput,
353364
cockroachIOController string,
354365
) (string, error) {
355-
maj, min, err := s.DiskDeviceMajorMinor(ctx, l)
356-
if err != nil {
357-
return "", err
358-
}
359-
360366
var limits []string
361367
for _, rw := range readOrWrite {
362368
bytesPerSecondStr := "max"
@@ -367,9 +373,9 @@ func (s *CGroupDiskStaller) setThroughputCmd(
367373
}
368374
l.Printf("setting cgroup bandwith limits:\n%v", limits)
369375

370-
return fmt.Sprintf("sudo /bin/bash -c 'echo %d:%d %s > %s'",
371-
maj,
372-
min,
376+
return fmt.Sprintf(
377+
`sudo /bin/bash -c 'echo "%s %s" > %s'`,
378+
diskMajMinExpr,
373379
strings.Join(limits, " "),
374380
cockroachIOController,
375381
), nil
@@ -380,14 +386,9 @@ func (s *CGroupDiskStaller) setThroughputCmd(
380386
func (s *CGroupDiskStaller) GetReadWriteBytes(
381387
ctx context.Context, l *logger.Logger, node install.Nodes,
382388
) (int, int, error) {
383-
maj, min, err := s.DiskDeviceMajorMinor(ctx, l)
384-
if err != nil {
385-
return 0, 0, err
386-
}
387-
// Check the number of bytes read and written to disk.
388389
res, err := s.RunWithDetails(
389390
ctx, l, node,
390-
fmt.Sprintf(`grep -E '%d:%d' /sys/fs/cgroup/system.slice/io.stat |`, maj, min),
391+
fmt.Sprintf(`grep -E "^%s" /sys/fs/cgroup/system.slice/io.stat |`, diskMajMinExpr),
391392
`grep -oE 'rbytes=[0-9]+|wbytes=[0-9]+' |`,
392393
`awk -F= '{printf "%s ", $2} END {print ""}'`,
393394
)
@@ -415,9 +416,13 @@ const (
415416
DmsetupDiskStallName = "dmsetup-disk-stall"
416417
dmsetupStallCmd = "sudo dmsetup suspend --noflush --nolockfs data1"
417418
dmsetupUnstallCmd = "sudo dmsetup resume data1"
418-
// dmsetupStateFile stores the original disk device name so cleanup can find it
419-
// even when running as a separate stage after setup has modified the mount.
419+
// dmsetupStateFile stores the original disk device name (per node) so
420+
// cleanup can find it even when running as a separate stage after setup
421+
// has unmounted /mnt/data1 and remounted /dev/mapper/data1 over it.
420422
dmsetupStateFile = "/tmp/dmsetup-disk-stall-device"
423+
// dmsetupDevExpr is a shell expression that expands to the data disk
424+
// device name stored in dmsetupStateFile on the current node.
425+
dmsetupDevExpr = `"$(cat ` + dmsetupStateFile + `)"`
421426
)
422427

423428
type DmsetupDiskStaller struct {
@@ -446,36 +451,23 @@ func (s *DmsetupDiskStaller) Description() string {
446451
return "dmsetup disk staller"
447452
}
448453

449-
// getStoredDeviceName reads the device name from the state file saved during setup.
450-
// This allows cleanup to find the original device even when running as a separate stage.
451-
func (s *DmsetupDiskStaller) getStoredDeviceName(
452-
ctx context.Context, l *logger.Logger,
453-
) (string, error) {
454-
res, err := s.RunWithDetails(ctx, l, s.c.Nodes[:1], fmt.Sprintf("cat %s 2>/dev/null", dmsetupStateFile))
455-
if err != nil {
456-
return "", errors.Wrap(err, "failed to read stored device name")
457-
}
458-
dev := strings.TrimSpace(res.Stdout)
459-
if dev == "" {
460-
return "", errors.New("no stored device name found; run setup stage first")
461-
}
462-
return dev, nil
463-
}
464-
465-
// getOrStoreDeviceName gets the disk device name, storing it to the state file if not already stored.
466-
// This is used during setup to persist the device name for later stages.
467-
func (s *DmsetupDiskStaller) getOrStoreDeviceName(
468-
ctx context.Context, l *logger.Logger,
469-
) (string, error) {
470-
dev, err := s.DiskDeviceName(ctx, l)
471-
if err != nil {
472-
return "", err
473-
}
474-
// Save the device name for later use by cleanup when running stages independently.
475-
if err = s.Run(ctx, l, s.c.Nodes, fmt.Sprintf("echo '%s' > %s", dev, dmsetupStateFile)); err != nil {
476-
return "", errors.Wrap(err, "failed to save device name to state file")
454+
// storeDeviceName writes the device backing /mnt/data1 (per node, via
455+
// findmnt) to dmsetupStateFile so subsequent commands can read it back via
456+
// dmsetupDevExpr. Doing this per node is required because GCE does not
457+
// guarantee consistent /dev/sdX letter assignment across VMs (see #170379).
458+
//
459+
// If overwrite is true, the state file is always rewritten (used by Setup).
460+
// If false, the file is only written when missing (used by Cleanup, since
461+
// after Setup has run findmnt would return /dev/mapper/data1 rather than the
462+
// original device).
463+
func (s *DmsetupDiskStaller) storeDeviceName(
464+
ctx context.Context, l *logger.Logger, overwrite bool,
465+
) error {
466+
cmd := fmt.Sprintf(`findmnt -n -o SOURCE /mnt/data1 > %s`, dmsetupStateFile)
467+
if !overwrite {
468+
cmd = fmt.Sprintf(`[ -s %s ] || `, dmsetupStateFile) + cmd
477469
}
478-
return dev, nil
470+
return s.Run(ctx, l, s.c.Nodes, cmd)
479471
}
480472

481473
func (s *DmsetupDiskStaller) Setup(ctx context.Context, l *logger.Logger, args FailureArgs) error {
@@ -492,10 +484,11 @@ func (s *DmsetupDiskStaller) Setup(ctx context.Context, l *logger.Logger, args F
492484
}
493485
}
494486

495-
// Get the device name and store it for later stages (cleanup).
496-
dev, err := s.getOrStoreDeviceName(ctx, l)
497-
if err != nil {
498-
return err
487+
// Resolve the data disk device on each node and persist it for later
488+
// stages (cleanup may run independently after the mount has been
489+
// replaced).
490+
if err = s.storeDeviceName(ctx, l, true /* overwrite */); err != nil {
491+
return errors.Wrap(err, "resolving data disk device")
499492
}
500493

501494
// snapd will run "snapd auto-import /dev/dm-0" via udev triggers when
@@ -515,11 +508,12 @@ sudo udevadm settle`); err != nil {
515508
return err
516509
}
517510
// See https://github.com/cockroachdb/cockroach/issues/129619#issuecomment-2316147244.
518-
if err = s.Run(ctx, l, s.c.Nodes, `sudo tune2fs -O ^has_journal `+dev); err != nil {
511+
if err = s.Run(ctx, l, s.c.Nodes, `sudo tune2fs -O ^has_journal `+dmsetupDevExpr); err != nil {
519512
return errors.WithHintf(err, "disabling journaling fails if the cluster has been started")
520513
}
521-
if err = s.Run(ctx, l, s.c.Nodes, `echo "0 $(sudo blockdev --getsz `+dev+`) linear `+dev+` 0" | `+
522-
`sudo dmsetup create data1`); err != nil {
514+
if err = s.Run(ctx, l, s.c.Nodes,
515+
`dev=`+dmsetupDevExpr+`; echo "0 $(sudo blockdev --getsz "$dev") linear $dev 0" | `+
516+
`sudo dmsetup create data1`); err != nil {
523517
return err
524518
}
525519
// This has occasionally been seen to fail with "Device or resource busy",
@@ -605,15 +599,12 @@ func (s *DmsetupDiskStaller) Cleanup(
605599
}
606600
}
607601

608-
// Try to get the device name from the state file first (for independent stage runs),
609-
// fall back to live lookup (for full lifecycle runs where mount is still intact).
610-
dev, err := s.getStoredDeviceName(ctx, l)
611-
if err != nil {
612-
l.Printf("Could not read stored device name, trying live lookup: %v", err)
613-
dev, err = s.DiskDeviceName(ctx, l)
614-
if err != nil {
615-
return errors.Wrap(err, "failed to determine disk device")
616-
}
602+
// Ensure each node has its device name persisted in dmsetupStateFile.
603+
// In the common case Setup already wrote it; if cleanup is run standalone
604+
// it is populated here via findmnt while the original mount is still in
605+
// place.
606+
if err := s.storeDeviceName(ctx, l, false /* overwrite */); err != nil {
607+
return errors.Wrap(err, "determining disk device")
617608
}
618609

619610
if err := s.Run(ctx, l, s.c.Nodes, `sudo dmsetup resume data1`); err != nil {
@@ -625,11 +616,11 @@ func (s *DmsetupDiskStaller) Cleanup(
625616
if err := s.Run(ctx, l, s.c.Nodes, `sudo dmsetup remove data1`); err != nil {
626617
return err
627618
}
628-
if err := s.Run(ctx, l, s.c.Nodes, `sudo tune2fs -O has_journal `+dev); err != nil {
619+
if err := s.Run(ctx, l, s.c.Nodes, `sudo tune2fs -O has_journal `+dmsetupDevExpr); err != nil {
629620
return err
630621
}
631622
// Mount the original device back to /mnt/data1.
632-
if err := s.Run(ctx, l, s.c.Nodes, fmt.Sprintf("sudo mount %s /mnt/data1", dev)); err != nil {
623+
if err := s.Run(ctx, l, s.c.Nodes, `sudo mount `+dmsetupDevExpr+` /mnt/data1`); err != nil {
633624
return err
634625
}
635626
// Reenable snapd autoimport udev rules.

pkg/roachprod/failureinjection/failures/failure.go

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import (
1010
gosql "database/sql"
1111
"fmt"
1212
"net/url"
13-
"strconv"
1413
"strings"
1514
"time"
1615

@@ -65,12 +64,6 @@ type FailureMode interface {
6564
WaitForFailureToRecover(ctx context.Context, l *logger.Logger, args FailureArgs) error
6665
}
6766

68-
type diskDevice struct {
69-
name string
70-
major int
71-
minor int
72-
}
73-
7467
// GenericFailure is a generic helper struct that FailureModes can embed to
7568
// provide commonly used functionality that doesn't differ between failure modes,
7669
// e.g. running remote commands on the cluster.
@@ -80,7 +73,6 @@ type GenericFailure struct {
8073
// runTitle is the title to prefix command output with.
8174
runTitle string
8275
networkInterfaces []string
83-
diskDevice diskDevice
8476
connCache []*gosql.DB
8577
localCertsPath string
8678
replicationFactor int
@@ -206,47 +198,6 @@ func (f *GenericFailure) NetworkInterfaces(
206198
return f.networkInterfaces, nil
207199
}
208200

209-
func getDiskDevice(ctx context.Context, f *GenericFailure, l *logger.Logger) error {
210-
if f.diskDevice.name == "" {
211-
res, err := f.c.RunWithDetails(ctx, l, install.WithNodes(f.c.Nodes[:1]), "Get Disk Device", "lsblk -o NAME,MAJ:MIN,MOUNTPOINTS | grep /mnt/data1 | awk '{print $1, $2}'")
212-
if err != nil {
213-
return errors.Wrapf(err, "error when determining block device")
214-
}
215-
parts := strings.Split(strings.TrimSpace(res[0].Stdout), " ")
216-
if len(parts) != 2 {
217-
return errors.Newf("unexpected output from lsblk: %s", res[0].Stdout)
218-
}
219-
f.diskDevice.name = strings.TrimSpace(parts[0])
220-
major, minor, found := strings.Cut(parts[1], ":")
221-
if !found {
222-
return errors.Newf("unexpected output from lsblk: %s", res[0].Stdout)
223-
}
224-
if f.diskDevice.major, err = strconv.Atoi(major); err != nil {
225-
return err
226-
}
227-
if f.diskDevice.minor, err = strconv.Atoi(minor); err != nil {
228-
return err
229-
}
230-
}
231-
return nil
232-
}
233-
234-
func (f *GenericFailure) DiskDeviceName(ctx context.Context, l *logger.Logger) (string, error) {
235-
if err := getDiskDevice(ctx, f, l); err != nil {
236-
return "", err
237-
}
238-
return "/dev/" + f.diskDevice.name, nil
239-
}
240-
241-
func (f *GenericFailure) DiskDeviceMajorMinor(
242-
ctx context.Context, l *logger.Logger,
243-
) (int, int, error) {
244-
if err := getDiskDevice(ctx, f, l); err != nil {
245-
return 0, 0, err
246-
}
247-
return f.diskDevice.major, f.diskDevice.minor, nil
248-
}
249-
250201
func (f *GenericFailure) PingNode(ctx context.Context, l *logger.Logger, node install.Nodes) error {
251202
db, err := f.Conn(ctx, l, node)
252203
if err != nil {

0 commit comments

Comments
 (0)