diff --git a/internal/components/collector/collector_test.go b/internal/components/collector/collector_test.go index 08abdb7..786e6e0 100644 --- a/internal/components/collector/collector_test.go +++ b/internal/components/collector/collector_test.go @@ -122,6 +122,76 @@ func TestListPods_ResourceFormat(t *testing.T) { } } +func TestContainsGlob(t *testing.T) { + tests := []struct { + path string + want bool + }{ + {"/skywalking/logs/", false}, + {"/tmp/dump.hprof", false}, + {"/tmp/app[1].log", true}, // [1] is a valid shell character class + {"/tmp/app[].log", false}, // [] is not a valid character class + {"/skywalking/logs*", true}, + {"/tmp/*.hprof", true}, + {"/tmp/dump-[0-9].hprof", true}, + {"/var/log/?oo", true}, + } + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + if got := containsGlob(tt.path); got != tt.want { + t.Errorf("containsGlob(%q) = %v, want %v", tt.path, got, tt.want) + } + }) + } +} + +func TestValidateGlobPattern(t *testing.T) { + tests := []struct { + pattern string + wantErr bool + }{ + {"/skywalking/logs*", false}, + {"/tmp/*.hprof", false}, + {"/tmp/dump-[0-9].hprof", false}, + {"/var/log/app-?.log", false}, + {"'; rm -rf /; '", true}, + {"/path with spaces/*", true}, + {"/tmp/$(whoami)", true}, + {"/tmp/`id`", true}, + {"/tmp/foo|bar", true}, + {"/tmp/foo;bar", true}, + {"/tmp/foo&bar", true}, + } + for _, tt := range tests { + t.Run(tt.pattern, func(t *testing.T) { + err := validateGlobPattern(tt.pattern) + if (err != nil) != tt.wantErr { + t.Errorf("validateGlobPattern(%q) error = %v, wantErr %v", tt.pattern, err, tt.wantErr) + } + }) + } +} + +func TestExpandPodGlob_NoGlob(t *testing.T) { + paths, err := expandPodGlob("", "default", "pod-0", "", "/skywalking/logs/") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(paths) != 1 || paths[0] != "/skywalking/logs/" { + t.Errorf("expected [/skywalking/logs/], got %v", paths) + } +} + +func TestExpandContainerGlob_NoGlob(t *testing.T) { + paths, err := expandContainerGlob("abc123", "svc", "/var/log/app.log") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(paths) != 1 || paths[0] != "/var/log/app.log" { + t.Errorf("expected [/var/log/app.log], got %v", paths) + } +} + func TestComposeCollectItem_NoService(t *testing.T) { err := composeCollectItem("/fake/compose.yml", "test-project", t.TempDir(), &config.CollectItem{ Paths: []string{"/tmp"}, diff --git a/internal/components/collector/compose.go b/internal/components/collector/compose.go index 09474af..35cbca3 100644 --- a/internal/components/collector/compose.go +++ b/internal/components/collector/compose.go @@ -75,8 +75,15 @@ func composeCollectItem(composeFile, projectName, outputDir string, item *config // Collect specified files var errs []string for _, p := range item.Paths { - if err := collectContainerFile(outputDir, item.Service, containerID, p); err != nil { + paths, err := expandContainerGlob(containerID, item.Service, p) + if err != nil { errs = append(errs, fmt.Sprintf("service %s path %s: %v", item.Service, p, err)) + continue + } + for _, expanded := range paths { + if err := collectContainerFile(outputDir, item.Service, containerID, expanded); err != nil { + errs = append(errs, fmt.Sprintf("service %s path %s: %v", item.Service, expanded, err)) + } } } @@ -123,6 +130,41 @@ func collectContainerInspect(outputDir, service, containerID string) error { return nil } +// expandContainerGlob expands a glob pattern inside a Docker container. +// If the path has no glob characters it is returned as-is. +func expandContainerGlob(containerID, service, pattern string) ([]string, error) { + if !containsGlob(pattern) { + return []string{pattern}, nil + } + + if err := validateGlobPattern(pattern); err != nil { + return nil, err + } + + cmd := fmt.Sprintf("docker exec %s sh -c 'ls -d -- %s 2>/dev/null || true'", containerID, pattern) + stdout, stderr, err := util.ExecuteCommand(cmd) + if err != nil { + logger.Log.Warnf("failed to expand glob %s in service %s: %v, stderr: %s", pattern, service, err, stderr) + return nil, fmt.Errorf("glob expansion failed for %s: %v, stderr: %s", pattern, err, stderr) + } + + var paths []string + for _, line := range strings.Split(strings.TrimSpace(stdout), "\n") { + line = strings.TrimSpace(line) + if line != "" { + paths = append(paths, line) + } + } + + if len(paths) == 0 { + logger.Log.Warnf("glob %s matched no files in service %s", pattern, service) + return nil, fmt.Errorf("glob %s matched no files", pattern) + } + + logger.Log.Infof("glob %s expanded to %d path(s) in service %s", pattern, len(paths), service) + return paths, nil +} + func collectContainerFile(outputDir, service, containerID, srcPath string) error { // Preserve the full source path under the service directory to avoid collisions. // e.g. /var/log/nginx/ -> outputDir/serviceName/var/log/nginx/ diff --git a/internal/components/collector/kind.go b/internal/components/collector/kind.go index 29015b9..e3c2527 100644 --- a/internal/components/collector/kind.go +++ b/internal/components/collector/kind.go @@ -21,6 +21,7 @@ import ( "fmt" "os" "path/filepath" + "regexp" "strings" "github.com/apache/skywalking-infra-e2e/internal/config" @@ -74,8 +75,15 @@ func kindCollectItem(kubeConfigPath, outputDir string, item *config.CollectItem) // Collect specified files for _, p := range item.Paths { - if err := collectPodFile(kubeConfigPath, outputDir, pod.namespace, pod.name, item.Container, p); err != nil { + paths, err := expandPodGlob(kubeConfigPath, pod.namespace, pod.name, item.Container, p) + if err != nil { errs = append(errs, fmt.Sprintf("pod %s/%s path %s: %v", pod.namespace, pod.name, p, err)) + continue + } + for _, expanded := range paths { + if err := collectPodFile(kubeConfigPath, outputDir, pod.namespace, pod.name, item.Container, expanded); err != nil { + errs = append(errs, fmt.Sprintf("pod %s/%s path %s: %v", pod.namespace, pod.name, expanded, err)) + } } } } @@ -151,6 +159,88 @@ func collectPodDescribe(kubeConfigPath, outputDir, namespace, podName string) er return nil } +// containsGlob reports whether the path contains glob metacharacters. +func containsGlob(path string) bool { + if strings.ContainsAny(path, "*?") { + return true + } + // Only treat '[' as a glob when followed by a matching ']' with at least + // one character between them, so literal brackets (e.g. "app[1].log") + // that don't form a valid character class are not misidentified. + for i := 0; i < len(path); i++ { + if path[i] != '[' { + continue + } + for j := i + 1; j < len(path); j++ { + if path[j] != ']' { + continue + } + // Check there is at least one non-']' char between '[' and ']'. + for k := i + 1; k < j; k++ { + if path[k] != ']' { + return true + } + } + break + } + } + return false +} + +// validPathPattern matches paths that contain only safe characters for shell interpolation. +// Allowed: alphanumeric, /, ., -, _, *, ?, [, ]. +var validPathPattern = regexp.MustCompile(`^[a-zA-Z0-9/_.*?\[\]\-]+$`) + +// validateGlobPattern checks that a glob pattern contains only safe characters +// to prevent shell injection when interpolated into sh -c commands. +func validateGlobPattern(pattern string) error { + if !validPathPattern.MatchString(pattern) { + return fmt.Errorf("glob pattern %q contains unsupported characters", pattern) + } + return nil +} + +// expandPodGlob expands a glob pattern inside a pod. If the path has no glob +// characters it is returned as-is. Otherwise kubectl exec runs sh to expand +// the pattern and returns the matched paths. +func expandPodGlob(kubeConfigPath, namespace, podName, container, pattern string) ([]string, error) { + if !containsGlob(pattern) { + return []string{pattern}, nil + } + + if err := validateGlobPattern(pattern); err != nil { + return nil, err + } + + cmd := fmt.Sprintf("kubectl --kubeconfig %s -n %s exec %s", kubeConfigPath, namespace, podName) + if container != "" { + cmd += fmt.Sprintf(" -c %s", container) + } + cmd += fmt.Sprintf(" -- sh -c 'ls -d -- %s 2>/dev/null || true'", pattern) + + stdout, stderr, err := util.ExecuteCommand(cmd) + if err != nil { + logger.Log.Warnf("failed to expand glob %s in pod %s/%s: %v, stderr: %s", pattern, namespace, podName, err, stderr) + return nil, fmt.Errorf("glob expansion failed for %s: %v, stderr: %s", pattern, err, stderr) + } + + var paths []string + for _, line := range strings.Split(strings.TrimSpace(stdout), "\n") { + line = strings.TrimSpace(line) + if line != "" { + paths = append(paths, line) + } + } + + if len(paths) == 0 { + logger.Log.Warnf("glob %s matched no files in pod %s/%s", pattern, namespace, podName) + return nil, fmt.Errorf("glob %s matched no files", pattern) + } + + logger.Log.Infof("glob %s expanded to %d path(s) in pod %s/%s", pattern, len(paths), namespace, podName) + return paths, nil +} + func collectPodFile(kubeConfigPath, outputDir, namespace, podName, container, srcPath string) error { // Preserve the full source path under the pod directory to avoid collisions. // e.g. /skywalking/logs/ -> outputDir/namespace/podName/skywalking/logs/