Skip to content

Commit 76f9458

Browse files
committed
fix: copilot issues
Signed-off-by: Gustavo Carvalho <gustavo.carvalho@container-solutions.com>
1 parent d784310 commit 76f9458

3 files changed

Lines changed: 234 additions & 46 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ All plugin config fields are strings (agent gRPC `map<string,string>` contract).
4343
| `custodian_debug` | No | Boolean (`true`/`false`) toggle to pass `--debug` to Cloud Custodian. This increases Cloud Custodian diagnostic output on stderr. Default: `false`. |
4444
| `custodian_verbose` | No | Boolean (`true`/`false`) toggle to pass `-v` to Cloud Custodian. This increases Cloud Custodian diagnostic output on stderr. Default: `false`. |
4545
| `custodian_aws_api_trace` | No | Boolean (`true`/`false`) toggle to inject a temporary Python `sitecustomize.py` into the Custodian child process. Logs botocore API start/end/error events to stderr and `custodian-aws-api-trace.jsonl` in the check output directory. Default: `false`. |
46-
| `custodian_network_diagnostics` | No | Boolean (`true`/`false`) toggle to run Go DNS/TLS preflight probes for relevant AWS service endpoints before Custodian starts and log child process TCP socket snapshots while Custodian is running. Preflight failures stop the Custodian check before execution. Default: `false`. |
46+
| `custodian_network_diagnostics` | No | Boolean (`true`/`false`) toggle to run Go DNS/TLS preflight probes for relevant AWS service endpoints before Custodian starts and log child process TCP socket snapshots while Custodian is running. Preflight failures stop the Custodian check before execution. If `aws_regions` is unset or only `all`, service-derived probes are skipped unless `custodian_network_diagnostic_endpoints` is configured. Default: `false`. |
4747
| `custodian_network_diagnostic_endpoints` | No | Comma or whitespace separated list of additional endpoint hostnames or URLs to DNS/TLS probe when `custodian_network_diagnostics` is enabled. Use this for AWS VPC endpoint DNS names such as `vpce-123.backup.eu-west-1.vpce.amazonaws.com`. Default: unset. |
4848
| `custodian_log_tail_during_run` | No | Boolean (`true`/`false`) toggle to tail discovered `custodian-run.log` artifacts during the monitor loop, not only after process exit. Default: `false`. |
4949
| `aws_regions` | No | Comma or whitespace separated AWS regions passed as repeated `--region` flags. Duplicate entries are removed while preserving order. Default: unset, which falls back to `--region all` for AWS checks. |

main.go

Lines changed: 109 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,43 @@ var lookPath = exec.LookPath
5050
var lookupHost = net.DefaultResolver.LookupHost
5151
var tlsProbeEndpoint = defaultTLSProbeEndpoint
5252

53+
var awsResourceServices = map[string][]string{
54+
"app-elb": {"elasticloadbalancing"},
55+
"backup-plan": {"backup"},
56+
"backup-vault": {"backup"},
57+
"cache-cluster": {"elasticache"},
58+
"distribution": {"cloudfront"},
59+
"dynamodb-table": {"dynamodb"},
60+
"ebs": {"ec2"},
61+
"ec2": {"ec2"},
62+
"ecs-service": {"ecs"},
63+
"efs": {"elasticfilesystem"},
64+
"eks": {"eks"},
65+
"firewall": {"network-firewall"},
66+
"hostedzone": {"route53"},
67+
"iam-group": {"iam"},
68+
"iam-policy": {"iam"},
69+
"iam-role": {"iam"},
70+
"iam-user": {"iam"},
71+
"kms-key": {"kms"},
72+
"lambda": {"lambda"},
73+
"log-group": {"logs"},
74+
"rds": {"rds"},
75+
"rds-cluster": {"rds"},
76+
"s3": {"s3"},
77+
"secrets-manager": {"secretsmanager"},
78+
"sns": {"sns"},
79+
"sqs": {"sqs"},
80+
"transfer-server": {"transfer"},
81+
"wafv2": {"wafv2"},
82+
}
83+
84+
var awsGlobalEndpointServices = map[string]string{
85+
"cloudfront": "cloudfront.amazonaws.com",
86+
"iam": "iam.amazonaws.com",
87+
"route53": "route53.amazonaws.com",
88+
}
89+
5390
// PluginConfig receives string-only config from the agent gRPC interface.
5491
type PluginConfig struct {
5592
PoliciesYAML string `mapstructure:"policies_yaml"`
@@ -475,6 +512,7 @@ func (e *CommandCustodianExecutor) Execute(ctx context.Context, req CustodianExe
475512
contextDoneLogged := false
476513
runCtxDone := runCtx.Done()
477514
lastCustodianLogTail := ""
515+
logTailCache := &custodianLogTailCache{}
478516
e.Logger.Debug("Starting custodian command monitor loop",
479517
"check_name", req.Check.Name,
480518
"pid", pid,
@@ -514,7 +552,7 @@ func (e *CommandCustodianExecutor) Execute(ctx context.Context, req CustodianExe
514552
e.logCustodianProcessSockets(pid, req.Check.Name)
515553
}
516554
if req.LogTailDuringRun {
517-
lastCustodianLogTail = e.logCustodianRunLogTail(req.OutputDir, req.Check.Name, lastCustodianLogTail)
555+
lastCustodianLogTail = e.logCustodianRunLogTail(req.OutputDir, req.Check.Name, lastCustodianLogTail, logTailCache)
518556
}
519557
case <-runCtxDone:
520558
if !contextDoneLogged {
@@ -532,7 +570,7 @@ func (e *CommandCustodianExecutor) Execute(ctx context.Context, req CustodianExe
532570
e.logCustodianProcessSockets(pid, req.Check.Name)
533571
}
534572
if req.LogTailDuringRun {
535-
lastCustodianLogTail = e.logCustodianRunLogTail(req.OutputDir, req.Check.Name, lastCustodianLogTail)
573+
lastCustodianLogTail = e.logCustodianRunLogTail(req.OutputDir, req.Check.Name, lastCustodianLogTail, logTailCache)
536574
}
537575
contextDoneLogged = true
538576
}
@@ -741,9 +779,8 @@ func (e *CommandCustodianExecutor) runAWSEndpointDiagnostics(ctx context.Context
741779
return err
742780
}
743781
if len(endpoints) == 0 {
744-
err := fmt.Errorf("no concrete AWS endpoint hosts are available for diagnostics; configure aws_regions or custodian_network_diagnostic_endpoints")
745-
e.Logger.Error("AWS endpoint diagnostics failed before custodian execution", "check_name", req.Check.Name, "resource", req.Check.Resource, "aws_regions", req.AWSRegions, "error", err)
746-
return err
782+
e.Logger.Warn("Skipping AWS endpoint diagnostics because no concrete endpoint hosts are available; configure aws_regions or custodian_network_diagnostic_endpoints for preflight probes", "check_name", req.Check.Name, "resource", req.Check.Resource, "aws_regions", req.AWSRegions)
783+
return nil
747784
}
748785
if !knownResource {
749786
e.Logger.Warn("AWS endpoint diagnostics will use only configured endpoints because resource service is not mapped", "check_name", req.Check.Name, "resource", req.Check.Resource)
@@ -817,7 +854,7 @@ func awsDiagnosticEndpointsForCheck(resource string, regions []string, configure
817854

818855
for _, region := range diagnosticRegions {
819856
for _, service := range services {
820-
host := fmt.Sprintf("%s.%s.amazonaws.com", service, region)
857+
host := awsServiceEndpointHost(service, region)
821858
endpoints = append(endpoints, networkDiagnosticEndpoint{
822859
Host: host,
823860
Port: "443",
@@ -829,6 +866,14 @@ func awsDiagnosticEndpointsForCheck(resource string, regions []string, configure
829866
return compactUniqueNetworkDiagnosticEndpoints(endpoints), true, nil
830867
}
831868

869+
func awsServiceEndpointHost(service string, region string) string {
870+
service = strings.TrimSpace(service)
871+
if host, ok := awsGlobalEndpointServices[service]; ok {
872+
return host
873+
}
874+
return fmt.Sprintf("%s.%s.amazonaws.com", service, strings.TrimSpace(region))
875+
}
876+
832877
func parseNetworkDiagnosticEndpoint(value string) (networkDiagnosticEndpoint, error) {
833878
original := strings.TrimSpace(value)
834879
if original == "" {
@@ -910,37 +955,7 @@ func defaultTLSProbeEndpoint(ctx context.Context, endpoint networkDiagnosticEndp
910955

911956
func awsServicesForResource(resource string) ([]string, bool) {
912957
resource = strings.TrimPrefix(strings.TrimSpace(resource), "aws.")
913-
resourceServices := map[string][]string{
914-
"app-elb": {"elasticloadbalancing"},
915-
"backup-plan": {"backup"},
916-
"backup-vault": {"backup"},
917-
"cache-cluster": {"elasticache"},
918-
"distribution": {"cloudfront"},
919-
"dynamodb-table": {"dynamodb"},
920-
"ebs": {"ec2"},
921-
"ec2": {"ec2"},
922-
"ecs-service": {"ecs"},
923-
"efs": {"elasticfilesystem"},
924-
"eks": {"eks"},
925-
"firewall": {"network-firewall"},
926-
"hostedzone": {"route53"},
927-
"iam-group": {"iam"},
928-
"iam-policy": {"iam"},
929-
"iam-role": {"iam"},
930-
"iam-user": {"iam"},
931-
"kms-key": {"kms"},
932-
"lambda": {"lambda"},
933-
"log-group": {"logs"},
934-
"rds": {"rds"},
935-
"rds-cluster": {"rds"},
936-
"s3": {"s3"},
937-
"secrets-manager": {"secretsmanager"},
938-
"sns": {"sns"},
939-
"sqs": {"sqs"},
940-
"transfer-server": {"transfer"},
941-
"wafv2": {"wafv2"},
942-
}
943-
services, ok := resourceServices[resource]
958+
services, ok := awsResourceServices[resource]
944959
if !ok {
945960
return nil, false
946961
}
@@ -971,8 +986,21 @@ func (e *CommandCustodianExecutor) logCustodianProcessSockets(pid int, checkName
971986
e.Logger.Info("Custodian child socket snapshot", "check_name", checkName, "pid", pid, "socket_count", len(sockets), "sockets", sockets)
972987
}
973988

974-
func (e *CommandCustodianExecutor) logCustodianRunLogTail(outputDir, checkName, previous string) string {
975-
_, logTail, err := readCustodianLogArtifacts(outputDir, custodianOutputTailBytes)
989+
type custodianLogTailCache struct {
990+
paths []string
991+
nextDiscovery time.Time
992+
}
993+
994+
func (e *CommandCustodianExecutor) logCustodianRunLogTail(outputDir, checkName, previous string, cache *custodianLogTailCache) string {
995+
logPaths, err := cachedCustodianRunLogPaths(outputDir, cache)
996+
if err != nil {
997+
e.Logger.Warn("Failed locating custodian run logs while process is still running", "check_name", checkName, "error", err)
998+
return previous
999+
}
1000+
if len(logPaths) == 0 {
1001+
return previous
1002+
}
1003+
_, logTail, err := readCustodianLogArtifactsForPaths(logPaths, custodianOutputTailBytes)
9761004
if err != nil {
9771005
e.Logger.Warn("Failed reading custodian run log tail while process is still running", "check_name", checkName, "error", err)
9781006
return previous
@@ -984,6 +1012,26 @@ func (e *CommandCustodianExecutor) logCustodianRunLogTail(outputDir, checkName,
9841012
return logTail
9851013
}
9861014

1015+
func cachedCustodianRunLogPaths(outputDir string, cache *custodianLogTailCache) ([]string, error) {
1016+
if cache == nil {
1017+
return findCustodianRunLogs(outputDir)
1018+
}
1019+
now := time.Now()
1020+
if now.Before(cache.nextDiscovery) {
1021+
return cache.paths, nil
1022+
}
1023+
logPaths, err := findCustodianRunLogs(outputDir)
1024+
if err != nil {
1025+
return nil, err
1026+
}
1027+
cache.paths = logPaths
1028+
cache.nextDiscovery = now.Add(2 * time.Minute)
1029+
if len(cache.paths) == 0 {
1030+
cache.nextDiscovery = now.Add(30 * time.Second)
1031+
}
1032+
return cache.paths, nil
1033+
}
1034+
9871035
func custodianProcessSockets(pid int) ([]string, error) {
9881036
if runtime.GOOS != "linux" {
9891037
return []string{}, nil
@@ -1150,6 +1198,10 @@ func readCustodianLogArtifacts(outputDir string, maxBytes int) ([]string, string
11501198
if err != nil {
11511199
return nil, "", err
11521200
}
1201+
return readCustodianLogArtifactsForPaths(logPaths, maxBytes)
1202+
}
1203+
1204+
func readCustodianLogArtifactsForPaths(logPaths []string, maxBytes int) ([]string, string, error) {
11531205
if len(logPaths) == 0 {
11541206
return nil, "", nil
11551207
}
@@ -1183,6 +1235,7 @@ func findCustodianRunLogs(outputDir string) ([]string, error) {
11831235
if err != nil {
11841236
return nil, fmt.Errorf("failed to locate custodian logs: %w", err)
11851237
}
1238+
slices.Sort(logPaths)
11861239
return logPaths, nil
11871240
}
11881241

@@ -2669,13 +2722,28 @@ func logResourceExplorerLinkSkipped(logger hclog.Logger, payload *StandardizedRe
26692722
resourceType = payload.Resource.Type
26702723
provider = payload.Resource.Provider
26712724
}
2672-
logger.Warn("Skipping AWS Resource Explorer evidence link generation",
2725+
message := "Skipping AWS Resource Explorer evidence link generation"
2726+
args := []interface{}{
26732727
"reason", reason,
26742728
"check_name", checkName,
26752729
"resource_id", resourceID,
26762730
"resource_type", resourceType,
26772731
"provider", provider,
2678-
)
2732+
}
2733+
if shouldWarnResourceExplorerLinkSkipped(reason) {
2734+
logger.Warn(message, args...)
2735+
return
2736+
}
2737+
logger.Debug(message, args...)
2738+
}
2739+
2740+
func shouldWarnResourceExplorerLinkSkipped(reason string) bool {
2741+
switch reason {
2742+
case "resource id is not an ARN", "provider is not aws":
2743+
return false
2744+
default:
2745+
return true
2746+
}
26792747
}
26802748

26812749
func awsResourceExplorerLink(payload *StandardizedResourcePayload) (*proto.Link, string) {

0 commit comments

Comments
 (0)