Skip to content

Commit 2dfc605

Browse files
Zane/node level test coverage (#1673)
1 parent a9fd683 commit 2dfc605

4 files changed

Lines changed: 100 additions & 0 deletions

File tree

test/ginkgo-e2e/querylogs/querylogs_suite_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ var LogsClient *azquery.LogsClient
1818
var AKSResourceId string
1919
var RetinaNetworkFlowLogsEnabled string
2020
var GenevaIntegrationEnabled string
21+
var PerNodeLogCoverageEnabled string
2122
var Cfg *rest.Config
2223

2324
func TestQuerylogs(t *testing.T) {
@@ -34,6 +35,7 @@ var _ = BeforeSuite(func() {
3435
RetinaNetworkFlowLogsEnabled, err = utils.IsRetinaNetworkFlowLogsEnabled(K8sClient, "kube-system", "component", "ama-logs-agent", "ama-logs")
3536
Expect(err).NotTo(HaveOccurred())
3637
GenevaIntegrationEnabled = os.Getenv("GENEVA_INTEGRATION")
38+
PerNodeLogCoverageEnabled = os.Getenv("PER_NODE_LOG_COVERAGE")
3739
LogsClient, err = utils.SetupLogsClient()
3840
Expect(err).NotTo(HaveOccurred())
3941
})

test/ginkgo-e2e/querylogs/querylogs_test.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,25 @@ var _ = Describe("When querying the logs for the table", func() {
4141
)
4242
})
4343

44+
var _ = Describe("When querying Container logs per node", func() {
45+
It("Every node hosting an ama-logs DaemonSet pod should have Container logs", func() {
46+
if PerNodeLogCoverageEnabled != "true" {
47+
Skip("Per-node Container log coverage skipped because PER_NODE_LOG_COVERAGE is not set to 'true'")
48+
}
49+
if GenevaIntegrationEnabled == "true" {
50+
Skip("Container log per-node coverage skipped because GENEVA_INTEGRATION is set to 'true'")
51+
}
52+
53+
expectedNodes, err := utils.GetExpectedAmaLogsNodes(K8sClient)
54+
Expect(err).NotTo(HaveOccurred())
55+
56+
observed, err := utils.GetComputerFromContainerLog(LogsClient, AKSResourceId, "5m")
57+
Expect(err).NotTo(HaveOccurred())
58+
59+
Expect(utils.AssertContainerLogNodeCoverage(expectedNodes, observed)).NotTo(HaveOccurred())
60+
})
61+
})
62+
4463
var _ = Describe("When querying the logs for the ContainerInventory", func() {
4564
DescribeTable("Column should have zero empty values",
4665
func(column string) {

test/ginkgo-e2e/utils/kubernetes_api_utils.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,26 @@ func GetAndUpdateConfigMap(clientset *kubernetes.Clientset, configMapName, confi
522522
return nil
523523
}
524524

525+
// GetExpectedAmaLogsNodes returns the names of all nodes in the cluster.
526+
// This is the set of nodes that ama-logs DaemonSets are expected to cover
527+
// for ContainerLogV2 ingestion. Crucially, this set is derived from the
528+
// Kubernetes node list directly (not from where ama-logs pods actually
529+
// landed), so a node where the DaemonSet failed to schedule (taint,
530+
// resource pressure, image pull failure, etc.) is still expected and will
531+
// be reported as missing by the per-node coverage check.
532+
func GetExpectedAmaLogsNodes(clientset *kubernetes.Clientset) ([]string, error) {
533+
nodes, err := GetAllNodes(clientset)
534+
if err != nil {
535+
return nil, err
536+
}
537+
538+
names := make([]string, 0, len(nodes))
539+
for _, n := range nodes {
540+
names = append(names, n.Name)
541+
}
542+
return names, nil
543+
}
544+
525545
func GetAllAgentPods(clientset *kubernetes.Clientset) ([]corev1.Pod, error) {
526546
podList, err := clientset.CoreV1().Pods("").List(context.TODO(), metav1.ListOptions{})
527547
if err != nil {

test/ginkgo-e2e/utils/query_logs_api_utils.go

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package utils
33
import (
44
"context"
55
"fmt"
6+
"strings"
67

78
"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
89
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
@@ -149,3 +150,61 @@ func CompareResourcesInLogsAndKubeAPI(K8sClient *kubernetes.Clientset, logsClien
149150

150151
return CompareResourcesHelper(logsClient, resourceID, query, resources)
151152
}
153+
154+
155+
func GetComputerFromContainerLog(logsClient *azquery.LogsClient, resourceID string, window string) (map[string]int64, error) {
156+
counts, v2Err := queryCountsByComputer(logsClient, resourceID, "ContainerLogV2", window)
157+
if v2Err == nil {
158+
return counts, nil
159+
}
160+
161+
fallback, fbErr := queryCountsByComputer(logsClient, resourceID, "ContainerLog", window)
162+
if fbErr != nil {
163+
return nil, fmt.Errorf("ContainerLogV2 query failed: %v; ContainerLog fallback failed: %v", v2Err, fbErr)
164+
}
165+
return fallback, nil
166+
}
167+
168+
func queryCountsByComputer(logsClient *azquery.LogsClient, resourceID string, table string, window string) (map[string]int64, error) {
169+
query := fmt.Sprintf("%s | where TimeGenerated > ago(%s) | summarize count() by Computer", table, window)
170+
tables, err := QueryLogs(logsClient, resourceID, query)
171+
if err != nil {
172+
return nil, err
173+
}
174+
175+
counts := map[string]int64{}
176+
for _, t := range tables {
177+
for _, row := range t.Rows {
178+
if len(row) < 2 {
179+
continue
180+
}
181+
computer, ok := row[0].(string)
182+
if !ok || computer == "" {
183+
continue
184+
}
185+
count, _ := row[1].(float64)
186+
counts[strings.ToLower(computer)] += int64(count)
187+
}
188+
}
189+
return counts, nil
190+
}
191+
192+
// AssertContainerLogNodeCoverage returns nil if every expected node appears
193+
// in the per-Computer count map with a positive row count (compared
194+
// case-insensitively), or an error listing the missing nodes otherwise.
195+
func AssertContainerLogNodeCoverage(expectedNodes []string, observedCountsByComputer map[string]int64) error {
196+
if len(expectedNodes) == 0 {
197+
return fmt.Errorf("no expected nodes provided; cannot verify ContainerLogV2 coverage")
198+
}
199+
200+
var missing []string
201+
for _, n := range expectedNodes {
202+
if observedCountsByComputer[strings.ToLower(n)] <= 0 {
203+
missing = append(missing, n)
204+
}
205+
}
206+
if len(missing) > 0 {
207+
return fmt.Errorf("ContainerLogV2 ingestion is missing for %d/%d expected node(s): %s", len(missing), len(expectedNodes), strings.Join(missing, ", "))
208+
}
209+
return nil
210+
}

0 commit comments

Comments
 (0)