Skip to content

Commit 5f8cb4a

Browse files
Add per-node ContainerLogV2 coverage check to e2e querylogs
1 parent 3bb5c30 commit 5f8cb4a

3 files changed

Lines changed: 106 additions & 0 deletions

File tree

test/ginkgo-e2e/querylogs/querylogs_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,22 @@ var _ = Describe("When querying the logs for the table", func() {
4141
)
4242
})
4343

44+
var _ = Describe("When querying ContainerLogV2 per node", func() {
45+
It("Every node hosting an ama-logs DaemonSet pod should have ContainerLogV2 rows", func() {
46+
if GenevaIntegrationEnabled == "true" {
47+
Skip("ContainerLogV2 per-node coverage skipped because GENEVA_INTEGRATION is set to 'true'")
48+
}
49+
50+
expectedNodes, err := utils.GetExpectedAmaLogsNodes(K8sClient)
51+
Expect(err).NotTo(HaveOccurred())
52+
53+
observed, err := utils.QueryContainerLogV2CountsByComputer(LogsClient, AKSResourceId, "5m")
54+
Expect(err).NotTo(HaveOccurred())
55+
56+
Expect(utils.AssertContainerLogV2NodeCoverage(expectedNodes, observed)).NotTo(HaveOccurred())
57+
})
58+
})
59+
4460
var _ = Describe("When querying the logs for the ContainerInventory", func() {
4561
DescribeTable("Column should have zero empty values",
4662
func(column string) {

test/ginkgo-e2e/utils/kubernetes_api_utils.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,26 @@ func GetAndUpdateConfigMap(clientset *kubernetes.Clientset, configMapName, confi
522522
return nil
523523
}
524524

525+
// GetExpectedAmaLogsNodes returns the names of all nodes in the cluster.
526+
// This is the set of nodes that ama-logs DaemonSets are expected to cover
527+
// for ContainerLogV2 ingestion. Crucially, this set is derived from the
528+
// Kubernetes node list directly (not from where ama-logs pods actually
529+
// landed), so a node where the DaemonSet failed to schedule (taint,
530+
// resource pressure, image pull failure, etc.) is still expected and will
531+
// be reported as missing by the per-node coverage check.
532+
func GetExpectedAmaLogsNodes(clientset *kubernetes.Clientset) ([]string, error) {
533+
nodes, err := GetAllNodes(clientset)
534+
if err != nil {
535+
return nil, err
536+
}
537+
538+
names := make([]string, 0, len(nodes))
539+
for _, n := range nodes {
540+
names = append(names, n.Name)
541+
}
542+
return names, nil
543+
}
544+
525545
func GetAllAgentPods(clientset *kubernetes.Clientset) ([]corev1.Pod, error) {
526546
podList, err := clientset.CoreV1().Pods("").List(context.TODO(), metav1.ListOptions{})
527547
if err != nil {

test/ginkgo-e2e/utils/query_logs_api_utils.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package utils
33
import (
44
"context"
55
"fmt"
6+
"strings"
67

78
"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
89
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
@@ -149,3 +150,72 @@ func CompareResourcesInLogsAndKubeAPI(K8sClient *kubernetes.Clientset, logsClien
149150

150151
return CompareResourcesHelper(logsClient, resourceID, query, resources)
151152
}
153+
154+
// QueryContainerLogV2CountsByComputer queries the Log Analytics workspace for
155+
// the number of ContainerLogV2 rows ingested per node (Computer) within the
156+
// given time window (e.g. "5m"). Returns a map keyed by lowercased Computer
157+
// name.
158+
//
159+
// ContainerLogV2 and ContainerLog are mutually exclusive — a cluster writes
160+
// to one or the other based on its schema configuration. This helper only
161+
// falls back to ContainerLog when the ContainerLogV2 query *errors* (e.g.
162+
// the V2 table does not exist in a V1-configured workspace). A successful V2
163+
// query that returns zero rows is treated as a real ingestion failure and
164+
// surfaced as an empty map; callers must NOT interpret that as a reason to
165+
// fall back, otherwise V2 ingestion failures would be silently masked.
166+
func QueryContainerLogV2CountsByComputer(logsClient *azquery.LogsClient, resourceID string, window string) (map[string]int64, error) {
167+
counts, v2Err := queryCountsByComputer(logsClient, resourceID, "ContainerLogV2", window)
168+
if v2Err == nil {
169+
return counts, nil
170+
}
171+
172+
fallback, fbErr := queryCountsByComputer(logsClient, resourceID, "ContainerLog", window)
173+
if fbErr != nil {
174+
return nil, fmt.Errorf("ContainerLogV2 query failed: %v; ContainerLog fallback failed: %v", v2Err, fbErr)
175+
}
176+
return fallback, nil
177+
}
178+
179+
func queryCountsByComputer(logsClient *azquery.LogsClient, resourceID string, table string, window string) (map[string]int64, error) {
180+
query := fmt.Sprintf("%s | where TimeGenerated > ago(%s) | summarize count() by Computer", table, window)
181+
tables, err := QueryLogs(logsClient, resourceID, query)
182+
if err != nil {
183+
return nil, err
184+
}
185+
186+
counts := map[string]int64{}
187+
for _, t := range tables {
188+
for _, row := range t.Rows {
189+
if len(row) < 2 {
190+
continue
191+
}
192+
computer, ok := row[0].(string)
193+
if !ok || computer == "" {
194+
continue
195+
}
196+
count, _ := row[1].(float64)
197+
counts[strings.ToLower(computer)] += int64(count)
198+
}
199+
}
200+
return counts, nil
201+
}
202+
203+
// AssertContainerLogV2NodeCoverage returns nil if every expected node appears
204+
// in the per-Computer count map with a positive row count (compared
205+
// case-insensitively), or an error listing the missing nodes otherwise.
206+
func AssertContainerLogV2NodeCoverage(expectedNodes []string, observedCountsByComputer map[string]int64) error {
207+
if len(expectedNodes) == 0 {
208+
return fmt.Errorf("no expected nodes provided; cannot verify ContainerLogV2 coverage")
209+
}
210+
211+
var missing []string
212+
for _, n := range expectedNodes {
213+
if observedCountsByComputer[strings.ToLower(n)] <= 0 {
214+
missing = append(missing, n)
215+
}
216+
}
217+
if len(missing) > 0 {
218+
return fmt.Errorf("ContainerLogV2 ingestion is missing for %d/%d expected node(s): %s", len(missing), len(expectedNodes), strings.Join(missing, ", "))
219+
}
220+
return nil
221+
}

0 commit comments

Comments
 (0)