Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions test/ginkgo-e2e/querylogs/querylogs_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ var LogsClient *azquery.LogsClient
var AKSResourceId string
var RetinaNetworkFlowLogsEnabled string
var GenevaIntegrationEnabled string
var PerNodeLogCoverageEnabled string
var Cfg *rest.Config

func TestQuerylogs(t *testing.T) {
Expand All @@ -34,6 +35,7 @@ var _ = BeforeSuite(func() {
RetinaNetworkFlowLogsEnabled, err = utils.IsRetinaNetworkFlowLogsEnabled(K8sClient, "kube-system", "component", "ama-logs-agent", "ama-logs")
Expect(err).NotTo(HaveOccurred())
GenevaIntegrationEnabled = os.Getenv("GENEVA_INTEGRATION")
PerNodeLogCoverageEnabled = os.Getenv("PER_NODE_LOG_COVERAGE")
LogsClient, err = utils.SetupLogsClient()
Expect(err).NotTo(HaveOccurred())
})
Expand Down
19 changes: 19 additions & 0 deletions test/ginkgo-e2e/querylogs/querylogs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,25 @@ var _ = Describe("When querying the logs for the table", func() {
)
})

var _ = Describe("When querying Container logs per node", func() {
It("Every node hosting an ama-logs DaemonSet pod should have Container logs", func() {
if PerNodeLogCoverageEnabled != "true" {
Skip("Per-node Container log coverage skipped because PER_NODE_LOG_COVERAGE is not set to 'true'")
}
if GenevaIntegrationEnabled == "true" {
Skip("Container log per-node coverage skipped because GENEVA_INTEGRATION is set to 'true'")
}

expectedNodes, err := utils.GetExpectedAmaLogsNodes(K8sClient)
Expect(err).NotTo(HaveOccurred())

observed, err := utils.GetComputerFromContainerLog(LogsClient, AKSResourceId, "5m")
Expect(err).NotTo(HaveOccurred())

Expect(utils.AssertContainerLogNodeCoverage(expectedNodes, observed)).NotTo(HaveOccurred())
})
})

var _ = Describe("When querying the logs for the ContainerInventory", func() {
DescribeTable("Column should have zero empty values",
func(column string) {
Expand Down
20 changes: 20 additions & 0 deletions test/ginkgo-e2e/utils/kubernetes_api_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,26 @@ func GetAndUpdateConfigMap(clientset *kubernetes.Clientset, configMapName, confi
return nil
}

// GetExpectedAmaLogsNodes returns the names of all nodes in the cluster.
// This is the set of nodes that ama-logs DaemonSets are expected to cover
// for ContainerLogV2 ingestion. Crucially, this set is derived from the
// Kubernetes node list directly (not from where ama-logs pods actually
// landed), so a node where the DaemonSet failed to schedule (taint,
// resource pressure, image pull failure, etc.) is still expected and will
// be reported as missing by the per-node coverage check.
func GetExpectedAmaLogsNodes(clientset *kubernetes.Clientset) ([]string, error) {
nodes, err := GetAllNodes(clientset)
if err != nil {
return nil, err
}

names := make([]string, 0, len(nodes))
for _, n := range nodes {
names = append(names, n.Name)
}
return names, nil
}

func GetAllAgentPods(clientset *kubernetes.Clientset) ([]corev1.Pod, error) {
podList, err := clientset.CoreV1().Pods("").List(context.TODO(), metav1.ListOptions{})
if err != nil {
Expand Down
59 changes: 59 additions & 0 deletions test/ginkgo-e2e/utils/query_logs_api_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package utils
import (
"context"
"fmt"
"strings"

"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
Expand Down Expand Up @@ -149,3 +150,61 @@ func CompareResourcesInLogsAndKubeAPI(K8sClient *kubernetes.Clientset, logsClien

return CompareResourcesHelper(logsClient, resourceID, query, resources)
}


func GetComputerFromContainerLog(logsClient *azquery.LogsClient, resourceID string, window string) (map[string]int64, error) {
counts, v2Err := queryCountsByComputer(logsClient, resourceID, "ContainerLogV2", window)
if v2Err == nil {
return counts, nil
}

fallback, fbErr := queryCountsByComputer(logsClient, resourceID, "ContainerLog", window)
if fbErr != nil {
return nil, fmt.Errorf("ContainerLogV2 query failed: %v; ContainerLog fallback failed: %v", v2Err, fbErr)
}
return fallback, nil
}

func queryCountsByComputer(logsClient *azquery.LogsClient, resourceID string, table string, window string) (map[string]int64, error) {
query := fmt.Sprintf("%s | where TimeGenerated > ago(%s) | summarize count() by Computer", table, window)
tables, err := QueryLogs(logsClient, resourceID, query)
if err != nil {
return nil, err
}

counts := map[string]int64{}
for _, t := range tables {
for _, row := range t.Rows {
if len(row) < 2 {
continue
}
computer, ok := row[0].(string)
if !ok || computer == "" {
continue
}
count, _ := row[1].(float64)
counts[strings.ToLower(computer)] += int64(count)
}
}
return counts, nil
}

// AssertContainerLogNodeCoverage returns nil if every expected node appears
// in the per-Computer count map with a positive row count (compared
// case-insensitively), or an error listing the missing nodes otherwise.
func AssertContainerLogNodeCoverage(expectedNodes []string, observedCountsByComputer map[string]int64) error {
if len(expectedNodes) == 0 {
return fmt.Errorf("no expected nodes provided; cannot verify ContainerLogV2 coverage")
}

var missing []string
for _, n := range expectedNodes {
if observedCountsByComputer[strings.ToLower(n)] <= 0 {
missing = append(missing, n)
}
}
if len(missing) > 0 {
return fmt.Errorf("ContainerLogV2 ingestion is missing for %d/%d expected node(s): %s", len(missing), len(expectedNodes), strings.Join(missing, ", "))
}
return nil
}
Loading