diff --git a/Dockerfile b/Dockerfile index 96be8e57b0..f18e1448d5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,27 +12,15 @@ COPY ./Makefile ./Makefile COPY ./version ./version COPY ./cmd/main.go ./cmd/main.go COPY ./internal ./internal +COPY ./must-gather ./must-gather USER 0 RUN make build -FROM quay.io/openshift/origin-cli-artifacts:latest AS origincli - -RUN case $(uname -m) in \ - x86_64) cp /usr/share/openshift/linux_amd64/oc.rhel9 /tmp/oc ;; \ - aarch64) cp /usr/share/openshift/linux_arm64/oc.rhel9 /tmp/oc ;; \ - ppc64le) cp /usr/share/openshift/linux_ppc64le/oc.rhel9 /tmp/oc ;; \ - s390x) cp /usr/share/openshift/linux_s390x/oc /tmp/oc ;; \ - *) echo "Unsupported architecture"; exit 1 ;; \ -esac - FROM registry.access.redhat.com/ubi9/ubi-minimal RUN INSTALL_PKGS=" \ openssl \ - rsync \ - file \ - xz \ " && \ microdnf install -y ${INSTALL_PKGS} && \ rpm -V ${INSTALL_PKGS} && \ @@ -41,10 +29,9 @@ RUN INSTALL_PKGS=" \ chmod og+w /tmp/ocp-clo COPY --from=builder /opt/app-root/src/bin/cluster-logging-operator /usr/bin/ +COPY --from=builder /opt/app-root/src/bin/must-gather /usr/bin/ -COPY --from=origincli /tmp/oc /usr/bin/oc - -COPY ./must-gather/collection-scripts/* /usr/bin/ +RUN ln -s /usr/bin/must-gather /usr/bin/gather USER 1000 WORKDIR /usr/bin diff --git a/Dockerfile.art b/Dockerfile.art index 1125ed4168..face83cb91 100644 --- a/Dockerfile.art +++ b/Dockerfile.art @@ -8,24 +8,10 @@ WORKDIR /opt/app-root/src RUN make build BUILD_OPTS="-tags strictfipsruntime" - -FROM quay.io/openshift/origin-cli-artifacts:4.21 AS origincli - -RUN case $(uname -m) in \ - x86_64) cp /usr/share/openshift/linux_amd64/oc.rhel9 /tmp/oc ;; \ - aarch64) cp /usr/share/openshift/linux_arm64/oc.rhel9 /tmp/oc ;; \ - ppc64le) cp /usr/share/openshift/linux_ppc64le/oc.rhel9 /tmp/oc ;; \ - s390x) cp /usr/share/openshift/linux_s390x/oc /tmp/oc ;; \ - *) echo "Unsupported architecture"; exit 1 ;; \ -esac - FROM registry.redhat.io/ubi9/ubi-minimal:9.7 RUN INSTALL_PKGS=" \ - cpio \ - rsync \ - file \ - xz \ + openssl \ " && \ microdnf install -y $INSTALL_PKGS && \ rpm -V $INSTALL_PKGS && \ @@ -34,10 +20,9 @@ RUN INSTALL_PKGS=" \ chmod og+w /tmp/ocp-clo COPY --from=builder /opt/app-root/src/bin/cluster-logging-operator /usr/bin/ +COPY --from=builder /opt/app-root/src/bin/must-gather /usr/bin/ -COPY --from=builder /opt/app-root/src/must-gather/collection-scripts/* /usr/bin/ - -COPY --from=origincli /tmp/oc /usr/bin/oc +RUN ln -s /usr/bin/must-gather /usr/bin/gather USER 1000 diff --git a/Makefile b/Makefile index a10c351602..ee43674f66 100644 --- a/Makefile +++ b/Makefile @@ -87,12 +87,18 @@ bin/forwarder-generator: bin/cluster-logging-operator: go build $(BUILD_OPTS) -o $@ ./cmd +bin/must-gather: + go build $(BUILD_OPTS) -o $@ ./must-gather/cmd + +.PHONY: must-gather +must-gather: bin/must-gather + .PHONY: openshift-client openshift-client: @type -p oc > /dev/null || bash hack/get-openshift-client.sh .PHONY: build -build: bin/cluster-logging-operator +build: bin/cluster-logging-operator bin/must-gather .PHONY: build-debug build-debug: diff --git a/must-gather/cmd/main.go b/must-gather/cmd/main.go new file mode 100644 index 0000000000..7c0c123b23 --- /dev/null +++ b/must-gather/cmd/main.go @@ -0,0 +1,62 @@ +package main + +import ( + "context" + "flag" + "fmt" + "io" + "os" + "path/filepath" + + "github.com/openshift/cluster-logging-operator/must-gather" +) + +func main() { + var ( + destDir string + loggingNamespace string + logFileName string + ) + + flag.StringVar(&destDir, "dest-dir", "/must-gather", "Destination directory for collecting must-gather data") + flag.StringVar(&loggingNamespace, "logging-namespace", "openshift-logging", "Namespace where cluster logging operator is deployed") + flag.StringVar(&logFileName, "log-file", "gather-debug.log", "Name of the debug log file") + flag.Parse() + + // Ensure destination directory exists + if err := os.MkdirAll(destDir, 0755); err != nil { + fmt.Fprintf(os.Stderr, "Failed to create destination directory: %v\n", err) + os.Exit(1) + } + + // Set up logging + logFilePath := filepath.Join(destDir, logFileName) + logFile, err := os.Create(logFilePath) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to create log file: %v\n", err) + os.Exit(1) + } + defer func() { + if err := logFile.Close(); err != nil { + fmt.Fprintf(os.Stderr, "Failed to close log file: %v\n", err) + } + }() + + // Use multi-writer to write to both file and stdout + multiWriter := io.MultiWriter(os.Stdout, logFile) + + // Create and run the gather + gather, err := mustgather.NewGather(destDir, loggingNamespace, logFileName, multiWriter) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to create gather: %v\n", err) + os.Exit(1) + } + + ctx := context.Background() + if err := gather.Run(ctx); err != nil { + fmt.Fprintf(os.Stderr, "Must-gather failed: %v\n", err) + os.Exit(1) + } + + fmt.Println("Must-gather completed successfully") +} diff --git a/must-gather/collection-scripts/common b/must-gather/collection-scripts/common deleted file mode 100755 index 88c7ab120f..0000000000 --- a/must-gather/collection-scripts/common +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - - -get_timestamp(){ - date '+%Y-%m-%d %H:%M:%S' -} - -log(){ - echo "$(get_timestamp) ${*}" -} - -get_env() { - log "BEGIN get_env ..." - local pod=$1 - local env_file=$2/$pod - local ns=${3:-$NAMESPACE} - local pattern=${4:-"Dockerfile-.*logging*"} - log ---- Env for $pod - containers=$(oc -n $ns get po $pod -o jsonpath='{.spec.containers[*].name}') - for container in $containers - do - log ----- Inspecting container $container - dockerfile=$(oc -n $ns exec $pod -c $container -- ls /root/buildinfo | grep $pattern)|| - if [ -n "$dockerfile" ] - then - log ----- Getting buildInfo - echo Image info: $dockerfile > $env_file - oc -n $ns exec $pod -c $container -- grep -o "\"build-date\"=\"[^[:blank:]]*\"" /root/buildinfo/$dockerfile >> $env_file || log "---- Unable to get build date" - fi - log ----- Getting environment variables - echo -- Environment Variables >> $env_file - oc -n $ns exec $pod -c $container -- env | sort >> $env_file - done - log "END get_env ..." -} diff --git a/must-gather/collection-scripts/gather b/must-gather/collection-scripts/gather deleted file mode 100755 index 8c6cc5c220..0000000000 --- a/must-gather/collection-scripts/gather +++ /dev/null @@ -1,145 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -# Store PIDs of all the subprocesses -pids=() - -echo -e "..... Cluster Logging must-gather script started .....\n" - -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -source ${SCRIPT_DIR}/common -BASE_COLLECTION_PATH="${1:-/must-gather}" -BASE_COLLECTION_PATH=$(cd "$(dirname -- $BASE_COLLECTION_PATH)" >/dev/null; pwd -P)/$(basename -- "$BASE_COLLECTION_PATH") - -LOGGING_NS="${2:-openshift-logging}" -LOGFILE_NAME="${3:-gather-debug.log}" -LOGFILE_PATH="${BASE_COLLECTION_PATH}/${LOGFILE_NAME}" # must-gather/gather-debug.log - -mkdir -p "${BASE_COLLECTION_PATH}" -cd $BASE_COLLECTION_PATH -log "must-gather logs are located at: '${LOGFILE_PATH}'" - -mkdir ${BASE_COLLECTION_PATH}/cache-dir||: -export KUBECACHEDIR=${BASE_COLLECTION_PATH}/cache-dir - -# namespaces -namespace_resources=(openshift-operator-lifecycle-manager) - -# cluster logging operator namespace -namespace_resources+=($LOGGING_NS) - -# elasticsearch operator namespace -namespace_resources+=(openshift-operators-redhat) - -# uiplugin namespace -namespace_resources+=(openshift-operators) - -# multi-forwarder namespaces -for kind in $(oc get crd -A -o custom-columns=:.metadata.name | grep clusterlogforwarder); do - namespaces=$(oc get $kind -A -o custom-columns=:.metadata.namespace | sort -u) - for multi in ${namespaces} ; do - # add to the list of namespaces - if [ "$multi" != "$LOGGING_NS" ] ; then - namespace_resources+=($multi) - log "Adding namespace '$multi' to cluster resources list" | tee -a "${LOGFILE_PATH}" - fi - - # get collector resources from the namespace - log "Inspecting collector resources in namespace '$multi'" | tee -a "${LOGFILE_PATH}" - ${SCRIPT_DIR}/gather_collection_resources "$BASE_COLLECTION_PATH" "$multi" 2>&1 >> "${LOGFILE_PATH}" - done -done - -# cluster-scoped resources -cluster_resources+=(nodes) -cluster_resources+=(clusterroles) -cluster_resources+=(clusterrolebindings) -cluster_resources+=(persistentvolumes) -cluster_resources+=(clusterversion) -cluster_resources+=(machineconfigpool) -cluster_resources+=(customresourcedefinitions) - -log "- BEGIN inspecting cluster resources and namespaces..." | tee -a "${LOGFILE_PATH}" - -for cr in "${cluster_resources[@]}" ; do - log "-- BEGIN inspecting cluster resource ${cr} ..." | tee -a "${LOGFILE_PATH}" - oc adm inspect --cache-dir=${KUBECACHEDIR} --dest-dir="${BASE_COLLECTION_PATH}" "${cr}" >> "${LOGFILE_PATH}" 2>&1 & - pids+=($!) -done - -for ns in "${namespace_resources[@]}" ; do - log "-- BEGIN inspecting namespace ${ns} ..." | tee -a "${LOGFILE_PATH}" - oc adm inspect --cache-dir=${KUBECACHEDIR} --dest-dir="${BASE_COLLECTION_PATH}" "ns/${ns}" >> "${LOGFILE_PATH}" 2>&1 & - pids+=($!) -done -log "- END inspecting cluster resources..." | tee -a "${LOGFILE_PATH}" - -# namespace-scoped resources -resources="pods,roles,rolebindings,configmaps,serviceaccounts,events,installplans,subscriptions,clusterserviceversions,logfilemetricexporter" - -log "BEGIN inspecting namespaced resources ..." | tee -a "${LOGFILE_PATH}" - -for ns in ${namespace_resources[@]} ; do - # grab all our namespaces -- openshift-logging, openshift-operator-lifecycle-manager, openshift-operators-redhat - # should also include any multi-forwarder namespaces found above - log "-- BEGIN inspecting ${ns}/${resources} ..." | tee -a "${LOGFILE_PATH}" - oc adm inspect --cache-dir=${KUBECACHEDIR} --dest-dir="${BASE_COLLECTION_PATH}" -n "$ns" "$resources" 2>&1 | tee -a "${LOGFILE_PATH}" & - pids+=($!) - -done -log "END inspecting namespaced resources ..." | tee -a "${LOGFILE_PATH}" - -# if the uiplugin is installed, collect it and the console CO -uiplugin_crd=$(oc get crd -A -o custom-columns=:.metadata.name | grep uiplugin) || true -if [ "$uiplugin_crd" != "" ] ; then - uiplugin_found="$(oc get uiplugin --ignore-not-found --no-headers)" || true - if [ "$uiplugin_found" != "" ] ; then - log "BEGIN gathering uiplugin and console resources ..." | tee -a "${LOGFILE_PATH}" - oc adm inspect --cache-dir=${KUBECACHEDIR} --dest-dir="${BASE_COLLECTION_PATH}" uiplugin >> "${LOGFILE_PATH}" 2>&1 & - pids+=($!) - oc adm inspect --cache-dir=${KUBECACHEDIR} --dest-dir="${BASE_COLLECTION_PATH}" co/console 2>&1 | tee -a "${LOGFILE_PATH}" & - pids+=($!) - log "END gathering uiplugin and console resources ..." | tee -a "${LOGFILE_PATH}" - else - log "UIPlugin not configured" 2>&1 | tee -a "${LOGFILE_PATH}" - fi -else - log "UIPlugin not installed" 2>&1 | tee -a "${LOGFILE_PATH}" -fi - -log "BEGIN gathering alerts ..." | tee -a "${LOGFILE_PATH}" -${SCRIPT_DIR}/gather_monitoring "$BASE_COLLECTION_PATH" 2>&1 | tee -a "${LOGFILE_PATH}" & -pids+=($!) - -default_clo_found="$(oc -n "$LOGGING_NS" get deployment cluster-logging-operator --ignore-not-found --no-headers)" - -if [ "$default_clo_found" != "" ] ; then - log "BEGIN gathering default CLO resources ..." | tee -a "${LOGFILE_PATH}" - ${SCRIPT_DIR}/gather_cluster_logging_operator_resources "$BASE_COLLECTION_PATH" "$LOGGING_NS" 2>&1 >> "${LOGFILE_PATH}" - log "END gathering default CLO resources ..." | tee -a "${LOGFILE_PATH}" -else - log "Skipping collection inspection. No default CLO found" 2>&1 | tee -a "${LOGFILE_PATH}" -fi - -loki_crd=$(oc get crd -A -o custom-columns=:.metadata.name | grep lokistack) || true - -if [ "$loki_crd" != "" ] ; then - found_lokistack="$(oc -n $LOGGING_NS get lokistack.loki.grafana.com --ignore-not-found --no-headers)" - if [ "$found_lokistack" != "" ] ; then - - log "BEGIN gathering lokistack resources ..." | tee -a "${LOGFILE_PATH}" - ${SCRIPT_DIR}/gather_logstore_resources "$BASE_COLLECTION_PATH" "lokistack" 2>&1 >> "${LOGFILE_PATH}" - log "END gathering logstorage resources ..." | tee -a "${LOGFILE_PATH}" - else - log "Skipping logstorage inspection. No deployment found" 2>&1 | tee -a "${LOGFILE_PATH}" - fi -fi - -# Check if PID array has any values, if so, wait for them to finish -if [ ${#pids[@]} -ne 0 ]; then - log "Waiting on subprocesses to finish execution." - wait "${pids[@]}" -fi - -exit 0 diff --git a/must-gather/collection-scripts/gather_cluster_logging_operator_resources b/must-gather/collection-scripts/gather_cluster_logging_operator_resources deleted file mode 100755 index fd4dfc278c..0000000000 --- a/must-gather/collection-scripts/gather_cluster_logging_operator_resources +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -set -euo pipefail -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -source ${SCRIPT_DIR}/common - -# Expect base collection path as an argument -BASE_COLLECTION_PATH=$1 -NAMESPACE=${2:-openshift-logging} - -log "BEGIN from namespace: $NAMESPACE ..." - -# Use PWD as base path if no argument is passed -if [ "${BASE_COLLECTION_PATH}" = "" ]; then - BASE_COLLECTION_PATH=$(pwd) -fi - -CLO_COLLECTION_PATH="$BASE_COLLECTION_PATH/cluster-logging" -# Adding namespace folders to allow for multi-logging -clo_folder="$CLO_COLLECTION_PATH/clo" - -log "Creating namespace directory: $clo_folder" -mkdir -p "$clo_folder" - -# We only need these from the openshift-logging namespace -if [ $NAMESPACE == "openshift-logging" ]; then - log "Gathering data for 'cluster-logging-operator' from namespace: $NAMESPACE" - - pods=$(oc -n $NAMESPACE get pods -l name=cluster-logging-operator -o jsonpath='{.items[*].metadata.name}') - for pod in $pods - do - log "Inspecting $pod" - get_env $pod $clo_folder $NAMESPACE "Dockerfile-.*operator*" - done -fi - -log "Gathering 'version' from logging namespace: $NAMESPACE" -csv_name="$(oc -n $NAMESPACE get csv -o name | grep -E 'cluster(-?)logging')" -oc -n $NAMESPACE get "${csv_name}" -o jsonpath='{.spec.displayName}{"/must-gather\n"}{.spec.version}' --cache-dir=${KUBECACHEDIR} > "${clo_folder}/version" - -log "END from namespace: $NAMESPACE ..." diff --git a/must-gather/collection-scripts/gather_collection_resources b/must-gather/collection-scripts/gather_collection_resources deleted file mode 100755 index 82771b01b4..0000000000 --- a/must-gather/collection-scripts/gather_collection_resources +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -source ${SCRIPT_DIR}/common - -# Expect base collection path as an argument -BASE_COLLECTION_PATH=$1 - -# Use PWD as base path if no argument is passed -if [ "${BASE_COLLECTION_PATH}" = "" ]; then - BASE_COLLECTION_PATH=$(pwd) -fi - -NAMESPACE=${2:-openshift-logging} - -CLO_COLLECTION_PATH="$BASE_COLLECTION_PATH/cluster-logging" -# Adding namespace folders to allow for multi-logging -collector_folder="$CLO_COLLECTION_PATH/namespaces/$NAMESPACE" - -log "- BEGIN for namespace: $NAMESPACE ..." - -log "-- Exporting ClusterLogForwarder.observability.openshift.io resources" -crs="$(oc get clusterLogForwarder.observability.openshift.io -n $NAMESPACE -o custom-columns=:.metadata.name --ignore-not-found)" - -# get name of collector from CR name -for collector in ${crs}; do - log "-- Gathering data for ClusterLogForwarder: $collector" - mkdir -p $collector_folder - oc adm inspect --cache-dir=${KUBECACHEDIR} --dest-dir="${BASE_COLLECTION_PATH}" -n $NAMESPACE clusterLogForwarders.observability.openshift.io 2>&1 - - # find daemonset - log "--- Describe Daemonset ds/$collector" - ds_describe=$(oc -n $NAMESPACE describe ds/$collector --cache-dir=${KUBECACHEDIR} 2>&1)|| - if [ "$?" == "0" ]; then - echo "$ds_describe" > $collector_folder/$collector.describe - fi - - # gathering collector pods - pods="$(oc -n $NAMESPACE get pods -lapp.kubernetes.io/instance=$collector -lapp.kubernetes.io/component=collector -o custom-columns=:.metadata.name --ignore-not-found)" - for pod in $pods - do - log "--- Describe collector pod: $pod" - oc -n $NAMESPACE describe pod/$pod > $collector_folder/$pod.describe --cache-dir=${KUBECACHEDIR} 2>&1 - done - - config_name="$collector-config" - log "-- Gathering $config_name#vector.toml from namespace: $NAMESPACE" - data=$(oc -n $NAMESPACE get "configmap/$config_name" -o jsonpath='{.data.vector\.toml}' --ignore-not-found) - if [ "$data" != "" ] ; then - echo "$data" > $collector_folder/configmap_${config_name}_vector.toml 2>&1 - fi -done -log "- END for namespace: $NAMESPACE ..." diff --git a/must-gather/collection-scripts/gather_logstore_resources b/must-gather/collection-scripts/gather_logstore_resources deleted file mode 100755 index b3e9b7b573..0000000000 --- a/must-gather/collection-scripts/gather_logstore_resources +++ /dev/null @@ -1,117 +0,0 @@ -#!/bin/bash -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -source ${SCRIPT_DIR}/common - -log "BEGIN gather_logstore_resources ..." - -# Expect base collection path as an argument -BASE_COLLECTION_PATH=$1 -LOG_STORE_TYPE=$2 - -# Use PWD as base path if no argument is passed -if [ "${BASE_COLLECTION_PATH}" = "" ]; then - BASE_COLLECTION_PATH=$(pwd) -fi - -NAMESPACE=${3:-openshift-logging} - -CLO_COLLECTION_PATH="$BASE_COLLECTION_PATH/cluster-logging" -es_folder="$CLO_COLLECTION_PATH/es" - -list_es_storage() { - local pod=$1 - local mountPath=$(oc -n $NAMESPACE get pod $pod -o jsonpath='{.spec.containers[0].volumeMounts[?(@.name=="elasticsearch-storage")].mountPath}') - echo "-- Persistence files" >> $es_folder/$pod - - oc -n $NAMESPACE --cache-dir=${KUBECACHEDIR} exec -c elasticsearch $pod -- ls -lR $mountPath >> $es_folder/$pod - - echo "-- Persistence storage size " >> $es_folder/$pod-storage - - oc -n $NAMESPACE --cache-dir=${KUBECACHEDIR} exec -c elasticsearch $pod -- df -h $mountPath >> $es_folder/$pod-storage - -} - -get_elasticsearch_status() { - local comp=$1 - local pod=${2:-""} - - if [ -z "$pod" ] ; then - echo "Skipping elasticsearch status because no pod was found for $1" - return - fi - - local cluster_folder=$es_folder/cluster-$comp - mkdir -p $cluster_folder - - curl_es='curl -s --max-time 20 --key /etc/elasticsearch/secret/admin-key --cert /etc/elasticsearch/secret/admin-cert --cacert /etc/elasticsearch/secret/admin-ca https://localhost:9200' - local cat_items=(health nodes aliases thread_pool) - for cat_item in "${cat_items[@]}" - do - oc -n $NAMESPACE --cache-dir=${KUBECACHEDIR} exec -c elasticsearch $pod -- $curl_es/_cat/$cat_item?v &> $cluster_folder/${cat_item}.cat - done - - oc -n $NAMESPACE --cache-dir=${KUBECACHEDIR} exec -c elasticsearch $pod -- $curl_es/_nodes/hot_threads &> $cluster_folder/hot_threads.txt - oc -n $NAMESPACE --cache-dir=${KUBECACHEDIR} exec -c elasticsearch $pod -- $curl_es/_cat/indices?v\&bytes=m &> $cluster_folder/indices.cat - oc -n $NAMESPACE --cache-dir=${KUBECACHEDIR} exec -c elasticsearch $pod -- $curl_es/_cat/indices?h=i,creation.date,creation.date.string,store.size,pri.store.size > $cluster_folder/indices_size.cat - oc -n $NAMESPACE --cache-dir=${KUBECACHEDIR} exec -c elasticsearch $pod -- $curl_es/_search?sort=@timestamp:desc\&pretty > $cluster_folder/latest_documents.json - oc -n $NAMESPACE --cache-dir=${KUBECACHEDIR} exec -c elasticsearch $pod -- $curl_es/_nodes/?pretty > $cluster_folder/nodes_state.json - oc -n $NAMESPACE --cache-dir=${KUBECACHEDIR} exec -c elasticsearch $pod -- $curl_es/_nodes/stats?pretty > $cluster_folder/nodes_stats.json - local health=$(oc -n $NAMESPACE --cache-dir=${KUBECACHEDIR} exec -c elasticsearch $pod -- $curl_es/_cat/health?h=status) - if [ -z "$health" ] - then - log "Unable to get health from $1" - elif [ $health != "green" ] - then - log "Gathering additional cluster information Cluster status is $health" - - cat_items=(recovery shards pending_tasks) - for cat_item in "${cat_items[@]}" - do - oc -n $NAMESPACE --cache-dir=${KUBECACHEDIR} exec -c elasticsearch $pod -- $curl_es/_cat/$cat_item?v --cache-dir=${KUBECACHEDIR} &> $cluster_folder/${cat_item}.cat - done - - oc -n $NAMESPACE --cache-dir=${KUBECACHEDIR} exec -c elasticsearch $pod -- $curl_es/_cat/shards?h=index,shard,prirep,state,unassigned.reason,unassigned.description | grep UNASSIGNED &> $cluster_folder/unassigned_shards.cat - fi -} - -get_elasticsearch_cr() { - oc adm inspect --cache-dir=${KUBECACHEDIR} --dest-dir="${BASE_COLLECTION_PATH}" -n $NAMESPACE elasticsearch.logging.openshift.io elasticsearch -} - -get_lokistack_cr() { - oc adm inspect --cache-dir=${KUBECACHEDIR} --dest-dir="${BASE_COLLECTION_PATH}" -n $NAMESPACE lokistacks.loki.grafana.com -} - -log "Gathering data for logstore component" -if [ "$LOG_STORE_TYPE" = "elasticsearch" ] ; then - log "-- Checking Elasticsearch health" - mkdir -p $es_folder - - es_pods=$(oc -n $NAMESPACE get pods -l component=elasticsearch -o jsonpath='{.items[*].metadata.name}') - for pod in $es_pods - do - log "---- Elasticsearch pod: $pod" - get_env $pod $es_folder "$NAMESPACE" - list_es_storage $pod - done - - anypod="" - for comp in "elasticsearch" - do - echo "-- Getting Elasticsearch cluster info from logging-${comp} pod" - anypod=$(oc -n $NAMESPACE --cache-dir=${KUBECACHEDIR} get pod --selector="component=${comp}" --no-headers | grep Running | awk '{print$1}' | tail -1) - get_elasticsearch_status ${comp} ${anypod} - done - - log "-- Gather Elasticsearch CR" - get_elasticsearch_cr -fi - -if [ "$LOG_STORE_TYPE" = "lokistack" ] ; then - log "Gathering Lokistack resources" - - log "-- Gather Lokistack CR" - get_lokistack_cr -fi - -log "END gather_logstore_resources ..." diff --git a/must-gather/collection-scripts/gather_monitoring b/must-gather/collection-scripts/gather_monitoring deleted file mode 100755 index 149bde18d6..0000000000 --- a/must-gather/collection-scripts/gather_monitoring +++ /dev/null @@ -1,62 +0,0 @@ -#!/bin/bash - -# based on gather_monitoring script from standard must-gather - -# safeguards -set -o nounset -set -o errexit -set -o pipefail - -# global readonly constants -# expect base collection path as an argument -declare -r BASE_COLLECTION_PATH=$1 -declare -r MONITORING_PATH="${BASE_COLLECTION_PATH}/monitoring" - -source "$(dirname "$0")"/monitoring_common.sh - -# init initializes global variables that need to be computed. -# E.g. get token of the default ServiceAccount -init() { - mkdir -p "${MONITORING_PATH}" - - readarray -t PROM_PODS < <( - oc get pods -n openshift-monitoring -l prometheus=k8s \ - --no-headers -o custom-columns=":metadata.name" - ) -} - -# prom_get makes http GET requests to prometheus /api/v1/$object and stores -# the stdout and stderr results -prom_get() { - local object="$1"; shift - local path="${1:-$object}"; shift || true - local pod - pod=$(get_first_ready_prom_pod) - - local result_path="$MONITORING_PATH/prometheus/$path" - mkdir -p "$(dirname "$result_path")" - - echo "INFO: Getting ${object} from ${pod}" - oc exec "${pod}" \ - -c prometheus \ - -n openshift-monitoring \ - -- /bin/bash -c "curl -sG http://localhost:9090/api/v1/${object}" \ - > "${result_path}.json" \ - 2> "${result_path}.stderr" -} - -monitoring_gather(){ - init - - echo "INFO: Found ${#PROM_PODS[@]} replicas - ${PROM_PODS[*]}" - - # begin gathering - # NOTE || true ignores failures - - prom_get rules || true - - # force disk flush to ensure that all data gathered are written - sync -} - -monitoring_gather diff --git a/must-gather/collection-scripts/monitoring_common.sh b/must-gather/collection-scripts/monitoring_common.sh deleted file mode 100755 index 45c184c2f2..0000000000 --- a/must-gather/collection-scripts/monitoring_common.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -# based on monitoring_common.sh script from standard must-gather - -# safeguards -set -o nounset -set -o errexit -set -o pipefail - -get_first_ready_prom_pod() { - readarray -t READY_PROM_PODS < <( - oc get pods -n openshift-monitoring -l prometheus=k8s --field-selector=status.phase==Running \ - --no-headers -o custom-columns=":metadata.name" - ) - echo "${READY_PROM_PODS[0]}" -} diff --git a/must-gather/gather.go b/must-gather/gather.go new file mode 100644 index 0000000000..ba8d507781 --- /dev/null +++ b/must-gather/gather.go @@ -0,0 +1,171 @@ +package mustgather + +import ( + "context" + "fmt" + "io" + "path/filepath" + "sync" + "time" + + "github.com/openshift/cluster-logging-operator/must-gather/internal/api" + "github.com/openshift/cluster-logging-operator/must-gather/internal/client" + "github.com/openshift/cluster-logging-operator/must-gather/internal/cluster" + "github.com/openshift/cluster-logging-operator/must-gather/internal/collection" + "github.com/openshift/cluster-logging-operator/must-gather/internal/logstore/lokistack" + "github.com/openshift/cluster-logging-operator/must-gather/internal/metrics" + "github.com/openshift/cluster-logging-operator/must-gather/internal/namespace" + "github.com/openshift/cluster-logging-operator/must-gather/internal/ui" +) + +var ( + // Standard namespaces + standardNamespaces = []string{ + "openshift-operator-lifecycle-manager", + "openshift-operators-redhat", + "openshift-operators", + "openshift-monitoring", // Contains Prometheus pods needed by monitoring collector + } +) + +// Gather is the main must-gather orchestrator +type Gather struct { + config *api.Config + client *client.Client + logger api.Logger +} + +// NewGather creates a new must-gather orchestrator +func NewGather(baseCollectionPath, loggingNamespace, logFileName string, logWriter io.Writer) (*Gather, error) { + logger := api.NewLogger(logWriter) + k8sClient, err := client.NewClient(logger) + if err != nil { + return nil, fmt.Errorf("failed to create Kubernetes client: %w", err) + } + + // Set up paths + absPath, err := filepath.Abs(baseCollectionPath) + if err != nil { + return nil, fmt.Errorf("failed to get absolute path: %w", err) + } + + config := &api.Config{ + DestDir: api.NewPath(absPath), + LoggingNamespace: loggingNamespace, + LogFileName: logFileName, + Logger: logWriter, + } + + return &Gather{ + config: config, + client: k8sClient, + logger: logger, + }, nil +} + +// Run executes the must-gather collection +func (g *Gather) Run(ctx context.Context) error { + g.logger.Log("..... Cluster Logging must-gather script started .....") + g.logger.Log("must-gather logs are located at: '%s'", filepath.Join(g.config.DestDir.String(), g.config.LogFileName)) + + // Ensure base collection path exists + if err := g.config.DestDir.MkdirAll(); err != nil { + return err + } + + // Create collectors + collectors := g.createCollectors() + + // Run collectors concurrently + results := g.runCollectors(ctx, collectors) + + // Log results + g.logResults(results) + + // Check for failures and aggregate errors + var failures []error + for _, result := range results { + if result.Error != nil { + failures = append(failures, fmt.Errorf("%s: %w", result.CollectorName, result.Error)) + } + } + + if len(failures) > 0 { + return fmt.Errorf("one or more collectors failed: %v", failures) + } + + return nil +} + +// createCollectors creates all collectors needed for the gathering +func (g *Gather) createCollectors() []api.Collector { + collectors := make([]api.Collector, 0) + + // Cluster-scoped resources collector + collectors = append(collectors, cluster.NewCollector(g.client, g.logger, g.config.DestDir)) + + // Namespace collectors + collectors = append(collectors, namespace.NewCollector(g.client, g.logger, standardNamespaces, g.config.DestDir)) + + // Log Collection collector + collectors = append(collectors, collection.NewCollector(g.client, g.logger, g.config.LoggingNamespace, g.config.DestDir)) + + // UIPlugin collector (checks for installation internally) + collectors = append(collectors, ui.NewUIPluginCollector(g.client, g.logger, g.config.DestDir)) + + // Monitoring collector + collectors = append(collectors, metrics.NewCollector(g.client, g.logger, g.config.DestDir)) + + // LogStore collector (checks for LokiStack installation internally) + collectors = append(collectors, lokistack.NewCollector(g.client, g.logger, g.config.LoggingNamespace, g.config.DestDir)) + + return collectors +} + +// runCollectors runs all collectors concurrently +func (g *Gather) runCollectors(ctx context.Context, collectors []api.Collector) []api.Result { + var wg sync.WaitGroup + resultsChan := make(chan api.Result, len(collectors)) + + for _, collector := range collectors { + wg.Add(1) + go func(c api.Collector) { + defer wg.Done() + + start := time.Now() + // Call Collect with no GVRs to use defaults + err := c.Collect(ctx) + duration := time.Since(start) + + resultsChan <- api.Result{ + CollectorName: c.Name(), + Error: err, + Duration: duration, + } + }(collector) + } + + // Wait for all collectors to finish + wg.Wait() + close(resultsChan) + + // Collect results + results := make([]api.Result, 0, len(collectors)) + for result := range resultsChan { + results = append(results, result) + } + + return results +} + +// logResults logs the results of all collectors +func (g *Gather) logResults(results []api.Result) { + g.logger.Log("=== Must-gather collection complete ===") + for _, result := range results { + if result.Error != nil { + g.logger.Log("FAILED: %s (took %v): %v", result.CollectorName, result.Duration, result.Error) + } else { + g.logger.Log("SUCCESS: %s (took %v)", result.CollectorName, result.Duration) + } + } +} diff --git a/must-gather/internal/api/logger.go b/must-gather/internal/api/logger.go new file mode 100644 index 0000000000..847cab8762 --- /dev/null +++ b/must-gather/internal/api/logger.go @@ -0,0 +1,51 @@ +package api + +import ( + "fmt" + "io" + "os" + "time" +) + +// DefaultLogger provides timestamped logging similar to the bash script +type DefaultLogger struct { + writer io.Writer +} + +// NewLogger creates a new logger that writes to the given writer +func NewLogger(w io.Writer) Logger { + return &DefaultLogger{writer: w} +} + +// Log writes a timestamped log message +func (l *DefaultLogger) Log(format string, args ...interface{}) { + timestamp := time.Now().Format("2006-01-02 15:04:05") + message := fmt.Sprintf(format, args...) + if _, err := fmt.Fprintf(l.writer, "%s %s\n", timestamp, message); err != nil { + // Fallback to stderr if primary writer fails + fmt.Fprintf(os.Stderr, "LOGGER ERROR: failed to write log: %v\nOriginal message: %s %s\n", err, timestamp, message) + } +} + +// Begin logs a BEGIN message and returns a function that logs the corresponding END message +func (l *DefaultLogger) Begin(format string, args ...interface{}) func() { + l.Log("BEGIN "+format, args...) + return func() { + l.Log("END "+format, args...) + } +} + +// Warn logs a warning message with WARN prefix +func (l *DefaultLogger) Warn(format string, args ...interface{}) { + l.Log("WARN: "+format, args...) +} + +// Info logs an informational message with INFO prefix +func (l *DefaultLogger) Info(format string, args ...interface{}) { + l.Log("INFO: "+format, args...) +} + +// Logf is an alias for Log for convenience +func (l *DefaultLogger) Logf(format string, args ...interface{}) { + l.Log(format, args...) +} diff --git a/must-gather/internal/api/path.go b/must-gather/internal/api/path.go new file mode 100644 index 0000000000..6dba32f6f7 --- /dev/null +++ b/must-gather/internal/api/path.go @@ -0,0 +1,52 @@ +package api + +import ( + "fmt" + "os" + "path/filepath" + + "k8s.io/apimachinery/pkg/runtime/schema" +) + +type Path struct { + string +} + +func NewPath(parts ...string) Path { + return Path{filepath.Join(parts...)} +} + +func (p Path) Add(parts ...string) Path { + parts = append([]string{p.string}, parts...) + p.string = filepath.Join(parts...) + return p +} + +func (p Path) ForResource(gvr schema.GroupVersionResource) Path { + p.string = filepath.Join(p.string, gvr.Group, gvr.Resource) + return p +} + +func (p Path) String() string { + return p.string +} + +func (p Path) MkdirAll() error { + if err := os.MkdirAll(p.string, 0755); err != nil { + return fmt.Errorf("failed to create directory %s: %w", p.string, err) + } + return nil +} + +func (p Path) WriteFile(data []byte) error { + // Create parent directory if it doesn't exist + dir := filepath.Dir(p.string) + if err := os.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("failed to create directory %s: %w", dir, err) + } + + if err := os.WriteFile(p.String(), data, 0644); err != nil { + return fmt.Errorf("failed to write file %s: %w", p, err) + } + return nil +} diff --git a/must-gather/internal/api/path_test.go b/must-gather/internal/api/path_test.go new file mode 100644 index 0000000000..d53ac1ede9 --- /dev/null +++ b/must-gather/internal/api/path_test.go @@ -0,0 +1,34 @@ +package api_test + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/openshift/cluster-logging-operator/must-gather/internal/api" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +var _ = Describe("Path", func() { + + It("#NewPath should return a path of the args", func() { + Expect(api.NewPath("foo", "bar").String()).To(Equal("foo/bar")) + }) + + It("#Add should return the part to the path", func() { + Expect(api.NewPath("foo", "bar").Add("xyz").String()).To(Equal("foo/bar/xyz")) + }) + + Context("#ForResource", func() { + + var ( + path = api.NewPath("/root") + ) + + It("should use the group and resource", func() { + Expect(path.ForResource(schema.GroupVersionResource{Group: "apps", Version: "v1", Resource: "deployments"}).String()).To(Equal("/root/apps/deployments")) + }) + + It("should use only the resource when there is no group", func() { + Expect(path.ForResource(schema.GroupVersionResource{Group: "", Version: "v1", Resource: "deployments"}).String()).To(Equal("/root/deployments")) + }) + }) +}) diff --git a/must-gather/internal/api/suite_test.go b/must-gather/internal/api/suite_test.go new file mode 100644 index 0000000000..bb09f8ecba --- /dev/null +++ b/must-gather/internal/api/suite_test.go @@ -0,0 +1,13 @@ +package api_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestSuite(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "must-gather/internal/api suite") +} diff --git a/must-gather/internal/api/types.go b/must-gather/internal/api/types.go new file mode 100644 index 0000000000..da0532c2d5 --- /dev/null +++ b/must-gather/internal/api/types.go @@ -0,0 +1,49 @@ +package api + +import ( + "context" + "io" + "time" + + "k8s.io/apimachinery/pkg/runtime/schema" +) + +// Logger interface for logging operations +type Logger interface { + Log(format string, args ...interface{}) + Begin(format string, args ...interface{}) func() + Warn(format string, args ...interface{}) + Info(format string, args ...interface{}) +} + +// Config holds the configuration for must-gather collection +type Config struct { + // DestDir is the root directory where all collected data will be stored + DestDir Path + + // LoggingNamespace is the namespace where cluster logging operator is deployed + LoggingNamespace string + + // LogFileName is the name of the debug log file + LogFileName string + + // Logger is where log output should be written + Logger io.Writer +} + +// Collector defines the interface for all must-gather collectors +type Collector interface { + // Collect performs the collection and returns an error if collection fails + // Accepts optional GroupVersionResource parameters to collect specific resources + Collect(ctx context.Context, gvrs ...schema.GroupVersionResource) error + + // Name returns the name of this collector + Name() string +} + +// Result represents the result of a collection operation +type Result struct { + CollectorName string + Error error + Duration time.Duration +} diff --git a/must-gather/internal/client/client.go b/must-gather/internal/client/client.go new file mode 100644 index 0000000000..4d98c42c2b --- /dev/null +++ b/must-gather/internal/client/client.go @@ -0,0 +1,265 @@ +package client + +import ( + "context" + "fmt" + "io" + + "github.com/openshift/cluster-logging-operator/must-gather/internal/api" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" + "k8s.io/client-go/tools/remotecommand" + "sigs.k8s.io/yaml" +) + +// Client wraps Kubernetes client functionality for must-gather operations +type Client struct { + Clientset *kubernetes.Clientset + DynamicClient dynamic.Interface + config *rest.Config + logger api.Logger +} + +// NewClient creates a new Kubernetes client for must-gather operations +func NewClient(logger api.Logger) (*Client, error) { + config, err := clientcmd.NewNonInteractiveDeferredLoadingClientConfig( + clientcmd.NewDefaultClientConfigLoadingRules(), + &clientcmd.ConfigOverrides{}, + ).ClientConfig() + if err != nil { + return nil, fmt.Errorf("failed to load kubeconfig: %w", err) + } + + Clientset, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create Clientset: %w", err) + } + + DynamicClient, err := dynamic.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create dynamic client: %w", err) + } + + return &Client{ + Clientset: Clientset, + DynamicClient: DynamicClient, + config: config, + logger: logger, + }, nil +} + +// GetPods returns a list of pods matching the label selector +func (c *Client) GetPods(ctx context.Context, namespace string, labelSelector string) ([]corev1.Pod, error) { + podList, err := c.Clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ + LabelSelector: labelSelector, + }) + if err != nil { + return nil, fmt.Errorf("failed to list pods: %w", err) + } + return podList.Items, nil +} + +// GetNamespaces returns all namespaces +func (c *Client) GetNamespaces(ctx context.Context) ([]string, error) { + nsList, err := c.Clientset.CoreV1().Namespaces().List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to list namespaces: %w", err) + } + + namespaces := make([]string, len(nsList.Items)) + for i, ns := range nsList.Items { + namespaces[i] = ns.Name + } + return namespaces, nil +} + +// ExecInPod executes a command in a pod container and returns the output +func (c *Client) ExecInPod(ctx context.Context, namespace, pod, container string, command []string) (string, error) { + req := c.Clientset.CoreV1().RESTClient().Post(). + Resource("pods"). + Name(pod). + Namespace(namespace). + SubResource("exec") + + req.VersionedParams(&corev1.PodExecOptions{ + Container: container, + Command: command, + Stdout: true, + Stderr: true, + }, scheme.ParameterCodec) + + exec, err := remotecommand.NewSPDYExecutor(c.config, "POST", req.URL()) + if err != nil { + return "", fmt.Errorf("failed to create executor: %w", err) + } + + var stdout, stderr io.Writer + stdoutBuf := &writeBuffer{} + stderrBuf := &writeBuffer{} + stdout = stdoutBuf + stderr = stderrBuf + + err = exec.StreamWithContext(ctx, remotecommand.StreamOptions{ + Stdout: stdout, + Stderr: stderr, + }) + + if err != nil { + return "", fmt.Errorf("exec failed: %w, stderr: %s", err, stderrBuf.String()) + } + + return stdoutBuf.String(), nil +} + +// GetResource gets a resource and writes it as YAML to the destination file +func (c *Client) GetResource(ctx context.Context, gvr schema.GroupVersionResource, namespace, name string, destPath api.Path) error { + var resource runtime.Object + var err error + + if namespace != "" { + resource, err = c.DynamicClient.Resource(gvr).Namespace(namespace).Get(ctx, name, metav1.GetOptions{}) + } else { + resource, err = c.DynamicClient.Resource(gvr).Get(ctx, name, metav1.GetOptions{}) + } + + if err != nil { + return fmt.Errorf("failed to get resource: %w", err) + } + + return c.WriteResourceToFile(resource, destPath) +} + +// ListResources lists resources and writes them to destination directory +// Each resource is written as a separate file, similar to oc adm inspect +func (c *Client) ListResources(ctx context.Context, gvr schema.GroupVersionResource, namespace string, destDir api.Path, opts metav1.ListOptions) error { + var listObj *unstructured.UnstructuredList + var err error + + if namespace != "" { + listObj, err = c.DynamicClient.Resource(gvr).Namespace(namespace).List(ctx, opts) + } else { + listObj, err = c.DynamicClient.Resource(gvr).List(ctx, opts) + } + + if err != nil { + return fmt.Errorf("failed to list resources: %w", err) + } + + // Only create directory if we have items to write + if len(listObj.Items) == 0 { + return nil + } + + // Create destination directory + if err := destDir.MkdirAll(); err != nil { + return err + } + + // Write each resource as a separate file + var writeErrors []error + for _, item := range listObj.Items { + name := item.GetName() + if name == "" { + name = item.GetGenerateName() + } + + itemPath := destDir.Add(fmt.Sprintf("%s.yaml", name)) + if err := c.WriteResourceToFile(&item, itemPath); err != nil { + c.logger.Warn("Failed to write %s: %v", name, err) + writeErrors = append(writeErrors, fmt.Errorf("failed to write %s: %w", name, err)) + continue + } + } + + if len(writeErrors) > 0 { + return fmt.Errorf("%d resource(s) failed to write: %v", len(writeErrors), writeErrors) + } + + return nil +} + +// WriteResourceToFile writes a Kubernetes resource as YAML to a file +func (c *Client) WriteResourceToFile(resource runtime.Object, destPath api.Path) error { + // Sanitize secrets before writing + sanitizedResource := c.sanitizeResource(resource) + + // Marshal to YAML + yamlBytes, err := yaml.Marshal(sanitizedResource) + if err != nil { + return fmt.Errorf("failed to marshal resource to YAML: %w", err) + } + + // Write to file (WriteFile creates parent directory automatically) + return destPath.WriteFile(yamlBytes) +} + +// sanitizeResource redacts secret data values +func (c *Client) sanitizeResource(resource runtime.Object) runtime.Object { + // Check if this is a Secret + if unstructuredObj, ok := resource.(*unstructured.Unstructured); ok { + if unstructuredObj.GetKind() == "Secret" { + // Make a deep copy to avoid modifying the original + sanitized := unstructuredObj.DeepCopy() + + // Redact all data values (base64 encoded) + if data, found, _ := unstructured.NestedMap(sanitized.Object, "data"); found { + for key, val := range data { + length := 0 + if strVal, ok := val.(string); ok { + length = len(strVal) + } + data[key] = fmt.Sprintf("REDACTED length=%d", length) + } + unstructured.SetNestedMap(sanitized.Object, data, "data") + } + + // Redact all stringData values (plain text) + if stringData, found, _ := unstructured.NestedMap(sanitized.Object, "stringData"); found { + for key, val := range stringData { + length := 0 + if strVal, ok := val.(string); ok { + length = len(strVal) + } + stringData[key] = fmt.Sprintf("REDACTED length=%d", length) + } + unstructured.SetNestedMap(sanitized.Object, stringData, "stringData") + } + + return sanitized + } + } + + return resource +} + +// GetDynamicClient returns the underlying dynamic client +func (c *Client) GetDynamicClient() dynamic.Interface { + return c.DynamicClient +} + +// GetClientset returns the underlying Clientset +func (c *Client) GetClientset() *kubernetes.Clientset { + return c.Clientset +} + +// writeBuffer is a simple buffer that implements io.Writer +type writeBuffer struct { + data []byte +} + +func (w *writeBuffer) Write(p []byte) (n int, err error) { + w.data = append(w.data, p...) + return len(p), nil +} + +func (w *writeBuffer) String() string { + return string(w.data) +} diff --git a/must-gather/internal/cluster/collector.go b/must-gather/internal/cluster/collector.go new file mode 100644 index 0000000000..6529e3b3d0 --- /dev/null +++ b/must-gather/internal/cluster/collector.go @@ -0,0 +1,91 @@ +package cluster + +import ( + "context" + "sync" + + "github.com/openshift/cluster-logging-operator/must-gather/internal/api" + "github.com/openshift/cluster-logging-operator/must-gather/internal/client" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +const ( + ArtifactRoot = "cluster-scoped-resources" + GroupConfig = "config.openshift.io" + + groupRbac = "rbac.authorization.k8s.io" +) + +// Collector collects cluster-scoped resources +type Collector struct { + client *client.Client + logger api.Logger + destDir api.Path +} + +// NewCollector creates a new cluster resource collector +func NewCollector(c *client.Client, logger api.Logger, destDir api.Path) *Collector { + return &Collector{ + client: c, + logger: logger, + destDir: destDir, + } +} + +// Name returns the name of this collector +func (c *Collector) Name() string { + return "ClusterCollector" +} + +// Collect performs the collection of cluster-scoped resources +func (c *Collector) Collect(ctx context.Context, gvrs ...schema.GroupVersionResource) error { + defer c.logger.Begin("inspecting cluster resources...")() + + // Use provided GVRs or default cluster-scoped resources + clusterResources := gvrs + if len(clusterResources) == 0 { + // Default cluster-scoped resources to collect (matching /tmp/foo reference) + clusterResources = []schema.GroupVersionResource{ + // Core resources + {Group: "", Version: "v1", Resource: "nodes"}, + + // RBAC + {Group: groupRbac, Version: "v1", Resource: "clusterroles"}, + {Group: groupRbac, Version: "v1", Resource: "clusterrolebindings"}, + + // API Extensions + {Group: "apiextensions.k8s.io", Version: "v1", Resource: "customresourcedefinitions"}, + + // OpenShift Config + {Group: GroupConfig, Version: "v1", Resource: "clusterversions"}, + } + } + + basePath := api.NewPath(c.destDir.String(), ArtifactRoot) + + var wg sync.WaitGroup + for _, gvr := range clusterResources { + wg.Add(1) + go func(g schema.GroupVersionResource) { + defer wg.Done() + defer c.logger.Begin("-- inspecting cluster resource %s ...", g.Resource)() + + // Use "core" for core resources (empty group) to match reference structure + gvr := g + if gvr.Group == "" { + gvr.Group = "core" + } + + resourcePath := basePath.ForResource(gvr) + + if err := c.client.ListResources(ctx, g, "", resourcePath, metav1.ListOptions{}); err != nil { + c.logger.Warn("Failed to collect %s: %v", g.Resource, err) + } + }(gvr) + } + + wg.Wait() + + return nil +} diff --git a/must-gather/internal/collection/collection_collector.go b/must-gather/internal/collection/collection_collector.go new file mode 100644 index 0000000000..2ed6b32f0d --- /dev/null +++ b/must-gather/internal/collection/collection_collector.go @@ -0,0 +1,227 @@ +package collection + +import ( + "context" + "fmt" + "os" + "path/filepath" + "regexp" + "sort" + "strings" + + "github.com/openshift/cluster-logging-operator/must-gather/internal/api" + "github.com/openshift/cluster-logging-operator/must-gather/internal/namespace" + + "github.com/openshift/cluster-logging-operator/must-gather/internal/client" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +var ( + clfGVR = schema.GroupVersionResource{ + Group: "observability.openshift.io", + Version: "v1", + Resource: "clusterlogforwarders", + } + lfmeGVE = schema.GroupVersionResource{ + Group: "logging.openshift.io", + Version: "v1alpha1", + Resource: "logfilemetricexporters", + } +) + +// Collector collects cluster logging operator resources +type Collector struct { + client *client.Client + logger api.Logger + namespace string + destDir api.Path +} + +// NewCollector creates a new CLO resource collector +func NewCollector(c *client.Client, logger api.Logger, loggingNamespace string, destDir api.Path) *Collector { + return &Collector{ + client: c, + logger: logger, + namespace: loggingNamespace, + destDir: destDir, + } +} + +// Name returns the name of this collector +func (c *Collector) Name() string { + return "Log Collection" +} + +// Collect performs the collection of CLO resources +func (c *Collector) Collect(ctx context.Context, gvrs ...schema.GroupVersionResource) error { + defer c.logger.Begin("gather cluster-logging-operator from namespace: %s ...", c.namespace)() + + // Collect operator-specific artifacts (only if operator is deployed) + operatorFound, err := c.CollectForOperator(ctx) + if err != nil { + c.logger.Warn("failed to collect operator artifacts in namespace %s: %v", c.namespace, err) + } + + // Discover namespaces with ClusterLogForwarders (independent of operator presence) + namespaces, err := c.discoverNamespaces(ctx) + if err != nil { + return err + } + + // If no namespaces found and operator not present, nothing to collect + if len(namespaces) == 0 && !operatorFound { + c.logger.Info("No cluster-logging-operator or ClusterLogForwarders found, skipping collection") + return nil + } + + // Collect namespace resources with collection-specific GVRs + gvrs = append(gvrs, clfGVR, lfmeGVE) + nsCollector := namespace.NewCollector(c.client, c.logger, namespaces, c.destDir) + return nsCollector.Collect(ctx, gvrs...) +} + +// discoverNamespaces discovers all relevant namespaces for collection +func (c *Collector) discoverNamespaces(ctx context.Context) ([]string, error) { + namespaceSet := make(map[string]bool) + + // Always include the logging namespace itself + namespaceSet[c.namespace] = true + + // List all ClusterLogForwarders across all namespaces + clfListUnstructured, err := c.client.DynamicClient.Resource(clfGVR).List(ctx, metav1.ListOptions{}) + if err != nil { + c.logger.Warn("Failed to list ClusterLogForwarders: %v", err) + } else { + for _, item := range clfListUnstructured.Items { + ns := item.GetNamespace() + if ns != "" { + namespaceSet[ns] = true + if ns != c.namespace { + c.logger.Log("Adding namespace '%s' to cluster resources list", ns) + } + } + } + } + + // Convert set to slice + namespaces := make([]string, 0, len(namespaceSet)) + for ns := range namespaceSet { + namespaces = append(namespaces, ns) + } + + return namespaces, nil +} + +func (c *Collector) CollectForOperator(ctx context.Context) (bool, error) { + defer c.logger.Begin(" from namespace: %s ...", c.namespace)() + + // Try to get CLO pods with standard Kubernetes label first + pods, err := c.client.GetPods(ctx, c.namespace, "app.kubernetes.io/name=cluster-logging-operator") + if err != nil { + c.logger.Warn("Failed to get CLO pods with standard label: %v", err) + return false, nil + } + + // If not found with standard label, try legacy label + if len(pods) == 0 { + pods, err = c.client.GetPods(ctx, c.namespace, "name=cluster-logging-operator") + if err != nil { + c.logger.Warn("Failed to get CLO pods with legacy label: %v", err) + return false, nil + } + } + + // Return early if no operator pods found with either label + if len(pods) == 0 { + c.logger.Info("No cluster-logging-operator pods found in namespace %s, skipping operator-specific collection", c.namespace) + return false, nil + } + + cloFolder := filepath.Join(c.destDir.String(), "namespaces", c.namespace, "core", "pods") + if err := os.MkdirAll(cloFolder, 0755); err != nil { + return true, fmt.Errorf("failed to create CLO folder: %w", err) + } + + c.logger.Log("Gathering data for 'cluster-logging-operator' from namespace: %s", c.namespace) + for _, pod := range pods { + c.logger.Log("Inspecting %s", pod.Name) + if err := c.getEnv(ctx, c.namespace, pod.Name, cloFolder, "Dockerfile-.*operator*"); err != nil { + c.logger.Warn("Failed to get env for pod %s: %v", pod.Name, err) + } + } + + return true, nil +} + +// getEnv gets environment variables and build info from a pod +func (c *Collector) getEnv(ctx context.Context, namespace, podName, destFolder, dockerfilePattern string) error { + defer c.logger.Begin("get_env ...")() + c.logger.Log("---- Env for %s", podName) + + destDir := filepath.Join(destFolder, podName) + envFile := filepath.Join(destDir, "env.txt") + var output strings.Builder + + // Get pod to find containers + pods, err := c.client.Clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ + FieldSelector: fmt.Sprintf("metadata.name=%s", podName), + }) + if err != nil || len(pods.Items) == 0 { + return fmt.Errorf("failed to get pod: %w", err) + } + + pod := pods.Items[0] + + for _, container := range pod.Spec.Containers { + c.logger.Log("----- Inspecting container %s", container.Name) + + // Try to get dockerfile info + lsOutput, err := c.client.ExecInPod(ctx, namespace, podName, container.Name, []string{"ls", "/root/buildinfo"}) + if err == nil { + // Find dockerfile matching pattern + re := regexp.MustCompile(dockerfilePattern) + lines := strings.Split(lsOutput, "\n") + for _, line := range lines { + if re.MatchString(line) { + c.logger.Log("----- Getting buildInfo") + output.WriteString(fmt.Sprintf("Image info: %s\n", line)) + + // Get build date + buildDateOutput, err := c.client.ExecInPod(ctx, namespace, podName, container.Name, + []string{"grep", "-o", "\"build-date\"=\"[^[:blank:]]*\"", "/root/buildinfo/" + line}) + if err == nil { + output.WriteString(buildDateOutput) + } else { + c.logger.Log("---- Unable to get build date") + } + break + } + } + } + + // Get environment variables + c.logger.Log("----- Getting environment variables") + output.WriteString("-- Environment Variables\n") + + envOutput, err := c.client.ExecInPod(ctx, namespace, podName, container.Name, []string{"env"}) + if err == nil { + envLines := strings.Split(envOutput, "\n") + sort.Strings(envLines) + for _, line := range envLines { + if line != "" { + output.WriteString(line + "\n") + } + } + } + } + + if err := os.MkdirAll(destDir, 0755); err != nil { + return fmt.Errorf("failed to create directory %s: %w", destDir, err) + } + if err := os.WriteFile(envFile, []byte(output.String()), 0644); err != nil { + return fmt.Errorf("failed to write env file: %w", err) + } + + return nil +} diff --git a/must-gather/internal/logstore/lokistack/collector.go b/must-gather/internal/logstore/lokistack/collector.go new file mode 100644 index 0000000000..da41fd6cf9 --- /dev/null +++ b/must-gather/internal/logstore/lokistack/collector.go @@ -0,0 +1,75 @@ +package lokistack + +import ( + "context" + "fmt" + + "github.com/openshift/cluster-logging-operator/must-gather/internal/api" + "github.com/openshift/cluster-logging-operator/must-gather/internal/client" + kerrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +// Collector collects LokiStack resources +type Collector struct { + client *client.Client + logger api.Logger + namespace string + destDir api.Path +} + +// NewCollector creates a new LokiStack collector +func NewCollector(c *client.Client, logger api.Logger, namespace string, destDir api.Path) *Collector { + return &Collector{ + client: c, + logger: logger, + namespace: namespace, + destDir: destDir, + } +} + +// Name returns the name of this collector +func (l *Collector) Name() string { + return "LogStoreCollector" +} + +// Collect performs the collection of LokiStack resources +func (l *Collector) Collect(ctx context.Context, gvrs ...schema.GroupVersionResource) error { + defer l.logger.Begin("gather_logstore_resources ...")() + + lokiGVR := schema.GroupVersionResource{ + Group: "loki.grafana.com", + Version: "v1", + Resource: "lokistacks", + } + + // Check if LokiStack is installed (cluster-wide check) + lokiList, err := l.client.DynamicClient.Resource(lokiGVR).List(ctx, metav1.ListOptions{}) + if err != nil { + // Only skip if CRD doesn't exist (NotFound or NoKindMatchError) + // Return other errors (RBAC, API issues, etc.) to caller + if kerrors.IsNotFound(err) || meta.IsNoMatchError(err) { + l.logger.Info("LokiStack CRD not available, skipping logstore collection") + return nil + } + return fmt.Errorf("failed to check for LokiStack resources: %w", err) + } + + if len(lokiList.Items) == 0 { + l.logger.Info("No LokiStack resources found in any namespace, skipping logstore collection") + return nil + } + + l.logger.Log("Gathering Lokistack resources") + l.logger.Log("-- Gather Lokistack CR") + + // Write to namespace directory like other resources + lokiFolder := l.destDir.Add("namespaces", l.namespace).ForResource(lokiGVR) + if err := l.client.ListResources(ctx, lokiGVR, l.namespace, lokiFolder, metav1.ListOptions{}); err != nil { + return fmt.Errorf("failed to collect LokiStack CR: %w", err) + } + + return nil +} diff --git a/must-gather/internal/metrics/collector.go b/must-gather/internal/metrics/collector.go new file mode 100644 index 0000000000..720afdf762 --- /dev/null +++ b/must-gather/internal/metrics/collector.go @@ -0,0 +1,111 @@ +package metrics + +import ( + "context" + "fmt" + + "github.com/openshift/cluster-logging-operator/must-gather/internal/api" + "github.com/openshift/cluster-logging-operator/must-gather/internal/client" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +// Collector collects Prometheus rules and alerts +type Collector struct { + client *client.Client + logger api.Logger + destDir api.Path +} + +// NewCollector creates a new monitoring collector +func NewCollector(c *client.Client, logger api.Logger, destDir api.Path) *Collector { + return &Collector{ + client: c, + logger: logger, + destDir: destDir, + } +} + +// Name returns the name of this collector +func (m *Collector) Name() string { + return "MonitoringCollector" +} + +// Collect performs the collection of monitoring resources +func (m *Collector) Collect(ctx context.Context, _ ...schema.GroupVersionResource) error { + defer m.logger.Begin("gathering alerts ...")() + + monitoringPath := m.destDir.Add("monitoring") + if err := monitoringPath.MkdirAll(); err != nil { + return err + } + + // Get Prometheus pods + promPods, err := m.client.GetPods(ctx, "openshift-monitoring", "prometheus=k8s") + if err != nil { + m.logger.Warn("Failed to get Prometheus pods: %v", err) + return nil + } + + m.logger.Info("Found %d Prometheus replicas", len(promPods)) + + // Get first ready pod + readyPod := m.getFirstReadyPromPod(promPods) + if readyPod == "" { + m.logger.Warn("No ready Prometheus pod found") + return nil + } + + // Get Prometheus rules + m.logger.Info("Getting rules from %s", readyPod) + if err := m.promGet(ctx, readyPod, "rules", monitoringPath); err != nil { + m.logger.Warn("Failed to get Prometheus rules: %v", err) + } + + return nil +} + +// promGet makes HTTP GET requests to prometheus /api/v1/ +func (m *Collector) promGet(ctx context.Context, pod, object string, monitoringPath api.Path) error { + resultPath := monitoringPath.Add("prometheus") + if err := resultPath.MkdirAll(); err != nil { + return err + } + + // Execute curl command in Prometheus pod + cmd := []string{"/bin/bash", "-c", + fmt.Sprintf("curl -sG http://localhost:9090/api/v1/%s", object)} + + output, err := m.client.ExecInPod(ctx, "openshift-monitoring", pod, "prometheus", cmd) + if err != nil { + // Write error to error.log file + if writeErr := resultPath.Add("error.log").WriteFile([]byte(err.Error())); writeErr != nil { + m.logger.Warn("Failed to write error log: %v", writeErr) + } + return err + } + + // Write output to json file + jsonFile := resultPath.Add(fmt.Sprintf("%s.json", object)) + return jsonFile.WriteFile([]byte(output)) +} + +// getFirstReadyPromPod returns the first ready Prometheus pod +func (m *Collector) getFirstReadyPromPod(pods []corev1.Pod) string { + for _, pod := range pods { + if pod.Status.Phase == corev1.PodRunning { + // Check if all containers are ready + allReady := true + for _, containerStatus := range pod.Status.ContainerStatuses { + if !containerStatus.Ready { + allReady = false + break + } + } + if allReady { + return pod.Name + } + } + } + return "" +} diff --git a/must-gather/internal/namespace/collector.go b/must-gather/internal/namespace/collector.go new file mode 100644 index 0000000000..5b4f84ba55 --- /dev/null +++ b/must-gather/internal/namespace/collector.go @@ -0,0 +1,293 @@ +package namespace + +import ( + "context" + "fmt" + "io" + "os" + "sync" + + "github.com/openshift/cluster-logging-operator/must-gather/internal/api" + "github.com/openshift/cluster-logging-operator/must-gather/internal/client" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +const ( + // Maximum concurrent goroutines for namespace collection + maxConcurrentNamespaces = 5 + // Maximum concurrent goroutines for resource collection per namespace + maxConcurrentResources = 10 + // Maximum concurrent goroutines for pod log collection per namespace + maxConcurrentPods = 10 +) + +const ( + groupApps = "apps" + groupK8sOvn = "k8s.ovn.org" + groupMonitoring = "monitoring.coreos.com" + groupOperators = "operators.coreos.com" + groupRbac = "rbac.authorization.k8s.io" + + v1 = "v1" + v2 = "v2" + v1Alpha1 = "v1alpha1" +) + +// Collector collects namespace-scoped resources +type Collector struct { + client *client.Client + logger api.Logger + namespaces []string + destDir api.Path +} + +// NewCollector creates a new namespace resource collector +func NewCollector(c *client.Client, logger api.Logger, namespaces []string, destDir api.Path) *Collector { + return &Collector{ + client: c, + logger: logger, + namespaces: namespaces, + destDir: destDir, + } +} + +// Name returns the name of this collector +func (n *Collector) Name() string { + return "Collector" +} + +// Collect performs the collection of namespace-scoped resources +func (n *Collector) Collect(ctx context.Context, gvrs ...schema.GroupVersionResource) error { + defer n.logger.Begin("inspecting namespaced resources...")() + + // Define namespace-scoped resources to collect (matching oc adm inspect behavior) + + namespacedResources := []schema.GroupVersionResource{ + // Core resources + {Group: "", Version: v1, Resource: "pods"}, + {Group: "", Version: v1, Resource: "services"}, + {Group: "", Version: v1, Resource: "configmaps"}, + {Group: "", Version: v1, Resource: "secrets"}, + {Group: "", Version: v1, Resource: "serviceaccounts"}, + {Group: "", Version: v1, Resource: "events"}, + {Group: "", Version: v1, Resource: "endpoints"}, + {Group: "", Version: v1, Resource: "persistentvolumeclaims"}, + {Group: "", Version: v1, Resource: "replicationcontrollers"}, + + // Apps + {Group: groupApps, Version: v1, Resource: "deployments"}, + {Group: groupApps, Version: v1, Resource: "daemonsets"}, + {Group: groupApps, Version: v1, Resource: "statefulsets"}, + {Group: groupApps, Version: v1, Resource: "replicasets"}, + + // RBAC + {Group: groupRbac, Version: v1, Resource: "roles"}, + {Group: groupRbac, Version: v1, Resource: "rolebindings"}, + + // Networking + {Group: "networking.k8s.io", Version: v1, Resource: "networkpolicies"}, + {Group: "discovery.k8s.io", Version: v1, Resource: "endpointslices"}, + + // Autoscaling + {Group: "autoscaling", Version: v2, Resource: "horizontalpodautoscalers"}, + + // Policy + {Group: "policy", Version: v1, Resource: "poddisruptionbudgets"}, + + // OpenShift Monitoring + {Group: groupMonitoring, Version: v1, Resource: "servicemonitors"}, + {Group: groupMonitoring, Version: v1, Resource: "podmonitors"}, + {Group: groupMonitoring, Version: v1, Resource: "prometheusrules"}, + + // OVN Kubernetes + {Group: groupK8sOvn, Version: v1, Resource: "egressfirewalls"}, + {Group: groupK8sOvn, Version: v1, Resource: "egressqoses"}, + {Group: groupK8sOvn, Version: v1, Resource: "userdefinednetworks"}, + + // Operators + {Group: groupOperators, Version: v1Alpha1, Resource: "installplans"}, + {Group: groupOperators, Version: v1Alpha1, Resource: "subscriptions"}, + {Group: groupOperators, Version: v1Alpha1, Resource: "clusterserviceversions"}, + } + + // Append any additional GVRs provided as arguments + namespacedResources = append(namespacedResources, gvrs...) + + var wg sync.WaitGroup + // Semaphore to limit concurrent namespace processing + namespaceSem := make(chan struct{}, maxConcurrentNamespaces) + + namespacesPath := n.destDir.Add("namespaces") + for _, ns := range n.namespaces { + wg.Add(1) + namespaceSem <- struct{}{} // Acquire semaphore + go func(namespace string) { + defer wg.Done() + defer func() { <-namespaceSem }() // Release semaphore + defer n.logger.Begin("-- inspecting namespace %s ...", namespace)() + + // First collect the namespace itself + nsGVR := schema.GroupVersionResource{Group: "", Version: v1, Resource: "namespaces"} + nsDir := namespacesPath.Add(namespace) + //nsDir := n.destDir. filepath.Join(n.destDir.String(), "namespaces", namespace) + + if err := n.client.GetResource(ctx, nsGVR, "", namespace, nsDir.Add("namespace.yaml")); err != nil { + n.logger.Warn("Failed to collect namespace %s: %v", namespace, err) + return + } + + // Collect resources in the namespace in parallel + var resourceWg sync.WaitGroup + // Semaphore to limit concurrent resource collection + resourceSem := make(chan struct{}, maxConcurrentResources) + for _, gvr := range namespacedResources { + resourceWg.Add(1) + resourceSem <- struct{}{} // Acquire semaphore + go func(g schema.GroupVersionResource) { + defer resourceWg.Done() + defer func() { <-resourceSem }() // Release semaphore + + // Use "core" for core resources (empty group) to match reference structure + resourceDir := nsDir + group := g.Group + if group == "" { + resourceDir = resourceDir.Add("core") + } + if err := n.client.ListResources(ctx, g, namespace, resourceDir.ForResource(g), metav1.ListOptions{}); err != nil { + // Some resources may not exist in all namespaces, just log and continue + n.logger.Info("Skipped %s in namespace %s: %v", g.Resource, namespace, err) + } + }(gvr) + } + resourceWg.Wait() + + // Collect pod logs for all pods in the namespace + n.logger.Log("-- Collecting pod logs for namespace %s ...", namespace) + if err := n.collectPodLogs(ctx, namespace, nsDir); err != nil { + n.logger.Warn("Failed to collect pod logs for namespace %s: %v", namespace, err) + } + }(ns) + } + + wg.Wait() + + return nil +} + +// collectPodLogs collects logs for all pods in a namespace +func (n *Collector) collectPodLogs(ctx context.Context, namespace string, nsDir api.Path) error { + // Get all pods in the namespace + pods, err := n.client.Clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed to list pods: %w", err) + } + + var wg sync.WaitGroup + // Semaphore to limit concurrent pod log collection + podSem := make(chan struct{}, maxConcurrentPods) + for _, pod := range pods.Items { + wg.Add(1) + podSem <- struct{}{} // Acquire semaphore + go func(p corev1.Pod) { + defer wg.Done() + defer func() { <-podSem }() // Release semaphore + + podDir := nsDir.Add("core", "pods", p.Name) + + // Save pod YAML + podYamlPath := podDir.Add(fmt.Sprintf("%s.yaml", p.Name)) + if err := n.client.WriteResourceToFile(&p, podYamlPath); err != nil { + n.logger.Warn("Failed to save pod YAML for %s: %v", p.Name, err) + } + + // Collect logs for each container + for _, container := range p.Spec.Containers { + containerDir := podDir.Add(container.Name, "logs") + + // Collect current logs + if err := n.collectContainerLog(ctx, namespace, p.Name, container.Name, containerDir, "current.log", false); err != nil { + n.logger.Info("Failed to get current logs for pod %s container %s: %v", p.Name, container.Name, err) + } + + // Collect previous logs (from restarts) + if err := n.collectContainerLog(ctx, namespace, p.Name, container.Name, containerDir, "previous.log", true); err != nil { + // Previous logs may not exist if the container hasn't restarted, don't log as warning + continue + } + } + + // Collect logs for init containers if any + for _, container := range p.Spec.InitContainers { + containerDir := podDir.Add(container.Name, "logs") + + // Collect current logs + if err := n.collectContainerLog(ctx, namespace, p.Name, container.Name, containerDir, "current.log", false); err != nil { + n.logger.Info("Failed to get current logs for init container %s in pod %s: %v", container.Name, p.Name, err) + } + + // Collect previous logs (from restarts) + if err := n.collectContainerLog(ctx, namespace, p.Name, container.Name, containerDir, "previous.log", true); err != nil { + // Previous logs may not exist if the container hasn't restarted + continue + } + } + }(pod) + } + + wg.Wait() + return nil +} + +// collectContainerLog collects a single container log +func (n *Collector) collectContainerLog(ctx context.Context, namespace, podName, containerName string, destDir api.Path, filename string, previous bool) error { + logOpts := &corev1.PodLogOptions{ + Container: containerName, + Previous: previous, + } + + req := n.client.Clientset.CoreV1().Pods(namespace).GetLogs(podName, logOpts) + logs, err := req.Stream(ctx) + if err != nil { + return err + } + defer func() { + if err := logs.Close(); err != nil { + n.logger.Warn("Failed to close log stream for pod %s container %s: %v", podName, containerName, err) + } + }() + + // Create destination directory + if err := destDir.MkdirAll(); err != nil { + return err + } + + // Create destination file + logFilePath := destDir.Add(filename) + logFile, err := os.Create(logFilePath.String()) + if err != nil { + return fmt.Errorf("failed to create log file: %w", err) + } + defer func() { + if err := logFile.Close(); err != nil { + n.logger.Warn("Failed to close log file %s: %v", logFilePath.String(), err) + } + }() + + // Stream logs directly to file without buffering in memory + bytesWritten, err := io.Copy(logFile, logs) + if err != nil { + return fmt.Errorf("failed to write logs: %w", err) + } + + // If no data was written, remove the empty file + if bytesWritten == 0 { + // Close file before removing (ignore error since we're deleting anyway) + _ = logFile.Close() + // Remove empty file (ignore error since file may not exist) + _ = os.Remove(logFilePath.String()) + } + + return nil +} diff --git a/must-gather/internal/ui/uiplugin_collector.go b/must-gather/internal/ui/uiplugin_collector.go new file mode 100644 index 0000000000..87ddd494ab --- /dev/null +++ b/must-gather/internal/ui/uiplugin_collector.go @@ -0,0 +1,87 @@ +package ui + +import ( + "context" + "fmt" + + "github.com/openshift/cluster-logging-operator/must-gather/internal/api" + "github.com/openshift/cluster-logging-operator/must-gather/internal/client" + "github.com/openshift/cluster-logging-operator/must-gather/internal/cluster" + kerrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +var ( + uipluginGVR = schema.GroupVersionResource{ + Group: "console.openshift.io", + Version: "v1", + Resource: "uiplugins", + } +) + +// UIPluginCollector collects UIPlugin and Console resources +type UIPluginCollector struct { + client *client.Client + logger api.Logger + destDir api.Path +} + +// NewUIPluginCollector creates a new UIPlugin collector +func NewUIPluginCollector(c *client.Client, logger api.Logger, destDir api.Path) *UIPluginCollector { + return &UIPluginCollector{ + client: c, + logger: logger, + destDir: destDir, + } +} + +// Name returns the name of this collector +func (u *UIPluginCollector) Name() string { + return "UIPluginCollector" +} + +// Collect performs the collection of UIPlugin resources +func (u *UIPluginCollector) Collect(ctx context.Context, gvrs ...schema.GroupVersionResource) error { + defer u.logger.Begin("gathering uiplugin and console resources ...")() + + // Check if UIPlugin is installed + uiPluginList, err := u.client.DynamicClient.Resource(uipluginGVR).List(ctx, metav1.ListOptions{}) + if err != nil { + // Only skip if CRD doesn't exist (NotFound or NoKindMatchError) + // Return other errors (RBAC, API issues, etc.) to caller + if kerrors.IsNotFound(err) || meta.IsNoMatchError(err) { + u.logger.Info("UIPlugin CRD not available, skipping uiplugin collection") + return nil + } + return fmt.Errorf("failed to check for UIPlugin resources: %w", err) + } + + if len(uiPluginList.Items) == 0 { + u.logger.Info("No UIPlugin resources found, skipping uiplugin collection") + return nil + } + + destDir := u.destDir.Add(cluster.ArtifactRoot).ForResource(uipluginGVR) + + // Collect UIPlugin resources + if err := u.client.ListResources(ctx, uipluginGVR, "", destDir, metav1.ListOptions{}); err != nil { + return fmt.Errorf("failed to collect UIPlugin resources: %w", err) + } + + // Collect Console ClusterOperator + coGVR := schema.GroupVersionResource{ + Group: cluster.GroupConfig, + Version: "v1", + Resource: "clusteroperators", + } + + consoleDestDir := u.destDir.Add(cluster.ArtifactRoot).ForResource(coGVR) + + if err := u.client.GetResource(ctx, coGVR, "", "console", consoleDestDir.Add("console.yaml")); err != nil { + u.logger.Warn("Failed to collect console ClusterOperator: %v", err) + } + + return nil +} diff --git a/test/e2e/logforwarding/lokistack/forward_to_lokistack_test.go b/test/e2e/logforwarding/lokistack/forward_to_lokistack_test.go index 798fac0991..a083984afd 100644 --- a/test/e2e/logforwarding/lokistack/forward_to_lokistack_test.go +++ b/test/e2e/logforwarding/lokistack/forward_to_lokistack_test.go @@ -133,6 +133,7 @@ var _ = Describe("[ClusterLogForwarder] Forward to Lokistack", func() { found, err = lokistackReceiver.HasInfrastructureLogs(serviceAccount.Name, framework.DefaultWaitForLogsTimeout) Expect(err).To(BeNil()) Expect(found).To(BeTrue()) + Fail(">>>>> REMOVE ME <<< TESTING MUST-GATHER") }) It("should send logs to lokistack when dataModel is not spec'd", func() {