Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/connectedk8s/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

Release History
===============
1.10.13
+++++
* Added telemetry for pre-onboarding diagnostic results, including diagnostics execution failures.

1.10.12
+++++
* Removed deprecated '--app-id' and '--app-secret' RBAC parameters from the extension.
Expand Down
11 changes: 10 additions & 1 deletion src/connectedk8s/azext_connectedk8s/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@
PublicKey_Export_Fault_Type = "publickey-export-error"
PrivateKey_Export_Fault_Type = "privatekey-export-error"
Install_HelmRelease_Fault_Type = "helm-release-install-error"
Install_Prediagnostics_Fault_Type = "prediagnostics-failure"
Install_Prediagnostics_Job_Execution_Error_Fault_Type = (
"prediagnostics-job-execution-error"
)
Post_Diagnostic_Precheck_Fault_Type = "post-diagnostic-precheck-failure"
Delete_HelmRelease_Fault_Type = "helm-release-delete-error"
Check_PodStatus_Fault_Type = "check-pod-status-error"
Kubernetes_Connectivity_FaultType = "kubernetes-cluster-connection-error"
Expand Down Expand Up @@ -418,7 +423,7 @@

# Connect Precheck Diagnoser constants
Cluster_Diagnostic_Checks_Job_Registry_Path = (
"azurearck8s/helmchart/stable/clusterdiagnosticchecks:1.31.2"
"azurearck8s/helmchart/stable/clusterdiagnosticchecks:1.33.0"
)
Cluster_Diagnostic_Checks_Helm_Install_Failed_Fault_Type = (
"Error while installing cluster diagnostic checks helm release"
Expand Down Expand Up @@ -475,6 +480,10 @@
"Outbound network connectivity check failed for Cluster Connect"
)
DNS_Check_Result_String = "DNS Result:"
Entra_Connectivity_Check_Result_String = (
"Entra Authentication Endpoint Connectivity Check Result"
)
CRD_Ownership_Check_Failed_String = "Check Failed: CRD"
AZ_CLI_ADAL_TO_MSAL_MIGRATE_VERSION = "2.30.0"
CLIENT_PROXY_VERSION = "1.3.033581"
CLIENT_PROXY_FOLDER = ".clientproxy"
Expand Down
270 changes: 260 additions & 10 deletions src/connectedk8s/azext_connectedk8s/_precheckutils.py

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions src/connectedk8s/azext_connectedk8s/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,9 @@ def check_cluster_DNS(
if (
"NXDOMAIN" in formatted_dns_log
or "connection timed out" in formatted_dns_log
or "no servers could be reached" in formatted_dns_log
or "communications error" in formatted_dns_log
or "timed out" in formatted_dns_log
):
logger.warning(
"Error: We found an issue with the DNS resolution on your cluster. For details about debugging DNS "
Expand Down
34 changes: 34 additions & 0 deletions src/connectedk8s/azext_connectedk8s/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,13 +367,20 @@ def create_connectedk8s(
filepath_with_timestamp, storage_space_available, 1
)

if precheckutils.diagnoser_output:
print("\n--- Pre-onboarding Diagnostic Check Results ---")
for line in precheckutils.diagnoser_output:
print(line.rstrip())
print("--- End of Diagnostic Check Results ---\n")

if storage_space_available is False:
logger.warning(
"There is no storage space available on your device and hence not saving cluster "
"diagnostic check logs on your device"
)

except Exception as e:
precheckutils.send_prediagnostic_job_execution_error_telemetry(reason=str(e))
ex_msg = f"An exception occured while trying to execute pre-onboarding diagnostic checks : {e}"
summ_msg = f"An exception occured while trying to execute pre-onboarding diagnostic checks : {e}"
telemetry.set_exception(
Expand Down Expand Up @@ -401,6 +408,23 @@ def create_connectedk8s(
and not azure_local_disconnected
and not lowbandwidth
):
precheck_failure_summary = precheckutils.get_precheck_failure_summary()
precheck_failure_summary_msg = (
f" Details: {precheck_failure_summary}" if precheck_failure_summary else ""
)
if precheckutils.prediagnostic_job_execution_status == "Completed" or (
precheckutils.prediagnostic_job_execution_status == "NotCompleted"
and precheckutils.prediagnostic_dns_check != "NotApplicable"
):
precheckutils.send_prediagnostic_check_failure_telemetry(
precheckutils.prediagnostic_dns_check,
precheckutils.prediagnostic_outbound_check,
)
elif precheckutils.prediagnostic_job_execution_status not in (
"Completed",
"NotCompleted",
):
precheckutils.send_prediagnostic_job_execution_error_telemetry()
if storage_space_available:
logger.warning(
"The pre-check result logs logs have been saved at this path: "
Expand All @@ -418,6 +442,7 @@ def create_connectedk8s(
"meet the prerequisites - "
+ consts.Doc_Onboarding_PreRequisites_Url
+ " and try onboarding again."
+ precheck_failure_summary_msg
)
raise ValidationError(err_msg)

Expand All @@ -430,6 +455,7 @@ def create_connectedk8s(
err_msg = (
"One or more pre-onboarding diagnostic checks failed and hence not proceeding with "
"cluster onboarding. Please resolve them and try onboarding again."
+ precheck_failure_summary_msg
)
raise ValidationError(err_msg)

Expand All @@ -450,6 +476,10 @@ def create_connectedk8s(
fault_type=consts.Linux_Node_Not_Exists,
summary="Couldn't find any node on the kubernetes cluster with the OS 'linux'",
)
precheckutils.send_post_diagnostic_precheck_failure_telemetry(
check_name="LinuxNodeExists",
reason="Couldn't find any node on the kubernetes cluster with the OS 'linux'",
)
logger.warning(
"Please ensure that this Kubernetes cluster has any nodes with OS 'linux', for scheduling the "
"Arc-Agents onto and connecting to Azure. Learn more at %s",
Expand All @@ -468,6 +498,10 @@ def create_connectedk8s(
fault_type=consts.Cannot_Create_ClusterRoleBindings_Fault_Type,
summary=summ_msg,
)
precheckutils.send_post_diagnostic_precheck_failure_telemetry(
check_name="ClusterRoleBindings",
reason=ex_msg,
)
err_msg = (
"Your credentials doesn't have permission to create clusterrolebindings on this "
"kubernetes cluster. Please check your permissions."
Expand Down
Loading
Loading