Skip to content

Commit bc24385

Browse files
CONSOLE-4950: Add high availability section to Configuration
1 parent 9e23d5b commit bc24385

13 files changed

Lines changed: 1167 additions & 6 deletions

File tree

frontend/packages/console-app/locales/en/console-app.json

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,36 @@
352352
"Unpin": "Unpin",
353353
"Remove from navigation?": "Remove from navigation?",
354354
"Remove": "Remove",
355+
"auto-reboot": "auto-reboot",
356+
"machine replacement": "machine replacement",
357+
"template remediation": "template remediation",
358+
"{{prefix}}: {{remediation}}; Drain: {{timeout}} timeout": "{{prefix}}: {{remediation}}; Drain: {{timeout}} timeout",
359+
"{{prefix}}: {{remediation}}": "{{prefix}}: {{remediation}}",
360+
"{{minMinutes}}-{{maxMinutes}} min": "{{minMinutes}}-{{maxMinutes}} min",
361+
"Details": "Details",
362+
"Unable to load high availability details": "Unable to load high availability details",
363+
"Ready": "Ready",
364+
"Unavailable": "Unavailable",
365+
"Remediation": "Remediation",
366+
"Estimated recovery time": "Estimated recovery time",
367+
"Machine/Node health checks": "Machine/Node health checks",
368+
"Unable to load health checks": "Unable to load health checks",
369+
"Scope": "Scope",
370+
"Selector": "Selector",
371+
"Unhealthy conditions": "Unhealthy conditions",
372+
"Last triggered": "Last triggered",
373+
"No matching MachineHealthChecks or NodeHealthChecks": "No matching MachineHealthChecks or NodeHealthChecks",
374+
"High availability": "High availability",
375+
"SNR - Self Node Remediation": "SNR - Self Node Remediation",
376+
"FAR - Fence Agent Remediation": "FAR - Fence Agent Remediation",
377+
"MDR - Metal3-driven Remediation": "MDR - Metal3-driven Remediation",
378+
"Unknown remediation": "Unknown remediation",
379+
"Node remediation agents": "Node remediation agents",
380+
"Unable to load remediation agents": "Unable to load remediation agents",
381+
"Triggered by": "Triggered by",
382+
"Config object": "Config object",
383+
"Last action": "Last action",
384+
"No matching remediation actions": "No matching remediation actions",
355385
"Host addresses": "Host addresses",
356386
"Management": "Management",
357387
"NICs": "NICs",
@@ -363,7 +393,6 @@
363393
"Powering off": "Powering off",
364394
"Powering on": "Powering on",
365395
"Off": "Off",
366-
"Details": "Details",
367396
"Address": "Address",
368397
"Firmware": "Firmware",
369398
"Power state": "Power state",
@@ -501,7 +530,6 @@
501530
"This count is based on your access permissions and might not include all virtual machines.": "This count is based on your access permissions and might not include all virtual machines.",
502531
"{{formattedCores}} cores / {{totalCores}} cores": "{{formattedCores}} cores / {{totalCores}} cores",
503532
"Node": "Node",
504-
"Ready": "Ready",
505533
"Not Ready": "Not Ready",
506534
"Discovered": "Discovered",
507535
"control-plane": "control-plane",
@@ -524,6 +552,16 @@
524552
"Certificate approval required": "Certificate approval required",
525553
"An error occurred. Please try again": "An error occurred. Please try again",
526554
"No new Pods or workloads will be placed on this Node until it's marked as schedulable.": "No new Pods or workloads will be placed on this Node until it's marked as schedulable.",
555+
"NodeHealthCheck": "NodeHealthCheck",
556+
"NodeHealthChecks": "NodeHealthChecks",
557+
"All machines": "All machines",
558+
"MachineSet {{name}}": "MachineSet {{name}}",
559+
"Machine role {{role}}": "Machine role {{role}}",
560+
"Selected machines": "Selected machines",
561+
"Cluster-wide": "Cluster-wide",
562+
"Node role {{role}}": "Node role {{role}}",
563+
"Node roles {{roles}}": "Node roles {{roles}}",
564+
"Selected nodes": "Selected nodes",
527565
"Unable to load VirtualMachines": "Unable to load VirtualMachines",
528566
"Identity providers": "Identity providers",
529567
"Mapping method": "Mapping method",
@@ -568,7 +606,6 @@
568606
"Min available": "Min available",
569607
"Max unavailable": "Max unavailable",
570608
"Allowed disruption": "Allowed disruption",
571-
"Selector": "Selector",
572609
"Label query over pods whose evictions are managed by the disruption budget. Anull selector will match no pods, while an empty ({}) selector will select all pods within the namespace.": "Label query over pods whose evictions are managed by the disruption budget. Anull selector will match no pods, while an empty ({}) selector will select all pods within the namespace.",
573610
"Resource is already covered by another PodDisruptionBudget": "Resource is already covered by another PodDisruptionBudget",
574611
"Availability requirement value": "Availability requirement value",

frontend/packages/console-app/src/components/nodes/configuration/NodeConfiguration.tsx

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import type { FC } from 'react';
22
import type { NodeKind } from '@console/dynamic-plugin-sdk/src';
33
import { CONFIG_PAGE_ID, NodeSubNavPage } from '../NodeSubNavPage';
4+
import HighAvailability from './high-availability/HighAvailability';
45
import Machine from './machine/Machine';
56
import NodeStorage from './node-storage/NodeStorage';
67
import OperatingSystem from './OperatingSystem';
@@ -31,6 +32,13 @@ const standardPages = [
3132
component: Machine,
3233
priority: 40,
3334
},
35+
{
36+
tabId: 'high-availability',
37+
// t('console-app~High availability')
38+
nameKey: 'console-app~High availability',
39+
component: HighAvailability,
40+
priority: 30,
41+
},
3442
];
3543

3644
export const NodeConfiguration: FC<NodeConfigurationProps> = ({ obj }) => (

frontend/packages/console-app/src/components/nodes/configuration/__tests__/NodeConfiguration.spec.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ describe('NodeConfiguration', () => {
200200
page: {
201201
tabId: 'low-priority-tab',
202202
name: 'Low Priority',
203-
priority: 30,
203+
priority: 10,
204204
},
205205
component: jest.fn(() => 'LowPriorityComponent'),
206206
},
@@ -232,6 +232,7 @@ describe('NodeConfiguration', () => {
232232
'Storage',
233233
'Operating system',
234234
'Machine',
235+
'High availability',
235236
'Low Priority',
236237
]);
237238
});
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
import type { FC } from 'react';
2+
import { useMemo } from 'react';
3+
import {
4+
DescriptionList,
5+
DescriptionListDescription,
6+
DescriptionListGroup,
7+
DescriptionListTerm,
8+
Skeleton,
9+
Title,
10+
} from '@patternfly/react-core';
11+
import { useTranslation } from 'react-i18next';
12+
import Status from '@console/dynamic-plugin-sdk/src/app/components/status/Status';
13+
import type { MachineHealthCheckKind } from '@console/internal/module/k8s';
14+
import { DASH } from '@console/shared/src/constants/ui';
15+
import {
16+
computeRemediationTimeBoundsFromRefs,
17+
dedupeRemediationTemplateRefs,
18+
FALLBACK_REMEDIATION_BOUNDS,
19+
getRemediationTemplateRefsFromHealthChecks,
20+
useRemediationResourcesForEstimatedRecovery,
21+
} from '../../utils/estimatedRecoveryRemediation';
22+
import type { NodeHealthCheckKind } from '../../utils/HealthCheckUtils';
23+
import { formatTimeoutForDisplay, getMaxTimeoutFromConditions } from '../../utils/utils';
24+
25+
type DetailsProps = {
26+
matchingMachineHealthChecks: MachineHealthCheckKind[];
27+
matchingNodeHealthChecks: NodeHealthCheckKind[];
28+
isLoading: boolean;
29+
loadError?: unknown;
30+
};
31+
32+
const NODE_HEARTBEAT_DETECTION_SECONDS = 50;
33+
const WORKLOAD_RESTART_SECONDS = 15;
34+
35+
const Details: FC<DetailsProps> = ({
36+
matchingMachineHealthChecks,
37+
matchingNodeHealthChecks,
38+
isLoading,
39+
loadError,
40+
}) => {
41+
const { t } = useTranslation();
42+
const {
43+
snrConfigs,
44+
farTemplates,
45+
loaded: remediationResourcesLoaded,
46+
} = useRemediationResourcesForEstimatedRecovery();
47+
48+
const isLoadingDetailsData = isLoading || !remediationResourcesLoaded;
49+
50+
const isHighAvailability = useMemo(
51+
() =>
52+
getRemediationTemplateRefsFromHealthChecks(
53+
matchingMachineHealthChecks,
54+
matchingNodeHealthChecks,
55+
).length > 0,
56+
[matchingMachineHealthChecks, matchingNodeHealthChecks],
57+
);
58+
59+
const remediationDisplay = useMemo(() => {
60+
const primaryMHC = matchingMachineHealthChecks[0];
61+
const primaryNHC = matchingNodeHealthChecks[0];
62+
const source = primaryMHC
63+
? ({ prefix: 'MHC', check: primaryMHC } as const)
64+
: primaryNHC
65+
? ({ prefix: 'NHC', check: primaryNHC } as const)
66+
: undefined;
67+
if (!source) {
68+
return DASH;
69+
}
70+
71+
const reboot =
72+
source.prefix === 'MHC' &&
73+
source.check.metadata?.annotations?.['machine.openshift.io/remediation-strategy'] ===
74+
'external-baremetal';
75+
const baseRemediation = reboot
76+
? t('console-app~auto-reboot')
77+
: source.prefix === 'MHC'
78+
? t('console-app~machine replacement')
79+
: t('console-app~template remediation');
80+
81+
const unhealthyConditions = source.check.spec?.unhealthyConditions ?? [];
82+
const maxTimeoutSeconds = getMaxTimeoutFromConditions(unhealthyConditions);
83+
84+
if (maxTimeoutSeconds) {
85+
return t('console-app~{{prefix}}: {{remediation}}; Drain: {{timeout}} timeout', {
86+
prefix: source.prefix,
87+
remediation: baseRemediation,
88+
timeout: formatTimeoutForDisplay(maxTimeoutSeconds),
89+
});
90+
}
91+
92+
return t('console-app~{{prefix}}: {{remediation}}', {
93+
prefix: source.prefix,
94+
remediation: baseRemediation,
95+
});
96+
}, [matchingMachineHealthChecks, matchingNodeHealthChecks, t]);
97+
98+
const estimatedRecoveryTimeDisplay = useMemo(() => {
99+
const allConditions = [
100+
...matchingMachineHealthChecks.flatMap((hc) => hc.spec?.unhealthyConditions ?? []),
101+
...matchingNodeHealthChecks.flatMap((hc) => hc.spec?.unhealthyConditions ?? []),
102+
];
103+
const maxTimeoutSeconds = getMaxTimeoutFromConditions(allConditions);
104+
105+
if (maxTimeoutSeconds === undefined) {
106+
return undefined;
107+
}
108+
109+
const orderedRefs = dedupeRemediationTemplateRefs(
110+
getRemediationTemplateRefsFromHealthChecks(
111+
matchingMachineHealthChecks,
112+
matchingNodeHealthChecks,
113+
),
114+
);
115+
const remediationBounds =
116+
computeRemediationTimeBoundsFromRefs(orderedRefs, snrConfigs, farTemplates) ??
117+
FALLBACK_REMEDIATION_BOUNDS;
118+
119+
// Recovery estimate model from HA guidance:
120+
// 50s node heartbeat detection + health-check timeout + remediation time + ~15s workload restart.
121+
const baseSeconds =
122+
NODE_HEARTBEAT_DETECTION_SECONDS + maxTimeoutSeconds + WORKLOAD_RESTART_SECONDS;
123+
const minMinutes = Math.max(1, Math.ceil((baseSeconds + remediationBounds.minSeconds) / 60));
124+
const maxMinutes = Math.max(
125+
minMinutes,
126+
Math.ceil((baseSeconds + remediationBounds.maxSeconds) / 60),
127+
);
128+
return t('console-app~{{minMinutes}}-{{maxMinutes}} min', { minMinutes, maxMinutes });
129+
}, [matchingMachineHealthChecks, matchingNodeHealthChecks, snrConfigs, farTemplates, t]);
130+
131+
return (
132+
<>
133+
<Title headingLevel="h3" className="co-section-heading">
134+
<span>{t('console-app~Details')}</span>
135+
</Title>
136+
{loadError ? (
137+
t('console-app~Unable to load high availability details')
138+
) : (
139+
<DescriptionList
140+
className="pf-v6-u-ml-lg"
141+
columnModifier={{ default: '3Col' }}
142+
isInlineGrid
143+
>
144+
<DescriptionListGroup>
145+
<DescriptionListTerm>{t('console-app~Status')}</DescriptionListTerm>
146+
<DescriptionListDescription>
147+
{isLoadingDetailsData ? (
148+
<Skeleton width="120px" />
149+
) : (
150+
<Status
151+
status={isHighAvailability ? 'Ready' : 'Unknown'}
152+
title={isHighAvailability ? t('console-app~Ready') : t('console-app~Unavailable')}
153+
/>
154+
)}
155+
</DescriptionListDescription>
156+
</DescriptionListGroup>
157+
<DescriptionListGroup>
158+
<DescriptionListTerm>{t('console-app~Remediation')}</DescriptionListTerm>
159+
<DescriptionListDescription>
160+
{isLoadingDetailsData ? <Skeleton width="220px" /> : remediationDisplay}
161+
</DescriptionListDescription>
162+
</DescriptionListGroup>
163+
<DescriptionListGroup>
164+
<DescriptionListTerm>{t('console-app~Estimated recovery time')}</DescriptionListTerm>
165+
<DescriptionListDescription>
166+
{isLoadingDetailsData ? (
167+
<Skeleton width="90px" />
168+
) : (
169+
estimatedRecoveryTimeDisplay ?? DASH
170+
)}
171+
</DescriptionListDescription>
172+
</DescriptionListGroup>
173+
</DescriptionList>
174+
)}
175+
</>
176+
);
177+
};
178+
179+
export default Details;

0 commit comments

Comments
 (0)