Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
523620f
Prevent addition of duplicate PF rules on scale up and no rules left …
Pearl1594 Mar 11, 2024
3579806
fix missing dependency injection
Pearl1594 Mar 28, 2024
f228c7a
NSX: Fix concurrency issues on port forwarding rules deletion (#37)
nvazquez Apr 10, 2024
a899648
CKS: Externalize control and worker node setup wait time and installa…
Pearl1594 Apr 10, 2024
968235a
NSX: Add shared network support (#41)
Pearl1594 Apr 10, 2024
bcc8ff2
NSX: Fix number of physical networks for Guest traffic checks and lef…
nvazquez Apr 12, 2024
f44b8aa
fix logger
Pearl1594 Apr 12, 2024
e79d771
NSX: Handle CheckHealthCommand to avoid host disconnection and errors…
nvazquez May 6, 2024
4e57cc9
Merge branch 'main' of https://github.com/apache/cloudstack into nsx-…
Pearl1594 Jun 5, 2024
0163da4
fix logger
Pearl1594 Jun 5, 2024
0903ba6
Merge branch 'main' of https://github.com/apache/cloudstack into nsx-…
Pearl1594 Jul 22, 2024
2726987
Update UDP active monitor to ICMP
Pearl1594 May 20, 2024
c393973
Fix NPE on restarting VPC with additional public IPs
Pearl1594 Jun 17, 2024
587f6fe
NSX / VPC: Reuse Source NAT IP from systemVM range on restarts
Pearl1594 Jun 21, 2024
459c23b
CKS: Public IP not found for VPC networks
Pearl1594 Jul 5, 2024
89aa0a5
Externalize retries and inverval for NSX segment deletion (#67)
nvazquez May 28, 2024
9733984
remove unused import
Pearl1594 Jul 22, 2024
9c1df1d
remove duplicate imports
Pearl1594 Jul 23, 2024
7e0007b
Merge branch 'main' of https://github.com/apache/cloudstack into nsx-…
Pearl1594 Aug 14, 2024
d2c386c
remove unused import
Pearl1594 Aug 14, 2024
0a31b13
revert externalizing cks settings
Pearl1594 Aug 14, 2024
70f70b2
fix test
Pearl1594 Aug 14, 2024
665b698
Refactor log messages
nvazquez Aug 26, 2024
738f934
Address comments
nvazquez Aug 26, 2024
66ee5da
Merge branch 'main' into nsx-integration-fixes
nvazquez Sep 5, 2024
09159af
Fix issue caused due to forward merge: 90fe1d
nvazquez Sep 5, 2024
d1a6038
Merge branch 'main' into nsx-integration-fixes
yadvr Sep 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,8 @@ public long getResourceId() {
public boolean isDisplay() {
return display;
}

public void setValue(String value) {
this.value = value;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1984,7 +1984,11 @@ public ConfigKey<?>[] getConfigKeys() {
KubernetesClusterUpgradeTimeout,
KubernetesClusterUpgradeRetries,
KubernetesClusterExperimentalFeaturesEnabled,
KubernetesMaxClusterSize
KubernetesMaxClusterSize,
KubernetesControlNodeInstallAttemptWait,
KubernetesControlNodeInstallReattempts,
KubernetesWorkerNodeInstallAttemptWait,
KubernetesWorkerNodeInstallReattempts
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,30 @@ public interface KubernetesClusterService extends PluggableService, Configurable
true,
ConfigKey.Scope.Account,
KubernetesServiceEnabled.key());
static final ConfigKey<Long> KubernetesControlNodeInstallAttemptWait = new ConfigKey<Long>("Advanced", Long.class,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these settings shouldn't be part of this PR but for #9102

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this is just externalizing the already present in the CKS scripts. So it wouldn't cause any issues. Unless you still want to me to revert

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO we should fix only NSX related issues on this PR and we could add the externalize settings on the other PR

"cloud.kubernetes.control.node.install.attempt.wait.duration",
"15",
"Time in seconds for the installation process to wait before it re-attempts",
true,
KubernetesServiceEnabled.key());
static final ConfigKey<Long> KubernetesControlNodeInstallReattempts = new ConfigKey<Long>("Advanced", Long.class,
"cloud.kubernetes.control.node.install.reattempt.count",
"100",
"Number of times the offline installation of K8S will be re-attempted",
true,
KubernetesServiceEnabled.key());
final ConfigKey<Long> KubernetesWorkerNodeInstallAttemptWait = new ConfigKey<Long>("Advanced", Long.class,
"cloud.kubernetes.worker.node.install.attempt.wait.duration",
"30",
"Time in seconds for the installation process to wait before it re-attempts",
true,
KubernetesServiceEnabled.key());
static final ConfigKey<Long> KubernetesWorkerNodeInstallReattempts = new ConfigKey<Long>("Advanced", Long.class,
"cloud.kubernetes.worker.node.install.reattempt.count",
"40",
"Number of times the offline installation of K8S will be re-attempted",
true,
KubernetesServiceEnabled.key());

KubernetesCluster findById(final Long id);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@

import javax.inject.Inject;

import com.cloud.kubernetes.cluster.KubernetesClusterService;
import com.cloud.network.rules.FirewallManager;
import com.cloud.offering.NetworkOffering;
import com.cloud.offerings.dao.NetworkOfferingDao;
import org.apache.cloudstack.api.ApiConstants;
Expand Down Expand Up @@ -136,6 +138,8 @@ public class KubernetesClusterResourceModifierActionWorker extends KubernetesClu
@Inject
protected RulesService rulesService;
@Inject
protected FirewallManager firewallManager;
@Inject
protected PortForwardingRulesDao portForwardingRulesDao;
@Inject
protected ResourceManager resourceManager;
Expand Down Expand Up @@ -171,6 +175,11 @@ private String getKubernetesNodeConfig(final String joinIp, final boolean ejectI
final String joinIpKey = "{{ k8s_control_node.join_ip }}";
final String clusterTokenKey = "{{ k8s_control_node.cluster.token }}";
final String ejectIsoKey = "{{ k8s.eject.iso }}";
final String installWaitTime = "{{ k8s.install.wait.time }}";
final String installReattemptsCount = "{{ k8s.install.reattempts.count }}";

final Long waitTime = KubernetesClusterService.KubernetesWorkerNodeInstallAttemptWait.value();
final Long reattempts = KubernetesClusterService.KubernetesWorkerNodeInstallReattempts.value();
String pubKey = "- \"" + configurationDao.getValue("ssh.publickey") + "\"";
String sshKeyPair = kubernetesCluster.getKeyPair();
if (StringUtils.isNotEmpty(sshKeyPair)) {
Expand All @@ -183,7 +192,8 @@ private String getKubernetesNodeConfig(final String joinIp, final boolean ejectI
k8sNodeConfig = k8sNodeConfig.replace(joinIpKey, joinIp);
k8sNodeConfig = k8sNodeConfig.replace(clusterTokenKey, KubernetesClusterUtil.generateClusterToken(kubernetesCluster));
k8sNodeConfig = k8sNodeConfig.replace(ejectIsoKey, String.valueOf(ejectIso));

k8sNodeConfig = k8sNodeConfig.replace(installWaitTime, String.valueOf(waitTime));
k8sNodeConfig = k8sNodeConfig.replace(installReattemptsCount, String.valueOf(reattempts));
k8sNodeConfig = updateKubeConfigWithRegistryDetails(k8sNodeConfig);

return k8sNodeConfig;
Expand Down Expand Up @@ -551,9 +561,12 @@ protected void removePortForwardingRules(final IpAddress publicIp, final Network
for (PortForwardingRuleVO pfRule : pfRules) {
if (startPort <= pfRule.getSourcePortStart() && pfRule.getSourcePortStart() <= endPort) {
portForwardingRulesDao.remove(pfRule.getId());
logger.trace("Marking PF rule " + pfRule + " with Revoke state");
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this log message is a bit of a regression, does it really need to change?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, refactored

pfRule.setState(FirewallRule.State.Revoke);

}
}
rulesService.applyPortForwardingRules(publicIp.getId(), account);
firewallManager.applyRules(pfRules, false, true);
}

protected void removeLoadBalancingRule(final IpAddress publicIp, final Network network,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,9 @@ private String getKubernetesControlNodeConfig(final String controlNodeIp, final
final String clusterToken = "{{ k8s_control_node.cluster.token }}";
final String clusterInitArgsKey = "{{ k8s_control_node.cluster.initargs }}";
final String ejectIsoKey = "{{ k8s.eject.iso }}";
final String installWaitTime = "{{ k8s.install.wait.time }}";
final String installReattemptsCount = "{{ k8s.install.reattempts.count }}";

final List<String> addresses = new ArrayList<>();
addresses.add(controlNodeIp);
if (!serverIp.equals(controlNodeIp)) {
Expand All @@ -150,6 +153,8 @@ private String getKubernetesControlNodeConfig(final String controlNodeIp, final
final String tlsClientCert = CertUtils.x509CertificateToPem(certificate.getClientCertificate());
final String tlsPrivateKey = CertUtils.privateKeyToPem(certificate.getPrivateKey());
final String tlsCaCert = CertUtils.x509CertificatesToPem(certificate.getCaCertificates());
final Long waitTime = KubernetesClusterService.KubernetesControlNodeInstallAttemptWait.value();
final Long reattempts = KubernetesClusterService.KubernetesControlNodeInstallReattempts.value();
k8sControlNodeConfig = k8sControlNodeConfig.replace(apiServerCert, tlsClientCert.replace("\n", "\n "));
k8sControlNodeConfig = k8sControlNodeConfig.replace(apiServerKey, tlsPrivateKey.replace("\n", "\n "));
k8sControlNodeConfig = k8sControlNodeConfig.replace(caCert, tlsCaCert.replace("\n", "\n "));
Expand All @@ -161,6 +166,8 @@ private String getKubernetesControlNodeConfig(final String controlNodeIp, final
pubKey += "\n - \"" + sshkp.getPublicKey() + "\"";
}
}
k8sControlNodeConfig = k8sControlNodeConfig.replace(installWaitTime, String.valueOf(waitTime));
k8sControlNodeConfig = k8sControlNodeConfig.replace(installReattemptsCount, String.valueOf(reattempts));
k8sControlNodeConfig = k8sControlNodeConfig.replace(sshPubKey, pubKey);
k8sControlNodeConfig = k8sControlNodeConfig.replace(clusterToken, KubernetesClusterUtil.generateClusterToken(kubernetesCluster));
String initArgs = "";
Expand Down Expand Up @@ -241,6 +248,11 @@ private String getKubernetesAdditionalControlNodeConfig(final String joinIp, fin
final String sshPubKey = "{{ k8s.ssh.pub.key }}";
final String clusterHACertificateKey = "{{ k8s_control_node.cluster.ha.certificate.key }}";
final String ejectIsoKey = "{{ k8s.eject.iso }}";
final String installWaitTime = "{{ k8s.install.wait.time }}";
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same for all these CKS logic using the settings values

final String installReattemptsCount = "{{ k8s.install.reattempts.count }}";

final Long waitTime = KubernetesClusterService.KubernetesControlNodeInstallAttemptWait.value();
final Long reattempts = KubernetesClusterService.KubernetesControlNodeInstallReattempts.value();
String pubKey = "- \"" + configurationDao.getValue("ssh.publickey") + "\"";
String sshKeyPair = kubernetesCluster.getKeyPair();
if (StringUtils.isNotEmpty(sshKeyPair)) {
Expand All @@ -249,6 +261,8 @@ private String getKubernetesAdditionalControlNodeConfig(final String joinIp, fin
pubKey += "\n - \"" + sshkp.getPublicKey() + "\"";
}
}
k8sControlNodeConfig = k8sControlNodeConfig.replace(installWaitTime, String.valueOf(waitTime));
k8sControlNodeConfig = k8sControlNodeConfig.replace(installReattemptsCount, String.valueOf(reattempts));
k8sControlNodeConfig = k8sControlNodeConfig.replace(sshPubKey, pubKey);
k8sControlNodeConfig = k8sControlNodeConfig.replace(joinIpKey, joinIp);
k8sControlNodeConfig = k8sControlNodeConfig.replace(clusterTokenKey, KubernetesClusterUtil.generateClusterToken(kubernetesCluster));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,14 @@ write_files:
ATTEMPT_ONLINE_INSTALL=false
setup_complete=false

OFFLINE_INSTALL_ATTEMPT_SLEEP=15
MAX_OFFLINE_INSTALL_ATTEMPTS=100
OFFLINE_INSTALL_ATTEMPT_SLEEP={{ k8s.install.wait.time }}
MAX_OFFLINE_INSTALL_ATTEMPTS={{ k8s.install.reattempts.count }}
if [[ -z $OFFLINE_INSTALL_ATTEMPT_SLEEP || $OFFLINE_INSTALL_ATTEMPT_SLEEP -eq 0 ]]; then
OFFLINE_INSTALL_ATTEMPT_SLEEP=15
fi
if [[ -z $MAX_OFFLINE_INSTALL_ATTEMPTS || $MAX_OFFLINE_INSTALL_ATTEMPTS -eq 0 ]]; then
MAX_OFFLINE_INSTALL_ATTEMPTS=100
fi
offline_attempts=1
MAX_SETUP_CRUCIAL_CMD_ATTEMPTS=3
EJECT_ISO_FROM_OS={{ k8s.eject.iso }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,14 @@ write_files:
ATTEMPT_ONLINE_INSTALL=false
setup_complete=false

OFFLINE_INSTALL_ATTEMPT_SLEEP=15
MAX_OFFLINE_INSTALL_ATTEMPTS=100
OFFLINE_INSTALL_ATTEMPT_SLEEP={{ k8s.install.wait.time }}
MAX_OFFLINE_INSTALL_ATTEMPTS={{ k8s.install.reattempts.count }}
if [[ -z $OFFLINE_INSTALL_ATTEMPT_SLEEP || $OFFLINE_INSTALL_ATTEMPT_SLEEP -eq 0 ]]; then
OFFLINE_INSTALL_ATTEMPT_SLEEP=15
fi
if [[ -z $MAX_OFFLINE_INSTALL_ATTEMPTS || $MAX_OFFLINE_INSTALL_ATTEMPTS -eq 0 ]]; then
MAX_OFFLINE_INSTALL_ATTEMPTS=100
fi
offline_attempts=1
MAX_SETUP_CRUCIAL_CMD_ATTEMPTS=3
EJECT_ISO_FROM_OS={{ k8s.eject.iso }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,14 @@ write_files:
ATTEMPT_ONLINE_INSTALL=false
setup_complete=false

OFFLINE_INSTALL_ATTEMPT_SLEEP=30
MAX_OFFLINE_INSTALL_ATTEMPTS=40
OFFLINE_INSTALL_ATTEMPT_SLEEP={{ k8s.install.wait.time }}
MAX_OFFLINE_INSTALL_ATTEMPTS={{ k8s.install.reattempts.count }}
if [[ -z $OFFLINE_INSTALL_ATTEMPT_SLEEP || $OFFLINE_INSTALL_ATTEMPT_SLEEP -eq 0 ]]; then
OFFLINE_INSTALL_ATTEMPT_SLEEP=30
fi
if [[ -z $MAX_OFFLINE_INSTALL_ATTEMPTS || $MAX_OFFLINE_INSTALL_ATTEMPTS -eq 0 ]]; then
MAX_OFFLINE_INSTALL_ATTEMPTS=40
fi
offline_attempts=1
MAX_SETUP_CRUCIAL_CMD_ATTEMPTS=3
EJECT_ISO_FROM_OS={{ k8s.eject.iso }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
import com.cloud.agent.api.Command;

public class NsxAnswer extends Answer {

private boolean objectExists;

public NsxAnswer(final Command command, final boolean success, final String details) {
super(command, success, details);
}
Expand All @@ -28,4 +31,11 @@ public NsxAnswer(final Command command, final Exception e) {
super(command, e);
}

public boolean isObjectExistent() {
return objectExists;
}

public void setObjectExists(boolean objectExisted) {
this.objectExists = objectExisted;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

import com.cloud.agent.IAgentControl;
import com.cloud.agent.api.Answer;
import com.cloud.agent.api.CheckHealthAnswer;
import com.cloud.agent.api.CheckHealthCommand;
import com.cloud.agent.api.Command;
import com.cloud.agent.api.PingCommand;
import com.cloud.agent.api.ReadyAnswer;
Expand Down Expand Up @@ -102,6 +104,8 @@ public PingCommand getCurrentStatus(long id) {
public Answer executeRequest(Command cmd) {
if (cmd instanceof ReadyCommand) {
return executeRequest((ReadyCommand) cmd);
} else if (cmd instanceof CheckHealthCommand) {
return executeRequest((CheckHealthCommand) cmd);
} else if (cmd instanceof DeleteNsxTier1GatewayCommand) {
return executeRequest((DeleteNsxTier1GatewayCommand) cmd);
} else if (cmd instanceof DeleteNsxSegmentCommand) {
Expand Down Expand Up @@ -293,6 +297,10 @@ private Answer executeRequest(ReadyCommand cmd) {
return new ReadyAnswer(cmd);
}

private Answer executeRequest(CheckHealthCommand cmd) {
return new CheckHealthAnswer(cmd, nsxApiClient.isNsxControllerActive());
}

private Answer executeRequest(CreateNsxTier1GatewayCommand cmd) {
String tier1GatewayName = NsxControllerUtils.getTier1GatewayName(cmd.getDomainId(), cmd.getAccountId(), cmd.getZoneId(), cmd.getNetworkResourceId(), cmd.isResourceVpc());
boolean sourceNatEnabled = cmd.isSourceNatEnabled();
Expand Down Expand Up @@ -385,16 +393,21 @@ private NsxAnswer executeRequest(CreateNsxPortForwardRuleCommand cmd) {
cmd.getNetworkResourceId(), cmd.isResourceVpc());
try {
String privatePort = cmd.getPrivatePort();
String service = privatePort.contains("-") ? nsxApiClient.getServicePath(ruleName, privatePort, cmd.getProtocol(), null, null) :
nsxApiClient.getNsxInfraServices(ruleName, privatePort, cmd.getProtocol(), null, null);
logger.debug(String.format("Checking if rule %s exists on Tier 1 Gateway: %s", ruleName, tier1GatewayName));
Comment thread
nvazquez marked this conversation as resolved.
Outdated
if (nsxApiClient.doesPfRuleExist(ruleName, tier1GatewayName)) {
logger.debug(String.format("Port forward rule for port: %s exits on NSX, not adding it again", privatePort));
return new NsxAnswer(cmd, true, null);
String msg = String.format("Port forward rule for port: %s (%s) exits on NSX, not adding it again", ruleName, privatePort);
logger.debug(msg);
NsxAnswer answer = new NsxAnswer(cmd, true, msg);
answer.setObjectExists(true);
return answer;
}
String service = privatePort.contains("-") ? nsxApiClient.getServicePath(ruleName, privatePort, cmd.getProtocol(), null, null) :
nsxApiClient.getNsxInfraServices(ruleName, privatePort, cmd.getProtocol(), null, null);
nsxApiClient.createPortForwardingRule(ruleName, tier1GatewayName, cmd.getNetworkResourceName(), cmd.getPublicIp(),
cmd.getVmIp(), cmd.getPublicPort(), service);
} catch (Exception e) {
logger.error(String.format("Failed to add NSX port forward rule %s for network: %s", ruleName, cmd.getNetworkResourceName()));
String msg = String.format("Failed to add NSX port forward rule %s for network: %s", ruleName, cmd.getNetworkResourceName());
logger.error(msg, e);
return new NsxAnswer(cmd, new CloudRuntimeException(e.getMessage()));
}
return new NsxAnswer(cmd, true, null);
Expand All @@ -415,8 +428,9 @@ private NsxAnswer executeRequest(DeleteNsxNatRuleCommand cmd) {
nsxApiClient.deleteNatRule(cmd.getService(), cmd.getPrivatePort(), cmd.getProtocol(),
cmd.getNetworkResourceName(), tier1GatewayName, ruleName);
} catch (Exception e) {
logger.error(String.format("Failed to add NSX static NAT rule %s for network: %s", ruleName, cmd.getNetworkResourceName()));
return new NsxAnswer(cmd, new CloudRuntimeException(e.getMessage()));
String msg = String.format("Failed to delete NSX rule %s for network %s: due to %s", ruleName, cmd.getNetworkResourceName(), e.getMessage());
logger.error(msg, e);
return new NsxAnswer(cmd, new CloudRuntimeException(msg));
}
return new NsxAnswer(cmd, true, null);
}
Expand Down
Loading