Skip to content

Commit c258e68

Browse files
authored
Merge branch 'main' into update_ingest_nas
2 parents 9adef68 + 4d74885 commit c258e68

5 files changed

Lines changed: 118 additions & 23 deletions

File tree

Management-Utilities/fsx-ontap-aws-cli-scripts/create_fsxn_filesystem

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -221,14 +221,13 @@ if [ $size -lt $minSize ]; then
221221
usage
222222
fi
223223

224-
echo aws fsx create-file-system --output=json --file-system-type ONTAP --storage-capacity $size --subnet-ids $subnetID1 $subnetID2 --storage-type SSD --tags "Key=Name,Value=$fileSystemName" $securityGroupOption --ontap-configuration '{
224+
aws fsx create-file-system --output=json --file-system-type ONTAP --storage-capacity $size --subnet-ids $subnetID1 $subnetID2 --storage-type SSD --tags "Key=Name,Value=$fileSystemName" $securityGroupOption --ontap-configuration '{
225225
"PreferredSubnetId": "'$subnetID1'",
226226
'$endpointips'
227227
"DeploymentType": "'$azType'",
228228
"HAPairs": '$numPairs',
229-
"ThroughputCapacityPerHAPair": '$throughput'}' --region=$region
230-
# "ThroughputCapacityPerHAPair": '$throughput'}' --region=$region > $tmpout 2>&1
231-
exit
229+
"ThroughputCapacityPerHAPair": '$throughput'}' --region=$region > $tmpout 2>&1
230+
232231
if [ $? != "0" ]; then
233232
echo "Failed to create FSxN file system." 1>&2
234233
cat $tmpout 1>&2

Management-Utilities/fsx-ontap-aws-cli-scripts/list_fsxn_svms

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
# o Region
1919
# o File system ID
2020
# o File System Name - optional
21+
# o The managment and NAS IP address of the SVM - optional
2122
# o SVM ID
2223
# o SVM Name
2324
################################################################################
@@ -27,9 +28,10 @@
2728
################################################################################
2829
usage () {
2930
cat 1>&2 <<EOF
30-
Usage $(basename $0) [-r region] [-a] [-n] [-i fileSystemID] [-f fileSystemName]
31+
Usage $(basename $0) [-r region] [-a] [-n] [-p] [-i fileSystemID] [-f fileSystemName]
3132
Where: -a means all regions
3233
-n means to include file systems name
34+
-p means to include the management and NAS IP address
3335
-i means to only include SVMs that reside under the FSxN file system with the fileSystemID.
3436
-f means to only include SVMs that reside under the FSxN file system with the file system name.
3537
EOF
@@ -55,15 +57,18 @@ fi
5557
# Process command line arguments.
5658
allRegions=false
5759
includeFsName=false
60+
includeIp=false
5861
region=$(aws configure list | egrep '^.*egion ' | awk '{print $2}')
59-
while getopts "hanr:i:f:" option; do
62+
while getopts "hanr:i:f:p" option; do
6063
case "$option" in
6164
r) region="$OPTARG"
6265
;;
6366
a) allRegions=true
6467
;;
6568
n) includeFsName=true
6669
;;
70+
p) includeIp=true
71+
;;
6772
i) fileSystemID="$OPTARG"
6873
;;
6974
f) fileSystemName="$OPTARG"
@@ -99,27 +104,41 @@ if [ "$allRegions" = "true" ]; then
99104
else
100105
regions=($region)
101106
fi
102-
103-
if [ ! -z "$fileSystemName" ]; then
104-
fileSystemID=$(aws fsx describe-file-systems --output=json --region=$region 2> /dev/null | jq -r '.FileSystems[] | if((.Tags[] | select(.Key == "Name") .Value) == "'"${fileSystemName}"'") then .FileSystemId else empty end' 2> /dev/null)
105-
if [ -z "$fileSystemID" ]; then
106-
echo "Error, failed to find a file system with the name '$fileSystemName'. Maybe in a different region?" 1>&2
107-
exit 1
108-
fi
109-
fi
110107
#
111108
# Loop on all the regions.
112109
for region in ${regions[*]}; do
110+
if [ ! -z "$fileSystemName" ]; then
111+
fileSystemID=$(aws fsx describe-file-systems --output=json --region=$region 2> /dev/null | jq -r '.FileSystems[] | if((.Tags[] | select(.Key == "Name") .Value) == "'"${fileSystemName}"'") then .FileSystemId else empty end' 2> /dev/null)
112+
if [ -z "$fileSystemID" ]; then
113+
if [ "$allRegions" != "true" ]; then
114+
echo "Error, failed to find a file system with the name '$fileSystemName'. Maybe in a different region?" 1>&2
115+
exit 1
116+
else
117+
#
118+
# If there isn't a file system with that name in this region, then just skip region.
119+
continue
120+
fi
121+
fi
122+
fi
123+
113124
if [ -z "$fileSystemID" ]; then
114-
aws fsx describe-storage-virtual-machines --region=$region | jq -r '.StorageVirtualMachines[] | .FileSystemId + "," + .StorageVirtualMachineId + "," + .Name' | sort > $tmpout
125+
filter=""
126+
else
127+
filter="--filter Name=file-system-id,Values=$fileSystemID"
128+
fi
129+
aws fsx describe-storage-virtual-machines --region=$region $filter | jq -r '.StorageVirtualMachines[] | "\(.FileSystemId),\(.StorageVirtualMachineId),\(.Endpoints.Nfs.IpAddresses[0]),\(.Name)"' | sort > $tmpout
130+
if [ $includeIp == "true" ]; then
131+
ipFmt="%16s"
132+
ipHeader="IP"
115133
else
116-
aws fsx describe-storage-virtual-machines --region=$region | jq -r '.StorageVirtualMachines[] | if(.FileSystemId == "'$fileSystemID'") then .FileSystemId + "," + .StorageVirtualMachineId + "," + .Name else empty end' | sort > $tmpout
134+
ipFmt="%0s"
135+
ipHeader=""
117136
fi
118137

119138
if [ $includeFsName == "true" ]; then
120139
aws fsx describe-file-systems --region=$region | jq -r '.FileSystems[] | .FileSystemId + "," + (.Tags[] | select(.Key == "Name") .Value)' > $tmpout2
121-
awk -F, -v region=$region 'BEGIN {first=1; maxNameLen=0; while(getline < "'$tmpout2'") {fss[$1]=$2; if(length($2) > maxNameLen) {maxNameLen=length($2)}}; maxNameLen +=2; formatStr="%12s %20s%-"maxNameLen"s %23s %s\n"}; {if(first) {printf "\n"; printf formatStr, "Region", "FileSystem ID", "(Name)", "SVM ID", "SVM Name"; first=0}; name="("fss[$1]")"; printf formatStr, region, $1, name, $2, $3}' < $tmpout
140+
awk -F, -v region=$region -v ipFmt=$ipFmt -v ipHeader=$ipHeader 'BEGIN {first=1; maxNameLen=0; while(getline < "'$tmpout2'") {fss[$1]=$2; if(length($2) > maxNameLen) {maxNameLen=length($2)}}; maxNameLen +=2; formatStr="%12s %20s%-"maxNameLen"s %23s "ipFmt" %s\n"}; {if(first) {printf "\n"; printf formatStr, "Region", "FileSystem ID", "(Name)", "SVM ID", ipHeader, "SVM Name"; first=0}; if(ipHeader != "IP") {ip=""} else {ip=$3}; name="("fss[$1]")"; printf formatStr, region, $1, name, $2, ip, $4}' < $tmpout
122141
else
123-
awk -F, -v region=$region 'BEGIN {first=1; formatStr="%12s %23s %23s %s\n"}; {if(first) {printf "\n"; printf formatStr, "Region", "FileSystem ID", "SVM ID", "SVM Name"; first=0}; printf formatStr, region, $1, $2, $3}' < $tmpout
142+
awk -F, -v region=$region -v ipFmt=$ipFmt -v ipHeader=$ipHeader 'BEGIN {first=1; formatStr="%12s %23s %23s "ipFmt" %s\n"}; {if(first) {printf "\n"; printf formatStr, "Region", "FileSystem ID", "SVM ID", ipHeader, "SVM Name"; first=0}; if(ipHeader != "IP") {ip=""} else {ip=$3}; printf formatStr, region, $1, $2, ip, $4}' < $tmpout
124143
fi
125144
done

Monitoring/monitor-ontap-services/README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,15 @@ Here is an itemized list of the services that this program can monitor:
1616
- If the file system is running off its partner node (i.e. is running in failover mode).
1717
- If any of the network interfaces are down.
1818
- Any EMS message. Filtering is provided to allow you to only be alerted on the EMS messages you care about.
19-
- If a SnapMirror relationship hasn't been updated in a specified amount of time.
19+
- If any of the vservers are down.
20+
- If any of the protocol (NFS & CIFS) servers within a vserver are down.
21+
- If a SnapMirror relationship hasn't been updated within either a specified amount of time or as a percentage of time since its last scheduled update.
2022
- If a SnapMirror update has stalled.
2123
- If a SnapMirror relationship is in a "non-healthy" state.
2224
- If the aggregate is over a certain percentage full. You can set two thresholds (Warning and Critical).
2325
- If a volume is over a certain percentage full. You can set two thresholds (Warning and Critical).
2426
- If a volume is using more than a specified percentage of its inodes. You can set two thresholds (Warning and Critical).
27+
- If a volume if offline.
2528
- If any quotas are over a certain percentage full. You can be alerted on both soft and hard limits.
2629

2730
## Architecture

Monitoring/monitor-ontap-services/cloudformation.yaml

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,13 @@ Metadata:
4343
- volumeUtilizationCriticalAlert
4444
- volumeFileUtilizationWarnAlert
4545
- volumeFileUtilizationCriticalAlert
46+
- volumeOfflineAlert
4647
- softQuotaUtilizationAlert
4748
- hardQuotaUtilizationAlert
4849
- inodesQuotaUtilizationAlert
50+
- vserverStateAlert
51+
- vserverNFSProtocolStateAlert
52+
- vserverCIFSProtocolStateAlert
4953

5054
Parameters:
5155
OntapAdminSever:
@@ -220,6 +224,12 @@ Parameters:
220224
Type: Number
221225
Default: 95
222226

227+
volumeOfflineAlert:
228+
Description: "Alert when a volume goes offline."
229+
Type: String
230+
AllowedValues: ["true", "false"]
231+
Default: "true"
232+
223233
softQuotaUtilizationAlert:
224234
Description: "Alert when a soft quota exceeds this threshold in percentage."
225235
Type: Number
@@ -235,6 +245,24 @@ Parameters:
235245
Type: Number
236246
Default: 80
237247

248+
vserverStateAlert:
249+
Description: "Alert when a vserver goes offline."
250+
Type: String
251+
AllowedValues: ["true", "false"]
252+
Default: "true"
253+
254+
vserverNFSProtocolStateAlert:
255+
Description: "Alert when a vserver's NFS protocol goes offline."
256+
Type: String
257+
AllowedValues: ["true", "false"]
258+
Default: "true"
259+
260+
vserverCIFSProtocolStateAlert:
261+
Description: "Alert when a vserver's CIFS protocol goes offline."
262+
Type: String
263+
AllowedValues: ["true", "false"]
264+
Default: "true"
265+
238266
Conditions:
239267
CreateSecretsManagerEndpoint: !Equals [!Ref createSecretsManagerEndpoint, "true"]
240268
CreateSNSEndpoint: !Equals [!Ref createSNSEndpoint, "true"]
@@ -464,9 +492,13 @@ Resources:
464492
initialVolumeUtilizationCriticalAlert: !Ref volumeUtilizationCriticalAlert
465493
initialVolumeFileUtilizationWarnAlert: !Ref volumeFileUtilizationWarnAlert
466494
initialVolumeFileUtilizationCriticalAlert: !Ref volumeFileUtilizationCriticalAlert
495+
initialVolumeOfflineAlert: !Ref volumeOfflineAlert
467496
initialSoftQuotaUtilizationAlert: !Ref softQuotaUtilizationAlert
468497
initialHardQuotaUtilizationAlert: !Ref hardQuotaUtilizationAlert
469498
initialInodesQuotaUtilizationAlert: !Ref inodesQuotaUtilizationAlert
499+
initialVserverStateAlert: !Ref vserverStateAlert
500+
initialVserverNFSProtocolStateAlert: !Ref vserverNFSProtocolStateAlert
501+
initialVserverCIFSProtocolStateAlert: !Ref vserverCIFSProtocolStateAlert
470502
Code:
471503
ZipFile: |
472504
#!/bin/python3
@@ -489,8 +521,8 @@ Resources:
489521
# "matching conditions." It is intended to be run as a Lambda function, but
490522
# can be run as a standalone program.
491523
#
492-
# Version: v2.13
493-
# Date: 2025-04-28-16:26:21
524+
# Version: v2.14
525+
# Date: 2025-04-29-12:53:45
494526
################################################################################
495527
496528
import json
@@ -1733,7 +1765,8 @@ Resources:
17331765
{"name": "ems", "rules": []},
17341766
{"name": "snapmirror", "rules": []},
17351767
{"name": "storage", "rules": []},
1736-
{"name": "quota", "rules": []}
1768+
{"name": "quota", "rules": []},
1769+
{"name": "vserver", "rules": []}
17371770
]}
17381771
#
17391772
# Now, add rules based on the environment variables.
@@ -1797,6 +1830,11 @@ Resources:
17971830
value = int(value)
17981831
if value > 0:
17991832
conditions["services"][getServiceIndex("storage", conditions)]["rules"].append({"volumeCriticalFilesPercentUsed": value})
1833+
elif name == "initialVolumeOfflineAlert":
1834+
if value == "true":
1835+
conditions["services"][getServiceIndex("storage", conditions)]["rules"].append({"offline": True})
1836+
else:
1837+
conditions["services"][getServiceIndex("storage", conditions)]["rules"].append({"offline": False})
18001838
elif name == "initialSoftQuotaUtilizationAlert":
18011839
value = int(value)
18021840
if value > 0:
@@ -1809,6 +1847,21 @@ Resources:
18091847
value = int(value)
18101848
if value > 0:
18111849
conditions["services"][getServiceIndex("quota", conditions)]["rules"].append({"maxQuotaInodesPercentUsed": value})
1850+
elif name == "initialVserverStateAlert":
1851+
if value == "true":
1852+
conditions["services"][getServiceIndex("vserver", conditions)]["rules"].append({"vserverState": True})
1853+
else:
1854+
conditions["services"][getServiceIndex("vserver", conditions)]["rules"].append({"vserverState": False})
1855+
elif name == "initialVserverNFSProtocolStateAlert":
1856+
if value == "true":
1857+
conditions["services"][getServiceIndex("vserver", conditions)]["rules"].append({"nfsProtocolState": True})
1858+
else:
1859+
conditions["services"][getServiceIndex("vserver", conditions)]["rules"].append({"nfsProtocolState": False})
1860+
elif name == "initialVserverCIFSProtocolStateAlert":
1861+
if value == "true":
1862+
conditions["services"][getServiceIndex("vserver", conditions)]["rules"].append({"cifsProtocolState": True})
1863+
else:
1864+
conditions["services"][getServiceIndex("vserver", conditions)]["rules"].append({"cifsProtocolState": False})
18121865
18131866
return conditions
18141867

Monitoring/monitor-ontap-services/monitor_ontap_services.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1262,7 +1262,8 @@ def buildDefaultMatchingConditions():
12621262
{"name": "ems", "rules": []},
12631263
{"name": "snapmirror", "rules": []},
12641264
{"name": "storage", "rules": []},
1265-
{"name": "quota", "rules": []}
1265+
{"name": "quota", "rules": []},
1266+
{"name": "vserver", "rules": []}
12661267
]}
12671268
#
12681269
# Now, add rules based on the environment variables.
@@ -1326,6 +1327,11 @@ def buildDefaultMatchingConditions():
13261327
value = int(value)
13271328
if value > 0:
13281329
conditions["services"][getServiceIndex("storage", conditions)]["rules"].append({"volumeCriticalFilesPercentUsed": value})
1330+
elif name == "initialVolumeOfflineAlert":
1331+
if value == "true":
1332+
conditions["services"][getServiceIndex("storage", conditions)]["rules"].append({"offline": True})
1333+
else:
1334+
conditions["services"][getServiceIndex("storage", conditions)]["rules"].append({"offline": False})
13291335
elif name == "initialSoftQuotaUtilizationAlert":
13301336
value = int(value)
13311337
if value > 0:
@@ -1338,6 +1344,21 @@ def buildDefaultMatchingConditions():
13381344
value = int(value)
13391345
if value > 0:
13401346
conditions["services"][getServiceIndex("quota", conditions)]["rules"].append({"maxQuotaInodesPercentUsed": value})
1347+
elif name == "initialVserverStateAlert":
1348+
if value == "true":
1349+
conditions["services"][getServiceIndex("vserver", conditions)]["rules"].append({"vserverState": True})
1350+
else:
1351+
conditions["services"][getServiceIndex("vserver", conditions)]["rules"].append({"vserverState": False})
1352+
elif name == "initialVserverNFSProtocolStateAlert":
1353+
if value == "true":
1354+
conditions["services"][getServiceIndex("vserver", conditions)]["rules"].append({"nfsProtocolState": True})
1355+
else:
1356+
conditions["services"][getServiceIndex("vserver", conditions)]["rules"].append({"nfsProtocolState": False})
1357+
elif name == "initialVserverCIFSProtocolStateAlert":
1358+
if value == "true":
1359+
conditions["services"][getServiceIndex("vserver", conditions)]["rules"].append({"cifsProtocolState": True})
1360+
else:
1361+
conditions["services"][getServiceIndex("vserver", conditions)]["rules"].append({"cifsProtocolState": False})
13411362

13421363
return conditions
13431364

0 commit comments

Comments
 (0)