Skip to content

Commit 3a108de

Browse files
committed
Adapt DALI to nvImageCodec 0.8.0
- Bump required nvImageCodec version range from 0.7.x to 0.8.x - Update nvimgcodec download URL and hash to 0.8.0.22 - Update nvimgcodecCodeStreamCreateFromHostMem call to match the new 0.8.0 API signature (added nullable parameter) - Rework qa/TL1_decoder_perf/test.sh to collect nsys profiles on failure for easier debugging
1 parent c056c4e commit 3a108de

4 files changed

Lines changed: 127 additions & 87 deletions

File tree

cmake/Dependencies.common.cmake

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -297,8 +297,8 @@ endif()
297297
##################################################################
298298
set(DALI_INSTALL_REQUIRES_NVIMGCODEC "")
299299
if(BUILD_NVIMAGECODEC)
300-
set(NVIMGCODEC_MIN_VERSION "0.7.0")
301-
set(NVIMGCODEC_MAX_VERSION "0.8.0")
300+
set(NVIMGCODEC_MIN_VERSION "0.8.0")
301+
set(NVIMGCODEC_MAX_VERSION "0.9.0")
302302
message(STATUS "nvImageCodec - requires version >=${NVIMGCODEC_MIN_VERSION}, <${NVIMGCODEC_MAX_VERSION}")
303303
if (WITH_DYNAMIC_NVIMGCODEC)
304304
message(STATUS "nvImageCodec - dynamic load")
@@ -315,8 +315,8 @@ if(BUILD_NVIMAGECODEC)
315315
include(FetchContent)
316316
FetchContent_Declare(
317317
nvimgcodec_headers
318-
URL https://developer.download.nvidia.com/compute/nvimgcodec/redist/nvimgcodec/linux-x86_64/nvimgcodec-linux-x86_64-0.7.0.11-archive.tar.xz
319-
URL_HASH SHA512=0777af0a41500de7aaeffb6966b3da20271f807c6af106307b9759854c082d5b6f850c0455b011b8978fc5954514bb46dbd5da0904d471309adf9fdfbaf7dd98
318+
URL https://developer.download.nvidia.com/compute/nvimgcodec/redist/nvimgcodec/linux-x86_64/nvimgcodec-linux-x86_64-0.8.0.22-archive.tar.xz
319+
URL_HASH SHA512=2a400f75c619a10c3dbcd298a83ef3307f6e08453b2cfb5040f6b22c64c7be0ac4552a2a80ed057afe7657cf0bb8cc2d54cdccf8bc50ffdf34cfd05b45082978
320320
)
321321
FetchContent_Populate(nvimgcodec_headers)
322322
set(nvimgcodec_INCLUDE_DIR "${nvimgcodec_headers_SOURCE_DIR}/${CUDA_VERSION_MAJOR}/include")

conda/third_party/dali_nvimagecodec/recipe/meta.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@
1313
# limitations under the License.
1414

1515

16-
{% set build_version = "0.7.0" %}
16+
{% set build_version = "0.8.0" %}
1717

1818
package:
1919
name: nvidia-nvimagecodec-cuda{{ environ.get('CUDA_VERSION', '') | replace(".","") }}
2020
version: {{ build_version }}
2121

2222
source:
2323
git_url: https://github.com/NVIDIA/nvImageCodec.git
24-
git_rev: v0.7.0
24+
git_rev: v0.8.0
2525

2626
build:
2727
number: 0

dali/operators/imgcodec/util/nvimagecodec_types.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ NvImageCodecCodeStream NvImageCodecCodeStream::FromHostMem(nvimgcodecInstance_t
4444
const void *data, size_t length) {
4545
NvImageCodecCodeStream ret;
4646
CHECK_NVIMGCODEC(nvimgcodecCodeStreamCreateFromHostMem(
47-
instance, &ret.handle_, static_cast<const unsigned char*>(data), length));
47+
instance, &ret.handle_, static_cast<const unsigned char*>(data), length, nullptr));
4848
return ret;
4949
}
5050

qa/TL1_decoder_perf/test.sh

Lines changed: 120 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -3,113 +3,153 @@
33
pip_packages='numpy'
44
target_dir=./internal_tools
55

6+
# One-time pre-step: install nsys (NVIDIA Nsight Systems) if not present
7+
do_once() {
8+
apt update && apt install -y --no-install-recommends gnupg
9+
echo "deb http://developer.download.nvidia.com/devtools/repos/ubuntu$(source /etc/lsb-release && echo "$DISTRIB_RELEASE" | tr -d .)/$(dpkg --print-architecture) /" \
10+
| tee /etc/apt/sources.list.d/nvidia-devtools.list
11+
apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
12+
apt update && apt install -y nsight-systems-cli
13+
}
14+
615
LOG1="dali_legacy.log"
716
LOG2="dali_nvimgcodec.log"
817
LOG1_TP="dali_legacy_new_tp.log"
918
LOG2_TP="dali_nvimgcodec_new_tp.log"
1019
LOG1_NDD="dali_ndd_legacy.log"
1120
LOG2_NDD="dali_ndd_nvimgcodec.log"
21+
LOG2_32STREAMS="dali_nvimgcodec_32streams.log"
22+
LOG2_NDD_32STREAMS="dali_ndd_nvimgcodec_32streams.log"
23+
1224
function CLEAN_AND_EXIT {
1325
rm -rf ${LOG1}
1426
rm -rf ${LOG2}
1527
rm -rf ${LOG1_TP}
1628
rm -rf ${LOG2_TP}
1729
rm -rf ${LOG1_NDD}
1830
rm -rf ${LOG2_NDD}
31+
rm -rf ${LOG2_32STREAMS}
32+
rm -rf ${LOG2_NDD_32STREAMS}
1933
exit $1
2034
}
2135

36+
# Run a single benchmark; if NSYS_REP is set, wrap with nsys and write that profile.
37+
run_bench() {
38+
local log_file="$1"
39+
shift
40+
if [ -n "${NSYS_REP}" ]; then
41+
nsys profile -o "${NSYS_REP}" --stats=true "$@" | tee "${log_file}"
42+
else
43+
"$@" | tee "${log_file}"
44+
fi
45+
}
46+
47+
# SPEC:DA-11356-002_v04 - run all benchmarks (optionally with nsys when NSYS_REP is set per run).
48+
run_all_benchmarks() {
49+
if [ "$(uname -p)" == "x86_64" ]; then
50+
# Hopper
51+
TASKSET="taskset --cpu-list 0-127"
52+
BENCH_ARGS="--width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -j 70 --hw_load 0.12"
53+
else
54+
# GraceHopper
55+
TASKSET="taskset --cpu-list 0-71"
56+
BENCH_ARGS="--width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -j 72 --hw_load 0.11"
57+
fi
58+
run_bench "${LOG1}" ${TASKSET} python hw_decoder_bench.py ${BENCH_ARGS} -p rn50
59+
run_bench "${LOG2}" ${TASKSET} python hw_decoder_bench.py ${BENCH_ARGS} -p rn50 --experimental_decoder
60+
DALI_USE_NEW_THREAD_POOL=1 run_bench "${LOG1_TP}" ${TASKSET} python hw_decoder_bench.py ${BENCH_ARGS} -p rn50
61+
DALI_USE_NEW_THREAD_POOL=1 run_bench "${LOG2_TP}" ${TASKSET} python hw_decoder_bench.py ${BENCH_ARGS} -p rn50 --experimental_decoder
62+
run_bench "${LOG1_NDD}" ${TASKSET} python hw_decoder_bench.py ${BENCH_ARGS} -p ndd_rn50
63+
run_bench "${LOG2_NDD}" ${TASKSET} python hw_decoder_bench.py ${BENCH_ARGS} -p ndd_rn50 --experimental_decoder
64+
NVIMGCODEC_DEFAULT_NUM_CUDA_STREAMS=32 run_bench "${LOG2_32STREAMS}" ${TASKSET} python hw_decoder_bench.py ${BENCH_ARGS} -p rn50 --experimental_decoder
65+
NVIMGCODEC_DEFAULT_NUM_CUDA_STREAMS=32 run_bench "${LOG2_NDD_32STREAMS}" ${TASKSET} python hw_decoder_bench.py ${BENCH_ARGS} -p ndd_rn50 --experimental_decoder
66+
}
67+
2268
test_body() {
23-
# SPEC:DA-11356-002_v04
24-
if [ "$(uname -p)" == "x86_64" ]; then
25-
# Hopper
26-
MIN_PERF=19000;
27-
MIN_PERF2=18000; # TODO(janton): target is to be 19000 as well
28-
MIN_PERF_NDD=14000;
29-
MIN_PERF2_NDD=14000; # TODO(janton): remove this second value.
30-
# use taskset to avoid inefficient data migration between cores we don't want to use
31-
taskset --cpu-list 0-127 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 70 --hw_load 0.12 | tee ${LOG1}
32-
taskset --cpu-list 0-127 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 70 --hw_load 0.12 --experimental_decoder | tee ${LOG2}
33-
DALI_USE_NEW_THREAD_POOL=1 taskset --cpu-list 0-127 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 70 --hw_load 0.12 | tee ${LOG1_TP}
34-
DALI_USE_NEW_THREAD_POOL=1 taskset --cpu-list 0-127 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 70 --hw_load 0.12 --experimental_decoder | tee ${LOG2_TP}
35-
taskset --cpu-list 0-127 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p ndd_rn50 -j 70 --hw_load 0.12 | tee ${LOG1_NDD}
36-
taskset --cpu-list 0-127 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p ndd_rn50 -j 70 --hw_load 0.12 --experimental_decoder | tee ${LOG2_NDD}
69+
if [ "$(uname -p)" == "x86_64" ]; then
70+
MIN_PERF=19000
71+
MIN_PERF2=18000 # TODO(janton): target is to be 19000 as well
72+
MIN_PERF_NDD=14000
73+
MIN_PERF2_NDD=14000 # TODO(janton): remove this second value.
74+
else
75+
MIN_PERF=29000
76+
MIN_PERF2=29000 # TODO(janton): remove this second value.
77+
MIN_PERF_NDD=20000
78+
MIN_PERF2_NDD=20000 # TODO(janton): remove this second value.
79+
fi
3780

38-
else
39-
# GraceHopper
40-
MIN_PERF=29000;
41-
MIN_PERF2=29000; # TODO(janton): remove this second value.
42-
MIN_PERF_NDD=20000;
43-
MIN_PERF2_NDD=20000; # TODO(janton): remove this second value.
44-
# use taskset to avoid inefficient data migration between cores we don't want to use
45-
taskset --cpu-list 0-71 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 72 --hw_load 0.11 | tee ${LOG1}
46-
taskset --cpu-list 0-71 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 72 --hw_load 0.11 --experimental_decoder | tee ${LOG2}
47-
DALI_USE_NEW_THREAD_POOL=1 taskset --cpu-list 0-71 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 72 --hw_load 0.11 | tee ${LOG1_TP}
48-
DALI_USE_NEW_THREAD_POOL=1 taskset --cpu-list 0-71 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 72 --hw_load 0.11 --experimental_decoder | tee ${LOG2_TP}
49-
taskset --cpu-list 0-71 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p ndd_rn50 -j 72 --hw_load 0.11 | tee ${LOG1_NDD}
50-
taskset --cpu-list 0-71 python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p ndd_rn50 -j 72 --hw_load 0.11 --experimental_decoder | tee ${LOG2_NDD}
51-
fi
81+
# First run: all benchmarks without nsys
82+
unset NSYS_REP
83+
run_all_benchmarks
5284

53-
# Regex Explanation:
54-
# Total Throughput: : Matches the literal string "Total Throughput: ".
55-
# \K: Resets the start of the match, so anything before \K is not included in the output.
56-
# [0-9]+(\.[0-9]+)?: Matches the number, with an optional decimal part.
57-
# (?= frames/sec): ensures " frames/sec" follows the number, but doesn't include it.
58-
extract_perf() {
59-
log_file="$1"
60-
grep -oP 'Total Throughput: \K[0-9]+(\.[0-9]+)?(?= frames/sec)' "${log_file}"
61-
}
85+
# Regex: extract "Total Throughput: X frames/sec" -> X
86+
extract_perf() {
87+
grep -oP 'Total Throughput: \K[0-9]+(\.[0-9]+)?(?= frames/sec)' "$1"
88+
}
6289

90+
perf_check() {
91+
local value=$(extract_perf "$1")
92+
local min_value=$2
93+
local percent=${3:-0}
94+
local tolerance=$(awk -v p="$percent" 'BEGIN{print p/100}')
95+
echo "$value $min_value" | awk -v tol="$tolerance" '{
96+
lower = $2 * (1 - tol);
97+
if ($1 >= lower) {print "OK"} else {print "FAIL"}
98+
}'
99+
}
63100

64-
perf_check() {
65-
# Checks if the extracted performance value from the specified log file
66-
# is within a given percentage tolerance of a minimum threshold.
101+
PERF_RESULT1=$(perf_check "${LOG1}" "$MIN_PERF")
102+
PERF_RESULT2=$(perf_check "${LOG2}" "$MIN_PERF2")
103+
PERF_RESULT1_NDD=$(perf_check "${LOG1_NDD}" "$MIN_PERF_NDD")
104+
PERF_RESULT2_NDD=$(perf_check "${LOG2_NDD}" "$MIN_PERF2_NDD")
105+
PERF_RESULT3=$(perf_check "${LOG2}" "$(extract_perf "${LOG1}")" 5)
106+
PERF_RESULT3_NDD=$(perf_check "${LOG2_NDD}" "$(extract_perf "${LOG1_NDD}")" 5)
107+
PERF_RESULT1_TP=$(perf_check "${LOG1_TP}" "$(extract_perf "${LOG1}")" 2)
108+
PERF_RESULT2_TP=$(perf_check "${LOG2_TP}" "$(extract_perf "${LOG2}")" 2)
109+
PERF_RESULT2_32STREAMS=$(perf_check "${LOG2_32STREAMS}" "$MIN_PERF2")
110+
PERF_RESULT2_NDD_32STREAMS=$(perf_check "${LOG2_NDD_32STREAMS}" "$MIN_PERF2_NDD")
111+
PERF_RESULT3_32STREAMS=$(perf_check "${LOG2_32STREAMS}" "$(extract_perf "${LOG1}")" 5)
112+
PERF_RESULT3_NDD_32STREAMS=$(perf_check "${LOG2_NDD_32STREAMS}" "$(extract_perf "${LOG1_NDD}")" 5)
67113

68-
# Args:
69-
# $1: The log file to extract the throughput value from.
70-
# $2: The minimum threshold value to compare against.
71-
# $3: (Optional) Percent tolerance. If specified, allows value to be
72-
# within $2 * (1 - percent/100). Defaults to 0.
114+
echo "PERF_RESULT1=${PERF_RESULT1}"
115+
echo "PERF_RESULT2=${PERF_RESULT2}"
116+
echo "PERF_RESULT3=${PERF_RESULT3}"
117+
echo "PERF_RESULT1_TP=${PERF_RESULT1_TP}"
118+
echo "PERF_RESULT2_TP=${PERF_RESULT2_TP}"
119+
echo "PERF_RESULT1_NDD=${PERF_RESULT1_NDD}"
120+
echo "PERF_RESULT2_NDD=${PERF_RESULT2_NDD}"
121+
echo "PERF_RESULT3_NDD=${PERF_RESULT3_NDD}"
122+
echo "PERF_RESULT2_32STREAMS=${PERF_RESULT2_32STREAMS}"
123+
echo "PERF_RESULT2_NDD_32STREAMS=${PERF_RESULT2_NDD_32STREAMS}"
124+
echo "PERF_RESULT3_32STREAMS=${PERF_RESULT3_32STREAMS}"
125+
echo "PERF_RESULT3_NDD_32STREAMS=${PERF_RESULT3_NDD_32STREAMS}"
73126

74-
# Returns:
75-
# Prints "OK" if value >= min_value*(1-tolerance), "FAIL" otherwise.
127+
# don't check experimental decoder performance with dynamic mode (PERF_RESULT2_NDD, PERF_RESULT3_NDD)
128+
if [[ "$PERF_RESULT1" == "OK" && "$PERF_RESULT2" == "OK" && "$PERF_RESULT1_TP" == "OK" && "$PERF_RESULT2_TP" == "OK" && "$PERF_RESULT3" == "OK" && "$PERF_RESULT1_NDD" == "OK" && "$PERF_RESULT2_32STREAMS" == "OK" && "$PERF_RESULT3_32STREAMS" == "OK" && "$PERF_RESULT2_NDD_32STREAMS" == "OK" && "$PERF_RESULT3_NDD_32STREAMS" == "OK" ]]; then
129+
CLEAN_AND_EXIT 0
130+
fi
76131

77-
local value=$(extract_perf "$1")
78-
local min_value=$2
79-
local percent=${3:-0}
80-
# Check if value is within percent% of min_value below
81-
local tolerance=$(awk -v p="$percent" 'BEGIN{print p/100}')
82-
echo "$value $min_value" | awk -v tol="$tolerance" '{
83-
lower = $2 * (1 - tol);
84-
if ($1 >= lower) {print "OK"} else {print "FAIL"}
85-
}'
86-
}
87-
PERF_RESULT1=$(perf_check "${LOG1}" "$MIN_PERF")
88-
PERF_RESULT2=$(perf_check "${LOG2}" "$MIN_PERF2")
89-
PERF_RESULT1_NDD=$(perf_check "${LOG1_NDD}" "$MIN_PERF_NDD")
90-
PERF_RESULT2_NDD=$(perf_check "${LOG2_NDD}" "$MIN_PERF2_NDD")
91-
PERF_RESULT3=$(perf_check "${LOG2}" "$(extract_perf "${LOG1}")" 5)
92-
PERF_RESULT3_NDD=$(perf_check "${LOG2_NDD}" "$(extract_perf "${LOG1_NDD}")" 5)
93-
PERF_RESULT1_TP=$(perf_check "${LOG1_TP}" "$(extract_perf "${LOG1}")" 2)
94-
PERF_RESULT2_TP=$(perf_check "${LOG2_TP}" "$(extract_perf "${LOG2}")" 2)
132+
# On failure: re-run all benchmarks with nsys and save profiles to core_artifacts
133+
echo "Performance check failed; re-running all benchmarks with nsys profiling..."
134+
ARTIFACTS_DIR="${topdir}/core_artifacts"
135+
mkdir -p "${ARTIFACTS_DIR}"
95136

96-
echo "PERF_RESULT1=${PERF_RESULT1}"
97-
echo "PERF_RESULT2=${PERF_RESULT2}"
98-
echo "PERF_RESULT3=${PERF_RESULT3}"
99-
echo "PERF_RESULT1_TP=${PERF_RESULT1_TP}"
100-
echo "PERF_RESULT2_TP=${PERF_RESULT2_TP}"
101-
echo "PERF_RESULT1_NDD=${PERF_RESULT1_NDD}"
102-
echo "PERF_RESULT2_NDD=${PERF_RESULT2_NDD}"
103-
echo "PERF_RESULT3_NDD=${PERF_RESULT3_NDD}"
137+
if [ "$(uname -p)" == "x86_64" ]; then
138+
TASKSET="taskset --cpu-list 0-127"
139+
BENCH_ARGS="--width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -j 70 --hw_load 0.12"
140+
else
141+
TASKSET="taskset --cpu-list 0-71"
142+
BENCH_ARGS="--width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -j 72 --hw_load 0.11"
143+
fi
144+
NSYS_REP="decoder_perf_legacy.nsys-rep" run_bench "${LOG1}" ${TASKSET} python hw_decoder_bench.py ${BENCH_ARGS} -p rn50
145+
NSYS_REP="decoder_perf_nvimgcodec.nsys-rep" run_bench "${LOG2}" ${TASKSET} python hw_decoder_bench.py ${BENCH_ARGS} -p rn50 --experimental_decoder
146+
NSYS_REP="decoder_perf_ndd_legacy.nsys-rep" run_bench "${LOG1_NDD}" ${TASKSET} python hw_decoder_bench.py ${BENCH_ARGS} -p ndd_rn50
147+
NSYS_REP="decoder_perf_ndd_nvimgcodec.nsys-rep" run_bench "${LOG2_NDD}" ${TASKSET} python hw_decoder_bench.py ${BENCH_ARGS} -p ndd_rn50 --experimental_decoder
104148

105-
# if [[ "$PERF_RESULT1" == "OK" && "$PERF_RESULT2" == "OK" && "$PERF_RESULT3" == "OK" && "$PERF_RESULT1_NDD" == "OK" && "$PERF_RESULT2_NDD" == "OK" && "$PERF_RESULT3_NDD" == "OK" ]]; then
106-
# don't check experimental decoder performance with dynamic mode
107-
if [[ "$PERF_RESULT1" == "OK" && "$PERF_RESULT2" == "OK" && "$PERF_RESULT1_TP" == "OK" && "$PERF_RESULT2_TP" == "OK" && "$PERF_RESULT3" == "OK" && "$PERF_RESULT1_NDD" == "OK" ]]; then
108-
CLEAN_AND_EXIT 0
109-
else
149+
cp -f *.nsys-rep "${ARTIFACTS_DIR}/" 2>/dev/null || true
150+
echo "nsys profiles saved to ${ARTIFACTS_DIR}"
110151
CLEAN_AND_EXIT 1
111-
fi
112152
}
113153
pushd ../..
114154
source ./qa/test_template.sh
115-
popd
155+
popd

0 commit comments

Comments
 (0)