Skip to content

Commit 4237d17

Browse files
committed
remove unused env var
1 parent 855eb93 commit 4237d17

2 files changed

Lines changed: 2 additions & 4 deletions

File tree

.ci/scripts/test_model_e2e.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -355,8 +355,6 @@ EOF
355355
;;
356356
qwen3_5_moe)
357357
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --prompt 'What is the capital of France?' --max_new_tokens 128 --temperature 0 --cuda_graph"
358-
# CUDA graph capture requires cudaMallocAsync backend for stream-ordered allocations
359-
export PYTORCH_CUDA_ALLOC_CONF=backend:cudaMallocAsync
360358
;;
361359
voxtral_realtime)
362360
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --preprocessor_path ${MODEL_DIR}/$PREPROCESSOR --audio_path ${MODEL_DIR}/$AUDIO_FILE --temperature 0"

examples/models/qwen3_5_moe/main.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,8 @@ int main(int argc, char** argv) {
133133

134134
// Use prefill method for T>=2, decode method for T=1
135135
// (prefill was exported with min seq_len=2)
136-
std::string run_method = prefill_method;
137-
if (dual_method && num_prompt_tokens == 1) {
136+
std::string run_method = "prefill";
137+
if (num_prompt_tokens == 1) {
138138
run_method = "decode";
139139
}
140140

0 commit comments

Comments
 (0)