@@ -489,22 +489,25 @@ jobs:
489489 name : " gemma3-1b"
490490 use-custom : [false, true]
491491 qconfig : ["4w", "nvfp4"]
492+ runner : ["macos-14-xlarge"]
492493 include :
493494 - model :
494495 id : " google/gemma-4-E2B-it"
495496 name : " gemma4-e2b"
496497 use-custom : true
497- qconfig : " bf16"
498+ qconfig : " 4w"
499+ runner : " macos-15-xlarge"
498500 - model :
499501 id : " google/gemma-4-E2B-it"
500502 name : " gemma4-e2b"
501503 use-custom : false
502- qconfig : " bf16"
504+ qconfig : " 4w"
505+ runner : " macos-15-xlarge"
503506 uses : pytorch/test-infra/.github/workflows/macos_job.yml@main
504507 secrets : inherit
505508 with :
506509 job-name : test-mlx-llm-${{ matrix.model.name }}${{ matrix.use-custom && '-custom' || '' }}-${{ matrix.qconfig }}
507- runner : macos-14-xlarge
510+ runner : ${{ matrix.runner }}
508511 python-version : " 3.12"
509512 submodules : recursive
510513 ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
@@ -532,21 +535,6 @@ jobs:
532535 QEMBEDDING_ARGS=""
533536 fi
534537
535- # 8w currently requires an explicit group size (default 0 fails export
536- # on torchao::dequantize_affine missing out variant).
537- GROUP_SIZE_ARGS=""
538- if [ "${QCONFIG}" = "8w" ]; then
539- GROUP_SIZE_ARGS="--qlinear-group-size 32"
540- fi
541-
542- # "bf16" means no linear quantization — drop --qlinear/--qembedding.
543- QLINEAR_ARGS="--qlinear ${QCONFIG}"
544- if [ "${QCONFIG}" = "bf16" ]; then
545- QLINEAR_ARGS=""
546- QEMBEDDING_ARGS=""
547- GROUP_SIZE_ARGS=""
548- fi
549-
550538 echo "::group::Install ExecuTorch and configure MLX build"
551539 ${CONDA_RUN} python install_executorch.py > /dev/null
552540 ${CONDA_RUN} cmake --preset mlx-release
@@ -573,8 +561,7 @@ jobs:
573561 --model-id "${MODEL_ID}" \
574562 ${MODEL_REVISION:+--revision "${MODEL_REVISION}"} \
575563 --output /tmp/${MODEL_NAME}.pte \
576- ${QLINEAR_ARGS} \
577- ${GROUP_SIZE_ARGS} \
564+ --qlinear ${QCONFIG} \
578565 ${QEMBEDDING_ARGS} \
579566 ${CUSTOM_ARGS}
580567 echo "::endgroup::"
0 commit comments