executorch-examples/.github/workflows/llm-android.yml at d2b8e7e71c12e0a6c6e3b9708dadf49a2f0412d0 · meta-pytorch/executorch-examples · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

name: LlamaDemo Android

on:
  pull_request:
    branches: [main]
    paths:
      - 'llm/android/**'
      - '.github/workflows/llm-android.yml'
  workflow_dispatch:
    inputs:
      model_preset:
        description: 'Model preset to use'
        required: true
        type: choice
        options:
          - stories
          - llama
          - qwen3
          - custom
        default: 'stories'
      custom_pte_url:
        description: 'Custom URL for model .pte file (only used when model_preset is custom)'
        required: false
        type: string
      custom_tokenizer_url:
        description: 'Custom URL for tokenizer file (only used when model_preset is custom)'
        required: false
        type: string

permissions:
  contents: read

jobs:
  instrumentation-test:
    runs-on: 8-core-ubuntu
    env:
      API_LEVEL: 34
      ARCH: x86_64
      EMULATOR_OPTIONS: -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim -camera-back none
      RAM_SIZE: 16384

    name: Instrumentation Test LlamaDemo
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Write job summary
        run: |
          echo "## Test Configuration" >> $GITHUB_STEP_SUMMARY
          echo "| Parameter | Value |" >> $GITHUB_STEP_SUMMARY
          echo "|-----------|-------|" >> $GITHUB_STEP_SUMMARY
          echo "| Model Preset | \`${{ inputs.model_preset || 'stories' }}\` |" >> $GITHUB_STEP_SUMMARY
          if [ "${{ inputs.model_preset }}" = "custom" ]; then
            echo "| Custom PTE URL | \`${{ inputs.custom_pte_url }}\` |" >> $GITHUB_STEP_SUMMARY
            echo "| Custom Tokenizer URL | \`${{ inputs.custom_tokenizer_url }}\` |" >> $GITHUB_STEP_SUMMARY
          fi

      - name: Enable KVM group perms
        run: |
          echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /etc/udev/rules.d/99-kvm4all.rules
          sudo udevadm control --reload-rules
          sudo udevadm trigger --name-match=kvm

      - name: Set up JDK 17
        uses: actions/setup-java@v4
        with:
          java-version: '17'
          distribution: 'temurin'

      - name: Setup Gradle
        uses: gradle/actions/setup-gradle@v4

      - name: AVD cache
        uses: actions/cache@v4
        id: avd-cache
        with:
          path: |
            ~/.android/avd/*
            ~/.android/adb*
          key: avd-${{ env.API_LEVEL }}-${{ env.ARCH }}-ram${{ env.RAM_SIZE }}-disk16G

      - name: Create AVD and generate snapshot for caching
        if: steps.avd-cache.outputs.cache-hit != 'true'
        uses: reactivecircus/android-emulator-runner@v2
        with:
          api-level: ${{ env.API_LEVEL }}
          arch: ${{ env.ARCH }}
          force-avd-creation: true
          emulator-options: ${{ env.EMULATOR_OPTIONS }}
          disable-animations: false
          working-directory: llm/android/LlamaDemo
          script: echo "Generated AVD snapshot for caching."

      - name: Configure AVD RAM and disk
        run: |
          AVD_DIR="$HOME/.android/avd"
          for config in "$AVD_DIR"/*.avd/config.ini; do
            if [ -f "$config" ]; then
              echo "Updating config in $config"
              # Update RAM
              sed -i 's/hw.ramSize=.*/hw.ramSize=${{ env.RAM_SIZE }}/' "$config" || true
              grep -q "hw.ramSize" "$config" || echo "hw.ramSize=${{ env.RAM_SIZE }}" >> "$config"
              # Update disk size to 16GB for large models
              sed -i 's/disk.dataPartition.size=.*/disk.dataPartition.size=16G/' "$config" || true
              grep -q "disk.dataPartition.size" "$config" || echo "disk.dataPartition.size=16G" >> "$config"
            fi
          done

      - name: Download model files
        env:
          MODEL_PRESET: ${{ inputs.model_preset || 'stories' }}
          CUSTOM_PTE_URL: ${{ inputs.custom_pte_url }}
          CUSTOM_TOKENIZER_URL: ${{ inputs.custom_tokenizer_url }}
        run: |
          mkdir -p /tmp/llama_models

          # Determine URLs based on preset
          case "$MODEL_PRESET" in
            llama)
              PTE_URL="https://huggingface.co/executorch-community/Llama-3.2-1B-ET/resolve/main/llama3_2-1B.pte"
              TOKENIZER_URL="https://huggingface.co/executorch-community/Llama-3.2-1B-ET/resolve/main/tokenizer.model"
              ;;
            qwen3)
              PTE_URL="https://huggingface.co/pytorch/Qwen3-4B-INT8-INT4/resolve/main/model.pte"
              TOKENIZER_URL="https://huggingface.co/pytorch/Qwen3-4B-INT8-INT4/resolve/main/tokenizer.json"
              ;;
            custom)
              PTE_URL="$CUSTOM_PTE_URL"
              TOKENIZER_URL="$CUSTOM_TOKENIZER_URL"
              ;;
            *)
              PTE_URL="https://ossci-android.s3.amazonaws.com/executorch/stories/snapshot-20260114/stories110M.pte"
              TOKENIZER_URL="https://ossci-android.s3.amazonaws.com/executorch/stories/snapshot-20260114/tokenizer.model"
              ;;
          esac

          PTE_FILE=$(basename "$PTE_URL")
          TOKENIZER_FILE=$(basename "$TOKENIZER_URL")

          echo "Downloading model: $PTE_URL"
          curl -fL --progress-bar -o "/tmp/llama_models/$PTE_FILE" "$PTE_URL"

          echo "Downloading tokenizer: $TOKENIZER_URL"
          curl -fL --progress-bar -o "/tmp/llama_models/$TOKENIZER_FILE" "$TOKENIZER_URL"

          echo "Downloaded files:"
          ls -lh /tmp/llama_models/

          # Export filenames for later steps
          echo "MODEL_FILE=$PTE_FILE" >> $GITHUB_ENV
          echo "TOKENIZER_FILE=$TOKENIZER_FILE" >> $GITHUB_ENV

      - name: Run instrumentation tests
        uses: reactivecircus/android-emulator-runner@v2
        env:
          MODEL_PRESET: ${{ inputs.model_preset || 'stories' }}
        with:
          api-level: ${{ env.API_LEVEL }}
          arch: ${{ env.ARCH }}
          force-avd-creation: false
          emulator-options: -no-snapshot-save ${{ env.EMULATOR_OPTIONS }}
          disable-animations: true
          working-directory: llm/android/LlamaDemo
          script: |
            adb shell rm -rf /data/local/tmp/llama
            adb shell mkdir -p /data/local/tmp/llama
            echo "=== Pushing pre-downloaded model files to device ==="
            for file in /tmp/llama_models/*; do
              echo "Pushing $(basename "$file")..."
              adb push "$file" /data/local/tmp/llama/
            done
            echo "=== Model directory contents ==="
            adb shell ls -la /data/local/tmp/llama/
            adb logcat -c && adb logcat > /tmp/logcat.txt &
            LOGCAT_PID=$!
            ./gradlew connectedCheck -PskipModelDownload=true \
              -PmodelPreset="$MODEL_PRESET" \
              -Pandroid.testInstrumentationRunnerArguments.modelFile="$MODEL_FILE" \
              -Pandroid.testInstrumentationRunnerArguments.tokenizerFile="$TOKENIZER_FILE"
            TEST_EXIT_CODE=$?
            echo "=== Model directory after Gradle ==="
            adb shell ls -la /data/local/tmp/llama/
            kill $LOGCAT_PID || true
            echo "=== Model configuration used by test ==="
            grep "UIWorkflowTest.*Using model" /tmp/logcat.txt || echo "Model config not found in logcat"
            echo "=== Searching for LLAMA_RESPONSE in logcat ==="
            grep "LLAMA_RESPONSE" /tmp/logcat.txt || echo "No LLAMA_RESPONSE found in logcat"
            grep "LLAMA_RESPONSE" /tmp/logcat.txt | sed 's/.*LLAMA_RESPONSE: //' | grep -v "BEGIN_RESPONSE\|END_RESPONSE" > /tmp/response.txt || true
            echo "=== Response file contents ==="
            cat /tmp/response.txt || echo "Response file empty or not created"
            adb shell rm -rf /data/local/tmp/llama
            exit $TEST_EXIT_CODE

      - name: Add model response to summary
        if: always()
        run: |
          if [ -f /tmp/response.txt ]; then
            echo "" >> $GITHUB_STEP_SUMMARY
            echo "## Model Response" >> $GITHUB_STEP_SUMMARY
            echo '```' >> $GITHUB_STEP_SUMMARY
            cat /tmp/response.txt >> $GITHUB_STEP_SUMMARY
            echo '```' >> $GITHUB_STEP_SUMMARY
          fi

      - name: Upload logcat
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: logcat
          path: /tmp/logcat.txt
          retention-days: 7