Skip to content

Commit 5b9154f

Browse files
authored
Add model response output to GitHub job summary and support multiple model presets (#150)
Add model response extraction from instrumentation tests to GitHub job summary Support configurable model presets (stories, llama, qwen3, custom) with proper filename handling Pass model filenames to instrumentation tests via Gradle arguments for preset-aware testing
1 parent c77bcfe commit 5b9154f

6 files changed

Lines changed: 279 additions & 45 deletions

File tree

.github/workflows/llm-android.yml

Lines changed: 63 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ jobs:
8383
path: |
8484
~/.android/avd/*
8585
~/.android/adb*
86-
key: avd-${{ env.API_LEVEL }}-${{ env.ARCH }}-ram${{ env.RAM_SIZE }}
86+
key: avd-${{ env.API_LEVEL }}-${{ env.ARCH }}-ram${{ env.RAM_SIZE }}-disk16G
8787

8888
- name: Create AVD and generate snapshot for caching
8989
if: steps.avd-cache.outputs.cache-hit != 'true'
@@ -97,37 +97,88 @@ jobs:
9797
working-directory: llm/android/LlamaDemo
9898
script: echo "Generated AVD snapshot for caching."
9999

100-
- name: Configure AVD RAM
100+
- name: Configure AVD RAM and disk
101101
run: |
102102
AVD_DIR="$HOME/.android/avd"
103103
for config in "$AVD_DIR"/*.avd/config.ini; do
104104
if [ -f "$config" ]; then
105-
echo "Updating RAM in $config"
105+
echo "Updating config in $config"
106+
# Update RAM
106107
sed -i 's/hw.ramSize=.*/hw.ramSize=${{ env.RAM_SIZE }}/' "$config" || true
107108
grep -q "hw.ramSize" "$config" || echo "hw.ramSize=${{ env.RAM_SIZE }}" >> "$config"
109+
# Update disk size to 16GB for large models
110+
sed -i 's/disk.dataPartition.size=.*/disk.dataPartition.size=16G/' "$config" || true
111+
grep -q "disk.dataPartition.size" "$config" || echo "disk.dataPartition.size=16G" >> "$config"
108112
fi
109113
done
110114
111-
- name: Run instrumentation tests
112-
uses: reactivecircus/android-emulator-runner@v2
115+
- name: Download model files
113116
env:
114117
MODEL_PRESET: ${{ inputs.model_preset || 'stories' }}
115118
CUSTOM_PTE_URL: ${{ inputs.custom_pte_url }}
116119
CUSTOM_TOKENIZER_URL: ${{ inputs.custom_tokenizer_url }}
120+
run: |
121+
mkdir -p /tmp/llama_models
122+
123+
# Determine URLs based on preset
124+
case "$MODEL_PRESET" in
125+
llama)
126+
PTE_URL="https://huggingface.co/executorch-community/Llama-3.2-1B-ET/resolve/main/llama3_2-1B.pte"
127+
TOKENIZER_URL="https://huggingface.co/executorch-community/Llama-3.2-1B-ET/resolve/main/tokenizer.model"
128+
;;
129+
qwen3)
130+
PTE_URL="https://huggingface.co/pytorch/Qwen3-4B-INT8-INT4/resolve/main/model.pte"
131+
TOKENIZER_URL="https://huggingface.co/pytorch/Qwen3-4B-INT8-INT4/resolve/main/tokenizer.json"
132+
;;
133+
custom)
134+
PTE_URL="$CUSTOM_PTE_URL"
135+
TOKENIZER_URL="$CUSTOM_TOKENIZER_URL"
136+
;;
137+
*)
138+
PTE_URL="https://ossci-android.s3.amazonaws.com/executorch/stories/snapshot-20260114/stories110M.pte"
139+
TOKENIZER_URL="https://ossci-android.s3.amazonaws.com/executorch/stories/snapshot-20260114/tokenizer.model"
140+
;;
141+
esac
142+
143+
PTE_FILE=$(basename "$PTE_URL")
144+
TOKENIZER_FILE=$(basename "$TOKENIZER_URL")
145+
146+
echo "Downloading model: $PTE_URL"
147+
curl -fL --progress-bar -o "/tmp/llama_models/$PTE_FILE" "$PTE_URL"
148+
149+
echo "Downloading tokenizer: $TOKENIZER_URL"
150+
curl -fL --progress-bar -o "/tmp/llama_models/$TOKENIZER_FILE" "$TOKENIZER_URL"
151+
152+
echo "Downloaded files:"
153+
ls -lh /tmp/llama_models/
154+
155+
# Export filenames for later steps
156+
echo "MODEL_FILE=$PTE_FILE" >> $GITHUB_ENV
157+
echo "TOKENIZER_FILE=$TOKENIZER_FILE" >> $GITHUB_ENV
158+
159+
- name: Run instrumentation tests
160+
uses: reactivecircus/android-emulator-runner@v2
161+
env:
162+
MODEL_PRESET: ${{ inputs.model_preset || 'stories' }}
117163
with:
118164
api-level: ${{ env.API_LEVEL }}
119165
arch: ${{ env.ARCH }}
120166
force-avd-creation: false
121167
emulator-options: -no-snapshot-save ${{ env.EMULATOR_OPTIONS }}
122168
disable-animations: true
123169
working-directory: llm/android/LlamaDemo
124-
script: |
125-
adb shell rm -rf /data/local/tmp/llama
126-
adb shell mkdir -p /data/local/tmp/llama
127-
adb logcat -c && adb logcat > /tmp/logcat.txt &
128-
LOGCAT_PID=$!
129-
if [ "$MODEL_PRESET" = "custom" ]; then GRADLE_ARGS="-PmodelPreset=$MODEL_PRESET -PcustomPteUrl=$CUSTOM_PTE_URL -PcustomTokenizerUrl=$CUSTOM_TOKENIZER_URL"; else GRADLE_ARGS="-PmodelPreset=$MODEL_PRESET"; fi
130-
./gradlew connectedCheck $GRADLE_ARGS; TEST_EXIT_CODE=$?; kill $LOGCAT_PID || true; exit $TEST_EXIT_CODE
170+
script: bash ./scripts/run-ci-tests.sh "$MODEL_PRESET" "$MODEL_FILE" "$TOKENIZER_FILE"
171+
172+
- name: Add model response to summary
173+
if: always()
174+
run: |
175+
if [ -f /tmp/response.txt ]; then
176+
echo "" >> $GITHUB_STEP_SUMMARY
177+
echo "## Model Response" >> $GITHUB_STEP_SUMMARY
178+
echo '```' >> $GITHUB_STEP_SUMMARY
179+
cat /tmp/response.txt >> $GITHUB_STEP_SUMMARY
180+
echo '```' >> $GITHUB_STEP_SUMMARY
181+
fi
131182
132183
- name: Upload logcat
133184
if: always()

llm/android/LlamaDemo/README.md

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -228,15 +228,39 @@ adb push tokenizer.model /data/local/tmp/llama
228228

229229
### Running Tests
230230

231-
Run all instrumentation tests:
231+
The easiest way to run instrumentation tests is using model presets, which automatically download the model and tokenizer files:
232+
232233
```sh
233-
./gradlew connectedAndroidTest
234+
# Run with stories model (default, smallest and fastest)
235+
./gradlew connectedCheck -PmodelPreset=stories
236+
237+
# Run with Llama 3.2 1B model
238+
./gradlew connectedCheck -PmodelPreset=llama
239+
240+
# Run with Qwen3 4B model
241+
./gradlew connectedCheck -PmodelPreset=qwen3
242+
243+
# Run with custom model URLs
244+
./gradlew connectedCheck -PmodelPreset=custom \
245+
-PcustomPteUrl=https://example.com/model.pte \
246+
-PcustomTokenizerUrl=https://example.com/tokenizer.model
247+
248+
# Skip model download (use existing files on device)
249+
./gradlew connectedCheck -PmodelPreset=stories -PskipModelDownload=true
234250
```
235251

252+
Available presets:
253+
| Preset | Model | Description |
254+
|--------|-------|-------------|
255+
| `stories` | stories110M | Tiny model for quick testing |
256+
| `llama` | Llama 3.2 1B | Production-quality Llama model |
257+
| `qwen3` | Qwen3 4B | Qwen3 model with INT8/INT4 quantization |
258+
| `custom` | User-provided | Specify custom URLs for model and tokenizer |
259+
236260
Run a specific test class:
237261
```sh
238-
./gradlew connectedAndroidTest -Pandroid.testInstrumentationRunnerArguments.class=com.example.executorchllamademo.SanityCheck
239-
./gradlew connectedAndroidTest -Pandroid.testInstrumentationRunnerArguments.class=com.example.executorchllamademo.UIWorkflowTest
262+
./gradlew connectedCheck -PmodelPreset=stories -Pandroid.testInstrumentationRunnerArguments.class=com.example.executorchllamademo.SanityCheck
263+
./gradlew connectedCheck -PmodelPreset=stories -Pandroid.testInstrumentationRunnerArguments.class=com.example.executorchllamademo.UIWorkflowTest
240264
```
241265

242266
## Reporting Issues

llm/android/LlamaDemo/app/build.gradle.kts

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ plugins {
1212
}
1313

1414
// Model files configuration for instrumentation tests
15-
// Supported presets: stories, llama, custom
15+
// Supported presets: stories, llama, qwen3, custom
1616
val modelPreset: String = (project.findProperty("modelPreset") as? String) ?: "stories"
1717

1818
// Preset configurations
@@ -62,6 +62,14 @@ fun execCmdWithExitCode(vararg args: String): Pair<Int, String> {
6262
return Pair(exitCode, output)
6363
}
6464

65+
// Streaming version that shows output in real-time (for long-running commands)
66+
fun execCmdStreaming(vararg args: String): Int {
67+
val process = ProcessBuilder(*args)
68+
.inheritIO()
69+
.start()
70+
return process.waitFor()
71+
}
72+
6573
tasks.register("pushModelFiles") {
6674
description = "Download model files and push to connected Android device if not present"
6775
group = "verification"
@@ -84,17 +92,17 @@ tasks.register("pushModelFiles") {
8492
tokenizerUrl = customTokenizerUrl ?: throw GradleException("customTokenizerUrl is required when modelPreset is 'custom'")
8593
verifyChecksum = false
8694
} else {
87-
val preset = modelPresets[modelPreset] ?: throw GradleException("Unknown model preset: $modelPreset. Valid options: stories, llama, custom")
95+
val preset = modelPresets[modelPreset] ?: throw GradleException("Unknown model preset: $modelPreset. Valid options: ${modelPresets.keys.joinToString(", ")}, custom")
8896
val baseUrl = preset["baseUrl"] as String
8997
pteUrl = "$baseUrl/${preset["pteFile"]}"
9098
tokenizerUrl = "$baseUrl/${preset["tokenizerFile"]}"
9199
verifyChecksum = preset["verifyChecksum"] as Boolean
92100
}
93101

94-
// Files to download: source URL -> target name on device
102+
// Files to download: source URL -> target name on device (keep original filenames)
95103
val filesToDownload = mapOf(
96-
pteUrl to "model.pte",
97-
tokenizerUrl to "tokenizer.model"
104+
pteUrl to pteUrl.substringAfterLast("/"),
105+
tokenizerUrl to tokenizerUrl.substringAfterLast("/")
98106
)
99107

100108
// Check if adb is available
@@ -130,13 +138,11 @@ tasks.register("pushModelFiles") {
130138
val localPath = "$tempDir/$targetName"
131139
val devicePath = "$deviceModelDir/$targetName"
132140

133-
// Download file
141+
// Download file with progress indicator
134142
logger.lifecycle("Downloading from $sourceUrl...")
135-
val (dlCode, dlOutput) = execCmdWithExitCode(
136-
"curl", "-fL", "-o", localPath, sourceUrl
137-
)
143+
val dlCode = execCmdStreaming("curl", "-fL", "--progress-bar", "-o", localPath, sourceUrl)
138144
if (dlCode != 0) {
139-
throw GradleException("Failed to download from $sourceUrl: $dlOutput")
145+
throw GradleException("Failed to download from $sourceUrl")
140146
}
141147

142148
// Verify checksum if enabled and available (only for stories preset)
@@ -173,11 +179,11 @@ tasks.register("pushModelFiles") {
173179
}
174180
}
175181

176-
// Push to device
182+
// Push to device with progress
177183
logger.lifecycle("Pushing $targetName to device...")
178-
val (pushCode, pushOutput) = execCmdWithExitCode(adbPath, "push", localPath, devicePath)
184+
val pushCode = execCmdStreaming(adbPath, "push", localPath, devicePath)
179185
if (pushCode != 0) {
180-
throw GradleException("Failed to push $targetName to device: $pushOutput")
186+
throw GradleException("Failed to push $targetName to device")
181187
}
182188
logger.lifecycle("Successfully pushed $targetName")
183189
}

llm/android/LlamaDemo/app/src/androidTest/java/com/example/executorchllamademo/SanityCheck.java

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,28 +11,52 @@
1111
import static org.junit.Assert.assertEquals;
1212
import static org.junit.Assert.assertFalse;
1313

14+
import android.os.Bundle;
1415
import androidx.test.ext.junit.runners.AndroidJUnit4;
16+
import androidx.test.platform.app.InstrumentationRegistry;
1517
import java.io.File;
1618
import java.util.ArrayList;
1719
import java.util.List;
20+
import org.junit.Before;
1821
import org.junit.Test;
1922
import org.junit.runner.RunWith;
2023
import org.pytorch.executorch.extension.llm.LlmCallback;
2124
import org.pytorch.executorch.extension.llm.LlmModule;
2225

26+
/**
27+
* Sanity check test for model loading and generation.
28+
*
29+
* Model filenames can be configured via instrumentation arguments:
30+
* - modelFile: name of the .pte file (default: stories110M.pte)
31+
* - tokenizerFile: name of the tokenizer file (default: tokenizer.model)
32+
*/
2333
@RunWith(AndroidJUnit4.class)
2434
public class SanityCheck implements LlmCallback {
2535

2636
private static final String RESOURCE_PATH = "/data/local/tmp/llama/";
27-
private static final String TOKENIZER_PATH = "tokenizer.model";
28-
private static final String MODEL_PATH = "model.pte";
37+
38+
// Default filenames (stories preset)
39+
private static final String DEFAULT_MODEL_FILE = "stories110M.pte";
40+
private static final String DEFAULT_TOKENIZER_FILE = "tokenizer.model";
41+
42+
private String modelFile;
43+
private String tokenizerFile;
2944

3045
private final List<String> results = new ArrayList<>();
3146

47+
@Before
48+
public void setUp() {
49+
// Read model filenames from instrumentation arguments
50+
Bundle args = InstrumentationRegistry.getArguments();
51+
modelFile = args.getString("modelFile", DEFAULT_MODEL_FILE);
52+
tokenizerFile = args.getString("tokenizerFile", DEFAULT_TOKENIZER_FILE);
53+
android.util.Log.i("SanityCheck", "Using model: " + modelFile + ", tokenizer: " + tokenizerFile);
54+
}
55+
3256
@Test
3357
public void testLoadAndGenerate() {
34-
String tokenizerPath = RESOURCE_PATH + TOKENIZER_PATH;
35-
File model = new File(RESOURCE_PATH + MODEL_PATH);
58+
String tokenizerPath = RESOURCE_PATH + tokenizerFile;
59+
File model = new File(RESOURCE_PATH + modelFile);
3660
LlmModule mModule = new LlmModule(model.getPath(), tokenizerPath, 0.8f);
3761

3862
int loadResult = mModule.load();

0 commit comments

Comments
 (0)