From cb8dfce3f7a264b609745f9145b680d0ad8ee569 Mon Sep 17 00:00:00 2001 From: "Jiaxiao (mossaka) Zhou" Date: Thu, 12 Feb 2026 23:30:26 +0000 Subject: [PATCH] fix: harden one-shot-token binary against ELF reconnaissance Addresses githubnext/gh-aw-security#279 where cleartext token names were discoverable in the .rodata section via strings/objdump. Changes: - XOR-obfuscate default token names so they don't appear as cleartext in .rodata (decoded at runtime during initialization) - Add -fvisibility=hidden to hide all internal symbols; only getenv and secure_getenv are exported via visibility("default") attribute - Add -s linker flag and strip --strip-unneeded post-build to remove symbol table, debug sections, and build metadata - Add encode-tokens.sh script to regenerate obfuscated byte arrays when default token list changes - Add *.so to .gitignore - Add binutils to Dockerfile BUILD_PKGS for strip command - Update build.sh with verification that no cleartext token names remain in the compiled binary Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitignore | 1 + containers/agent/Dockerfile | 7 +- containers/agent/one-shot-token/README.md | 30 ++++- containers/agent/one-shot-token/build.sh | 19 ++- .../agent/one-shot-token/encode-tokens.sh | 54 +++++++++ .../agent/one-shot-token/one-shot-token.c | 113 +++++++++++++----- 6 files changed, 184 insertions(+), 40 deletions(-) create mode 100755 containers/agent/one-shot-token/encode-tokens.sh diff --git a/.gitignore b/.gitignore index ab3b7078f..4c25f55f8 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ coverage/ .nyc_output/ *.swp *.swo +*.so *~ .idea/ diff --git a/containers/agent/Dockerfile b/containers/agent/Dockerfile index 6a74a1556..b08a9ea2d 100644 --- a/containers/agent/Dockerfile +++ b/containers/agent/Dockerfile @@ -68,14 +68,17 @@ RUN chmod +x /usr/local/bin/setup-iptables.sh /usr/local/bin/entrypoint.sh /usr/ # Build one-shot-token LD_PRELOAD library for single-use token access # This prevents tokens from being read multiple times (e.g., by malicious code) +# Build flags: -fvisibility=hidden hides internal symbols, -s strips at link time COPY one-shot-token/one-shot-token.c /tmp/one-shot-token.c RUN set -eux; \ - BUILD_PKGS="gcc libc6-dev"; \ + BUILD_PKGS="gcc libc6-dev binutils"; \ apt-get update && \ ( apt-get install -y --no-install-recommends $BUILD_PKGS || \ (rm -rf /var/lib/apt/lists/* && apt-get update && \ apt-get install -y --no-install-recommends $BUILD_PKGS) ) && \ - gcc -shared -fPIC -O2 -Wall -o /usr/local/lib/one-shot-token.so /tmp/one-shot-token.c -ldl -lpthread && \ + gcc -shared -fPIC -fvisibility=hidden -O2 -Wall -s \ + -o /usr/local/lib/one-shot-token.so /tmp/one-shot-token.c -ldl -lpthread && \ + strip --strip-unneeded /usr/local/lib/one-shot-token.so && \ rm /tmp/one-shot-token.c && \ apt-get remove -y $BUILD_PKGS && \ apt-get autoremove -y && \ diff --git a/containers/agent/one-shot-token/README.md b/containers/agent/one-shot-token/README.md index db2d21c01..fbe14e2a6 100644 --- a/containers/agent/one-shot-token/README.md +++ b/containers/agent/one-shot-token/README.md @@ -158,13 +158,14 @@ In chroot mode, the library must be accessible from within the chroot (host file ### In Docker (automatic) -The Dockerfile compiles the library during image build: +The Dockerfile compiles the library during image build with hardened flags: ```dockerfile -RUN gcc -shared -fPIC -O2 -Wall \ +RUN gcc -shared -fPIC -fvisibility=hidden -O2 -Wall -s \ -o /usr/local/lib/one-shot-token.so \ /tmp/one-shot-token.c \ - -ldl -lpthread + -ldl -lpthread && \ + strip --strip-unneeded /usr/local/lib/one-shot-token.so ``` ### Locally (for testing) @@ -175,6 +176,24 @@ RUN gcc -shared -fPIC -O2 -Wall \ This produces `one-shot-token.so` in the current directory. +### Binary Hardening + +The build applies several hardening measures to reduce reconnaissance value: + +- **XOR-obfuscated token names**: Default token names are stored as XOR-encoded byte arrays + and decoded at runtime. This prevents extraction via `strings` or `objdump -s -j .rodata`. +- **Hidden symbol visibility**: `-fvisibility=hidden` hides all internal symbols by default. + Only `getenv` and `secure_getenv` are exported (required for LD_PRELOAD interposition). +- **Stripped binary**: `-s` flag and `strip --strip-unneeded` remove the symbol table, + debug sections, and build metadata. + +To regenerate the obfuscated byte arrays after changing default token names: + +```bash +./encode-tokens.sh +# Paste the output into one-shot-token.c, replacing the OBFUSCATED_DEFAULTS section +``` + ## Testing ### Basic Test (Default Tokens) @@ -291,6 +310,7 @@ This library is one layer in AWF's security model: ## Files -- `one-shot-token.c` - Library source code -- `build.sh` - Local build script +- `one-shot-token.c` - Library source code (token names are XOR-obfuscated) +- `build.sh` - Local build script (includes hardening flags and verification) +- `encode-tokens.sh` - Generates XOR-encoded byte arrays for default token names - `README.md` - This documentation diff --git a/containers/agent/one-shot-token/build.sh b/containers/agent/one-shot-token/build.sh index 79227f8fe..337ac7ab8 100644 --- a/containers/agent/one-shot-token/build.sh +++ b/containers/agent/one-shot-token/build.sh @@ -10,19 +10,26 @@ OUTPUT_FILE="${SCRIPT_DIR}/one-shot-token.so" echo "[build] Compiling one-shot-token.so..." -# Compile as a shared library with position-independent code +# Compile as a shared library with hardened build flags: # -shared: create a shared library # -fPIC: position-independent code (required for shared libs) +# -fvisibility=hidden: hide all symbols by default (only getenv/secure_getenv +# are exported via __attribute__((visibility("default")))) # -ldl: link with libdl for dlsym # -lpthread: link with pthread for mutex # -O2: optimize for performance # -Wall -Wextra: enable warnings +# -s: strip symbol table and relocation info at link time gcc -shared -fPIC \ - -O2 -Wall -Wextra \ + -fvisibility=hidden \ + -O2 -Wall -Wextra -s \ -o "${OUTPUT_FILE}" \ "${SOURCE_FILE}" \ -ldl -lpthread +# Remove remaining unneeded symbols (debug sections, build metadata) +strip --strip-unneeded "${OUTPUT_FILE}" + echo "[build] Successfully built: ${OUTPUT_FILE}" # Verify it's a valid shared library @@ -32,3 +39,11 @@ else echo "[build] ERROR: Output is not a valid shared object" exit 1 fi + +# Verify hardening: token names should NOT appear in binary +if strings -a "${OUTPUT_FILE}" | grep -qE '(COPILOT_GITHUB_TOKEN|OPENAI_API_KEY|ANTHROPIC_API_KEY)'; then + echo "[build] WARNING: Cleartext token names still present in binary" + exit 1 +else + echo "[build] Verified: no cleartext token names in binary" +fi diff --git a/containers/agent/one-shot-token/encode-tokens.sh b/containers/agent/one-shot-token/encode-tokens.sh new file mode 100755 index 000000000..e59645d96 --- /dev/null +++ b/containers/agent/one-shot-token/encode-tokens.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Generate XOR-obfuscated byte arrays for default token names. +# Run this script whenever the default token list changes, then paste +# the output into one-shot-token.c (replacing the OBFUSCATED_DEFAULTS section). +# +# The obfuscation prevents token names from appearing as cleartext strings +# in the .rodata section of the compiled binary. This is NOT cryptographic +# security -- a determined attacker can reverse the XOR. The goal is to +# defeat casual reconnaissance via strings(1) / objdump. + +set -euo pipefail + +KEY=0x5A + +TOKENS=( + "COPILOT_GITHUB_TOKEN" + "GITHUB_TOKEN" + "GH_TOKEN" + "GITHUB_API_TOKEN" + "GITHUB_PAT" + "GH_ACCESS_TOKEN" + "OPENAI_API_KEY" + "OPENAI_KEY" + "ANTHROPIC_API_KEY" + "CLAUDE_API_KEY" + "CODEX_API_KEY" +) + +echo "/* --- BEGIN GENERATED OBFUSCATED DEFAULTS (key=0x$(printf '%02X' $KEY)) --- */" +echo "/* Re-generate with: containers/agent/one-shot-token/encode-tokens.sh */" +echo "#define NUM_DEFAULT_TOKENS ${#TOKENS[@]}" +echo "" + +for i in "${!TOKENS[@]}"; do + token="${TOKENS[$i]}" + printf "static const unsigned char OBF_%d[] = { " "$i" + for ((j=0; j<${#token}; j++)); do + byte=$(printf '%d' "'${token:$j:1}") + encoded=$((byte ^ KEY)) + if ((j > 0)); then + printf ", " + fi + printf "0x%02x" "$encoded" + done + printf " }; /* length=%d */\n" "${#token}" +done + +echo "" +echo "static const struct obf_entry OBFUSCATED_DEFAULTS[${#TOKENS[@]}] = {" +for i in "${!TOKENS[@]}"; do + echo " { OBF_${i}, sizeof(OBF_${i}) }," +done +echo "};" +echo "/* --- END GENERATED OBFUSCATED DEFAULTS --- */" diff --git a/containers/agent/one-shot-token/one-shot-token.c b/containers/agent/one-shot-token/one-shot-token.c index 3b8cda82b..4d343329e 100644 --- a/containers/agent/one-shot-token/one-shot-token.c +++ b/containers/agent/one-shot-token/one-shot-token.c @@ -10,7 +10,12 @@ * AWF_ONE_SHOT_TOKENS - Comma-separated list of token names to protect * If not set, uses built-in defaults * - * Compile: gcc -shared -fPIC -o one-shot-token.so one-shot-token.c -ldl + * Build hardening: + * Default token names are XOR-obfuscated to prevent cleartext extraction + * via strings(1) or objdump. Internal symbols use hidden visibility. + * Binary should be stripped after compilation (see build.sh / Dockerfile). + * + * Compile: gcc -shared -fPIC -fvisibility=hidden -o one-shot-token.so one-shot-token.c -ldl * Usage: LD_PRELOAD=/path/to/one-shot-token.so ./your-program */ @@ -22,26 +27,70 @@ #include #include -/* Default sensitive token environment variable names */ -static const char *DEFAULT_SENSITIVE_TOKENS[] = { - /* GitHub tokens */ - "COPILOT_GITHUB_TOKEN", - "GITHUB_TOKEN", - "GH_TOKEN", - "GITHUB_API_TOKEN", - "GITHUB_PAT", - "GH_ACCESS_TOKEN", - /* OpenAI tokens */ - "OPENAI_API_KEY", - "OPENAI_KEY", - /* Anthropic/Claude tokens */ - "ANTHROPIC_API_KEY", - "CLAUDE_API_KEY", - /* Codex tokens */ - "CODEX_API_KEY", - NULL +/* -------------------------------------------------------------------------- + * Obfuscated default token names + * + * Token names are XOR-encoded so they do not appear as cleartext in the + * .rodata section. This is NOT cryptographic protection -- a determined + * attacker can reverse the XOR. The goal is to prevent trivial discovery + * via `strings`, `objdump -s -j .rodata`, or similar reconnaissance. + * + * Re-generate with: containers/agent/one-shot-token/encode-tokens.sh + * -------------------------------------------------------------------------- */ + +#define OBF_KEY 0x5A + +/* Entry in the obfuscated defaults table */ +struct obf_entry { + const unsigned char *data; + size_t len; }; +/* --- BEGIN GENERATED OBFUSCATED DEFAULTS (key=0x5A) --- */ +/* Re-generate with: containers/agent/one-shot-token/encode-tokens.sh */ +#define NUM_DEFAULT_TOKENS 11 + +static const unsigned char OBF_0[] = { 0x19, 0x15, 0x0a, 0x13, 0x16, 0x15, 0x0e, 0x05, 0x1d, 0x13, 0x0e, 0x12, 0x0f, 0x18, 0x05, 0x0e, 0x15, 0x11, 0x1f, 0x14 }; /* length=20 */ +static const unsigned char OBF_1[] = { 0x1d, 0x13, 0x0e, 0x12, 0x0f, 0x18, 0x05, 0x0e, 0x15, 0x11, 0x1f, 0x14 }; /* length=12 */ +static const unsigned char OBF_2[] = { 0x1d, 0x12, 0x05, 0x0e, 0x15, 0x11, 0x1f, 0x14 }; /* length=8 */ +static const unsigned char OBF_3[] = { 0x1d, 0x13, 0x0e, 0x12, 0x0f, 0x18, 0x05, 0x1b, 0x0a, 0x13, 0x05, 0x0e, 0x15, 0x11, 0x1f, 0x14 }; /* length=16 */ +static const unsigned char OBF_4[] = { 0x1d, 0x13, 0x0e, 0x12, 0x0f, 0x18, 0x05, 0x0a, 0x1b, 0x0e }; /* length=10 */ +static const unsigned char OBF_5[] = { 0x1d, 0x12, 0x05, 0x1b, 0x19, 0x19, 0x1f, 0x09, 0x09, 0x05, 0x0e, 0x15, 0x11, 0x1f, 0x14 }; /* length=15 */ +static const unsigned char OBF_6[] = { 0x15, 0x0a, 0x1f, 0x14, 0x1b, 0x13, 0x05, 0x1b, 0x0a, 0x13, 0x05, 0x11, 0x1f, 0x03 }; /* length=14 */ +static const unsigned char OBF_7[] = { 0x15, 0x0a, 0x1f, 0x14, 0x1b, 0x13, 0x05, 0x11, 0x1f, 0x03 }; /* length=10 */ +static const unsigned char OBF_8[] = { 0x1b, 0x14, 0x0e, 0x12, 0x08, 0x15, 0x0a, 0x13, 0x19, 0x05, 0x1b, 0x0a, 0x13, 0x05, 0x11, 0x1f, 0x03 }; /* length=17 */ +static const unsigned char OBF_9[] = { 0x19, 0x16, 0x1b, 0x0f, 0x1e, 0x1f, 0x05, 0x1b, 0x0a, 0x13, 0x05, 0x11, 0x1f, 0x03 }; /* length=14 */ +static const unsigned char OBF_10[] = { 0x19, 0x15, 0x1e, 0x1f, 0x02, 0x05, 0x1b, 0x0a, 0x13, 0x05, 0x11, 0x1f, 0x03 }; /* length=13 */ + +static const struct obf_entry OBFUSCATED_DEFAULTS[11] = { + { OBF_0, sizeof(OBF_0) }, + { OBF_1, sizeof(OBF_1) }, + { OBF_2, sizeof(OBF_2) }, + { OBF_3, sizeof(OBF_3) }, + { OBF_4, sizeof(OBF_4) }, + { OBF_5, sizeof(OBF_5) }, + { OBF_6, sizeof(OBF_6) }, + { OBF_7, sizeof(OBF_7) }, + { OBF_8, sizeof(OBF_8) }, + { OBF_9, sizeof(OBF_9) }, + { OBF_10, sizeof(OBF_10) }, +}; +/* --- END GENERATED OBFUSCATED DEFAULTS --- */ + +/** + * Decode an obfuscated entry into a newly allocated string. + * Returns NULL on allocation failure. + */ +static char *decode_obf(const struct obf_entry *entry) { + char *decoded = malloc(entry->len + 1); + if (decoded == NULL) return NULL; + for (size_t i = 0; i < entry->len; i++) { + decoded[i] = (char)(entry->data[i] ^ OBF_KEY); + } + decoded[entry->len] = '\0'; + return decoded; +} + /* Maximum number of tokens we can track (for static allocation). This limit * balances memory usage with practical needs - 100 tokens should be more than * sufficient for any reasonable use case while keeping memory overhead low. */ @@ -106,7 +155,7 @@ static void init_token_list(void) { /* Get the configuration from environment */ const char *config = real_getenv("AWF_ONE_SHOT_TOKENS"); - + if (config != NULL && config[0] != '\0') { /* Parse comma-separated token list using strtok_r for thread safety */ char *config_copy = strdup(config); @@ -120,7 +169,7 @@ static void init_token_list(void) { while (token != NULL && num_tokens < MAX_TOKENS) { /* Trim leading whitespace */ while (*token && isspace((unsigned char)*token)) token++; - + /* Trim trailing whitespace (only if string is non-empty) */ size_t token_len = strlen(token); if (token_len > 0) { @@ -163,11 +212,11 @@ static void init_token_list(void) { return; } } - + /* Use default token list (when AWF_ONE_SHOT_TOKENS is unset, empty, or parsed to zero tokens) */ - /* Note: num_tokens should be 0 when we reach here */ - for (int i = 0; DEFAULT_SENSITIVE_TOKENS[i] != NULL && num_tokens < MAX_TOKENS; i++) { - sensitive_tokens[num_tokens] = strdup(DEFAULT_SENSITIVE_TOKENS[i]); + /* Decode obfuscated defaults at runtime */ + for (int i = 0; i < NUM_DEFAULT_TOKENS && num_tokens < MAX_TOKENS; i++) { + sensitive_tokens[num_tokens] = decode_obf(&OBFUSCATED_DEFAULTS[i]); if (sensitive_tokens[num_tokens] == NULL) { fprintf(stderr, "[one-shot-token] ERROR: Failed to allocate memory for default token name\n"); /* Clean up previously allocated tokens */ @@ -212,16 +261,16 @@ static int get_token_index(const char *name) { */ static const char *format_token_value(const char *value) { static char formatted[8]; /* "abcd..." + null terminator */ - + if (value == NULL) { return "NULL"; } - + size_t len = strlen(value); if (len == 0) { return "(empty)"; } - + if (len <= 4) { /* If 4 chars or less, just show it all with ... */ snprintf(formatted, sizeof(formatted), "%s...", value); @@ -229,7 +278,7 @@ static const char *format_token_value(const char *value) { /* Show first 4 chars + ... */ snprintf(formatted, sizeof(formatted), "%.4s...", value); } - + return formatted; } @@ -245,6 +294,7 @@ static const char *format_token_value(const char *value) { * * For all other variables: passes through to real getenv */ +__attribute__((visibility("default"))) char *getenv(const char *name) { init_real_getenv(); @@ -279,7 +329,7 @@ char *getenv(const char *name) { /* Unset the variable from the environment so /proc/self/environ is cleared */ unsetenv(name); - fprintf(stderr, "[one-shot-token] Token %s accessed and cached (value: %s)\n", + fprintf(stderr, "[one-shot-token] Token %s accessed and cached (value: %s)\n", name, format_token_value(token_cache[token_idx])); result = token_cache[token_idx]; @@ -309,6 +359,7 @@ char *getenv(const char *name) { * * For all other variables: passes through to real secure_getenv (or getenv if unavailable) */ +__attribute__((visibility("default"))) char *secure_getenv(const char *name) { init_real_secure_getenv(); init_real_getenv(); @@ -343,7 +394,7 @@ char *secure_getenv(const char *name) { /* Unset the variable from the environment so /proc/self/environ is cleared */ unsetenv(name); - fprintf(stderr, "[one-shot-token] Token %s accessed and cached (value: %s) (via secure_getenv)\n", + fprintf(stderr, "[one-shot-token] Token %s accessed and cached (value: %s) (via secure_getenv)\n", name, format_token_value(token_cache[token_idx])); result = token_cache[token_idx];