semantic_firewall/.github/workflows/semantic_analysis.yml at main · BlackVectorOps/semantic_firewall · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
name: Semantic Analysis

on:
  push:
    branches: [ "main" ]
  pull_request:
    branches: [ "main" ]

permissions:
  contents: read

jobs:
  analysis:
    name: Run Analysis
    runs-on: ubuntu-latest

    steps:
      - name: Checkout Code
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
          persist-credentials: false

      - name: Set up Go
        uses: actions/setup-go@v5
        with:
          go-version: '1.24'
          cache: true

      - name: Build SFW
        # Building strictly from source to ensure binary integrity before execution.
        run: |
          mkdir -p bin
          CGO_ENABLED=0 go build -o bin/sfw ./cmd/sfw
          chmod +x bin/sfw
          # Verify binary works
          ./bin/sfw --version || ./bin/sfw help || true

      - name: Vendor Dependencies
        # Vendor modules into workspace so they're available inside the sandbox.
        # The sandbox only mounts the workspace, not GOMODCACHE (/home/runner/go/pkg/mod).
        run: go mod vendor

      - name: Determine Mode
        id: mode
        run: |
          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
             echo "mode=BLOCKER" >> $GITHUB_OUTPUT
          else
             echo "mode=check" >> $GITHUB_OUTPUT
          fi

      - name: Prepare Analysis Environment
        id: prep
        run: |
          # 1. Determine Refs
          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
            PR_BASE="${{ github.event.pull_request.base.ref }}"
            if [[ -n "$PR_BASE" ]]; then
              BASE_REF="origin/$PR_BASE"
            else
              # Fallback for local testing with act (PR context not fully populated)
              echo "::warning::PR base ref not available. Falling back to HEAD~1."
              BASE_REF="HEAD~1"
            fi
            HEAD_REF="HEAD"
          else
            git fetch origin main --depth=100 2>/dev/null || true
            BASE_REF=$(git merge-base origin/main HEAD 2>/dev/null || echo "HEAD~1")
            HEAD_REF="HEAD"
          fi

          echo "::notice::Base ref: $BASE_REF, Head ref: $HEAD_REF"

          # 2. Setup Base Worktree
          # We create the worktree in the workspace so it is mounted into the sandbox.
          WORKTREE_DIR=".sfw_base"
          # Clean up previous run if exists
          rm -rf "$WORKTREE_DIR"
          git worktree add --detach "$WORKTREE_DIR" "$BASE_REF"

          echo "worktree_dir=$WORKTREE_DIR" >> $GITHUB_OUTPUT

          # 3. Pre-calculate Diff (CRITICAL HARDENING)
          # We run git on the host (where it works) and save the status to a file.
          # The sandbox will simply read this file, avoiding "dubious ownership" errors.
          git diff -z --name-status "$BASE_REF" "$HEAD_REF" > diff_stream.bin

          # Check for Go files to fail fast if none exist
          if git diff --name-only "$BASE_REF" "$HEAD_REF" | grep -q '\.go$'; then
             echo "has_go_files=true" >> $GITHUB_OUTPUT
          else
             echo "has_go_files=false" >> $GITHUB_OUTPUT
          fi

      # Install dependencies for sfw's internal sandbox (AppArmor bypass via aa-exec)
      - name: Install Sandbox Dependencies
        run: |
          sudo apt-get update
          sudo apt-get install -y apparmor-utils

      - name: Run Semantic Analysis
        env:
          MODE: ${{ steps.mode.outputs.mode }}
          WORKTREE_DIR: ${{ steps.prep.outputs.worktree_dir }}
          HAS_GO: ${{ steps.prep.outputs.has_go_files }}
          # Use vendored dependencies - sandbox can't access GOMODCACHE
          GOFLAGS: "-mod=vendor"
          GOPROXY: "off"
        run: |
          set -euo pipefail

          REPORT_FILE="scan_report.md"

          echo "## Semantic Analysis Report ($MODE)" >> "$REPORT_FILE"
          echo "| File | Status | Match % |" >> "$REPORT_FILE"
          echo "| :--- | :--- | :--- |" >> "$REPORT_FILE"

          if [ "$HAS_GO" != "true" ]; then
              echo "No Go files changed." >> "$REPORT_FILE"
              exit 0
          fi

          ERROR_COUNT=0
          LOGIC_FAIL=0

          # Process the pre-calculated diff stream
          while IFS= read -r -d '' status; do
            case "$status" in
              R*|C*)
                 IFS= read -r -d '' old_path
                 IFS= read -r -d '' new_path
                 OLD_FILE_REF="$old_path"
                 NEW_FILE_REF="$new_path"
                 ;;
              *)
                 IFS= read -r -d '' path
                 OLD_FILE_REF="$path"
                 NEW_FILE_REF="$path"
                 ;;
            esac

            if [[ "$NEW_FILE_REF" != *.go ]] && [[ "$OLD_FILE_REF" != *.go ]]; then continue; fi

            if [ -f "$NEW_FILE_REF" ]; then
                NEW_FILE="$NEW_FILE_REF"
            else
                NEW_FILE=""
            fi

            OLD_FILE="$WORKTREE_DIR/$OLD_FILE_REF"
            if [ ! -f "$OLD_FILE" ]; then
                OLD_FILE=""
            fi

            if [[ -z "$NEW_FILE" ]] && [[ -z "$OLD_FILE" ]]; then continue; fi

            if [[ -z "$OLD_FILE" ]]; then
                echo "| \`$NEW_FILE_REF\` | New File | N/A |" >> "$REPORT_FILE"
                continue
            fi

            if [[ -z "$NEW_FILE" ]]; then
                echo "| \`$OLD_FILE_REF\` | Deleted | N/A |" >> "$REPORT_FILE"
                continue
            fi

            # Execute SFW diff (sfw handles its own sandboxing internally)
            if ! OUTPUT=$(./bin/sfw diff "$OLD_FILE" "$NEW_FILE" 2>&1); then
                echo "::error::sfw failed to process $NEW_FILE_REF"
                ERROR_COUNT=$((ERROR_COUNT + 1))
                continue
            fi

            # Validate JSON
            if ! echo "$OUTPUT" | jq -e . >/dev/null 2>&1; then
                echo "::error::Invalid JSON output for $NEW_FILE_REF"
                ERROR_COUNT=$((ERROR_COUNT + 1))
                continue
            fi

            PCT=$(echo "$OUTPUT" | jq -r '.summary.semantic_match_pct // 0')
            MODIFIED=$(echo "$OUTPUT" | jq -r '.summary.modified // 0')
            IS_BELOW_100=$(echo "$OUTPUT" | jq -r 'if (.summary.semantic_match_pct // 0) < 100 then "true" else "false" end')

            if [ "$IS_BELOW_100" = "true" ]; then
                STATUS_ICON="Modified ($MODIFIED)"
                echo "| \`$NEW_FILE_REF\` | $STATUS_ICON | **$PCT%** |" >> "$REPORT_FILE"

                if [ "$MODE" == "BLOCKER" ]; then
                    echo "::error file=$NEW_FILE_REF::Logic change detected in safe refactor! ($PCT%)"
                    LOGIC_FAIL=1
                fi
            else
                STATUS_ICON="Preserved"
                echo "| \`$NEW_FILE_REF\` | $STATUS_ICON | **$PCT%** |" >> "$REPORT_FILE"
            fi

          done < diff_stream.bin

          if [ $ERROR_COUNT -gt 0 ]; then
              echo "" >> "$REPORT_FILE"
              echo "**CI FAILED**: Tool execution failures detected." >> "$REPORT_FILE"
              exit 1
          fi

          if [ $LOGIC_FAIL -eq 1 ]; then
              echo "" >> "$REPORT_FILE"
              echo "**CI FAILED**: Logic changed in 'semantic-safe' PR." >> "$REPORT_FILE"
              exit 1
          fi

      - name: Publish Analysis Report
        if: always()
        # This runs on the host, so it has access to GITHUB_STEP_SUMMARY.
        # It reads the artifact produced by the sandbox.
        run: |
          if [ -f "scan_report.md" ]; then
             cat scan_report.md >> $GITHUB_STEP_SUMMARY
          fi

      - name: Cleanup Worktree
        if: always()
        env:
          WORKTREE_DIR: ${{ steps.prep.outputs.worktree_dir }}
        run: |
          if [ -d "$WORKTREE_DIR" ]; then
             git worktree remove --force "$WORKTREE_DIR" 2>/dev/null || rm -rf "$WORKTREE_DIR"
          fi