code-graph-analysis-pipeline/.github/workflows/public-analyze-code-graph.yml at main · JohT/code-graph-analysis-pipeline · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
# This is the public version of the code graph analysis workflow that can be used by other projects.
name: Code Graph Analysis

on:
  workflow_call:
    inputs:
      analysis-name:
        description: >
          The name of the project to analyze.
          Example: MyProject-1.0.0
        required: true
        type: string
      artifacts-upload-name:
        description: >
          The name of the artifacts uploaded with 'actions/upload-artifact'
          containing the content of the 'artifacts' directory for the analysis.
          Use it to analyze Java JARs, WARs, EARs, etc.
        required: false
        type: string
        default: ''
      maven-artifacts:
        description: >
          Comma-separated list of Maven coordinates (groupId:artifactId:version)
          to download from Maven Central for the analysis.
          Example: 'org.apache.commons:commons-lang3:3.12.0,com.google.guava:guava:31.1-jre'
        required: false
        type: string
        default: ''
      sources-upload-name:
        description: >
          The name of the sources uploaded with 'actions/upload-artifact'
          containing the content of the 'source' directory for the analysis.
          Also supports sub-folders for multiple source code bases.
          Please use 'include-hidden-files: true' if you also want to upload the git history.
          Note: JavaScript dependencies will NOT be installed automatically when using this option. This needs to be done before uploading the sources.
        required: false
        type: string
        default: ''
      source-repository:
        description: >
          The URL of the source repository to analyze. For now, only GitHub repositories are supported.
          This can be used instead of 'sources-upload-name' to directly analyze a repository without uploading artifacts first.
          It can also be used in addition to 'sources-upload-name' to analyze both uploaded sources and a repository.
          If specified, JavaScript dependencies will be installed automatically if a package.json file is found in the repository.
        required: false
        type: string
        default: ''
      source-repository-branch:
        description: >
          The branch, tag or SHA of the source repository to checkout.
          Default: default branch of the repository
        required: false
        type: string
        default: ''
      source-repository-history-only:
        description: >
          Whether to clone the source repository as a bare repository ("true") or not ("false", default).
          Bare repositories do not have a working directory and are useful for git history analysis when the sources are not needed.
        required: false
        type: string
        default: 'false'
      ref:
        description: >
          The branch, tag or SHA of the code-graph-analysis-pipeline to checkout.
          Default: "main"
        required: false
        type: string
        default: ''
      analysis-arguments:
        description: >
          The arguments to pass to the analysis script.
          Default: '--profile Neo4j-latest-low-memory'
        required: false
        type: string
        default: '--profile Neo4j-latest-low-memory'
      domain:
        description: >
          The name of an analysis domain to run.
          Must match a subdirectory name in the 'domains/' directory
          (e.g. 'anomaly-detection', 'external-dependencies').
          When set, only that domain's report scripts run;
          core reports from 'scripts/reports/' and other domains are skipped.
          Can be combined with 'analysis-arguments' to further narrow the reports.
          Default: '' (all domains and reports run unchanged)
        required: false
        type: string
        default: ''
      typescript-scan-heap-memory:
        description: >
          The heap memory size in MB to use for the TypeScript code scans (default=4096).
          This value is only used for the TypeScript code scans and is ignored for other scans.
        required: false
        type: string
        default: '4096'
      retention-days:
        description: >
          The number of days to keep the uploaded artifacts.
          Default: 5
        required: false
        type: number
        default: 5
      jupyter-pdf:
        description: >
          Enable PDF generation for Jupyter Notebooks ("true") or disable it ("false").
        required: false
        type: string
        default: 'true'
      use-venv_virtual_python_environment:
        description: >
          Use venv for virtual Python environments instead of Conda ("true") or not ("false", default).
        required: false
        type: string
        default: 'false'
    outputs:
      uploaded-analysis-results:
        description: >
          The name of the artifact uploaded with 'actions/upload-artifact'
          containing all analysis results.
        value: ${{ jobs.analyze-code-graph.outputs.uploaded-analysis-results-artifact-name }}

jobs:
  analyze-code-graph:
    runs-on: ubuntu-22.04
    outputs:
      uploaded-analysis-results-artifact-name: ${{ steps.set-analysis-results-artifact-name.outputs.uploaded-analysis-results-artifact-name }}
    strategy:
      matrix:
        include:
        - os: ubuntu-22.04
          java: 21
          python: 3.12
          miniforge: 24.9.0-0
    steps:
      - name: Assure that either artifacts-upload-name or maven-artifacts or sources-upload-name or source-repository is set
        if: inputs.artifacts-upload-name == '' && inputs.maven-artifacts == ''  && inputs.sources-upload-name == '' && inputs.source-repository == ''
        run: echo "Please specify either the input parameter 'artifacts-upload-name' or 'maven-artifacts' or 'sources-upload-name' or 'source-repository'."; exit 1
      - name: Verify analysis-name only consists of characters safe for folder names
        run: |
          if [[ ! "${{ inputs.analysis-name }}" =~ ^[A-Za-z0-9._-]+$ ]]; then
            echo "The analysis-name '${{ inputs.analysis-name }}' contains invalid characters. Only alphanumeric characters, dots (.), underscores (_) and hyphens (-) are allowed."
            exit 1
          fi
      - name: Assemble ENVIRONMENT_INFO
        run: echo "ENVIRONMENT_INFO=java-${{ matrix.java }}-python-${{ matrix.python }}-miniforge-${{ matrix.miniforge }}" >> $GITHUB_ENV

      - name: (Code Analysis Setup) Checkout code-graph-analysis-pipeline
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
        with:
          repository: JohT/code-graph-analysis-pipeline
          ref: ${{ inputs.ref }}
          persist-credentials: false
          fetch-tags: true

      - name: (Java Setup) Java Development Kit (JDK) ${{ matrix.java }}
        uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5
        with:
          distribution: "temurin"
          java-version: ${{ matrix.java }}

      # "Setup Python" could be skipped if jupyter notebook analysis-results aren't needed or .venv is used.
      # However, since this is a reuseable workflow, we always do it here.
      - name: (Python Setup) Use version ${{ matrix.python }} with Conda package manager Miniforge
        if: inputs.use-venv_virtual_python_environment == 'false'
        id: prepare-conda-environment
        uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3
        with:
          python-version: ${{ matrix.python }}
          miniforge-version: ${{ matrix.miniforge }}
          activate-environment: codegraph
          environment-file: ./conda-environment.yml
          auto-activate-base: false
          show-channel-urls: true

      - name: (Python Setup) Use version ${{ matrix.python }} with venv environment management module
        if: inputs.use-venv_virtual_python_environment == 'true'
        uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.python }}
          cache: 'pip'

      - name: (Python Setup) Conda environment info
        if: inputs.use-venv_virtual_python_environment == 'false'
        shell: bash -el {0}
        run: |
          conda info
          conda list
          conda config --show-sources
          conda config --show

      - name: (Code Analysis Setup) Setup Cache Analysis Downloads
        uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5
        with:
          path: ./temp/downloads
          key:
            ${{ runner.os }}-${{ hashFiles('**/*.sh') }}

      - name: (Code Analysis Setup) Generate Neo4j Initial Password
        id: generate-neo4j-initial-password
        run: |
          generated_password=$( LC_ALL=C tr -dc '[:graph:]' </dev/urandom | head -c 12; echo )
          echo "::add-mask::$generated_password"
          echo "neo4j-initial-password=$generated_password" >> "$GITHUB_OUTPUT"

      - name: (Code Analysis Setup) Initialize Analysis
        env:
          NEO4J_INITIAL_PASSWORD: ${{ steps.generate-neo4j-initial-password.outputs.neo4j-initial-password }}
        run: ./init.sh ${{ inputs.analysis-name }}

      - name: (Code Analysis Setup) Download sources for analysis
        if: inputs.sources-upload-name != ''
        uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7
        with:
          name: ${{ inputs.sources-upload-name }}
          path: temp/${{ inputs.analysis-name }}/source/${{ inputs.analysis-name }}

      - name: (Code Analysis Setup) Clone source repository for analysis
        if: inputs.source-repository != ''
        working-directory: temp/${{ inputs.analysis-name }}
        run: ./../../scripts/cloneGitRepository.sh --url "${{ inputs.source-repository }}" --branch "${{ inputs.source-repository-branch }}" --history-only "${{ inputs.source-repository-history-only }}" --target "source/${{ inputs.analysis-name }}"

      - name: (Code Analysis JavaScript Setup) Detect node version file .nvmrc in cloned source repository
        if: inputs.source-repository != ''
        working-directory: temp/${{ inputs.analysis-name }}/source/${{ inputs.analysis-name }}
        run: echo "nodeVersionFileDetected=$(if [ -f ".nvmrc" ]; then echo "true"; else echo "false"; fi)" >> $GITHUB_ENV
      - name: (Code Analysis JavaScript Setup) Detect pnpm project in cloned source repository
        if: inputs.source-repository != ''
        working-directory: temp/${{ inputs.analysis-name }}/source/${{ inputs.analysis-name }}
        run: echo "pnpmDetected=$(if [ -f "pnpm-lock.yaml" ]; then echo "true"; else echo "false"; fi)" >> $GITHUB_ENV
      - name: (Code Analysis JavaScript Setup) Setup Node.js with version in .nvmrc for cloned source repository
        if: inputs.source-repository != '' && env.nodeVersionFileDetected == 'true'
        uses: actions/setup-node@v6.3.0
        with:
          node-version-file: temp/${{ inputs.analysis-name }}/source/${{ inputs.analysis-name }}/.nvmrc
      - name: (Code Analysis JavaScript Setup) Setup Node.js (long-term support version fallback, no .nvmrc) for cloned source repository
        if: inputs.source-repository != '' && env.nodeVersionFileDetected != 'true'
        uses: actions/setup-node@v6.3.0
        with:
          node-version: 'lts/*'
      - name: (Code Analysis JavaScript Setup) Setup pnpm for cloned source repository
        if: inputs.source-repository != '' && env.pnpmDetected == 'true'
        uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0
        with:
          package_json_file: temp/${{ inputs.analysis-name }}/source/${{ inputs.analysis-name }}/package.json
          run_install: false
      - name: (Code Analysis JavaScript Setup) Install JavaScript dependencies in cloned source repository if needed
        if: inputs.source-repository != ''
        working-directory: temp/${{ inputs.analysis-name }}
        run: ./../../scripts/installJavaScriptDependencies.sh

      - name: (Code Analysis Setup) Download artifacts for analysis
        if: inputs.artifacts-upload-name != ''
        uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7
        with:
          name: ${{ inputs.artifacts-upload-name }}
          path: temp/${{ inputs.analysis-name }}/artifacts

      - name: (Code Analysis Setup) Download Maven artifacts for analysis
        if: inputs.maven-artifacts != ''
        working-directory: temp/${{ inputs.analysis-name }}
        run: ./../../scripts/downloadMavenArtifacts.sh "${{ inputs.maven-artifacts }}"

      - name: (Debug) Log folder structure of temp directory
        if: runner.debug == '1'
        working-directory: temp
        run: |
          ls -R | grep ":$" | sed -e 's/:$//' -e 's/[^-][^\/]*\//--/g' -e 's/^/   /' -e 's/-/|/'
      - name: Assemble DOMAIN_ARGUMENT
        run: echo "domainAnalysisArgument=${{ inputs.domain != '' && format('--domain {0} ', inputs.domain) || '' }}" >> $GITHUB_ENV
      - name: (Code Analysis) Analyze ${{ inputs.analysis-name }}
        working-directory: temp/${{ inputs.analysis-name }}
        # Shell type can be skipped if jupyter notebook analysis-results (and therefore conda) aren't needed
        shell: bash -el {0}
        env:
          NEO4J_INITIAL_PASSWORD: ${{ steps.generate-neo4j-initial-password.outputs.neo4j-initial-password }}
          ENABLE_JUPYTER_NOTEBOOK_PDF_GENERATION: ${{ inputs.jupyter-pdf }}
          IMPORT_GIT_LOG_DATA_IF_SOURCE_IS_PRESENT: "" # Options: "none", "aggregated", "full". default = "plugin" or ""
          PREPARE_CONDA_ENVIRONMENT: "false" # Had already been done in step with id "prepare-conda-environment".
          USE_VIRTUAL_PYTHON_ENVIRONMENT_VENV: ${{ inputs.use-venv_virtual_python_environment }}
        run: |
          TYPESCRIPT_SCAN_HEAP_MEMORY=${{ inputs.typescript-scan-heap-memory }} ./../../scripts/analysis/analyze.sh ${{ env.domainAnalysisArgument }}${{ inputs.analysis-arguments }}

      - name: Set artifact name for uploaded analysis results
        id: set-analysis-results-artifact-name
        run: echo "uploaded-analysis-results-artifact-name=code-analysis-results-${{ env.ENVIRONMENT_INFO }}" >> $GITHUB_OUTPUT

      # Upload successful analysis-results as the main output artifact
      - name: (Code Analysis Results) Archive successful analysis-results
        if: success() && !contains(inputs.analysis-arguments, '--explore')
        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
        with:
          name: ${{ steps.set-analysis-results-artifact-name.outputs.uploaded-analysis-results-artifact-name }}
          path: ./temp/${{ inputs.analysis-name }}/reports/*
          if-no-files-found: error
          retention-days: ${{ inputs.retention-days }}

      # Upload logs if analysis results had been skipped ("--explore" analysis option)
      - name: (Code Analysis Results) Archive successful analysis-results
        if: success() && contains(inputs.analysis-arguments, '--explore')
        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
        with:
          name: ${{ steps.set-analysis-results-artifact-name.outputs.uploaded-analysis-results-artifact-name }}
          path: ./temp/${{ inputs.analysis-name }}/runtime/*
          if-no-files-found: error
          retention-days: ${{ inputs.retention-days }}


      # Upload logs and unfinished analysis-results in case of an error for troubleshooting
      - name: (Code Analysis Results) Archive failed run with logs and unfinished analysis-results
        if: failure()
        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
        with:
          name: code-analysis-logs-${{ env.ENVIRONMENT_INFO }}
          path: |
            ./temp/**/runtime/*
            ./temp/**/reports/*

      # Upload Database Export
      # Only possible after an export with "./../../scripts/analysis/analyze.sh --report DatabaseCsvExport"
      # Won't be done here because of performance and security concerns
      #- name: Archive exported database
      #  uses: actions/upload-artifact@v3
      #  with:
      #    name: typescript-code-analysis-database-export-${{ matrix.java }}-python-${{ matrix.python }}-miniforge-${{ matrix.miniforge }}
      #    path: ./temp/**/import
      #    if-no-files-found: error
      #    retention-days: 5