code-graph-analysis-pipeline/.github/workflows/public-analyze-code-graph.yml at 3d9d1a66cb3240563e1dce910a7bd2b105dfcaba · JohT/code-graph-analysis-pipeline · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
# This is the public version of the code graph analysis workflow that can be used by other projects.
name: Code Graph Analysis

on:
  workflow_call:
    inputs:
      analysis-name:
        description: >
          The name of the project to analyze.
          Example: MyProject-1.0.0
        required: true
        type: string
      artifacts-upload-name:
        description: >
          The name of the artifacts uploaded with 'actions/upload-artifact'
          containing the content of the 'artifacts' directory for the analysis.
          Use it to analyze Java JARs, WARs, EARs, etc.
        required: false
        type: string
        default: ''
      maven-artifacts:
        description: >
          Comma-separated list of Maven coordinates (groupId:artifactId:version)
          to download from Maven Central for the analysis.
          Example: 'org.apache.commons:commons-lang3:3.12.0,com.google.guava:guava:31.1-jre'
        required: false
        type: string
        default: ''
      sources-upload-name:
        description: >
          The name of the sources uploaded with 'actions/upload-artifact'
          containing the content of the 'source' directory for the analysis.
          Also supports sub-folders for multiple source code bases.
          Please use 'include-hidden-files: true' if you also want to upload the git history.
        required: false
        type: string
        default: ''
      source-repository:
        description: >
          The URL of the source repository to analyze. For now, only GitHub repositories are supported.
          This can be used instead of 'sources-upload-name' to directly analyze a repository without uploading artifacts first.
          It can also be used in addition to 'sources-upload-name' to analyze both uploaded sources and a repository.
        required: false
        type: string
        default: ''
      source-repository-branch:
        description: >
          The branch, tag or SHA of the source repository to checkout.
          Default: default branch of the repository
        required: false
        type: string
        default: ''
      source-repository-history-only:
        description: >
          Whether to clone the source repository as a bare repository ("true") or not ("false", default).
          Bare repositories do not have a working directory and are useful for git history analysis when the sources are not needed.
        required: false
        type: string
        default: 'false'
      ref:
        description: >
          The branch, tag or SHA of the code-graph-analysis-pipeline to checkout.
          Default: "main"
        required: false
        type: string
        default: ''
      analysis-arguments:
        description: >
          The arguments to pass to the analysis script.
          Default: '--profile Neo4j-latest-low-memory'
        required: false
        type: string
        default: '--profile Neo4j-latest-low-memory'
      typescript-scan-heap-memory:
        description: >
          The heap memory size in MB to use for the TypeScript code scans (default=4096).
          This value is only used for the TypeScript code scans and is ignored for other scans.
        required: false
        type: string
        default: '4096'
      retention-days:
        description: >
          The number of days to keep the uploaded artifacts.
          Default: 5
        required: false
        type: number
        default: 5
      jupyter-pdf:
        description: >
          Enable PDF generation for Jupyter Notebooks ("true") or disable it ("false").
        required: false
        type: string
        default: 'true'
      use-venv_virtual_python_environment:
        description: >
          Use venv for virtual Python environments instead of Conda ("true") or not ("false", default).
        required: false
        type: string
        default: 'false'
    outputs:
      uploaded-analysis-results:
        description: >
          The name of the artifact uploaded with 'actions/upload-artifact'
          containing all analysis results.
        value: ${{ jobs.analyze-code-graph.outputs.uploaded-analysis-results-artifact-name }}

jobs:
  analyze-code-graph:
    runs-on: ubuntu-22.04
    outputs:
      uploaded-analysis-results-artifact-name: ${{ steps.set-analysis-results-artifact-name.outputs.uploaded-analysis-results-artifact-name }}
    strategy:
      matrix:
        include:
        - os: ubuntu-22.04
          java: 21
          python: 3.12
          miniforge: 24.9.0-0
    steps:
      - name: Assure that either artifacts-upload-name or maven-artifacts or sources-upload-name or source-repository is set
        if: inputs.artifacts-upload-name == '' && inputs.maven-artifacts == ''  && inputs.sources-upload-name == '' && inputs.source-repository == ''
        run: echo "Please specify either the input parameter 'artifacts-upload-name' or 'maven-artifacts' or 'sources-upload-name' or 'source-repository'."; exit 1
      - name: Verify analysis-name only consists of characters safe for folder names
        run: |
          if [[ ! "${{ inputs.analysis-name }}" =~ ^[A-Za-z0-9._-]+$ ]]; then
            echo "The analysis-name '${{ inputs.analysis-name }}' contains invalid characters. Only alphanumeric characters, dots (.), underscores (_) and hyphens (-) are allowed."
            exit 1
          fi
      - name: Assemble ENVIRONMENT_INFO
        run: echo "ENVIRONMENT_INFO=java-${{ matrix.java }}-python-${{ matrix.python }}-miniforge-${{ matrix.miniforge }}" >> $GITHUB_ENV

      - name: (Code Analysis Setup) Checkout code-graph-analysis-pipeline
        uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
        with:
          repository: JohT/code-graph-analysis-pipeline
          ref: ${{ inputs.ref }}
          persist-credentials: false
          fetch-tags: true

      - name: (Java Setup) Java Development Kit (JDK) ${{ matrix.java }}
        uses: actions/setup-java@dded0888837ed1f317902acf8a20df0ad188d165 # v5
        with:
          distribution: "temurin"
          java-version: ${{ matrix.java }}

      # "Setup Python" can be skipped if jupyter notebook analysis-results aren't needed
      - name: (Python Setup) Use version ${{ matrix.python }} with Conda package manager Miniforge
        if: inputs.use-venv_virtual_python_environment == 'false'
        id: prepare-conda-environment
        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3
        with:
          python-version: ${{ matrix.python }}
          miniforge-version: ${{ matrix.miniforge }}
          activate-environment: codegraph
          environment-file: ./conda-environment.yml
          auto-activate-base: false
          show-channel-urls: true

      - name: (Python Setup) Use version ${{ matrix.python }} with venv environment management module
        if: inputs.use-venv_virtual_python_environment == 'true'
        uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.python }}
          cache: 'pip'

      - name: (Python Setup) Conda environment info
        if: inputs.use-venv_virtual_python_environment == 'false'
        shell: bash -el {0}
        run: |
          conda info
          conda list
          conda config --show-sources
          conda config --show

      - name: (Code Analysis Setup) Setup Cache Analysis Downloads
        uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
        with:
          path: ./temp/downloads
          key:
            ${{ runner.os }}-${{ hashFiles('**/*.sh') }}

      - name: (Code Analysis Setup) Generate Neo4j Initial Password
        id: generate-neo4j-initial-password
        run: |
          generated_password=$( LC_ALL=C tr -dc '[:graph:]' </dev/urandom | head -c 12; echo )
          echo "::add-mask::$generated_password"
          echo "neo4j-initial-password=$generated_password" >> "$GITHUB_OUTPUT"

      - name: (Code Analysis Setup) Initialize Analysis
        env:
          NEO4J_INITIAL_PASSWORD: ${{ steps.generate-neo4j-initial-password.outputs.neo4j-initial-password }}
        run: ./init.sh ${{ inputs.analysis-name }}

      - name: (Code Analysis Setup) Download sources for analysis
        if: inputs.sources-upload-name != ''
        uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5
        with:
          name: ${{ inputs.sources-upload-name }}
          path: temp/${{ inputs.analysis-name }}/source/${{ inputs.analysis-name }}

      - name: (Code Analysis Setup) Clone source repository for analysis
        if: inputs.source-repository != ''
        working-directory: temp/${{ inputs.analysis-name }}
        run: ./../../scripts/cloneGitRepository.sh --url "${{ inputs.source-repository }}" --branch "${{ inputs.source-repository-branch }}" --history-only "${{ inputs.source-repository-history-only }}" --target "source/${{ inputs.analysis-name }}"

      - name: (Code Analysis Setup) Download artifacts for analysis
        if: inputs.artifacts-upload-name != ''
        uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5
        with:
          name: ${{ inputs.artifacts-upload-name }}
          path: temp/${{ inputs.analysis-name }}/artifacts

      - name: (Code Analysis Setup) Download Maven artifacts for analysis
        if: inputs.maven-artifacts != ''
        working-directory: temp/${{ inputs.analysis-name }}
        run: ./../../scripts/downloadMavenArtifacts.sh "${{ inputs.maven-artifacts }}"

      - name: (Debug) Log folder structure of temp directory
        if: runner.debug == '1'
        working-directory: temp
        run: |
          ls -R | grep ":$" | sed -e 's/:$//' -e 's/[^-][^\/]*\//--/g' -e 's/^/   /' -e 's/-/|/'

      - name: (Code Analysis) Analyze ${{ inputs.analysis-name }}
        working-directory: temp/${{ inputs.analysis-name }}
        # Shell type can be skipped if jupyter notebook analysis-results (and therefore conda) aren't needed
        shell: bash -el {0}
        env:
          NEO4J_INITIAL_PASSWORD: ${{ steps.generate-neo4j-initial-password.outputs.neo4j-initial-password }}
          ENABLE_JUPYTER_NOTEBOOK_PDF_GENERATION: ${{ inputs.jupyter-pdf }}
          IMPORT_GIT_LOG_DATA_IF_SOURCE_IS_PRESENT: "" # Options: "none", "aggregated", "full". default = "plugin" or ""
          PREPARE_CONDA_ENVIRONMENT: "false" # Had already been done in step with id "prepare-conda-environment".
          USE_VIRTUAL_PYTHON_ENVIRONMENT_VENV: ${{ inputs.use-venv_virtual_python_environment }}
        run: |
          TYPESCRIPT_SCAN_HEAP_MEMORY=${{ inputs.typescript-scan-heap-memory }} ./../../scripts/analysis/analyze.sh ${{ inputs.analysis-arguments }}

      - name: Set artifact name for uploaded analysis results
        id: set-analysis-results-artifact-name
        run: echo "uploaded-analysis-results-artifact-name=code-analysis-results-${{ env.ENVIRONMENT_INFO }}" >> $GITHUB_OUTPUT

      # Upload successful analysis-results in case they are needed for troubleshooting
      - name: (Code Analysis Results) Archive successful analysis-results
        if: success()
        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
        with:
          name: ${{ steps.set-analysis-results-artifact-name.outputs.uploaded-analysis-results-artifact-name }}
          path: ./temp/${{ inputs.analysis-name }}/reports/*
          if-no-files-found: error
          retention-days: ${{ inputs.retention-days }}


      # Upload logs and unfinished analysis-results in case of an error for troubleshooting
      - name: (Code Analysis Results) Archive failed run with logs and unfinished analysis-results
        if: failure()
        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
        with:
          name: code-analysis-logs-${{ env.ENVIRONMENT_INFO }}
          path: |
            ./temp/**/runtime/*
            ./temp/**/reports/*

      # Upload Database Export
      # Only possible after an export with "./../../scripts/analysis/analyze.sh --report DatabaseCsvExport"
      # Won't be done here because of performance and security concerns
      #- name: Archive exported database
      #  uses: actions/upload-artifact@v3
      #  with:
      #    name: typescript-code-analysis-database-export-${{ matrix.java }}-python-${{ matrix.python }}-miniforge-${{ matrix.miniforge }}
      #    path: ./temp/**/import
      #    if-no-files-found: error
      #    retention-days: 5