codeql-action/src/dependency-caching.ts at e51b6a9a520cc75c12cc3a2eaef8363f6845b5be · github/codeql-action · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
import * as os from "os";
import { join } from "path";

import * as actionsCache from "@actions/cache";
import * as glob from "@actions/glob";

import { getTemporaryDirectory } from "./actions-util";
import { listActionsCaches } from "./api-client";
import { createCacheKeyHash, getTotalCacheSize } from "./caching-utils";
import { CodeQL } from "./codeql";
import { Config } from "./config-utils";
import { EnvVar } from "./environment";
import { Feature, FeatureEnablement } from "./feature-flags";
import { KnownLanguage, Language } from "./languages";
import { Logger } from "./logging";
import { getErrorMessage, getRequiredEnvParam } from "./util";

/**
 * Caching configuration for a particular language.
 */
export interface CacheConfig {
  /** Gets the paths of directories on the runner that should be included in the cache. */
  getDependencyPaths: (
    codeql: CodeQL,
    features: FeatureEnablement,
  ) => Promise<string[]>;
  /**
   * Gets an array of glob patterns for the paths of files whose contents affect which dependencies are used
   * by a project. This function also checks whether there are any matching files and returns
   * `undefined` if no files match.
   *
   * The glob patterns are intended to be used for cache keys, where we find all files which match these
   * patterns, calculate a hash for their contents, and use that hash as part of the cache key.
   */
  getHashPatterns: (
    codeql: CodeQL,
    features: FeatureEnablement,
  ) => Promise<string[] | undefined>;
}

const CODEQL_DEPENDENCY_CACHE_PREFIX = "codeql-dependencies";
const CODEQL_DEPENDENCY_CACHE_VERSION = 1;

/**
 * Returns a path to a directory intended to be used to store .jar files
 * for the Java `build-mode: none` extractor.
 * @returns The path to the directory that should be used by the `build-mode: none` extractor.
 */
export function getJavaTempDependencyDir(): string {
  return join(getTemporaryDirectory(), "codeql_java", "repository");
}

/**
 * Returns an array of paths of directories on the runner that should be included in a dependency cache
 * for a Java analysis. It is important that this is a function, because we call `getTemporaryDirectory`
 * which would otherwise fail in tests if we haven't had a chance to initialise `RUNNER_TEMP`.
 *
 * @returns The paths of directories on the runner that should be included in a dependency cache
 * for a Java analysis.
 */
export async function getJavaDependencyDirs(): Promise<string[]> {
  return [
    // Maven
    join(os.homedir(), ".m2", "repository"),
    // Gradle
    join(os.homedir(), ".gradle", "caches"),
    // CodeQL Java build-mode: none
    getJavaTempDependencyDir(),
  ];
}

/**
 * Returns a path to a directory intended to be used to store dependencies
 * for the C# `build-mode: none` extractor.
 * @returns The path to the directory that should be used by the `build-mode: none` extractor.
 */
export function getCsharpTempDependencyDir(): string {
  return join(getTemporaryDirectory(), "codeql_csharp", "repository");
}

/**
 * Returns an array of paths of directories on the runner that should be included in a dependency cache
 * for a C# analysis.
 *
 * @returns The paths of directories on the runner that should be included in a dependency cache
 * for a C# analysis.
 */
export async function getCsharpDependencyDirs(
  codeql: CodeQL,
  features: FeatureEnablement,
): Promise<string[]> {
  const dirs = [
    // Nuget
    join(os.homedir(), ".nuget", "packages"),
  ];

  if (await features.getValue(Feature.CsharpCacheBuildModeNone, codeql)) {
    dirs.push(getCsharpTempDependencyDir());
  }

  return dirs;
}

/**
 * Checks that there are files which match `patterns`. If there are matching files for any of the patterns,
 * this function returns all `patterns`. Otherwise, `undefined` is returned.
 *
 * @param patterns The glob patterns to find matching files for.
 * @returns The array of glob patterns if there are matching files, or `undefined` otherwise.
 */
export async function makePatternCheck(
  patterns: string[],
): Promise<string[] | undefined> {
  const globber = await makeGlobber(patterns);

  if ((await globber.glob()).length === 0) {
    return undefined;
  }

  return patterns;
}

/** These files contain accurate information about dependencies, including the exact versions
 * that the relevant package manager has determined for the project. Using these gives us
 * stable hashes unless the dependencies change.
 */
export const CSHARP_BASE_PATTERNS = [
  // NuGet
  "**/packages.lock.json",
  // Paket
  "**/paket.lock",
];

/** These are less accurate for use in cache key calculations, because they:
 *
 * - Don't contain the exact versions used. They may only contain version ranges or none at all.
 * - They contain information unrelated to dependencies, which we don't care about.
 *
 * As a result, the hash we compute from these files may change, even if
 * the dependencies haven't changed.
 */
export const CSHARP_EXTRA_PATTERNS = [
  "**/*.csproj",
  "**/packages.config",
  "**/nuget.config",
];

/**
 * Returns the list of glob patterns that should be used to calculate the cache key hash
 * for a C# dependency cache. This will try to use `CSHARP_BASE_PATTERNS` whenever possible.
 * As a fallback, it will also use `CSHARP_EXTRA_PATTERNS` if the corresponding FF is enabled.
 *
 * @param codeql The CodeQL instance to use.
 * @param features Information about which FFs are enabled.
 * @returns A list of glob patterns to use for hashing.
 */
export async function getCsharpHashPatterns(
  codeql: CodeQL,
  features: FeatureEnablement,
): Promise<string[] | undefined> {
  const basePatterns = await internal.makePatternCheck(CSHARP_BASE_PATTERNS);

  if (basePatterns !== undefined) {
    return basePatterns;
  }

  if (await features.getValue(Feature.CsharpNewCacheKey, codeql)) {
    return internal.makePatternCheck(CSHARP_EXTRA_PATTERNS);
  }

  // If we get to this point, we didn't find any files with `CSHARP_BASE_PATTERNS`,
  // and `Feature.CsharpNewCacheKey` is not enabled.
  return undefined;
}

/**
 * Default caching configurations per language.
 */
const defaultCacheConfigs: { [language: string]: CacheConfig } = {
  java: {
    getDependencyPaths: getJavaDependencyDirs,
    getHashPatterns: async () =>
      internal.makePatternCheck([
        // Maven
        "**/pom.xml",
        // Gradle
        "**/*.gradle*",
        "**/gradle-wrapper.properties",
        "buildSrc/**/Versions.kt",
        "buildSrc/**/Dependencies.kt",
        "gradle/*.versions.toml",
        "**/versions.properties",
      ]),
  },
  csharp: {
    getDependencyPaths: getCsharpDependencyDirs,
    getHashPatterns: getCsharpHashPatterns,
  },
  go: {
    getDependencyPaths: async () => [join(os.homedir(), "go", "pkg", "mod")],
    getHashPatterns: async () => internal.makePatternCheck(["**/go.sum"]),
  },
};

async function makeGlobber(patterns: string[]): Promise<glob.Globber> {
  return glob.create(patterns.join("\n"));
}

/** Enumerates possible outcomes for cache hits. */
export enum CacheHitKind {
  /** We were unable to calculate a hash for the key. */
  NoHash = "no-hash",
  /** No cache was found. */
  Miss = "miss",
  /** The primary cache key matched. */
  Exact = "exact",
  /** A restore key matched. */
  Partial = "partial",
}

/** Represents results of trying to restore a dependency cache for a language. */
export interface DependencyCacheRestoreStatus {
  language: Language;
  hit_kind: CacheHitKind;
  download_duration_ms?: number;
}

/** An array of `DependencyCacheRestoreStatus` objects for each analysed language with a caching configuration. */
export type DependencyCacheRestoreStatusReport = DependencyCacheRestoreStatus[];

/** Represents the results of `downloadDependencyCaches`. */
export interface DownloadDependencyCachesResult {
  /** The status report for telemetry */
  statusReport: DependencyCacheRestoreStatusReport;
  /** An array of cache keys that we have restored and therefore know to exist. */
  restoredKeys: string[];
}

/**
 * A wrapper around `cacheConfig.getHashPatterns` which logs when there are no files to calculate
 * a hash for the cache key from.
 *
 * @param codeql The CodeQL instance to use.
 * @param features Information about which FFs are enabled.
 * @param language The language the `CacheConfig` is for. For use in the log message.
 * @param cacheConfig The caching configuration to call `getHashPatterns` on.
 * @param checkType Whether we are checking the patterns for a download or upload.
 * @param logger The logger to write the log message to if there is an error.
 * @returns An array of glob patterns to use for hashing files, or `undefined` if there are no matching files.
 */
export async function checkHashPatterns(
  codeql: CodeQL,
  features: FeatureEnablement,
  language: Language,
  cacheConfig: CacheConfig,
  checkType: "download" | "upload",
  logger: Logger,
): Promise<string[] | undefined> {
  const patterns = await cacheConfig.getHashPatterns(codeql, features);

  if (patterns === undefined) {
    logger.info(
      `Skipping ${checkType} of dependency cache for ${language} as we cannot calculate a hash for the cache key.`,
    );
  }

  return patterns;
}

/**
 * Attempts to restore dependency caches for the languages being analyzed.
 *
 * @param codeql The CodeQL instance to use.
 * @param features Information about which FFs are enabled.
 * @param languages The languages being analyzed.
 * @param logger A logger to record some informational messages to.
 *
 * @returns An array of `DependencyCacheRestoreStatus` objects for each analysed language with a caching configuration.
 */
export async function downloadDependencyCaches(
  codeql: CodeQL,
  features: FeatureEnablement,
  languages: Language[],
  logger: Logger,
): Promise<DownloadDependencyCachesResult> {
  const status: DependencyCacheRestoreStatusReport = [];
  const restoredKeys: string[] = [];

  for (const language of languages) {
    const cacheConfig = defaultCacheConfigs[language];

    if (cacheConfig === undefined) {
      logger.info(
        `Skipping download of dependency cache for ${language} as we have no caching configuration for it.`,
      );
      continue;
    }

    // Check that we can find files to calculate the hash for the cache key from, so we don't end up
    // with an empty string.
    const patterns = await checkHashPatterns(
      codeql,
      features,
      language,
      cacheConfig,
      "download",
      logger,
    );
    if (patterns === undefined) {
      status.push({ language, hit_kind: CacheHitKind.NoHash });
      continue;
    }

    const primaryKey = await cacheKey(codeql, features, language, patterns);
    const restoreKeys: string[] = [
      await cachePrefix(codeql, features, language),
    ];

    logger.info(
      `Downloading cache for ${language} with key ${primaryKey} and restore keys ${restoreKeys.join(
        ", ",
      )}`,
    );

    const start = performance.now();
    const hitKey = await actionsCache.restoreCache(
      await cacheConfig.getDependencyPaths(codeql, features),
      primaryKey,
      restoreKeys,
    );
    const download_duration_ms = Math.round(performance.now() - start);

    if (hitKey !== undefined) {
      logger.info(`Cache hit on key ${hitKey} for ${language}.`);

      // We have a partial cache hit, unless the key of the restored cache matches the
      // primary restore key.
      let hit_kind = CacheHitKind.Partial;
      if (hitKey === primaryKey) {
        hit_kind = CacheHitKind.Exact;
      }

      status.push({
        language,
        hit_kind,
        download_duration_ms,
      });
      restoredKeys.push(hitKey);
    } else {
      status.push({ language, hit_kind: CacheHitKind.Miss });
      logger.info(`No suitable cache found for ${language}.`);
    }
  }

  return { statusReport: status, restoredKeys };
}

/** Enumerates possible outcomes for storing caches. */
export enum CacheStoreResult {
  /** We were unable to calculate a hash for the key. */
  NoHash = "no-hash",
  /** There is nothing to store in the cache. */
  Empty = "empty",
  /** There already exists a cache with the key we are trying to store. */
  Duplicate = "duplicate",
  /** The cache was stored successfully. */
  Stored = "stored",
}

/** Represents results of trying to upload a dependency cache for a language. */
export interface DependencyCacheUploadStatus {
  language: Language;
  result: CacheStoreResult;
  upload_size_bytes?: number;
  upload_duration_ms?: number;
}

/** An array of `DependencyCacheUploadStatus` objects for each analysed language with a caching configuration. */
export type DependencyCacheUploadStatusReport = DependencyCacheUploadStatus[];

/**
 * Attempts to store caches for the languages that were analyzed.
 *
 * @param codeql The CodeQL instance to use.
 * @param features Information about which FFs are enabled.
 * @param config The configuration for this workflow.
 * @param logger A logger to record some informational messages to.
 *
 * @returns An array of `DependencyCacheUploadStatus` objects for each analysed language with a caching configuration.
 */
export async function uploadDependencyCaches(
  codeql: CodeQL,
  features: FeatureEnablement,
  config: Config,
  logger: Logger,
): Promise<DependencyCacheUploadStatusReport> {
  const status: DependencyCacheUploadStatusReport = [];
  for (const language of config.languages) {
    const cacheConfig = defaultCacheConfigs[language];

    if (cacheConfig === undefined) {
      logger.info(
        `Skipping upload of dependency cache for ${language} as we have no caching configuration for it.`,
      );
      continue;
    }

    // Check that we can find files to calculate the hash for the cache key from, so we don't end up
    // with an empty string.
    const patterns = await checkHashPatterns(
      codeql,
      features,
      language,
      cacheConfig,
      "upload",
      logger,
    );
    if (patterns === undefined) {
      status.push({ language, result: CacheStoreResult.NoHash });
      continue;
    }

    // Now that we have verified that there are suitable files, compute the hash for the cache key.
    const key = await cacheKey(codeql, features, language, patterns);

    // Check that we haven't previously restored this exact key. If a cache with this key
    // already exists in the Actions Cache, performing the next steps is pointless as the cache
    // will not get overwritten. We can therefore skip the expensive work of measuring the size
    // of the cache contents and attempting to upload it if we know that the cache already exists.
    if (config.dependencyCachingRestoredKeys.includes(key)) {
      status.push({ language, result: CacheStoreResult.Duplicate });
      continue;
    }

    // Calculate the size of the files that we would store in the cache. We use this to determine whether the
    // cache should be saved or not. For example, if there are no files to store, then we skip creating the
    // cache. In the future, we could also:
    // - Skip uploading caches with a size below some threshold: this makes sense for avoiding the overhead
    //   of storing and restoring small caches, but does not help with alert wobble if a package repository
    //   cannot be reached in a given run.
    // - Skip uploading caches with a size above some threshold: this could be a concern if other workflows
    //   use the cache quota that we compete with. In that case, we do not wish to use up all of the quota
    //   with the dependency caches. For this, we could use the Cache API to check whether other workflows
    //   are using the quota and how full it is.
    const size = await getTotalCacheSize(
      await cacheConfig.getDependencyPaths(codeql, features),
      logger,
      true,
    );

    // Skip uploading an empty cache.
    if (size === 0) {
      status.push({ language, result: CacheStoreResult.Empty });
      logger.info(
        `Skipping upload of dependency cache for ${language} since it is empty.`,
      );
      continue;
    }

    logger.info(
      `Uploading cache of size ${size} for ${language} with key ${key}...`,
    );

    try {
      const start = performance.now();
      await actionsCache.saveCache(
        await cacheConfig.getDependencyPaths(codeql, features),
        key,
      );
      const upload_duration_ms = Math.round(performance.now() - start);

      status.push({
        language,
        result: CacheStoreResult.Stored,
        upload_size_bytes: Math.round(size),
        upload_duration_ms,
      });
    } catch (error) {
      // `ReserveCacheError` indicates that the cache key is already in use, which means that a
      // cache with that key already exists or is in the process of being uploaded by another
      // workflow. We can ignore this.
      if (error instanceof actionsCache.ReserveCacheError) {
        logger.info(
          `Not uploading cache for ${language}, because ${key} is already in use.`,
        );
        logger.debug(error.message);

        status.push({ language, result: CacheStoreResult.Duplicate });
      } else {
        // Propagate other errors upwards.
        throw error;
      }
    }
  }

  return status;
}

/**
 * Computes a cache key for the specified language.
 *
 * @param codeql The CodeQL instance to use.
 * @param features Information about which FFs are enabled.
 * @param language The language being analyzed.
 * @param patterns The file patterns to hash.
 *
 * @returns A cache key capturing information about the project(s) being analyzed in the specified language.
 */
export async function cacheKey(
  codeql: CodeQL,
  features: FeatureEnablement,
  language: Language,
  patterns: string[],
): Promise<string> {
  const hash = await glob.hashFiles(patterns.join("\n"));
  return `${await cachePrefix(codeql, features, language)}${hash}`;
}

/**
 * If experimental features which the cache contents depend on are enabled for the current language,
 * this function returns a prefix that uniquely identifies the set of enabled features. The purpose of
 * this is to avoid restoring caches whose contents depended on experimental features, if those
 * experimental features are later disabled.
 *
 * @param codeql The CodeQL instance.
 * @param features Information about enabled features.
 * @param language The language we are creating the key for.
 *
 * @returns A cache key prefix identifying the enabled, experimental features that the cache depends on.
 */
export async function getFeaturePrefix(
  codeql: CodeQL,
  features: FeatureEnablement,
  language: Language,
): Promise<string> {
  const enabledFeatures: Feature[] = [];

  const addFeatureIfEnabled = async (feature: Feature) => {
    if (await features.getValue(feature, codeql)) {
      enabledFeatures.push(feature);
    }
  };

  if (language === KnownLanguage.csharp) {
    await addFeatureIfEnabled(Feature.CsharpNewCacheKey);
    await addFeatureIfEnabled(Feature.CsharpCacheBuildModeNone);
  }

  // If any features that affect the cache are enabled, return a feature prefix by
  // computing a hash of the feature array.
  if (enabledFeatures.length > 0) {
    return `${createCacheKeyHash(enabledFeatures)}-`;
  }

  // No feature prefix.
  return "";
}

/**
 * Constructs a prefix for the cache key, comprised of a CodeQL-specific prefix, a version number that
 * can be changed to invalidate old caches, the runner's operating system, and the specified language name.
 *
 * @param codeql The CodeQL instance to use.
 * @param features Information about which FFs are enabled.
 * @param language The language being analyzed.
 * @returns The prefix that identifies what a cache is for.
 */
async function cachePrefix(
  codeql: CodeQL,
  features: FeatureEnablement,
  language: Language,
): Promise<string> {
  const runnerOs = getRequiredEnvParam("RUNNER_OS");
  const customPrefix = process.env[EnvVar.DEPENDENCY_CACHING_PREFIX];
  let prefix = CODEQL_DEPENDENCY_CACHE_PREFIX;

  if (customPrefix !== undefined && customPrefix.length > 0) {
    prefix = `${prefix}-${customPrefix}`;
  }

  // Calculate the feature prefix for the cache, if any. This is a hash that identifies
  // experimental features that affect the cache contents.
  const featurePrefix = await getFeaturePrefix(codeql, features, language);

  // Assemble the cache key.
  return `${prefix}-${featurePrefix}${CODEQL_DEPENDENCY_CACHE_VERSION}-${runnerOs}-${language}-`;
}

/** Represents information about our overall cache usage for CodeQL dependency caches. */
export interface DependencyCachingUsageReport {
  count: number;
  size_bytes: number;
}

/**
 * Tries to determine the overall cache usage for CodeQL dependencies caches.
 *
 * @param logger The logger to log errors to.
 * @returns Returns the overall cache usage for CodeQL dependencies caches, or `undefined` if we couldn't determine it.
 */
export async function getDependencyCacheUsage(
  logger: Logger,
): Promise<DependencyCachingUsageReport | undefined> {
  try {
    const caches = await listActionsCaches(CODEQL_DEPENDENCY_CACHE_PREFIX);
    const totalSize = caches.reduce(
      (acc, cache) => acc + (cache.size_in_bytes ?? 0),
      0,
    );
    return { count: caches.length, size_bytes: totalSize };
  } catch (err) {
    logger.warning(
      `Unable to retrieve information about dependency cache usage: ${getErrorMessage(err)}`,
    );
  }

  return undefined;
}

export const internal = {
  makePatternCheck,
};