diff --git a/.github/workflows/asimFileAndParserValidation.yml b/.github/workflows/asimFileAndParserValidation.yml new file mode 100644 index 00000000000..eed16fc618b --- /dev/null +++ b/.github/workflows/asimFileAndParserValidation.yml @@ -0,0 +1,748 @@ +# ============================================================================ +# New ASIM File and Parser Validation +# ============================================================================ +# This workflow validates pull requests that add new ASIM parsers. +# It checks: +# 1. The PR has the "ASIM" label +# 2. New files follow the expected directory structure +# 3. Required companion files (unifying parsers, changelogs) are present +# 4. EquivalentBuiltInParser exists, follows naming conventions, and +# is listed in the unifying parser's Parsers list +# 5. ParserName exists, follows naming conventions, and is +# referenced in the unifying parser's ParserQuery +# 6. Parser.Version in new and unifying parser YAML files matches +# a corresponding entry in their CHANGELOG files +# 7. On forked PRs with synchronize events, the SafeToRun label +# is removed to prevent untrusted LLM execution +# 8. The KQL ParserQuery from new ASim and vim parsers is +# analyzed by an LLM for performance and best practices +# ============================================================================ +name: New ASIM File and Parser Validation + +on: + pull_request: + types: [labeled, synchronize] + branches: [master] + +permissions: + contents: read # Read repo contents for checkout + pull-requests: write # Post validation comments on PRs + models: read # Access GitHub Models API for LLM analysis + +jobs: + validate-asim-files-parsers: + runs-on: ubuntu-latest + steps: + # ---------------------------------------------------------------- + # Step 1: Check if the PR has the "ASIM" label. + # If not, all subsequent steps are skipped. + # ---------------------------------------------------------------- + - name: Check for ASIM label + id: check-label + uses: actions/github-script@b7fb2001b410c9390cbe9e2c7d5cab7eefb7b29c + with: + script: | + const labels = context.payload.pull_request.labels.map(l => l.name); + if (labels.includes('ASIM')) { + core.info('PR has the ASIM label.'); + core.setOutput('has_label', 'true'); + } else { + core.info('PR does not have the ASIM label. Skipping workflow.'); + core.setOutput('has_label', 'false'); + } + + # ---------------------------------------------------------------- + # Step 2: Fetch all files in the PR and categorize them. + # Outputs: files, new_asim_yaml_files, + # has_new_asim_yaml_files + # ---------------------------------------------------------------- + - name: Get changed files + if: steps.check-label.outputs.has_label == 'true' + id: changed-files + uses: actions/github-script@b7fb2001b410c9390cbe9e2c7d5cab7eefb7b29c + with: + script: | + const files = []; + let page = 1; + while (true) { + const response = await github.rest.pulls.listFiles({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: context.payload.pull_request.number, + per_page: 100, + page: page + }); + files.push(...response.data); + if (response.data.length < 100) break; + page++; + } + const allFiles = files.map(f => f.filename); + const newFiles = files.filter(f => f.status === 'added').map(f => f.filename); + + // Filter for new YAML files under Parsers/ASim/Parsers/ + const parserPattern = /^Parsers\/ASim\w+\/Parsers\/.+\.yaml$/; + const newAsimYamlFiles = newFiles.filter(f => parserPattern.test(f)); + core.info(`Changed files (${allFiles.length}):`); + allFiles.forEach(f => core.info(` - ${f}`)); + core.info(`New ASIM YAML files (${newAsimYamlFiles.length}):`); + newAsimYamlFiles.forEach(f => core.info(` - ${f}`)); + core.info(`New files (${newFiles.length}):`); + newFiles.forEach(f => core.info(` - ${f}`)); + core.setOutput('files', JSON.stringify(allFiles)); + core.setOutput('new_asim_yaml_files', JSON.stringify(newAsimYamlFiles)); + core.setOutput('has_new_asim_yaml_files', newAsimYamlFiles.length > 0 ? 'true' : 'false'); + + # ---------------------------------------------------------------- + # Step 3: Validate the PR's file structure. + # Expected for a new ASIM parser: + # - 2 new YAML files in Parsers/ASim/Parsers/ + # - ASim.yaml and im.yaml must be modified + # - 2 new CHANGELOG .md files matching the new YAML filenames + # - ASim.md and im.md in CHANGELOG/ must be modified + # Posts a comment on the PR if validation fails. + # ---------------------------------------------------------------- + - name: Validate new file paths + id: validate-paths + if: steps.check-label.outputs.has_label == 'true' && steps.changed-files.outputs.has_new_asim_yaml_files == 'true' + uses: actions/github-script@b7fb2001b410c9390cbe9e2c7d5cab7eefb7b29c + with: + script: | + const newAsimYamlFiles = JSON.parse('${{ steps.changed-files.outputs.new_asim_yaml_files }}'); + const allFiles = JSON.parse('${{ steps.changed-files.outputs.files }}'); + const errors = []; + + // 1. Validate new YAML files are in Parsers/ASim/Parsers/ + const parserPattern = /^Parsers\/(ASim\w+)\/Parsers\/(.+\.yaml)$/; + const newYamlFiles = newAsimYamlFiles; + + if (newYamlFiles.length !== 2) { + errors.push(`Expected exactly 2 new YAML files in Parsers/ASim/Parsers/, found ${newYamlFiles.length}.`); + } + + // 2. Extract schema from the directory name + const schemas = new Set(newYamlFiles.map(f => { + const match = f.match(parserPattern); + return match ? match[1] : null; + }).filter(Boolean)); + + if (schemas.size > 1) { + errors.push(`New YAML files span multiple schemas: ${[...schemas].join(', ')}. Expected a single schema.`); + } + + const schema = [...schemas][0]; + if (schema) { + const schemaName = schema.replace(/^ASim/, ''); + core.info(`Detected schema: ${schemaName} (directory: ${schema})`); + + // 3. Check that the union parsers (ASim.yaml and im.yaml) are modified + const expectedEdited = [ + `Parsers/${schema}/Parsers/${schema}.yaml`, + `Parsers/${schema}/Parsers/im${schemaName}.yaml` + ]; + const modifiedFiles = allFiles.filter(f => !newAsimYamlFiles.includes(f)); + for (const expected of expectedEdited) { + if (!modifiedFiles.includes(expected)) { + errors.push(`Expected modified file not found: ${expected}`); + } + } + + // 4. Check that new CHANGELOG md files exist (same name as new yaml files) + const newYamlBasenames = newYamlFiles.map(f => f.match(parserPattern)[2].replace('.yaml', '')); + const expectedNewChangelogs = newYamlBasenames.map(name => `Parsers/${schema}/CHANGELOG/${name}.md`); + for (const expected of expectedNewChangelogs) { + if (!allFiles.includes(expected)) { + errors.push(`Expected new CHANGELOG file not found: ${expected}`); + } + } + + // 5. Check that ASim.md and im.md in CHANGELOG are modified + const expectedEditedChangelogs = [ + `Parsers/${schema}/CHANGELOG/${schema}.md`, + `Parsers/${schema}/CHANGELOG/im${schemaName}.md` + ]; + for (const expected of expectedEditedChangelogs) { + if (!modifiedFiles.includes(expected)) { + errors.push(`Expected modified CHANGELOG file not found: ${expected}`); + } + } + } + + // Post a comment on the PR summarizing validation failures + if (errors.length > 0) { + const body = `## ASIM File Validation Failed\n\nThe following validation errors were found:\n\n${errors.map(e => '- ' + e).join('\n')}\n\n---\n*This comment was generated automatically by the ASIM File and Parser Validation workflow.*`; + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.pull_request.number, + body: body + }); + errors.forEach(e => core.error(e)); + core.setFailed('PR file validation failed. See PR comment for details.'); + } else { + core.info('All file validations passed.'); + } + + # ---------------------------------------------------------------- + # Steps 4-8: Content-based validation and LLM analysis. + # Requires file validation to pass and ASIM YAML files to exist. + # Step 4: Checkout the repository to read files from disk. + # Step 5: Install js-yaml for YAML parsing. + # Step 6: Validate parser YAML fields (EquivalentBuiltInParser, + # ParserName, Parser.Version) and cross-reference + # against unifying parsers and CHANGELOG entries. + # Step 7: Check if the PR has the "SafeToRun" label. + # Then remove it on forked PRs (synchronize events) + # to prevent untrusted LLM execution on next push. + # Step 8: Send ASim and vim parser KQL queries to an LLM + # for performance and best practices review. + # ---------------------------------------------------------------- + - name: Checkout repository + if: steps.validate-paths.outcome == 'success' && steps.changed-files.outputs.has_new_asim_yaml_files == 'true' + uses: actions/checkout@v4 + + - name: Install js-yaml + if: steps.validate-paths.outcome == 'success' && steps.changed-files.outputs.has_new_asim_yaml_files == 'true' + run: npm install js-yaml + + # ---------------------------------------------------------------- + # Step 6: Validate parser YAML fields and CHANGELOG entries. + # For each new parser YAML file: + # - EquivalentBuiltInParser: must exist, follow naming format + # (_ASim__ or _Im__), + # and be listed in the corresponding unifying parser's + # Parsers array + # - ParserName: must exist, follow naming format + # (ASim or vim), + # and be referenced in the corresponding unifying + # parser's ParserQuery + # - Parser.Version: must exist and have a matching + # "Version X.X" entry in the corresponding CHANGELOG + # For the unifying (edited) parsers (ASim.yaml, + # im.yaml): + # - Parser.Version to CHANGELOG validation + # ---------------------------------------------------------------- + - name: Validate parser files and changelog versions + id: read-yaml + if: steps.validate-paths.outcome == 'success' && steps.changed-files.outputs.has_new_asim_yaml_files == 'true' + uses: actions/github-script@b7fb2001b410c9390cbe9e2c7d5cab7eefb7b29c + with: + script: | + const fs = require('fs'); + const yaml = require('js-yaml'); + const newYamlFiles = JSON.parse('${{ steps.changed-files.outputs.new_asim_yaml_files }}'); + const parserPattern = /^(Parsers\/ASim\w+)\/Parsers\/(.+)\.yaml$/; + const errors = []; + + // Derive the schema directory and names from the first new parser file + const sampleMatch = newYamlFiles[0]?.match(parserPattern); + let schemaDir, schemaName, shortSchema, asimUnifyingParsers, imUnifyingParsers; + let asimUnifyingQuery, imUnifyingQuery; + + if (sampleMatch) { + schemaDir = sampleMatch[1]; // e.g. Parsers/ASimAuthentication + schemaName = schemaDir.split('/')[1]; // e.g. ASimAuthentication + shortSchema = schemaName.replace(/^ASim/, ''); // e.g. Authentication + + // Load the Parsers lists from the unifying parser YAML files + try { + const asimContent = fs.readFileSync(`${schemaDir}/Parsers/${schemaName}.yaml`, 'utf8'); + const asimParsed = yaml.load(asimContent); + asimUnifyingParsers = asimParsed?.Parsers || []; + asimUnifyingQuery = asimParsed?.ParserQuery || ''; + } catch (err) { + errors.push(`Could not read ASim unifying parser: ${schemaDir}/Parsers/${schemaName}.yaml - ${err.message}`); + } + + try { + const imContent = fs.readFileSync(`${schemaDir}/Parsers/im${shortSchema}.yaml`, 'utf8'); + const imParsed = yaml.load(imContent); + imUnifyingParsers = imParsed?.Parsers || []; + imUnifyingQuery = imParsed?.ParserQuery || ''; + } catch (err) { + errors.push(`Could not read im unifying parser: ${schemaDir}/Parsers/im${shortSchema}.yaml - ${err.message}`); + } + } + + const parsedFiles = {}; + for (const filePath of newYamlFiles) { + try { + const content = fs.readFileSync(filePath, 'utf8'); + const parsed = yaml.load(content); + parsedFiles[filePath] = parsed; + core.info(`Read file: ${filePath}`); + + const fileName = filePath.match(parserPattern)[2]; // e.g. ASimAuthenticationTestProduct + + // --- Validate EquivalentBuiltInParser --- + const equivalentParser = parsed?.EquivalentBuiltInParser; + if (!equivalentParser) { + errors.push(`Missing EquivalentBuiltInParser in: ${filePath}`); + } else { + core.info(` EquivalentBuiltInParser: ${equivalentParser}`); + + // Validate naming format + if (fileName.startsWith('ASim')) { + const expectedPrefix = `_ASim_${shortSchema}_`; + if (!equivalentParser.startsWith(expectedPrefix)) { + errors.push(`EquivalentBuiltInParser in ${filePath} must follow the format _ASim_${shortSchema}_, but found: ${equivalentParser}`); + } + } + if (fileName.startsWith('vim')) { + const expectedPrefix = `_Im_${shortSchema}_`; + if (!equivalentParser.startsWith(expectedPrefix)) { + errors.push(`EquivalentBuiltInParser in ${filePath} must follow the format _Im_${shortSchema}_, but found: ${equivalentParser}`); + } + } + + // Validate ParserName exists and is referenced in the unifying parser's ParserQuery + const parserName = parsed?.ParserName; + if (!parserName) { + errors.push(`Missing ParserName in: ${filePath}`); + } else { + // Validate ParserName format + if (fileName.startsWith('ASim')) { + const expectedPrefix = `ASim${shortSchema}`; + if (!parserName.startsWith(expectedPrefix) || parserName === expectedPrefix) { + errors.push(`ParserName in ${filePath} must follow the format ASim${shortSchema}, but found: ${parserName}`); + } + } + if (fileName.startsWith('vim')) { + const expectedPrefix = `vim${shortSchema}`; + if (!parserName.startsWith(expectedPrefix) || parserName === expectedPrefix) { + errors.push(`ParserName in ${filePath} must follow the format vim${shortSchema}, but found: ${parserName}`); + } + } + + // Validate ParserName is referenced in the unifying parser's ParserQuery + if (fileName.startsWith('ASim') && asimUnifyingQuery) { + if (asimUnifyingQuery.includes(parserName)) { + core.info(` ParserName ${parserName} found in ${schemaName}.yaml ParserQuery`); + } else { + errors.push(`ParserName ${parserName} from ${filePath} is not referenced in ${schemaDir}/Parsers/${schemaName}.yaml ParserQuery`); + } + } + if (fileName.startsWith('vim') && imUnifyingQuery) { + if (imUnifyingQuery.includes(parserName)) { + core.info(` ParserName ${parserName} found in im${shortSchema}.yaml ParserQuery`); + } else { + errors.push(`ParserName ${parserName} from ${filePath} is not referenced in ${schemaDir}/Parsers/im${shortSchema}.yaml ParserQuery`); + } + } + } + + // ASim-prefixed parsers must be in ASim unifying parser's Parsers list + if (fileName.startsWith('ASim') && asimUnifyingParsers) { + if (asimUnifyingParsers.includes(equivalentParser)) { + core.info(` ${equivalentParser} found in ${schemaName}.yaml Parsers list`); + } else { + errors.push(`${equivalentParser} from ${filePath} is not listed in ${schemaDir}/Parsers/${schemaName}.yaml Parsers list`); + } + } + + // vim-prefixed parsers must be in im unifying parser's Parsers list + if (fileName.startsWith('vim') && imUnifyingParsers) { + if (imUnifyingParsers.includes(equivalentParser)) { + core.info(` ${equivalentParser} found in im${shortSchema}.yaml Parsers list`); + } else { + errors.push(`${equivalentParser} from ${filePath} is not listed in ${schemaDir}/Parsers/im${shortSchema}.yaml Parsers list`); + } + } + } + + // --- Validate Parser.Version against CHANGELOG --- + const version = parsed?.Parser?.Version; + if (!version) { + errors.push(`Missing Parser.Version in: ${filePath}`); + continue; + } + core.info(` Parser.Version: ${version}`); + + // Derive the CHANGELOG path: Parsers/ASim/CHANGELOG/.md + const match = filePath.match(parserPattern); + const changelogPath = `${match[1]}/CHANGELOG/${match[2]}.md`; + + try { + const changelogContent = fs.readFileSync(changelogPath, 'utf8'); + const versionPattern = new RegExp(`Version ${version.replace('.', '\\.')}`, 'm'); + if (versionPattern.test(changelogContent)) { + core.info(` Version ${version} found in ${changelogPath}`); + } else { + errors.push(`Version ${version} from ${filePath} is not referenced in ${changelogPath}`); + } + } catch (err) { + errors.push(`Could not read CHANGELOG file: ${changelogPath} - ${err.message}`); + } + } catch (err) { + core.warning(`Could not read/parse file: ${filePath} - ${err.message}`); + } + } + + // Also validate the unifying (edited) parsers: ASim.yaml and im.yaml + // These are existing files modified in the PR, not new files. + if (sampleMatch) { + const unifyingParsers = [ + { yaml: `${schemaDir}/Parsers/${schemaName}.yaml`, changelog: `${schemaDir}/CHANGELOG/${schemaName}.md` }, + { yaml: `${schemaDir}/Parsers/im${shortSchema}.yaml`, changelog: `${schemaDir}/CHANGELOG/im${shortSchema}.md` } + ]; + + for (const { yaml: yamlPath, changelog: changelogPath } of unifyingParsers) { + try { + const content = fs.readFileSync(yamlPath, 'utf8'); + const parsed = yaml.load(content); + const version = parsed?.Parser?.Version; + if (!version) { + errors.push(`Missing Parser.Version in unifying parser: ${yamlPath}`); + continue; + } + core.info(`Unifying parser ${yamlPath} - Parser.Version: ${version}`); + + try { + const changelogContent = fs.readFileSync(changelogPath, 'utf8'); + const versionPattern = new RegExp(`Version ${version.replace('.', '\\.')}`, 'm'); + if (versionPattern.test(changelogContent)) { + core.info(` Version ${version} found in ${changelogPath}`); + } else { + errors.push(`Version ${version} from ${yamlPath} is not referenced in ${changelogPath}`); + } + } catch (err) { + errors.push(`Could not read CHANGELOG file: ${changelogPath} - ${err.message}`); + } + } catch (err) { + errors.push(`Could not read unifying parser: ${yamlPath} - ${err.message}`); + } + } + } + + if (errors.length > 0) { + const body = `## ASIM Parser Validation Failed\n\nThe following validation errors were found:\n\n${errors.map(e => '- ' + e).join('\n')}\n\n---\n*This comment was generated automatically by the ASIM File and Parser Validation workflow.*`; + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.pull_request.number, + body: body + }); + errors.forEach(e => core.error(e)); + core.setFailed('Parser validation failed. See PR comment for details.'); + } + + # ---------------------------------------------------------------- + # Step 7: Check if the PR has the "SafeToRun" label. + # Required for the LLM analysis step to run. + # ---------------------------------------------------------------- + - name: Check for SafeToRun label + id: check-safe-to-run + if: steps.validate-paths.outcome == 'success' && steps.changed-files.outputs.has_new_asim_yaml_files == 'true' + uses: actions/github-script@b7fb2001b410c9390cbe9e2c7d5cab7eefb7b29c + with: + script: | + const labels = context.payload.pull_request.labels.map(l => l.name); + if (labels.includes('SafeToRun')) { + core.info('PR has the SafeToRun label.'); + core.setOutput('is_safe', 'true'); + } else { + core.info('PR does not have the SafeToRun label. Skipping LLM analysis.'); + core.setOutput('is_safe', 'false'); + } + + # ---------------------------------------------------------------- + # Remove the "SafeToRun" label on forked PRs. + # Forked PRs run with a read-only GITHUB_TOKEN and cannot + # be trusted to run LLM analysis. Removing the label on + # synchronize events ensures the SafeToRun check above + # blocks them from proceeding on subsequent pushes. + # ---------------------------------------------------------------- + - name: Remove SafeToRun label on forked PRs + if: github.event.pull_request.head.repo.fork == true && github.event.action == 'synchronize' + uses: actions/github-script@b7fb2001b410c9390cbe9e2c7d5cab7eefb7b29c + with: + script: | + const labels = context.payload.pull_request.labels.map(l => l.name); + if (labels.includes('SafeToRun')) { + core.info('Forked PR detected — removing SafeToRun label.'); + try { + await github.rest.issues.removeLabel({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.pull_request.number, + name: 'SafeToRun' + }); + } catch (err) { + core.setFailed(`Failed to remove SafeToRun label: ${err.message}`); + } + } else { + core.info('Forked PR detected — SafeToRun label not present.'); + } + + # ---------------------------------------------------------------- + # Step 8: LLM-based analysis of the new parser KQL queries. + # Extracts ParserQuery from the new ASim-prefixed and + # vim-prefixed YAML files. Sends two sequential LLM calls: + # 1. ASim parser: reviewed for KQL performance, operator + # efficiency, and ASIM best practices + # 2. vim parser: reviewed for filter parameter placement, + # efficiency, and completeness (includes ParserParams) + # Results are posted as separate PR comments. + # Only runs if the PR has the "SafeToRun" label. + # ---------------------------------------------------------------- + - name: Analyze parsers for ASIM best practices + if: steps.check-safe-to-run.outputs.is_safe == 'true' + uses: actions/github-script@b7fb2001b410c9390cbe9e2c7d5cab7eefb7b29c + env: + GITHUB_TOKEN: ${{ github.token }} + with: + script: | + const fs = require('fs'); + const yaml = require('js-yaml'); + const newAsimYamlFiles = JSON.parse('${{ steps.changed-files.outputs.new_asim_yaml_files }}'); + const parserPattern = /^Parsers\/ASim\w+\/Parsers\/(.+)\.yaml$/; + + // Separate new YAML files into ASim-prefixed and vim-prefixed parsers + const asimFiles = newAsimYamlFiles.filter(f => { + const match = f.match(parserPattern); + return match && match[1].startsWith('ASim'); + }); + const vimFiles = newAsimYamlFiles.filter(f => { + const match = f.match(parserPattern); + return match && match[1].startsWith('vim'); + }); + + // Extract ParserQuery from the ASim-prefixed YAML file + let asimParserQuery = null; + for (const filePath of asimFiles) { + try { + const content = fs.readFileSync(filePath, 'utf8'); + const parsed = yaml.load(content); + if (parsed && parsed.ParserQuery) { + asimParserQuery = { file: filePath, query: parsed.ParserQuery }; + } else { + core.warning(`No ParserQuery found in: ${filePath}`); + } + } catch (err) { + core.warning(`Could not read/parse file: ${filePath} - ${err.message}`); + } + } + + // Extract ParserQuery and ParserParams from the vim-prefixed YAML file + let vimParserQuery = null; + for (const filePath of vimFiles) { + try { + const content = fs.readFileSync(filePath, 'utf8'); + const parsed = yaml.load(content); + if (parsed && parsed.ParserQuery) { + vimParserQuery = { + file: filePath, + query: parsed.ParserQuery, + params: parsed.ParserParams || [] + }; + } else { + core.warning(`No ParserQuery found in: ${filePath}`); + } + } catch (err) { + core.warning(`Could not read/parse file: ${filePath} - ${err.message}`); + } + } + + if (!asimParserQuery && !vimParserQuery) { + core.info('No ParserQuery fields found in new YAML files. Skipping LLM analysis.'); + return; + } + + core.info(`ASim parser: ${asimParserQuery?.file || 'not found'}`); + core.info(`vim parser: ${vimParserQuery?.file || 'not found'}`); + + // Build the query content for the LLM prompt + const parserQueries = []; + if (asimParserQuery) { + parserQueries.push(`### ${asimParserQuery.file}\n\`\`\`kql\n${asimParserQuery.query}\n\`\`\``); + } + + // Send the ParserQuery to the LLM for review + const prompt = `You are a KQL performance and efficiency expert reviewing \ + a new ASIM parser for the Azure-Sentinel repository. Your job is to check \ + for efficiency and performance of the KQL query. There is already a separate \ + schema and data tester for ASIM correctness, so focus only on performance \ + and best practices. + + Here is the ParserQuery from the new ASim-prefixed YAML file: + + ${parserQueries.join('\n\n')} + + Please review the KQL query for the following: + + 1. **Filter → Parse → Map pattern**: Verify the query follows the correct \ + ASIM parsing flow. Filtering should happen early on native columns before \ + any parsing. Parsing should occur next, followed by field mapping. + + 2. **Field mapping operators**: Check that \`project-rename\` is used for \ + direct column-to-field mappings, and \`extend\` is used for calculated or \ + normalized fields. Flag any misuse (e.g., using \`extend\` where \ + \`project-rename\` would suffice). + + 3. **No \`project-away\`**: The query must NOT use \`project-away\` to remove \ + unmapped columns. It should use \`project\` instead, as \`project-away\` does \ + not protect the parser from schema changes in the source data. + + 4. **\`pack\` parameter**: If the query uses \`AdditionalFields\`, verify that \ + a \`pack: bool = false\` parameter is included. This allows users to choose \ + whether to populate \`AdditionalFields\` or return an empty dynamic, \ + improving performance for users who do not need the extra information. + + 5. **Parsing operator efficiency**: Check that high-performance parsing \ + operators are used (\`split\`, \`parse-kv\`, \`parse\`) and that regular \ + expressions are avoided where simpler operators would work. + + 6. **General KQL performance**: Flag any other inefficient patterns such as \ + unnecessary \`let\` statements, redundant filters, expensive joins, or \ + operations that could be reordered for better performance. + + **Output format:** + + First, provide an overall **Readiness Rating** out of 10 (where 10 means \ + production-ready with no issues). + + Then, return your findings as a markdown table with the following columns: + + | # | Priority | Issue | Suggestion | + |---|----------|-------|------------| + + Where: + - **Priority** is one of: 🔴 High, 🟡 Medium, 🟢 Low + - **Issue** is a concise description of the problem found + - **Suggestion** is a specific, actionable fix + + If no issues are found for a category, do not include a row for it. \ + If the query has no issues at all, return the table with a single row \ + stating "No issues found" and a rating of 10/10.`; + + const response = await fetch('https://models.github.ai/inference/chat/completions', { + method: 'POST', + headers: { + 'Authorization': `Bearer ${process.env.GITHUB_TOKEN}`, + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + model: 'openai/gpt-5', + messages: [ + { role: 'user', content: prompt } + ] + }) + }); + + if (!response.ok) { + const errorText = await response.text(); + core.setFailed(`LLM API call failed: ${response.status} ${errorText}`); + return; + } + + // Parse and output the LLM response + const result = await response.json(); + const analysis = result.choices[0].message.content; + core.info('LLM Analysis:\n' + analysis); + + // Post the LLM analysis as a PR comment + const body = `## ASIM Parser KQL Review\n\n${analysis}\n\n---\n*This review was generated automatically by the ASIM File and Parser Validation workflow using GitHub Models.*`; + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.pull_request.number, + body: body + }); + + // Analyze the vim-prefixed parser query, passing the previous + // conversation as context so the LLM has awareness of the ASim review. + if (vimParserQuery) { + // Format ParserParams for the prompt + const paramsTable = vimParserQuery.params.map(p => + `| ${p.Name} | ${p.Type} | ${p.Default} |` + ).join('\n'); + + const vimPrompt = `Now review the vim (parameter/filtering) version of \ + the same ASIM parser. This parser adds filtering parameters to improve \ + query efficiency by reducing the number of rows processed early in the \ + query pipeline. + + You have already reviewed the ASim (parameter-less) version above. \ + Do NOT repeat issues already identified in that review. Focus only on \ + the filtering logic specific to this vim parser. + + Here are the filtering parameters defined in ParserParams: + + | Name | Type | Default | + |------|------|---------| + ${paramsTable} + + These are the ONLY filter parameters available. The query should use \ + these parameters to filter rows as early as possible. + + **Important:** Some filter parameters may not have a matching column \ + in the source data. In that case, the parser will simply check \ + \`array_length() == 0\` (or equivalent) without actually \ + filtering any rows. This is correct and expected — do NOT flag \ + these as issues. Only flag a parameter as unused if it is completely \ + absent from the query. + + Here is the ParserQuery from the vim-prefixed YAML file: + + ### ${vimParserQuery.file} + \`\`\`kql + ${vimParserQuery.query} + \`\`\` + + Please review: + 1. **Parameter placement**: Are the filtering parameters applied as \ + early as possible in the query? Filters should be placed before any \ + parsing or field calculations to avoid unnecessary computation on rows \ + that will be filtered out. + 2. **Filter efficiency**: Are the parameter-based filters using native \ + columns and indexed fields where possible? + 3. **Redundant computation**: Are there any calculated fields or parsing \ + operations that occur before the parameter filters, when they could be \ + moved after? + 4. **Parameter completeness**: Are the filtering parameters comprehensive \ + enough to allow efficient querying for common use cases? + + **Output format:** + + First, provide an overall **Readiness Rating** out of 10. + + Then, return findings as a markdown table: + + | # | Priority | Issue | Suggestion | + |---|----------|-------|------------| + + Where Priority is one of: 🔴 High, 🟡 Medium, 🟢 Low. + Only include issues specific to the filtering/parameter logic. \ + If no issues are found, return a single row stating "No issues found" \ + and a rating of 10/10.`; + + const vimResponse = await fetch('https://models.github.ai/inference/chat/completions', { + method: 'POST', + headers: { + 'Authorization': `Bearer ${process.env.GITHUB_TOKEN}`, + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + model: 'openai/gpt-5', + messages: [ + { role: 'user', content: vimPrompt } + ] + }) + }); + + if (!vimResponse.ok) { + const errorText = await vimResponse.text(); + core.warning(`LLM API call for vim parser failed: ${vimResponse.status} ${errorText}`); + } else { + const vimResult = await vimResponse.json(); + const vimAnalysis = vimResult.choices[0].message.content; + core.info('vim Parser LLM Analysis:\n' + vimAnalysis); + + const vimBody = `## ASIM vim Parser KQL Review\n\n${vimAnalysis}\n\n---\n*This review was generated automatically by the ASIM File and Parser Validation workflow using GitHub Models.*`; + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.pull_request.number, + body: vimBody + }); + } + }