diff --git a/.github/workflows/actions_labeller.yml b/.github/workflows/actions_labeller.yml index 37da2603e..929426d5c 100644 --- a/.github/workflows/actions_labeller.yml +++ b/.github/workflows/actions_labeller.yml @@ -9,11 +9,11 @@ jobs: if: ${{ contains(github.event.discussion.category.name, 'Actions') }} runs-on: ubuntu-latest env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Global authentication for gh CLI + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - name: Get discussion body html id: get_discussion_body_html - env: + env: OWNER: ${{ github.repository_owner }} REPO: ${{ github.event.repository.name }} DISCUSSION_NUMBER: ${{ github.event.discussion.number }} @@ -30,9 +30,8 @@ jobs: echo 'DISCUSSION_BODY_HTML='$(jq -r '.data.repository.discussion.bodyHTML' discussion_data.json) >> $GITHUB_ENV echo 'DISCUSSION_ID='$(jq -r '.data.repository.discussion.id' discussion_data.json) >> $GITHUB_ENV - - - run: npm install jsdom + - run: npm install jsdom dompurify - name: Extract Title and Body Text id: extract_text @@ -48,7 +47,6 @@ jobs: const { DISCUSSION_BODY_HTML } = process.env; const fragment = JSDOM.fragment(DISCUSSION_BODY_HTML); let body = ''; - // Find all

and

pairs const h3s = Array.from(fragment.querySelectorAll('h3')); h3s.forEach(h3 => { const heading = h3.textContent.trim(); @@ -59,7 +57,6 @@ jobs: body = p.textContent.trim(); } }); - // Remove leading/trailing quotes from body body = body.replace(/^['\"]+|['\"]+$/g, ''); const title = process.env.DISCUSSION_TITLE || ''; core.info(`Extracted title: ${title}`); @@ -69,6 +66,8 @@ jobs: - name: Extract Primary and Secondary Topic Areas id: extract_topics uses: actions/github-script@v6 + env: + DISCUSSION_BODY_HTML: ${{ env.DISCUSSION_BODY_HTML }} with: result-encoding: string script: | @@ -78,11 +77,9 @@ jobs: const fragment = JSDOM.fragment(DISCUSSION_BODY_HTML); let primary = ''; let secondary = ''; - // Find all

and

pairs (form headings as h3, answers as p) const h3s = Array.from(fragment.querySelectorAll('h3')); h3s.forEach(h3 => { const heading = h3.textContent.trim(); - // Look for the next

sibling after each heading let p = h3.nextElementSibling; while (p && p.tagName !== 'P') p = p.nextElementSibling; if (!p) return; @@ -100,78 +97,87 @@ jobs: - name: Auto-label by keyword search id: auto_label_keywords uses: actions/github-script@v6 + env: + EXTRACT_TEXT_RESULT: ${{ steps.extract_text.outputs.result }} with: result-encoding: string script: | - // Keyphrase to label mapping + const jsdom = require('jsdom'); + const { JSDOM } = jsdom; + const createDOMPurify = require('dompurify'); + const window = (new JSDOM('')).window; + const DOMPurify = createDOMPurify(window); + const labelMap = [ { label: 'Workflow Deployment', keywords: [ - "deployment error", - "publish artifact", - "release failure", - "deployment target", - "github pages", - "deployment issue", - "release workflow", - "target environment" - ] + "deployment error", + "publish artifact", + "release failure", + "deployment target", + "github pages", + "deployment issue", + "release workflow", + "target environment" + ] }, { label: 'Workflow Configuration', keywords: [ - "yaml syntax", - "job dependency", - "setup error", - "workflow file", - "configuration issue", - "matrix strategy", - "define env", - "secret management", - "environment setup", - "config job" - ] + "yaml syntax", + "job dependency", + "setup error", + "workflow file", + "configuration issue", + "matrix strategy", + "define env", + "secret management", + "environment setup", + "config job" + ] }, { label: 'Schedule & Cron Jobs', keywords: [ - "cron job", - "scheduled workflow", - "timing issue", - "delay trigger", - "timezone error", - "periodic run", - "recurring schedule", - "interval workflow", - "scheduled trigger", - "cron expression" - ] + "cron job", + "scheduled workflow", + "timing issue", + "delay trigger", + "timezone error", + "periodic run", + "recurring schedule", + "interval workflow", + "scheduled trigger", + "cron expression" + ] }, { label: 'Metrics & Insights', keywords: [ - "usage metrics", - "performance trend", - "analytics graph", - "stats dashboard", - "timeseries graph", - "insight report", - "metric tracking", - "workflow analytics", - "performance metric", - "statistics report" - ] + "usage metrics", + "performance trend", + "analytics graph", + "stats dashboard", + "timeseries graph", + "insight report", + "metric tracking", + "workflow analytics", + "performance metric", + "statistics report" + ] } ]; const miscLabel = 'Misc'; let title = ''; let body = ''; try { - const parsed = JSON.parse(`${{ steps.extract_text.outputs.result }}`); - title = parsed.title || ''; - body = parsed.body || ''; - } catch (e) {} + const parsed = JSON.parse(process.env.EXTRACT_TEXT_RESULT); + title = DOMPurify.sanitize(parsed.title || '', { ALLOWED_TAGS: [], ALLOWED_ATTR: [] }).trim(); + body = DOMPurify.sanitize(parsed.body || '', { ALLOWED_TAGS: [], ALLOWED_ATTR: [] }).trim(); + } catch (e) { + core.error('Failed to parse or sanitize discussion text: ' + e.message); + } const text = (title + ' ' + body).toLowerCase(); let foundLabel = miscLabel; core.info(`Auto-label debug: text to match: '${text}'`); @@ -188,6 +194,7 @@ jobs: } core.info(`Auto-label debug: selected label: '${foundLabel}'`); return foundLabel; + - name: Fetch label ID for primary topic id: fetch_primary_label_id env: @@ -239,6 +246,7 @@ jobs: SECONDARY_LABEL_ID=$(jq -r '.data.repository.labels.edges[0]?.node?.id // empty' secondary_label_data.json) echo "SECONDARY_LABEL_ID=$SECONDARY_LABEL_ID" >> $GITHUB_ENV + - name: Fetch label ID for auto-label id: fetch_auto_label_id env: