diff --git a/.github/workflows/fetch-sfs-workflow.yml b/.github/workflows/fetch-sfs-workflow.yml index 63cfefd..dcbfad9 100644 --- a/.github/workflows/fetch-sfs-workflow.yml +++ b/.github/workflows/fetch-sfs-workflow.yml @@ -11,6 +11,16 @@ on: required: false default: '30' type: string + enable_git_commit: + description: 'Aktivera git commits av markdown-filer' + required: false + default: true + type: boolean + branch_name: + description: 'Branch namn att använda (krävs om enable_git_commit är true)' + required: false + default: 'workflow-artifact-data' + type: string permissions: contents: write @@ -38,41 +48,75 @@ jobs: - name: Fetch JSON from beta.rkrattsbaser.gov.se run: | - python downloaders/fetch_new_sfs_docs.py --days ${{ inputs.days || '1' }} --output sfs_json + python downloaders/fetch_new_sfs_docs.py --days ${{ inputs.days || '1' }} --output data/sfs_json env: PYTHONPATH: ${{ github.workspace }} - name: Process JSON files to Markdown files with Selex tags run: | - python sfs_processor.py --input sfs_json --output output/md --formats md-markers + python sfs_processor.py --input data/sfs_json --output data/md-markers --formats md-markers env: PYTHONPATH: ${{ github.workspace }} + - name: Upload JSON source files to Cloudflare R2 for permanent storage + run: | + aws configure set aws_access_key_id ${{ secrets.CLOUDFLARE_R2_ACCESS_KEY_ID }} + aws configure set aws_secret_access_key ${{ secrets.CLOUDFLARE_R2_SECRET_ACCESS_KEY }} + aws configure set region us-east-1 + aws configure set output json + + # Upload all JSON files to R2 for backup + aws s3 sync data/sfs_json/ s3://${{ secrets.CLOUDFLARE_R2_BUCKET_NAME }}/sfs_json/ \ + --endpoint-url https://${{ secrets.CLOUDFLARE_R2_ACCOUNT_ID }}.r2.cloudflarestorage.com \ + --content-type "application/json" \ + --exclude "*.md" \ + --include "*.json" + env: + AWS_DEFAULT_REGION: us-east-1 + - name: Configure Git + if: inputs.enable_git_commit != 'false' run: | git config --local user.email "action@github.com" git config --local user.name "GitHub Action" - name: Commit and push changes + if: inputs.enable_git_commit != 'false' id: commit_changes run: | # Get current branch CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD) - + # Create a new branch for commits if we have changes - git add output/md/ sfs_json/ + git add data/md-markers/ data/sfs_json/ if git diff --staged --quiet; then echo "Inga nya filer att committa" echo "has_changes=false" >> $GITHUB_OUTPUT else - # Create a unique branch name for the commits - TIMESTAMP=$(date +'%Y%m%d_%H%M%S') - COMMIT_BRANCH="sfs_updates_${TIMESTAMP}" - - # Create and switch to the new branch - git checkout -b "$COMMIT_BRANCH" - - git commit -m "Automatisk uppdatering av SFS-författningar $(date +'%Y-%m-%d')" + # Use fixed branch name from input (default: workflow-artifact-data) + COMMIT_BRANCH="${{ inputs.branch_name }}" + + # Check if branch exists remotely + if git ls-remote --heads origin "$COMMIT_BRANCH" | grep -q "$COMMIT_BRANCH"; then + echo "📥 Branch '$COMMIT_BRANCH' exists, checking out and merging with main..." + # Branch exists - fetch and checkout + git fetch origin "$COMMIT_BRANCH" + git checkout "$COMMIT_BRANCH" + # Merge latest main into fixed branch to keep it updated + git merge origin/main --no-edit --strategy-option theirs || echo "Merge completed (conflicts auto-resolved)" + else + echo "🆕 Branch '$COMMIT_BRANCH' doesn't exist, creating new..." + # Branch doesn't exist - create new + git checkout -b "$COMMIT_BRANCH" + fi + + # Commit with multi-line message + git commit -m "Automatisk uppdatering av SFS-författningar $(date +'%Y-%m-%d')" \ + -m "" \ + -m "Inkluderar:" \ + -m "- Käll-JSON (data/sfs_json/)" \ + -m "- Markdown med selex-taggar (data/md-markers/)" \ + -m "- Backup till Cloudflare R2" echo "has_changes=true" >> $GITHUB_OUTPUT echo "commit_branch=${COMMIT_BRANCH}" >> $GITHUB_OUTPUT @@ -105,24 +149,47 @@ jobs: echo "::error::Git push misslyckades: ${PUSH_ERROR}" exit 1 fi - + + # Manage PR lifecycle: close old, create new + echo "🔄 Hanterar PR för branch '$COMMIT_BRANCH'..." + + # Close existing PR for this branch if it exists + EXISTING_PR=$(gh pr list --head "$COMMIT_BRANCH" --json number --jq '.[0].number' 2>/dev/null || echo "") + if [ -n "$EXISTING_PR" ]; then + echo "Stänger befintlig PR #$EXISTING_PR" + gh pr close "$EXISTING_PR" --comment "🔄 Stänger för att öppna ny PR med uppdaterad data från $(date +'%Y-%m-%d %H:%M')" + fi + + # Create new PR + echo "📝 Skapar ny PR..." + gh pr create --base main --head "$COMMIT_BRANCH" \ + --title "SFS Data Update - $(date +'%Y-%m-%d %H:%M')" \ + --body "Automatisk uppdatering av SFS-data. Inkluderar Käll-JSON, Markdown med selex-taggar och backup till R2. Workflow: fetch-sfs-workflow, Branch: $COMMIT_BRANCH, Run: ${{ github.run_id }}" \ + || echo "⚠️ PR may already exist" + # Switch back to original branch git checkout "$CURRENT_BRANCH" - - echo "Commits gjorda i branch '${COMMIT_BRANCH}' istället för '${CURRENT_BRANCH}'" + + echo "✅ Commits gjorda i branch '${COMMIT_BRANCH}'" fi - name: Trigger HTML export workflow - if: steps.commit_changes.outputs.has_changes == 'true' && steps.commit_changes.outputs.push_success == 'true' + if: (inputs.enable_git_commit != 'false' && steps.commit_changes.outputs.has_changes == 'true' && steps.commit_changes.outputs.push_success == 'true') || (inputs.enable_git_commit == 'false') uses: actions/github-script@v6 with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | + // Always use the fixed branch name + const sourceRef = '${{ inputs.branch_name }}'; + github.rest.actions.createWorkflowDispatch({ owner: context.repo.owner, repo: context.repo.repo, workflow_id: 'html-export-workflow.yml', - ref: context.ref + ref: sourceRef || context.ref, + inputs: { + source_ref: sourceRef || 'main' + } }) - name: Report push failure diff --git a/.github/workflows/html-export-workflow.yml b/.github/workflows/html-export-workflow.yml index 4629cfb..7540712 100644 --- a/.github/workflows/html-export-workflow.yml +++ b/.github/workflows/html-export-workflow.yml @@ -45,20 +45,52 @@ jobs: python -m pip install --upgrade pip pip install -r requirements.txt + - name: Get JSON source files (from git or R2) + run: | + # Try to use JSON files from git first + if [ -d "data/sfs_json" ] && [ -n "$(ls -A data/sfs_json 2>/dev/null)" ]; then + echo "✅ Found $(find data/sfs_json -name '*.json' | wc -l) JSON files in git" + echo "Using JSON files from git checkout" + else + echo "⚠️ No JSON files in git, downloading from Cloudflare R2..." + + # Configure AWS CLI for R2 + aws configure set aws_access_key_id ${{ secrets.CLOUDFLARE_R2_ACCESS_KEY_ID }} + aws configure set aws_secret_access_key ${{ secrets.CLOUDFLARE_R2_SECRET_ACCESS_KEY }} + aws configure set region us-east-1 + aws configure set output json + + # Download all JSON files from R2 + mkdir -p data/sfs_json + aws s3 sync s3://${{ secrets.CLOUDFLARE_R2_BUCKET_NAME }}/sfs_json/ data/sfs_json/ \ + --endpoint-url https://${{ secrets.CLOUDFLARE_R2_ACCOUNT_ID }}.r2.cloudflarestorage.com \ + --exclude "*" \ + --include "*.json" + + # Verify download + if [ ! -d "data/sfs_json" ] || [ -z "$(ls -A data/sfs_json)" ]; then + echo "::error::Failed to download JSON files from R2" + exit 1 + fi + echo "✅ Downloaded $(find data/sfs_json -name '*.json' | wc -l) JSON files from R2" + fi + env: + AWS_DEFAULT_REGION: us-east-1 + - name: Generate HTML export run: | if [ -n "${{ inputs.filter }}" ]; then - python sfs_processor.py --input sfs_json --output output/html --formats html --filter "${{ inputs.filter }}" + python sfs_processor.py --input data/sfs_json --output output/html --formats html --filter "${{ inputs.filter }}" else - python sfs_processor.py --input sfs_json --output output/html --formats html + python sfs_processor.py --input data/sfs_json --output output/html --formats html fi env: PYTHONPATH: ${{ github.workspace }} - name: Regenerate index pages for HTML export run: | - python exporters/html/populate_index_pages.py --input sfs_json --output index.html --limit 30 - python exporters/html/populate_index_pages.py --input sfs_json --output latest.html --limit 10 + python exporters/html/populate_index_pages.py --input data/sfs_json --output index.html --limit 30 + python exporters/html/populate_index_pages.py --input data/sfs_json --output latest.html --limit 10 env: PYTHONPATH: ${{ github.workspace }} diff --git a/.github/workflows/upcoming-changes-workflow.yml b/.github/workflows/upcoming-changes-workflow.yml index 530a8e1..375fa1b 100644 --- a/.github/workflows/upcoming-changes-workflow.yml +++ b/.github/workflows/upcoming-changes-workflow.yml @@ -32,7 +32,7 @@ jobs: - name: Run upcoming changes script run: | # Kör scriptet på markdown-katalogen för att uppdatera kommande.yaml - python temporal/upcoming_changes.py output/md/ + python temporal/upcoming_changes.py data/md-markers/ env: PYTHONPATH: ${{ github.workspace }} @@ -42,7 +42,7 @@ jobs: # Detta gör att vi kan hämta ikapp missade dagar om jobbet misslyckats eval $(python temporal/get_temporal_date_range.py) echo "Bearbetar temporal commits från $FROM_DATE till $TO_DATE" - python scripts/temporal_commits_batch.py output/md/ --from-date "$FROM_DATE" --to-date "$TO_DATE" --verbose + python scripts/temporal_commits_batch.py data/md-markers/ --from-date "$FROM_DATE" --to-date "$TO_DATE" --verbose env: PYTHONPATH: ${{ github.workspace }} GIT_GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} @@ -54,13 +54,28 @@ jobs: - name: Commit and push kommande.yaml updates run: | - # Lägg till den uppdaterade kommande.yaml filen + # Checkout the same fixed branch as fetch-sfs uses + WORKFLOW_BRANCH="workflow-artifact-data" # Match default from fetch-sfs + + # Fetch and checkout the fixed branch + echo "📥 Checking out fixed branch '$WORKFLOW_BRANCH'..." + git fetch origin "$WORKFLOW_BRANCH" 2>/dev/null || echo "Branch doesn't exist yet" + + if git ls-remote --heads origin "$WORKFLOW_BRANCH" | grep -q "$WORKFLOW_BRANCH"; then + git checkout "$WORKFLOW_BRANCH" + echo "✅ Checked out existing branch '$WORKFLOW_BRANCH'" + else + git checkout -b "$WORKFLOW_BRANCH" + echo "🆕 Created new branch '$WORKFLOW_BRANCH'" + fi + + # Make changes git add output/kommande.yaml if git diff --staged --quiet; then echo "Inga ändringar i kommande.yaml" else git commit -m "Uppdatera kommande ändringar - $(date +'%Y-%m-%d')" - git push - echo "✅ Kommande.yaml har uppdaterats och pushats" + git push origin "$WORKFLOW_BRANCH" + echo "✅ Kommande.yaml uppdaterat på branch $WORKFLOW_BRANCH" fi diff --git a/.gitignore b/.gitignore index c66e179..6c85be1 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,11 @@ ___pycache__ htmlcov/ .pytest_cache/ -# Ignore output files +# Ignore output files (generated, not committed) logs/ -output/ \ No newline at end of file +output/ + +# Data directory contains source files committed to git: +# - data/sfs_json/ = Käll-JSON från API +# - data/md-markers/ = Markdown med selex-taggar +# (nothing to ignore here, committed to git + backed up to R2) \ No newline at end of file