Skip to content

Commit ff60339

Browse files
committed
Refactor: Move to data/ directory structure with R2 backup
Changes: - Reorganize file structure: data/sfs_json/ and data/md-markers/ - Add Cloudflare R2 backup for JSON files (redundancy) - Make git commits configurable (enable_git_commit input) - Add smart fallback in html-export (git first, R2 as backup) - Remove gitignore exceptions for cleaner config This provides better organization, permanent R2 backup, and flexibility to disable git commits when only R2 storage is needed.
1 parent 309a75d commit ff60339

4 files changed

Lines changed: 95 additions & 16 deletions

File tree

.github/workflows/fetch-sfs-workflow.yml

Lines changed: 50 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,16 @@ on:
1111
required: false
1212
default: '30'
1313
type: string
14+
enable_git_commit:
15+
description: 'Aktivera git commits av markdown-filer'
16+
required: false
17+
default: true
18+
type: boolean
19+
branch_name:
20+
description: 'Branch namn att använda (krävs om enable_git_commit är true)'
21+
required: false
22+
default: 'workflow-artifact-data'
23+
type: string
1424

1525
permissions:
1626
contents: write
@@ -38,29 +48,47 @@ jobs:
3848
3949
- name: Fetch JSON from beta.rkrattsbaser.gov.se
4050
run: |
41-
python downloaders/fetch_new_sfs_docs.py --days ${{ inputs.days || '1' }} --output sfs_json
51+
python downloaders/fetch_new_sfs_docs.py --days ${{ inputs.days || '1' }} --output data/sfs_json
4252
env:
4353
PYTHONPATH: ${{ github.workspace }}
4454

4555
- name: Process JSON files to Markdown files with Selex tags
4656
run: |
47-
python sfs_processor.py --input sfs_json --output output/md --formats md-markers
57+
python sfs_processor.py --input data/sfs_json --output data/md-markers --formats md-markers
4858
env:
4959
PYTHONPATH: ${{ github.workspace }}
5060

61+
- name: Upload JSON source files to Cloudflare R2 for permanent storage
62+
run: |
63+
aws configure set aws_access_key_id ${{ secrets.CLOUDFLARE_R2_ACCESS_KEY_ID }}
64+
aws configure set aws_secret_access_key ${{ secrets.CLOUDFLARE_R2_SECRET_ACCESS_KEY }}
65+
aws configure set region us-east-1
66+
aws configure set output json
67+
68+
# Upload all JSON files to R2 for backup
69+
aws s3 sync data/sfs_json/ s3://${{ secrets.CLOUDFLARE_R2_BUCKET_NAME }}/sfs_json/ \
70+
--endpoint-url https://${{ secrets.CLOUDFLARE_R2_ACCOUNT_ID }}.r2.cloudflarestorage.com \
71+
--content-type "application/json" \
72+
--exclude "*.md" \
73+
--include "*.json"
74+
env:
75+
AWS_DEFAULT_REGION: us-east-1
76+
5177
- name: Configure Git
78+
if: inputs.enable_git_commit != 'false'
5279
run: |
5380
git config --local user.email "action@github.com"
5481
git config --local user.name "GitHub Action"
5582
5683
- name: Commit and push changes
84+
if: inputs.enable_git_commit != 'false'
5785
id: commit_changes
5886
run: |
5987
# Get current branch
6088
CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
61-
89+
6290
# Create a new branch for commits if we have changes
63-
git add output/md/ sfs_json/
91+
git add data/md-markers/ data/sfs_json/
6492
if git diff --staged --quiet; then
6593
echo "Inga nya filer att committa"
6694
echo "has_changes=false" >> $GITHUB_OUTPUT
@@ -71,8 +99,14 @@ jobs:
7199
72100
# Create and switch to the new branch
73101
git checkout -b "$COMMIT_BRANCH"
74-
75-
git commit -m "Automatisk uppdatering av SFS-författningar $(date +'%Y-%m-%d')"
102+
103+
# Commit with multi-line message
104+
git commit -m "Automatisk uppdatering av SFS-författningar $(date +'%Y-%m-%d')" \
105+
-m "" \
106+
-m "Inkluderar:" \
107+
-m "- Käll-JSON (data/sfs_json/)" \
108+
-m "- Markdown med selex-taggar (data/md-markers/)" \
109+
-m "- Backup till Cloudflare R2"
76110
echo "has_changes=true" >> $GITHUB_OUTPUT
77111
echo "commit_branch=${COMMIT_BRANCH}" >> $GITHUB_OUTPUT
78112
@@ -113,16 +147,24 @@ jobs:
113147
fi
114148
115149
- name: Trigger HTML export workflow
116-
if: steps.commit_changes.outputs.has_changes == 'true' && steps.commit_changes.outputs.push_success == 'true'
150+
if: (inputs.enable_git_commit != 'false' && steps.commit_changes.outputs.has_changes == 'true' && steps.commit_changes.outputs.push_success == 'true') || (inputs.enable_git_commit == 'false')
117151
uses: actions/github-script@v6
118152
with:
119153
github-token: ${{ secrets.GITHUB_TOKEN }}
120154
script: |
155+
const enableGitCommit = '${{ inputs.enable_git_commit }}' !== 'false';
156+
const sourceRef = enableGitCommit
157+
? '${{ steps.commit_changes.outputs.commit_branch }}'
158+
: '${{ inputs.branch_name }}';
159+
121160
github.rest.actions.createWorkflowDispatch({
122161
owner: context.repo.owner,
123162
repo: context.repo.repo,
124163
workflow_id: 'html-export-workflow.yml',
125-
ref: context.ref
164+
ref: sourceRef || context.ref,
165+
inputs: {
166+
source_ref: sourceRef || 'main'
167+
}
126168
})
127169
128170
- name: Report push failure

.github/workflows/html-export-workflow.yml

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,20 +45,52 @@ jobs:
4545
python -m pip install --upgrade pip
4646
pip install -r requirements.txt
4747
48+
- name: Get JSON source files (from git or R2)
49+
run: |
50+
# Try to use JSON files from git first
51+
if [ -d "data/sfs_json" ] && [ -n "$(ls -A data/sfs_json 2>/dev/null)" ]; then
52+
echo "✅ Found $(find data/sfs_json -name '*.json' | wc -l) JSON files in git"
53+
echo "Using JSON files from git checkout"
54+
else
55+
echo "⚠️ No JSON files in git, downloading from Cloudflare R2..."
56+
57+
# Configure AWS CLI for R2
58+
aws configure set aws_access_key_id ${{ secrets.CLOUDFLARE_R2_ACCESS_KEY_ID }}
59+
aws configure set aws_secret_access_key ${{ secrets.CLOUDFLARE_R2_SECRET_ACCESS_KEY }}
60+
aws configure set region us-east-1
61+
aws configure set output json
62+
63+
# Download all JSON files from R2
64+
mkdir -p data/sfs_json
65+
aws s3 sync s3://${{ secrets.CLOUDFLARE_R2_BUCKET_NAME }}/sfs_json/ data/sfs_json/ \
66+
--endpoint-url https://${{ secrets.CLOUDFLARE_R2_ACCOUNT_ID }}.r2.cloudflarestorage.com \
67+
--exclude "*" \
68+
--include "*.json"
69+
70+
# Verify download
71+
if [ ! -d "data/sfs_json" ] || [ -z "$(ls -A data/sfs_json)" ]; then
72+
echo "::error::Failed to download JSON files from R2"
73+
exit 1
74+
fi
75+
echo "✅ Downloaded $(find data/sfs_json -name '*.json' | wc -l) JSON files from R2"
76+
fi
77+
env:
78+
AWS_DEFAULT_REGION: us-east-1
79+
4880
- name: Generate HTML export
4981
run: |
5082
if [ -n "${{ inputs.filter }}" ]; then
51-
python sfs_processor.py --input sfs_json --output output/html --formats html --filter "${{ inputs.filter }}"
83+
python sfs_processor.py --input data/sfs_json --output output/html --formats html --filter "${{ inputs.filter }}"
5284
else
53-
python sfs_processor.py --input sfs_json --output output/html --formats html
85+
python sfs_processor.py --input data/sfs_json --output output/html --formats html
5486
fi
5587
env:
5688
PYTHONPATH: ${{ github.workspace }}
5789

5890
- name: Regenerate index pages for HTML export
5991
run: |
60-
python exporters/html/populate_index_pages.py --input sfs_json --output index.html --limit 30
61-
python exporters/html/populate_index_pages.py --input sfs_json --output latest.html --limit 10
92+
python exporters/html/populate_index_pages.py --input data/sfs_json --output index.html --limit 30
93+
python exporters/html/populate_index_pages.py --input data/sfs_json --output latest.html --limit 10
6294
env:
6395
PYTHONPATH: ${{ github.workspace }}
6496

.github/workflows/upcoming-changes-workflow.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ jobs:
3232
- name: Run upcoming changes script
3333
run: |
3434
# Kör scriptet på markdown-katalogen för att uppdatera kommande.yaml
35-
python temporal/upcoming_changes.py output/md/
35+
python temporal/upcoming_changes.py data/md-markers/
3636
env:
3737
PYTHONPATH: ${{ github.workspace }}
3838

@@ -42,7 +42,7 @@ jobs:
4242
# Detta gör att vi kan hämta ikapp missade dagar om jobbet misslyckats
4343
eval $(python temporal/get_temporal_date_range.py)
4444
echo "Bearbetar temporal commits från $FROM_DATE till $TO_DATE"
45-
python scripts/temporal_commits_batch.py output/md/ --from-date "$FROM_DATE" --to-date "$TO_DATE" --verbose
45+
python scripts/temporal_commits_batch.py data/md-markers/ --from-date "$FROM_DATE" --to-date "$TO_DATE" --verbose
4646
env:
4747
PYTHONPATH: ${{ github.workspace }}
4848
GIT_GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

.gitignore

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@ ___pycache__
1212
htmlcov/
1313
.pytest_cache/
1414

15-
# Ignore output files
15+
# Ignore output files (generated, not committed)
1616
logs/
17-
output/
17+
output/
18+
19+
# Data directory contains source files committed to git:
20+
# - data/sfs_json/ = Käll-JSON från API
21+
# - data/md-markers/ = Markdown med selex-taggar
22+
# (nothing to ignore here, committed to git + backed up to R2)

0 commit comments

Comments
 (0)