Skip to content

Commit bd64c16

Browse files
authored
Add script for uploading JSON files to Cloudflare R2 (#42)
## Summary Adds a new script `scripts/upload_json_to_r2.py` for manually uploading SFS JSON files to the Cloudflare R2 `sfs-json` bucket. ## Changes - New Python script for R2 upload operations - Environment variable validation for R2 credentials - AWS CLI configuration and management - Bulk JSON file upload with proper headers (content-type, cache-control) - Upload summary generation with metadata ## Usage ```bash # Using default directory (../sfs-jsondata) python scripts/upload_json_to_r2.py # Using custom directory python scripts/upload_json_to_r2.py --json-dir data/sfs_json ``` ## Requirements - AWS CLI installed (`pip install awscli`) - R2 credentials configured in environment variables
1 parent f6a3f34 commit bd64c16

1 file changed

Lines changed: 241 additions & 0 deletions

File tree

scripts/upload_json_to_r2.py

Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Script för att ladda upp SFS JSON-filer till Cloudflare R2 (sfs-json bucket)
4+
"""
5+
6+
import os
7+
import sys
8+
import subprocess
9+
import datetime
10+
import argparse
11+
from pathlib import Path
12+
from dotenv import load_dotenv
13+
14+
# Ladda miljövariabler från .env-fil
15+
load_dotenv()
16+
17+
def check_required_env_vars():
18+
"""Kontrollera att alla nödvändiga miljövariabler är satta"""
19+
required_vars = [
20+
'CLOUDFLARE_R2_ACCESS_KEY_ID',
21+
'CLOUDFLARE_R2_SECRET_ACCESS_KEY',
22+
'CLOUDFLARE_R2_RAWDATA_BUCKET_NAME',
23+
'CLOUDFLARE_R2_ACCOUNT_ID'
24+
]
25+
26+
missing_vars = []
27+
for var in required_vars:
28+
if not os.getenv(var):
29+
missing_vars.append(var)
30+
31+
if missing_vars:
32+
print("Error: Följande miljövariabler saknas:")
33+
for var in missing_vars:
34+
print(f" - {var}")
35+
print("\nExempel på hur du sätter dem:")
36+
print("export CLOUDFLARE_R2_ACCESS_KEY_ID='your_access_key'")
37+
print("export CLOUDFLARE_R2_SECRET_ACCESS_KEY='your_secret_key'")
38+
print("export CLOUDFLARE_R2_RAWDATA_BUCKET_NAME='sfs-json'")
39+
print("export CLOUDFLARE_R2_ACCOUNT_ID='your_account_id'")
40+
return False
41+
42+
print("✓ Alla nödvändiga miljövariabler är konfigurerade")
43+
return True
44+
45+
def configure_aws_cli():
46+
"""Konfigurera AWS CLI för Cloudflare R2"""
47+
print("Konfigurerar AWS CLI för Cloudflare R2...")
48+
49+
commands = [
50+
['aws', 'configure', 'set', 'aws_access_key_id', os.getenv('CLOUDFLARE_R2_ACCESS_KEY_ID')],
51+
['aws', 'configure', 'set', 'aws_secret_access_key', os.getenv('CLOUDFLARE_R2_SECRET_ACCESS_KEY')],
52+
['aws', 'configure', 'set', 'region', 'us-east-1'],
53+
['aws', 'configure', 'set', 'output', 'json']
54+
]
55+
56+
for cmd in commands:
57+
try:
58+
subprocess.run(cmd, check=True, capture_output=True)
59+
except subprocess.CalledProcessError as e:
60+
print(f"Error vid konfiguration av AWS CLI: {e}")
61+
return False
62+
63+
print("✓ AWS CLI konfigurerad")
64+
return True
65+
66+
def count_json_files(json_dir):
67+
"""Räkna antal JSON-filer i mappen"""
68+
json_files = list(Path(json_dir).glob('*.json'))
69+
return len(json_files)
70+
71+
def upload_json_files(json_dir):
72+
"""Ladda upp alla JSON-filer till Cloudflare R2"""
73+
bucket_name = os.getenv('CLOUDFLARE_R2_RAWDATA_BUCKET_NAME')
74+
account_id = os.getenv('CLOUDFLARE_R2_ACCOUNT_ID')
75+
endpoint_url = f"https://{account_id}.r2.cloudflarestorage.com"
76+
77+
# Kontrollera att mappen finns
78+
if not Path(json_dir).exists():
79+
print(f"Error: Mappen {json_dir} finns inte.")
80+
return False
81+
82+
# Räkna filer som ska laddas upp
83+
file_count = count_json_files(json_dir)
84+
print(f"Laddar upp JSON-filer från {json_dir} ({file_count} filer)...")
85+
86+
cmd = [
87+
'aws', 's3', 'sync', f'{json_dir}/', f's3://{bucket_name}/',
88+
'--endpoint-url', endpoint_url,
89+
'--exclude', '*',
90+
'--include', '*.json',
91+
'--cache-control', 'public, max-age=3600',
92+
'--content-type', 'application/json',
93+
'--cli-read-timeout', '0',
94+
'--cli-connect-timeout', '60'
95+
]
96+
97+
env = os.environ.copy()
98+
env['AWS_DEFAULT_REGION'] = 'us-east-1'
99+
100+
try:
101+
print(f"Kör kommando: {' '.join(cmd)}")
102+
result = subprocess.run(cmd, env=env, check=True, capture_output=True, text=True)
103+
print(f"✓ JSON-filer uppladdade ({file_count} filer)")
104+
105+
# Visa AWS CLI output om det finns
106+
if result.stdout.strip():
107+
print("AWS CLI output:")
108+
print(result.stdout.strip())
109+
110+
if result.stderr.strip():
111+
print("AWS CLI stderr (debug info):")
112+
print(result.stderr.strip())
113+
114+
return True
115+
except subprocess.CalledProcessError as e:
116+
print(f"Error vid uppladdning av JSON-filer: {e}")
117+
if e.stderr:
118+
print(f"Stderr: {e.stderr}")
119+
if e.stdout:
120+
print(f"Stdout: {e.stdout}")
121+
return False
122+
123+
def upload_summary(json_dir):
124+
"""Skapa och ladda upp sammanfattning"""
125+
print("Skapar och laddar upp sammanfattning...")
126+
127+
bucket_name = os.getenv('CLOUDFLARE_R2_RAWDATA_BUCKET_NAME')
128+
account_id = os.getenv('CLOUDFLARE_R2_ACCOUNT_ID')
129+
endpoint_url = f"https://{account_id}.r2.cloudflarestorage.com"
130+
131+
file_count = count_json_files(json_dir)
132+
133+
# Skapa sammanfattning
134+
summary_content = f"""JSON upload completed at {datetime.datetime.now().isoformat()}
135+
Files uploaded to Cloudflare R2 bucket: {bucket_name}/
136+
Number of JSON files: {file_count}
137+
Source directory: {json_dir}
138+
Upload performed locally via upload_json_to_r2.py script
139+
"""
140+
141+
# Skriv till fil
142+
with open('json-upload-summary.txt', 'w', encoding='utf-8') as f:
143+
f.write(summary_content)
144+
145+
# Ladda upp sammanfattning
146+
cmd = [
147+
'aws', 's3', 'cp', 'json-upload-summary.txt', f's3://{bucket_name}/last-update.txt',
148+
'--endpoint-url', endpoint_url
149+
]
150+
151+
env = os.environ.copy()
152+
env['AWS_DEFAULT_REGION'] = 'us-east-1'
153+
154+
try:
155+
print(f"Kör kommando: {' '.join(cmd)}")
156+
result = subprocess.run(cmd, env=env, check=True, capture_output=True, text=True)
157+
print("✓ Sammanfattning uppladdad")
158+
159+
# Visa AWS CLI output om det finns
160+
if result.stdout.strip():
161+
print("AWS CLI output:")
162+
print(result.stdout.strip())
163+
if result.stderr.strip():
164+
print("AWS CLI stderr:")
165+
print(result.stderr.strip())
166+
167+
# Ta bort lokal fil
168+
os.remove('json-upload-summary.txt')
169+
return True
170+
except subprocess.CalledProcessError as e:
171+
print(f"Error vid uppladdning av sammanfattning: {e}")
172+
if e.stderr:
173+
print(f"Stderr: {e.stderr}")
174+
if e.stdout:
175+
print(f"Stdout: {e.stdout}")
176+
return False
177+
178+
def main():
179+
"""Huvudfunktion"""
180+
# Hantera kommandoradsargument
181+
parser = argparse.ArgumentParser(
182+
description='Ladda upp SFS JSON-filer till Cloudflare R2 (sfs-json bucket)',
183+
formatter_class=argparse.RawDescriptionHelpFormatter,
184+
epilog="""
185+
Exempel:
186+
python scripts/upload_json_to_r2.py --json-dir ../sfs-jsondata
187+
python scripts/upload_json_to_r2.py --json-dir data/sfs_json
188+
"""
189+
)
190+
parser.add_argument(
191+
'--json-dir',
192+
default='../sfs-jsondata',
193+
help='Sökväg till mappen med JSON-filer (standard: ../sfs-jsondata)'
194+
)
195+
196+
args = parser.parse_args()
197+
198+
print("=== Cloudflare R2 JSON Upload Script ===")
199+
print(f"Laddar upp från: {args.json_dir}")
200+
print(f"Bucket: {os.getenv('CLOUDFLARE_R2_RAWDATA_BUCKET_NAME', 'sfs-json')}")
201+
print()
202+
203+
# Kontrollera att AWS CLI är installerat
204+
try:
205+
subprocess.run(['aws', '--version'], check=True, capture_output=True)
206+
except (subprocess.CalledProcessError, FileNotFoundError):
207+
print("Error: AWS CLI är inte installerat eller inte tillgängligt i PATH")
208+
print("Installera med: pip install awscli")
209+
sys.exit(1)
210+
211+
# Kontrollera miljövariabler
212+
if not check_required_env_vars():
213+
sys.exit(1)
214+
215+
# Konfigurera AWS CLI
216+
if not configure_aws_cli():
217+
sys.exit(1)
218+
219+
# Utför uppladdningar
220+
print()
221+
success = True
222+
223+
# Ladda upp JSON-filer
224+
if not upload_json_files(args.json_dir):
225+
success = False
226+
227+
# Ladda upp sammanfattning
228+
if not upload_summary(args.json_dir):
229+
success = False
230+
231+
print()
232+
if success:
233+
print("✓ Alla JSON-filer har laddats upp till Cloudflare R2!")
234+
print(f"Bucket: {os.getenv('CLOUDFLARE_R2_RAWDATA_BUCKET_NAME')}")
235+
print(f"Källa: {args.json_dir}")
236+
else:
237+
print("✗ Något gick fel under uppladdningen")
238+
sys.exit(1)
239+
240+
if __name__ == "__main__":
241+
main()

0 commit comments

Comments
 (0)