-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathaws_archive.py
More file actions
83 lines (73 loc) · 2.56 KB
/
aws_archive.py
File metadata and controls
83 lines (73 loc) · 2.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
"""
AWS S3 integration — archive every transcript and threat report as immutable records.
Demonstrates production-readiness: every call has a permanent audit trail.
Bucket layout: s3://{bucket}/transcripts/{call_id}.txt
s3://{bucket}/reports/{call_id}.json
"""
import os
import json
from datetime import datetime
def _s3_client():
try:
import boto3
region = os.getenv("AWS_REGION", "us-east-1")
return boto3.client(
"s3",
region_name=region,
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
)
except ImportError:
return None
def archive_transcript(call_id: str, transcript: str, classification: dict) -> str | None:
"""
Upload transcript + structured report to S3.
Returns the S3 URI if successful, None if skipped/failed.
"""
bucket = os.getenv("AWS_S3_BUCKET")
if not bucket or bucket == "FILL_IN":
print(f"[{call_id}] WARNING: AWS_S3_BUCKET not set — skipping S3 archive")
return None
s3 = _s3_client()
if s3 is None:
print(f"[{call_id}] WARNING: boto3 not installed — skipping S3 archive")
return None
# Guard against None transcript (e.g. when only recording_url was provided)
if not transcript:
transcript = ""
timestamp = datetime.utcnow().isoformat()
school = str(classification.get("school_name") or "unknown")
level = classification.get("threat_level", 0)
# Upload raw transcript
transcript_key = f"transcripts/{call_id}.txt"
report_key = f"reports/{call_id}.json"
report = {
"call_id": call_id,
"timestamp": timestamp,
"school": school,
"threat_level": level,
"classification": classification,
"transcript": transcript,
}
try:
meta = {"call_id": str(call_id), "school": str(school), "level": str(level)}
s3.put_object(
Bucket=bucket,
Key=transcript_key,
Body=transcript.encode("utf-8"),
ContentType="text/plain",
Metadata=meta,
)
s3.put_object(
Bucket=bucket,
Key=report_key,
Body=json.dumps(report, indent=2).encode("utf-8"),
ContentType="application/json",
Metadata=meta,
)
uri = f"s3://{bucket}/{report_key}"
print(f"[{call_id}] AWS S3: archived → {uri}")
return uri
except Exception as e:
print(f"[{call_id}] WARNING: S3 archive failed: {e}")
return None