diff --git a/.env.example b/.env.example index 20b19199..362b7c23 100644 --- a/.env.example +++ b/.env.example @@ -29,6 +29,11 @@ API_KEY=your_api_key_here #S3_REGION=your-region #S3_BUCKET_NAME=your-bucket-name +# optional +# Purpose: S3_PUBLIC_URL custom domain for returned file URLs (e.g., CDN or Cloudflare R2 custom domain) +# When set, file URLs will use this domain without the bucket name in the path +#S3_PUBLIC_URL=https://subdomain.domain.com + # Google Cloud Storage Env Variables # diff --git a/CLAUDE.md b/CLAUDE.md index 29062002..a5f8ea69 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -169,6 +169,7 @@ S3-Compatible: - `GUNICORN_TIMEOUT` - Worker timeout seconds (default: 30) - `GCP_JOB_NAME` - Cloud Run Job name for offloading - `GCP_JOB_LOCATION` - Cloud Run Job region (default: us-central1) +- `S3_PUBLIC_URL` - Custom domain for S3 file URLs (e.g., "subdomain.domain.com"). When set, URLs exclude bucket name and use this domain instead of S3_ENDPOINT_URL ## Key Patterns diff --git a/README.md b/README.md index e97899b6..d7a5517e 100644 --- a/README.md +++ b/README.md @@ -162,6 +162,16 @@ Each endpoint is supported by robust payload validation and detailed API documen - **Purpose**: The region for the S3-compatible storage service. - **Requirement**: Mandatory if using S3-compatible storage, "None" is acceptible for some s3 providers. +#### `S3_PUBLIC_URL` +- **Purpose**: Custom domain to use as the base URL for all returned file URLs (e.g., CDN domain). +- **Requirement**: Optional. When set, file URLs will use this domain instead of `S3_ENDPOINT_URL`, and the bucket name will be excluded from the URL path. +- **Example**: Set to `subdomain.domain.com` to return URLs like `https://subdomain.domain.com/file.mp3` instead of `https://nyc3.digitaloceanspaces.com/bucket-name/file.mp3` +- **Note**: The value can include or exclude the `https://` protocol prefix. If omitted, `https://` will be automatically prepended. +- **Common Use Cases**: + 1. **Cloudflare R2 Public Access**: Cloudflare R2 requires custom domains (without bucket names in the URL path) to serve files publicly without authentication. Set `S3_PUBLIC_URL` to your R2 custom domain to enable public file access. + 2. **CDN Integration**: Use CloudFlare, AWS CloudFront, Fastly, or other CDNs in front of your S3 storage for improved performance, caching, and reduced bandwidth costs. Set `S3_PUBLIC_URL` to your CDN domain. + 3. **Custom Domain Branding**: Use your own branded domain (e.g., `files.yourcompany.com`) instead of exposing the underlying cloud provider's domain in file URLs, providing a more professional appearance and easier domain migration. + --- ### Google Cloud Storage (GCP) Environment Variables diff --git a/config.py b/config.py index 2b4e9826..4a1fad0d 100644 --- a/config.py +++ b/config.py @@ -31,6 +31,9 @@ GCP_SA_CREDENTIALS = os.environ.get('GCP_SA_CREDENTIALS', '') GCP_BUCKET_NAME = os.environ.get('GCP_BUCKET_NAME', '') +# S3 public URL override (optional) - when set, uses this as base URL instead of S3_ENDPOINT_URL +S3_PUBLIC_URL = os.environ.get('S3_PUBLIC_URL', '') + def validate_env_vars(provider): """ Validate the necessary environment variables for the selected storage provider """ diff --git a/docker-compose.md b/docker-compose.md index 0c768648..cf0c47a2 100644 --- a/docker-compose.md +++ b/docker-compose.md @@ -140,6 +140,11 @@ API_KEY=your_api_key_here #S3_REGION=your-region #S3_BUCKET_NAME=your-bucket-name +# optional +# Purpose: S3_PUBLIC_URL custom domain for returned file URLs (e.g., CDN or Cloudflare R2 custom domain) +# When set, file URLs will use this domain without the bucket name in the path +#S3_PUBLIC_URL=https://subdomain.domain.com + # Google Cloud Storage Env Variables # diff --git a/docs/cloud-installation/do.md b/docs/cloud-installation/do.md index 817c4c2c..644fc300 100644 --- a/docs/cloud-installation/do.md +++ b/docs/cloud-installation/do.md @@ -59,6 +59,8 @@ You'll need to create a Space (Digital Ocean's object storage) for the toolkit t Add the following environment variables exactly as shown (be careful with underscores vs. dashes and avoid any leading/trailing spaces): +### Required Variables + 1. `API_KEY`: Your API key (e.g., `test123` for testing - change for production) 2. `S3_ENDPOINT_URL`: The URL of your Space (copied from Step 3) 3. `S3_ACCESS_KEY`: The access key from Step 3 @@ -66,6 +68,14 @@ Add the following environment variables exactly as shown (be careful with unders 5. `S3_BUCKET_NAME`: The name of your Space bucket (e.g., `nca-toolkit-bucket`) 6. `S3_REGION`: The region code of your Space (e.g., `NYC3` for New York) +### Optional Variables + +7. `S3_PUBLIC_URL`: Custom domain for file URLs (e.g., `cdn.yourdomain.com`) + - Use this if you have a CDN or custom domain in front of your Space + - When set, returned file URLs will use this domain instead of the Digital Ocean Spaces URL + - The bucket name will be excluded from the URL path + - Example: If set to `files.example.com`, URLs will be `https://files.example.com/file.mp3` instead of `https://nyc3.digitaloceanspaces.com/bucket-name/file.mp3` + ## Step 7: Finalize and Deploy 1. For Deployment Region, select a region close to your location (e.g., San Francisco) diff --git a/services/s3_toolkit.py b/services/s3_toolkit.py index 1b0dc970..9e027830 100644 --- a/services/s3_toolkit.py +++ b/services/s3_toolkit.py @@ -42,7 +42,19 @@ def upload_to_s3(file_path, s3_url, access_key, secret_key, bucket_name, region) # URL encode the filename for the URL encoded_filename = quote(os.path.basename(file_path)) - file_url = f"{s3_url}/{bucket_name}/{encoded_filename}" + + # Check if S3_PUBLIC_URL is set for custom domain + public_url = os.getenv('S3_PUBLIC_URL', '').strip() + if public_url: + # Ensure https:// prefix if not present + if not public_url.startswith('http'): + public_url = f"https://{public_url}" + # Use public URL without bucket name + file_url = f"{public_url}/{encoded_filename}" + else: + # Use existing logic with bucket name + file_url = f"{s3_url}/{bucket_name}/{encoded_filename}" + return file_url except Exception as e: logger.error(f"Error uploading file to S3: {e}") diff --git a/services/v1/s3/upload.py b/services/v1/s3/upload.py index 49fc3898..39ef0cb1 100644 --- a/services/v1/s3/upload.py +++ b/services/v1/s3/upload.py @@ -153,7 +153,18 @@ def stream_upload_to_s3(file_url, custom_filename=None, make_public=False, downl if make_public: # URL encode the filename for the URL only encoded_filename = quote(filename) - file_url = f"{endpoint_url}/{bucket_name}/{encoded_filename}" + + # Check if S3_PUBLIC_URL is set for custom domain + public_url = os.getenv('S3_PUBLIC_URL', '').strip() + if public_url: + # Ensure https:// prefix if not present + if not public_url.startswith('http'): + public_url = f"https://{public_url}" + # Use public URL without bucket name + file_url = f"{public_url}/{encoded_filename}" + else: + # Use existing logic with bucket name + file_url = f"{endpoint_url}/{bucket_name}/{encoded_filename}" else: # Generate a pre-signed URL for private files file_url = s3_client.generate_presigned_url(