Skip to content

Commit 95e34ec

Browse files
Add GCP Upload (#217)
1 parent cadbb22 commit 95e34ec

3 files changed

Lines changed: 232 additions & 0 deletions

File tree

docs/gcp/upload.md

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# GCP Upload API
2+
3+
This endpoint allows you to stream a file from a remote URL directly to Google Cloud Storage without using local disk space.
4+
5+
## Endpoint
6+
7+
`POST /v1/gcp/upload`
8+
9+
## Authentication
10+
11+
This endpoint requires an API key to be provided in the `X-API-Key` header.
12+
13+
## Request Body
14+
15+
The request body should be a JSON object with the following properties:
16+
17+
| Property | Type | Required | Description |
18+
|----------|------|----------|-------------|
19+
| file_url | string | Yes | The URL of the file to upload to GCS |
20+
| filename | string | No | Custom filename to use for the uploaded file. If not provided, the original filename will be used |
21+
| public | boolean | No | Whether to make the file publicly accessible. Defaults to `false` |
22+
| download_headers | object | No | Optional headers to include in the download request for authentication |
23+
24+
Example request body:
25+
```json
26+
{
27+
"file_url": "https://example.com/path/to/file.mp4",
28+
"filename": "custom-name.mp4",
29+
"public": true,
30+
"download_headers": {
31+
"Authorization": "Bearer your-token"
32+
}
33+
}
34+
```
35+
36+
## Response
37+
38+
The response will be a JSON object with the following properties:
39+
40+
| Property | Type | Description |
41+
|----------|------|-------------|
42+
| file_url | string | The public URL of the uploaded file |
43+
| filename | string | The filename of the uploaded file |
44+
| bucket | string | The name of the GCS bucket where the file was uploaded |
45+
| public | boolean | Whether the file is publicly accessible |
46+
| content_type | string | The detected content type of the uploaded file |
47+
48+
Example response:
49+
```json
50+
{
51+
"file_url": "https://storage.googleapis.com/bucket-name/custom-name.mp4",
52+
"filename": "custom-name.mp4",
53+
"bucket": "bucket-name",
54+
"public": true,
55+
"content_type": "video/mp4"
56+
}
57+
```
58+
59+
## Error Handling
60+
61+
If an error occurs, the response will include an error message with an appropriate HTTP status code.
62+
63+
## Technical Details
64+
65+
This endpoint uses the Google Cloud Storage API to stream the file directly from the source URL to GCS without saving it locally. This allows for efficient transfer of large files with minimal memory usage.
66+
67+
The implementation:
68+
1. Streams the file from the source URL
69+
2. Detects the content type from the response headers
70+
3. Uploads the file directly to GCS using the streaming upload API
71+
72+
This approach supports efficient handling of large files and maintains the original content type of the uploaded file.

routes/v1/gcp/upload.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# Copyright (c) 2025 Stephen G. Pope
2+
#
3+
# This program is free software; you can redistribute it and/or modify
4+
# it under the terms of the GNU General Public License as published by
5+
# the Free Software Foundation; either version 2 of the License, or
6+
# (at your option) any later version.
7+
#
8+
# This program is distributed in the hope that it will be useful,
9+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11+
# GNU General Public License for more details.
12+
#
13+
# You should have received a copy of the GNU General Public License along
14+
# with this program; if not, write to the Free Software Foundation, Inc.,
15+
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16+
17+
from flask import Blueprint, request, jsonify
18+
from services.authentication import authenticate
19+
from app_utils import validate_payload, queue_task_wrapper
20+
from services.v1.gcp.upload import stream_upload_to_gcs
21+
import os
22+
import json
23+
import logging
24+
25+
logger = logging.getLogger(__name__)
26+
v1_gcp_upload_bp = Blueprint('v1_gcp_upload', __name__)
27+
28+
@v1_gcp_upload_bp.route('/v1/gcp/upload', methods=['POST'])
29+
@authenticate
30+
@validate_payload({
31+
"type": "object",
32+
"properties": {
33+
"filename": {"type": "string"},
34+
"file_url": {"type": "string", "format": "uri"},
35+
"public": {"type": "boolean"},
36+
"download_headers": {"type": "object"}
37+
},
38+
"required": ["file_url"],
39+
"additionalProperties": False
40+
})
41+
@queue_task_wrapper(bypass_queue=False)
42+
def gcp_upload_endpoint(job_id, data):
43+
try:
44+
filename = data.get('filename') # Optional, will default to original filename if not provided
45+
make_public = data.get('public', False) # Default to private
46+
download_headers = data.get('download_headers') # Optional headers for authentication
47+
48+
# Handle file upload from URL
49+
file_url = data.get('file_url')
50+
logger.info(f"Job {job_id}: Starting GCS streaming upload from {file_url}")
51+
result = stream_upload_to_gcs(file_url, filename, make_public, download_headers)
52+
53+
logger.info(f"Job {job_id}: Successfully uploaded to GCS")
54+
return result, "/v1/gcp/upload", 200
55+
56+
except Exception as e:
57+
logger.error(f"Job {job_id}: Error uploading to GCS - {str(e)}")
58+
return str(e), "/v1/gcp/upload", 500

services/v1/gcp/upload.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# Copyright (c) 2025 Stephen G. Pope
2+
#
3+
# This program is free software; you can redistribute it and/or modify
4+
# it under the terms of the GNU General Public License as published by
5+
# the Free Software Foundation; either version 2 of the License, or
6+
# (at your option) any later version.
7+
#
8+
# This program is distributed in the hope that it will be useful,
9+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11+
# GNU General Public License for more details.
12+
#
13+
# You should have received a copy of the GNU General Public License along
14+
# with this program; if not, write to the Free Software Foundation, Inc.,
15+
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16+
17+
import os
18+
import logging
19+
import requests
20+
import json
21+
from google.cloud import storage
22+
from google.oauth2 import service_account
23+
from urllib.parse import urlparse, unquote
24+
import uuid
25+
26+
logger = logging.getLogger(__name__)
27+
28+
def get_gcs_client():
29+
"""Create and return a Google Cloud Storage client using service account credentials."""
30+
credentials_json = os.environ.get('GCP_SA_CREDENTIALS')
31+
if not credentials_json:
32+
raise ValueError("GCP_SA_CREDENTIALS environment variable is not set")
33+
34+
try:
35+
# Parse the JSON credentials
36+
credentials_info = json.loads(credentials_json)
37+
credentials = service_account.Credentials.from_service_account_info(
38+
credentials_info,
39+
scopes=['https://www.googleapis.com/auth/devstorage.full_control']
40+
)
41+
return storage.Client(credentials=credentials)
42+
except json.JSONDecodeError:
43+
raise ValueError("GCP_SA_CREDENTIALS is not valid JSON")
44+
except Exception as e:
45+
raise ValueError(f"Failed to create GCS client: {str(e)}")
46+
47+
def get_filename_from_url(url):
48+
"""Extract filename from URL."""
49+
path = urlparse(url).path
50+
filename = os.path.basename(unquote(path))
51+
52+
# If filename cannot be determined, generate a UUID
53+
if not filename or filename == '':
54+
filename = f"{uuid.uuid4()}"
55+
56+
return filename
57+
58+
def stream_upload_to_gcs(file_url, custom_filename=None, make_public=False, download_headers=None):
59+
try:
60+
# Get GCS configuration
61+
bucket_name = os.environ.get('GCP_BUCKET_NAME')
62+
if not bucket_name:
63+
raise ValueError("GCP_BUCKET_NAME environment variable is not set")
64+
65+
# Get GCS client
66+
client = get_gcs_client()
67+
bucket = client.bucket(bucket_name)
68+
69+
# Determine filename (use custom if provided, otherwise extract from URL)
70+
if custom_filename:
71+
filename = custom_filename
72+
else:
73+
filename = get_filename_from_url(file_url)
74+
75+
# Create a new blob
76+
blob = bucket.blob(filename)
77+
78+
# Stream the file from URL
79+
response = requests.get(file_url, stream=True, headers=download_headers)
80+
response.raise_for_status()
81+
82+
# Get content type from response headers
83+
content_type = response.headers.get('content-type', 'application/octet-stream')
84+
85+
# Create a streaming upload
86+
blob.upload_from_file(
87+
response.raw,
88+
content_type=content_type
89+
)
90+
91+
# Return the public URL
92+
return {
93+
'file_url': blob.public_url,
94+
'filename': filename,
95+
'bucket': bucket_name,
96+
'public': True, # Always return public URL like gcp_toolkit
97+
'content_type': content_type
98+
}
99+
100+
except Exception as e:
101+
logger.error(f"Error streaming file to GCS: {e}")
102+
raise

0 commit comments

Comments
 (0)