Skip to content

Commit 8f1ba45

Browse files
committed
Support file upload to gcs bucket
1 parent e641f11 commit 8f1ba45

7 files changed

Lines changed: 122 additions & 31 deletions

File tree

docs/docs/reference/environment-variables.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,9 @@ For more details on the options below, refer to the [server deployment](../guide
112112
- `DSTACK_DEFAULT_SERVICE_CLIENT_MAX_BODY_SIZE`{ #DSTACK_DEFAULT_SERVICE_CLIENT_MAX_BODY_SIZE } – Request body size limit for services running with a gateway, in bytes. Defaults to 64 MiB.
113113
- `DSTACK_FORBID_SERVICES_WITHOUT_GATEWAY`{ #DSTACK_FORBID_SERVICES_WITHOUT_GATEWAY } – Forbids registering new services without a gateway if set to any value.
114114
- `DSTACK_SERVER_CODE_UPLOAD_LIMIT`{ #DSTACK_SERVER_CODE_UPLOAD_LIMIT } - The repo size limit when uploading diffs or local repos, in bytes. Set to 0 to disable size limits. Defaults to 2MiB.
115+
- `DSTACK_SERVER_S3_BUCKET`{ #DSTACK_SERVER_S3_BUCKET } - The bucket that repo diffs will be uploaded to if set. If unset, diffs are uploaded to the database.
116+
- `DSTACK_SERVER_S3_BUCKET_REGION`{ #DSTACK_SERVER_S3_BUCKET_REGION } - The region of the S3 Bucket.
117+
- `DSTACK_SERVER_GCS_BUCKET`{ #DSTACK_SERVER_GCD_BUCKET } - The bucket that repo diffs will be uploaded to if set. If unset, diffs are uploaded to the database.
115118

116119
??? info "Internal environment variables"
117120
The following environment variables are intended for development purposes:

src/dstack/_internal/server/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ async def lifespan(app: FastAPI):
128128
yes=UPDATE_DEFAULT_PROJECT,
129129
no=DO_NOT_UPDATE_DEFAULT_PROJECT,
130130
)
131-
if settings.SERVER_BUCKET is not None:
131+
if settings.SERVER_S3_BUCKET is not None or settings.SERVER_GCS_BUCKET is not None:
132132
init_default_storage()
133133
scheduler = start_background_tasks()
134134
dstack_version = DSTACK_VERSION if DSTACK_VERSION else "(no version)"
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from typing import Optional
2+
3+
from dstack._internal.server import settings
4+
from dstack._internal.server.services.storage.base import BaseStorage
5+
from dstack._internal.server.services.storage.gcs import GCS_AVAILABLE, GCSStorage
6+
from dstack._internal.server.services.storage.s3 import BOTO_AVAILABLE, S3Storage
7+
8+
_default_storage = None
9+
10+
11+
def init_default_storage():
12+
global _default_storage
13+
if settings.SERVER_S3_BUCKET is None and settings.SERVER_GCS_BUCKET is None:
14+
raise ValueError(
15+
"Either settings.SERVER_S3_BUCKET or settings.SERVER_GCS_BUCKET must be set"
16+
)
17+
if settings.SERVER_S3_BUCKET and settings.SERVER_GCS_BUCKET:
18+
raise ValueError(
19+
"Only one of settings.SERVER_S3_BUCKET or settings.SERVER_GCS_BUCKET can be set"
20+
)
21+
22+
if settings.SERVER_S3_BUCKET:
23+
if not BOTO_AVAILABLE:
24+
raise ValueError("AWS dependencies are not installed")
25+
_default_storage = S3Storage(
26+
bucket=settings.SERVER_S3_BUCKET,
27+
region=settings.SERVER_S3_BUCKET_REGION,
28+
)
29+
elif settings.SERVER_GCS_BUCKET:
30+
if not GCS_AVAILABLE:
31+
raise ValueError("GCS dependencies are not installed")
32+
_default_storage = GCSStorage(
33+
bucket=settings.SERVER_GCS_BUCKET,
34+
)
35+
36+
37+
def get_default_storage() -> Optional[BaseStorage]:
38+
return _default_storage
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from abc import ABC
2+
from typing import Optional
3+
4+
5+
class BaseStorage(ABC):
6+
def upload_code(
7+
self,
8+
project_id: str,
9+
repo_id: str,
10+
code_hash: str,
11+
blob: bytes,
12+
):
13+
pass
14+
15+
def get_code(
16+
self,
17+
project_id: str,
18+
repo_id: str,
19+
code_hash: str,
20+
) -> Optional[bytes]:
21+
pass
22+
23+
@staticmethod
24+
def _get_code_key(project_id: str, repo_id: str, code_hash: str) -> str:
25+
return f"data/projects/{project_id}/codes/{repo_id}/{code_hash}"
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
from typing import Optional
2+
3+
from dstack._internal.server.services.storage.base import BaseStorage
4+
5+
GCS_AVAILABLE = True
6+
try:
7+
from google.cloud import storage
8+
from google.cloud.exceptions import NotFound
9+
except ImportError:
10+
GCS_AVAILABLE = False
11+
12+
13+
class GCSStorage(BaseStorage):
14+
def __init__(
15+
self,
16+
bucket: str,
17+
):
18+
self._client = storage.Client()
19+
self._bucket = self._client.bucket(bucket)
20+
21+
def upload_code(
22+
self,
23+
project_id: str,
24+
repo_id: str,
25+
code_hash: str,
26+
blob: bytes,
27+
):
28+
blob_name = self._get_code_key(project_id, repo_id, code_hash)
29+
blob_obj = self._bucket.blob(blob_name)
30+
blob_obj.upload_from_string(blob)
31+
32+
def get_code(
33+
self,
34+
project_id: str,
35+
repo_id: str,
36+
code_hash: str,
37+
) -> Optional[bytes]:
38+
try:
39+
blob_name = self._get_code_key(project_id, repo_id, code_hash)
40+
blob = self._bucket.blob(blob_name)
41+
except NotFound:
42+
return None
43+
44+
return blob.download_as_bytes()
Lines changed: 4 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from typing import Optional
22

3-
from dstack._internal.server import settings
3+
from dstack._internal.server.services.storage.base import BaseStorage
44

55
BOTO_AVAILABLE = True
66
try:
@@ -10,7 +10,7 @@
1010
BOTO_AVAILABLE = False
1111

1212

13-
class S3Storage:
13+
class S3Storage(BaseStorage):
1414
def __init__(
1515
self,
1616
bucket: str,
@@ -29,7 +29,7 @@ def upload_code(
2929
):
3030
self._client.put_object(
3131
Bucket=self.bucket,
32-
Key=_get_code_key(project_id, repo_id, code_hash),
32+
Key=self._get_code_key(project_id, repo_id, code_hash),
3333
Body=blob,
3434
)
3535

@@ -42,33 +42,10 @@ def get_code(
4242
try:
4343
response = self._client.get_object(
4444
Bucket=self.bucket,
45-
Key=_get_code_key(project_id, repo_id, code_hash),
45+
Key=self._get_code_key(project_id, repo_id, code_hash),
4646
)
4747
except botocore.exceptions.ClientError as e:
4848
if e.response["Error"]["Code"] == "NoSuchKey":
4949
return None
5050
raise e
5151
return response["Body"].read()
52-
53-
54-
def _get_code_key(project_id: str, repo_id: str, code_hash: str) -> str:
55-
return f"data/projects/{project_id}/codes/{repo_id}/{code_hash}"
56-
57-
58-
_default_storage = None
59-
60-
61-
def init_default_storage():
62-
global _default_storage
63-
if settings.SERVER_BUCKET is None:
64-
raise ValueError("settings.SERVER_BUCKET not set")
65-
if not BOTO_AVAILABLE:
66-
raise ValueError("AWS dependencies are not installed")
67-
_default_storage = S3Storage(
68-
bucket=settings.SERVER_BUCKET,
69-
region=settings.SERVER_BUCKET_REGION,
70-
)
71-
72-
73-
def get_default_storage() -> Optional[S3Storage]:
74-
return _default_storage

src/dstack/_internal/server/settings.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,13 @@
3737
SERVER_CONFIG_DISABLED = os.getenv("DSTACK_SERVER_CONFIG_DISABLED") is not None
3838
SERVER_CONFIG_ENABLED = not SERVER_CONFIG_DISABLED
3939

40-
# TODO: add s3/aws prefix
41-
SERVER_BUCKET = os.getenv("DSTACK_SERVER_BUCKET")
42-
SERVER_BUCKET_REGION = os.getenv("DSTACK_SERVER_BUCKET_REGION")
40+
# TODO: remove deprecated DSTACK_SERVER_BUCKET and DSTACK_SERVER_BUCKET_REGION env var usage
41+
SERVER_S3_BUCKET = os.getenv("DSTACK_SERVER_S3_BUCKET", os.getenv("DSTACK_SERVER_BUCKET"))
42+
SERVER_S3_BUCKET_REGION = os.getenv(
43+
"DSTACK_SERVER_S3_BUCKET_REGION", os.getenv("DSTACK_SERVER_BUCKET_REGION")
44+
)
45+
46+
SERVER_GCS_BUCKET = os.getenv("DSTACK_SERVER_GCS_BUCKET")
4347

4448
SERVER_CLOUDWATCH_LOG_GROUP = os.getenv("DSTACK_SERVER_CLOUDWATCH_LOG_GROUP")
4549
SERVER_CLOUDWATCH_LOG_REGION = os.getenv("DSTACK_SERVER_CLOUDWATCH_LOG_REGION")

0 commit comments

Comments
 (0)