Skip to content

Commit ea89929

Browse files
olevskisgaistPanaetiusleafty
committed
feat: add data connectors (#478)
Co-authored-by: Samuel Gaist <samuel.gaist@idiap.ch> Co-authored-by: Ralf Grubenmann <ralf.grubenmann@sdsc.ethz.ch> Co-authored-by: Flora Thiebaut <flora.thiebaut@sdsc.ethz.ch>
1 parent 3157a0f commit ea89929

30 files changed

Lines changed: 813 additions & 153 deletions

File tree

Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
.PHONY: schemas tests test_setup main_tests schemathesis_tests collect_coverage style_checks pre_commit_checks run download_avro check_avro avro_models update_avro k3d_cluster install_amaltheas all
22

3-
AMALTHEA_JS_VERSION ?= 0.12.2
4-
AMALTHEA_SESSIONS_VERSION ?= 0.0.10-new-operator-chart
3+
AMALTHEA_JS_VERSION ?= 0.13.0
4+
AMALTHEA_SESSIONS_VERSION ?= 0.13.0
55
codegen_params = --input-file-type openapi --output-model-type pydantic_v2.BaseModel --use-double-quotes --target-python-version 3.12 --collapse-root-models --field-constraints --strict-nullable --set-default-enum-member --openapi-scopes schemas paths parameters --set-default-enum-member --use-one-literal-as-default --use-default
66

77
define test_apispec_up_to_date
@@ -166,5 +166,5 @@ install_amaltheas: ## Installs both version of amalthea in the. NOTE: It uses t
166166

167167
# TODO: Add the version variables from the top of the file here when the charts are fully published
168168
amalthea_schema: ## Updates generates pydantic classes from CRDs
169-
curl https://raw.githubusercontent.com/SwissDataScienceCenter/amalthea/feat-add-cloud-storage/config/crd/bases/amalthea.dev_amaltheasessions.yaml | yq '.spec.versions[0].schema.openAPIV3Schema' | poetry run datamodel-codegen --input-file-type jsonschema --output-model-type pydantic_v2.BaseModel --output components/renku_data_services/notebooks/cr_amalthea_session.py --use-double-quotes --target-python-version 3.12 --collapse-root-models --field-constraints --strict-nullable --base-class renku_data_services.notebooks.cr_base.BaseCRD --allow-extra-fields --use-default-kwarg
169+
curl https://raw.githubusercontent.com/SwissDataScienceCenter/amalthea/main/config/crd/bases/amalthea.dev_amaltheasessions.yaml | yq '.spec.versions[0].schema.openAPIV3Schema' | poetry run datamodel-codegen --input-file-type jsonschema --output-model-type pydantic_v2.BaseModel --output components/renku_data_services/notebooks/cr_amalthea_session.py --use-double-quotes --target-python-version 3.12 --collapse-root-models --field-constraints --strict-nullable --base-class renku_data_services.notebooks.cr_base.BaseCRD --allow-extra-fields --use-default-kwarg
170170
curl https://raw.githubusercontent.com/SwissDataScienceCenter/amalthea/main/controller/crds/jupyter_server.yaml | yq '.spec.versions[0].schema.openAPIV3Schema' | poetry run datamodel-codegen --input-file-type jsonschema --output-model-type pydantic_v2.BaseModel --output components/renku_data_services/notebooks/cr_jupyter_server.py --use-double-quotes --target-python-version 3.12 --collapse-root-models --field-constraints --strict-nullable --base-class renku_data_services.notebooks.cr_base.BaseCRD --allow-extra-fields --use-default-kwarg

bases/renku_data_services/data_api/app.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,9 @@ def register_all_handlers(app: Sanic, config: Config) -> Sanic:
149149
session_repo=config.session_repo,
150150
storage_repo=config.storage_repo,
151151
rp_repo=config.rp_repo,
152+
data_connector_repo=config.data_connector_repo,
153+
data_connector_project_link_repo=config.data_connector_to_project_link_repo,
154+
data_connector_secret_repo=config.data_connector_secret_repo,
152155
internal_gitlab_authenticator=config.gitlab_authenticator,
153156
)
154157
platform_config = PlatformConfigBP(

components/renku_data_services/app_config/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,7 @@ def data_connector_secret_repo(self) -> DataConnectorSecretRepository:
459459
data_connector_repo=self.data_connector_repo,
460460
user_repo=self.kc_user_repo,
461461
secret_service_public_key=self.secrets_service_public_key,
462+
authz=self.authz,
462463
)
463464
return self._data_connector_secret_repo
464465

components/renku_data_services/authn/keycloak.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ async def authenticate(
9898
user = base_models.AuthenticatedAPIUser(
9999
is_admin=is_admin,
100100
id=id,
101-
access_token=access_token,
101+
access_token=token,
102102
full_name=parsed.get("name"),
103103
first_name=parsed.get("given_name"),
104104
last_name=parsed.get("family_name"),

components/renku_data_services/base_api/auth.py

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -71,30 +71,6 @@ async def decorated_function(request: Request, *args: _P.args, **kwargs: _P.kwar
7171
return decorator
7272

7373

74-
def validate_path_project_id(
75-
f: Callable[Concatenate[Request, _P], Coroutine[Any, Any, _T]],
76-
) -> Callable[Concatenate[Request, _P], Coroutine[Any, Any, _T]]:
77-
"""Decorator for a Sanic handler that validates the project_id path parameter."""
78-
_path_project_id_regex = re.compile(r"^[A-Za-z0-9]{26}$")
79-
80-
@wraps(f)
81-
async def decorated_function(request: Request, *args: _P.args, **kwargs: _P.kwargs) -> _T:
82-
project_id = cast(str | None, kwargs.get("project_id"))
83-
if not project_id:
84-
raise errors.ProgrammingError(
85-
message="Could not find 'project_id' in the keyword arguments for the handler in order to validate it."
86-
)
87-
if not _path_project_id_regex.match(project_id):
88-
raise errors.ValidationError(
89-
message=f"The 'project_id' path parameter {project_id} does not match the required "
90-
f"regex {_path_project_id_regex}"
91-
)
92-
93-
return await f(request, *args, **kwargs)
94-
95-
return decorated_function
96-
97-
9874
def validate_path_user_id(
9975
f: Callable[Concatenate[Request, _P], Coroutine[Any, Any, _T]],
10076
) -> Callable[Concatenate[Request, _P], Coroutine[Any, Any, _T]]:

components/renku_data_services/crc/db.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from functools import wraps
1313
from typing import Any, Concatenate, Optional, ParamSpec, TypeVar, cast
1414

15-
from sqlalchemy import NullPool, delete, select
15+
from sqlalchemy import NullPool, delete, false, select, true
1616
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
1717
from sqlalchemy.orm import selectinload
1818
from sqlalchemy.sql import Select, and_, not_, or_

components/renku_data_services/data_connectors/api.spec.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -619,10 +619,10 @@ components:
619619
exclusive:
620620
type: boolean
621621
description: if true, only values from 'examples' can be used
622-
datatype:
622+
type:
623623
type: string
624624
description: data type of option value. RClone has more options but they map to the ones listed here.
625-
enum: ["int", "bool", "string", "Time"]
625+
enum: ["int", "bool", "string", "Time", "Duration", "MultiEncoder", "SizeSuffix", "SpaceSepList", "CommaSepList", "Tristate"]
626626
Ulid:
627627
description: ULID identifier
628628
type: string

components/renku_data_services/data_connectors/apispec.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# generated by datamodel-codegen:
22
# filename: api.spec.yaml
3-
# timestamp: 2024-10-22T07:46:54+00:00
3+
# timestamp: 2024-10-28T20:03:14+00:00
44

55
from __future__ import annotations
66

@@ -23,11 +23,17 @@ class Example(BaseAPISpec):
2323
)
2424

2525

26-
class Datatype(Enum):
26+
class Type(Enum):
2727
int = "int"
2828
bool = "bool"
2929
string = "string"
3030
Time = "Time"
31+
Duration = "Duration"
32+
MultiEncoder = "MultiEncoder"
33+
SizeSuffix = "SizeSuffix"
34+
SpaceSepList = "SpaceSepList"
35+
CommaSepList = "CommaSepList"
36+
Tristate = "Tristate"
3137

3238

3339
class RCloneOption(BaseAPISpec):
@@ -65,7 +71,7 @@ class RCloneOption(BaseAPISpec):
6571
exclusive: Optional[bool] = Field(
6672
None, description="if true, only values from 'examples' can be used"
6773
)
68-
datatype: Optional[Datatype] = Field(
74+
type: Optional[Type] = Field(
6975
None,
7076
description="data type of option value. RClone has more options but they map to the ones listed here.",
7177
)

components/renku_data_services/data_connectors/db.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Adapters for data connectors database classes."""
22

3-
from collections.abc import Callable
3+
from collections.abc import AsyncIterator, Callable
44
from typing import TypeVar
55

66
from cryptography.hazmat.primitives.asymmetric import rsa
@@ -477,11 +477,44 @@ def __init__(
477477
data_connector_repo: DataConnectorRepository,
478478
user_repo: UserRepo,
479479
secret_service_public_key: rsa.RSAPublicKey,
480+
authz: Authz,
480481
) -> None:
481482
self.session_maker = session_maker
482483
self.data_connector_repo = data_connector_repo
483484
self.user_repo = user_repo
484485
self.secret_service_public_key = secret_service_public_key
486+
self.authz = authz
487+
488+
async def get_data_connectors_with_secrets(
489+
self,
490+
user: base_models.APIUser,
491+
project_id: ULID,
492+
) -> AsyncIterator[models.DataConnectorWithSecrets]:
493+
"""Get all data connectors and their secrets for a project."""
494+
if user.id is None:
495+
raise errors.UnauthorizedError(message="You do not have the required permissions for this operation.")
496+
497+
can_read_project = await self.authz.has_permission(user, ResourceType.project, project_id, Scope.READ)
498+
if not can_read_project:
499+
raise errors.MissingResourceError(
500+
message=f"The project ID with {project_id} does not exist or you dont have permission to access it"
501+
)
502+
503+
data_connector_ids = await self.authz.resources_with_permission(
504+
user, user.id, ResourceType.data_connector, Scope.READ
505+
)
506+
507+
async with self.session_maker() as session:
508+
stmt = select(schemas.DataConnectorORM).where(
509+
schemas.DataConnectorORM.project_links.any(
510+
schemas.DataConnectorToProjectLinkORM.project_id == project_id
511+
),
512+
schemas.DataConnectorORM.id.in_(data_connector_ids),
513+
)
514+
results = await session.stream_scalars(stmt)
515+
async for dc in results:
516+
secrets = await self.get_data_connector_secrets(user, dc.id)
517+
yield models.DataConnectorWithSecrets(dc.dump(), secrets)
485518

486519
async def get_data_connector_secrets(
487520
self,

components/renku_data_services/data_connectors/models.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,3 +150,11 @@ class DataConnectorPermissions:
150150
write: bool
151151
delete: bool
152152
change_membership: bool
153+
154+
155+
@dataclass
156+
class DataConnectorWithSecrets:
157+
"""A data connector with its secrets."""
158+
159+
data_connector: DataConnector
160+
secrets: list[DataConnectorSecret] = field(default_factory=list)

0 commit comments

Comments
 (0)