Skip to content

Commit 7d2e99e

Browse files
committed
feat: run notebooks in data service (#375)
Co-authored-by: Samuel Gaist <samuel.gaist@idiap.ch> squashme: resolve package version conflicts
1 parent 0aec28d commit 7d2e99e

65 files changed

Lines changed: 6602 additions & 1324 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.devcontainer/devcontainer.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,10 @@
2020
"ghcr.io/devcontainers/features/kubectl-helm-minikube:1": {
2121
"minikube": "none"
2222
},
23-
"ghcr.io/eitsupi/devcontainer-features/jq-likes:2": {},
23+
"ghcr.io/eitsupi/devcontainer-features/jq-likes:2": {
24+
"jqVersion": "latest",
25+
"yqVersion": "latest"
26+
},
2427
"ghcr.io/dhoeric/features/k9s:1": {},
2528
"ghcr.io/EliiseS/devcontainer-features/bash-profile:1": {
2629
"command": "alias k=kubectl"

.devcontainer/docker-compose.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ services:
2323
ZED_TOKEN: renku
2424
ZED_INSECURE: "true"
2525
POETRY_CACHE_DIR: "/poetry_cache"
26+
NB_SERVER_OPTIONS__DEFAULTS_PATH: /workspace/server_defaults.json
27+
NB_SERVER_OPTIONS__UI_CHOICES_PATH: /workspace/server_options.json
2628
network_mode: service:db
2729
depends_on:
2830
- db
@@ -43,6 +45,7 @@ services:
4345
- "8080:8080"
4446
- "5678:5678"
4547
- "50051:50051"
48+
- "8888:80"
4649

4750
swagger:
4851
image: swaggerapi/swagger-ui

.github/workflows/acceptance-tests.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ jobs:
2525
renku-graph: ${{ steps.deploy-comment.outputs.renku-graph}}
2626
renku-notebooks: ${{ steps.deploy-comment.outputs.renku-notebooks}}
2727
renku-ui: ${{ steps.deploy-comment.outputs.renku-ui}}
28+
amalthea-sessions: ${{ steps.deploy-comment.outputs.amalthea-sessions}}
29+
amalthea: ${{ steps.deploy-comment.outputs.amalthea}}
2830
test-enabled: ${{ steps.deploy-comment.outputs.test-enabled}}
2931
test-cypress-enabled: ${{ steps.deploy-comment.outputs.test-cypress-enabled}}
3032
persist: ${{ steps.deploy-comment.outputs.persist}}
@@ -84,6 +86,8 @@ jobs:
8486
renku_graph: "${{ needs.check-deploy.outputs.renku-graph }}"
8587
renku_notebooks: "${{ needs.check-deploy.outputs.renku-notebooks }}"
8688
renku_data_services: "@${{ github.head_ref }}"
89+
amalthea: "${{ needs.check-deploy.outputs.amalthea }}"
90+
amalthea_sessions: "${{ needs.check-deploy.outputs.amalthea-sessions }}"
8791
extra_values: "${{ needs.check-deploy.outputs.extra-values }}"
8892

8993
selenium-acceptance-tests:

.github/workflows/save_cache.yml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
name: Create cache from commits on main
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
- chore-add-kind
8+
workflow_dispatch:
9+
10+
11+
jobs:
12+
save-poetry-cache:
13+
runs-on: ubuntu-latest
14+
env:
15+
CACHE_KEY: main-branch-poetry-cache-ubuntu
16+
CACHE_PATH: .devcontainer/.poetry_cache
17+
DEVCONTAINER_IMAGE_CACHE: ghcr.io/swissdatasciencecenter/renku-data-services/devcontainer
18+
19+
steps:
20+
- uses: actions/checkout@v3
21+
with:
22+
fetch-depth: 0
23+
- name: Login to Docker Hub
24+
uses: docker/login-action@v2
25+
with:
26+
registry: ghcr.io
27+
username: ${{ github.actor }}
28+
password: ${{ secrets.GITHUB_TOKEN }}
29+
- name: Install python deps
30+
uses: devcontainers/ci@v0.3
31+
with:
32+
runCmd: poetry install --with dev
33+
push: always
34+
skipContainerUserIdUpdate: false
35+
imageName: ${{ env.DEVCONTAINER_IMAGE_CACHE }}
36+
cacheFrom: ${{ env.DEVCONTAINER_IMAGE_CACHE }}
37+
- uses: actions/cache/save@v3
38+
name: Create cache
39+
with:
40+
path: ${{ env.CACHE_PATH }}
41+
key: ${{ env.CACHE_KEY }}

.github/workflows/test_publish.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,11 @@ jobs:
7070
- uses: actions/checkout@v4
7171
with:
7272
fetch-depth: 0
73+
- uses: actions/cache/restore@v3
74+
name: Restore cache
75+
with:
76+
path: ${{ env.CACHE_PATH }}
77+
key: ${{ env.CACHE_KEY }}
7378
- name: Set Git config
7479
shell: bash
7580
run: |
@@ -111,6 +116,11 @@ jobs:
111116
- uses: actions/checkout@v4
112117
with:
113118
fetch-depth: 0
119+
- uses: actions/cache/restore@v3
120+
name: Restore cache
121+
with:
122+
path: ${{ env.CACHE_PATH }}
123+
key: ${{ env.CACHE_KEY }}
114124
- name: Set Git config
115125
shell: bash
116126
run: |
@@ -155,6 +165,11 @@ jobs:
155165
- uses: actions/checkout@v4
156166
with:
157167
fetch-depth: 0
168+
- uses: actions/cache/restore@v3
169+
name: Restore cache
170+
with:
171+
path: ${{ env.CACHE_PATH }}
172+
key: ${{ env.CACHE_KEY }}
158173
- name: Set Git config
159174
shell: bash
160175
run: |

Makefile

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
.PHONY: schemas tests test_setup main_tests schemathesis_tests collect_coverage style_checks pre_commit_checks run download_avro check_avro avro_models update_avro kind_cluster install_amaltheas all
22

3-
AMALTHEA_JS_VERSION ?= 0.11.0
4-
AMALTHEA_SESSIONS_VERSION ?= 0.0.1-new-operator-chart
5-
codegen_params = --input-file-type openapi --output-model-type pydantic_v2.BaseModel --use-double-quotes --target-python-version 3.12 --collapse-root-models --field-constraints --strict-nullable --openapi-scopes schemas paths parameters --set-default-enum-member --use-one-literal-as-default --use-default
3+
AMALTHEA_JS_VERSION ?= 0.12.2
4+
AMALTHEA_SESSIONS_VERSION ?= 0.0.9-new-operator-chart
5+
codegen_params = --input-file-type openapi --output-model-type pydantic_v2.BaseModel --use-double-quotes --target-python-version 3.12 --collapse-root-models --field-constraints --strict-nullable --set-default-enum-member --openapi-scopes schemas paths parameters --set-default-enum-member --use-one-literal-as-default --use-default
66

77
define test_apispec_up_to_date
88
$(eval $@_NAME=$(1))
@@ -161,7 +161,13 @@ kind_cluster: ## Creates a kind cluster for testing
161161
sleep 15
162162
kubectl wait --namespace ingress-nginx --for=condition=ready pod --selector=app.kubernetes.io/component=controller --timeout=90s
163163

164-
install_amaltheas: ## Installs both version of amalthea in the currently active k8s context.
164+
install_amaltheas: ## Installs both version of amalthea in the. NOTE: It uses the currently active k8s context.
165165
helm repo add renku https://swissdatasciencecenter.github.io/helm-charts
166-
helm install amalthea-js renku/amalthea --version $(AMALTHEA_JS_VERSION)
167-
helm install amalthea-sessions renku/amalthea-sessions --version $(AMALTHEA_SESSIONS_VERSION)
166+
helm repo update
167+
helm upgrade --install amalthea-js renku/amalthea --version $(AMALTHEA_JS_VERSION)
168+
helm upgrade --install amalthea-sessions amalthea-sessions-0.0.9-new-operator-chart.tgz --version $(AMALTHEA_SESSIONS_VERSION)
169+
170+
# TODO: Add the version variables from the top of the file here when the charts are fully published
171+
amalthea_schema: ## Updates generates pydantic classes from CRDs
172+
curl https://raw.githubusercontent.com/SwissDataScienceCenter/amalthea/feat-add-cloud-storage/config/crd/bases/amalthea.dev_amaltheasessions.yaml | yq '.spec.versions[0].schema.openAPIV3Schema' | poetry run datamodel-codegen --input-file-type jsonschema --output-model-type pydantic_v2.BaseModel --output components/renku_data_services/notebooks/cr_amalthea_session.py --use-double-quotes --target-python-version 3.12 --collapse-root-models --field-constraints --strict-nullable --base-class renku_data_services.notebooks.cr_base.BaseCRD --allow-extra-fields --use-default-kwarg
173+
curl https://raw.githubusercontent.com/SwissDataScienceCenter/amalthea/main/controller/crds/jupyter_server.yaml | yq '.spec.versions[0].schema.openAPIV3Schema' | poetry run datamodel-codegen --input-file-type jsonschema --output-model-type pydantic_v2.BaseModel --output components/renku_data_services/notebooks/cr_jupyter_server.py --use-double-quotes --target-python-version 3.12 --collapse-root-models --field-constraints --strict-nullable --base-class renku_data_services.notebooks.cr_base.BaseCRD --allow-extra-fields --use-default-kwarg

bases/renku_data_services/data_api/app.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from renku_data_services.data_connectors.blueprints import DataConnectorsBP
1919
from renku_data_services.message_queue.blueprints import MessageQueueBP
2020
from renku_data_services.namespace.blueprints import GroupsBP
21+
from renku_data_services.notebooks.blueprints import NotebooksBP, NotebooksNewBP
2122
from renku_data_services.platform.blueprints import PlatformConfigBP
2223
from renku_data_services.project.blueprints import ProjectsBP
2324
from renku_data_services.repositories.blueprints import RepositoriesBP
@@ -130,6 +131,24 @@ def register_all_handlers(app: Sanic, config: Config) -> Sanic:
130131
authenticator=config.authenticator,
131132
internal_gitlab_authenticator=config.gitlab_authenticator,
132133
)
134+
notebooks = NotebooksBP(
135+
name="notebooks_old",
136+
url_prefix=url_prefix,
137+
authenticator=config.authenticator,
138+
nb_config=config.nb_config,
139+
internal_gitlab_authenticator=config.gitlab_authenticator,
140+
git_repo=config.git_repositories_repo,
141+
)
142+
notebooks_new = NotebooksNewBP(
143+
name="notebooks",
144+
url_prefix=url_prefix,
145+
authenticator=config.authenticator,
146+
nb_config=config.nb_config,
147+
project_repo=config.project_repo,
148+
session_repo=config.session_repo,
149+
rp_repo=config.rp_repo,
150+
internal_gitlab_authenticator=config.gitlab_authenticator,
151+
)
133152
platform_config = PlatformConfigBP(
134153
name="platform_config",
135154
url_prefix=url_prefix,
@@ -175,6 +194,8 @@ def register_all_handlers(app: Sanic, config: Config) -> Sanic:
175194
oauth2_clients.blueprint(),
176195
oauth2_connections.blueprint(),
177196
repositories.blueprint(),
197+
notebooks.blueprint(),
198+
notebooks_new.blueprint(),
178199
platform_config.blueprint(),
179200
message_queue.blueprint(),
180201
data_connectors.blueprint(),

components/renku_data_services/app_config/config.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
from renku_data_services.message_queue.interface import IMessageQueue
6060
from renku_data_services.message_queue.redis_queue import RedisQueue
6161
from renku_data_services.namespace.db import GroupRepository
62+
from renku_data_services.notebooks.config import _NotebooksConfig
6263
from renku_data_services.platform.db import PlatformRepository
6364
from renku_data_services.project.db import ProjectMemberRepository, ProjectRepository
6465
from renku_data_services.repositories.db import GitRepositoriesRepository
@@ -151,6 +152,7 @@ class Config:
151152
kc_api: IKeycloakAPI
152153
message_queue: IMessageQueue
153154
gitlab_url: str | None
155+
nb_config: _NotebooksConfig
154156

155157
secrets_service_public_key: rsa.RSAPublicKey
156158
"""The public key of the secrets service, used to encrypt user secrets that only it can decrypt."""
@@ -228,6 +230,10 @@ def load_apispec() -> dict[str, Any]:
228230
with open(spec_file) as f:
229231
repositories = safe_load(f)
230232

233+
spec_file = Path(renku_data_services.notebooks.__file__).resolve().parent / "api.spec.yaml"
234+
with open(spec_file) as f:
235+
repositories = safe_load(f)
236+
231237
spec_file = Path(renku_data_services.platform.__file__).resolve().parent / "api.spec.yaml"
232238
with open(spec_file) as f:
233239
platform = safe_load(f)
@@ -466,8 +472,8 @@ def from_env(cls, prefix: str = "") -> "Config":
466472
gitlab_client: base_models.GitlabAPIProtocol
467473
user_preferences_config: UserPreferencesConfig
468474
version = os.environ.get(f"{prefix}VERSION", "0.0.1")
469-
server_options_file = os.environ.get("SERVER_OPTIONS")
470-
server_defaults_file = os.environ.get("SERVER_DEFAULTS")
475+
server_options_file = os.environ.get("NB_SERVER_OPTIONS__UI_CHOICES_PATH")
476+
server_defaults_file = os.environ.get("NB_SERVER_OPTIONS__DEFAULTS_PATH")
471477
k8s_namespace = os.environ.get("K8S_NAMESPACE", "default")
472478
max_pinned_projects = int(os.environ.get(f"{prefix}MAX_PINNED_PROJECTS", "10"))
473479
user_preferences_config = UserPreferencesConfig(max_pinned_projects=max_pinned_projects)
@@ -549,6 +555,7 @@ def from_env(cls, prefix: str = "") -> "Config":
549555
sentry = SentryConfig.from_env(prefix)
550556
trusted_proxies = TrustedProxiesConfig.from_env(prefix)
551557
message_queue = RedisQueue(redis)
558+
nb_config = _NotebooksConfig.from_env(db)
552559

553560
return cls(
554561
version=version,
@@ -569,4 +576,5 @@ def from_env(cls, prefix: str = "") -> "Config":
569576
encryption_key=encryption_key,
570577
secrets_service_public_key=secrets_service_public_key,
571578
gitlab_url=gitlab_url,
579+
nb_config=nb_config,
572580
)

components/renku_data_services/authn/dummy.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from typing import Optional
88

99
from sanic import Request
10+
from ulid import ULID
1011

1112
import renku_data_services.base_models as base_models
1213

@@ -39,10 +40,22 @@ class DummyAuthenticator:
3940
"""
4041

4142
token_field = "Authorization" # nosec: B105
43+
anon_id_header_key: str = "Renku-Auth-Anon-Id"
44+
anon_id_cookie_name: str = "Renku-Auth-Anon-Id"
4245

43-
@staticmethod
44-
async def authenticate(access_token: str, request: Request) -> base_models.APIUser:
46+
async def authenticate(self, access_token: str, request: Request) -> base_models.APIUser:
4547
"""Indicates whether the user has successfully logged in."""
48+
access_token = request.headers.get(self.token_field) or ""
49+
if not access_token or len(access_token) == 0:
50+
# Try to get an anonymous user ID if the validation of keycloak credentials failed
51+
anon_id = request.headers.get(self.anon_id_header_key)
52+
if anon_id is None:
53+
anon_id = request.cookies.get(self.anon_id_cookie_name)
54+
if anon_id is None:
55+
anon_id = f"anon-{str(ULID())}"
56+
return base_models.AnonymousAPIUser(id=str(anon_id))
57+
58+
access_token = access_token.removeprefix("Bearer ").removeprefix("bearer ")
4659
user_props = {}
4760
with contextlib.suppress(Exception):
4861
user_props = json.loads(access_token)
@@ -64,4 +77,5 @@ async def authenticate(access_token: str, request: Request) -> base_models.APIUs
6477
last_name=user_props.get("last_name", "Doe") if is_set else None,
6578
email=user_props.get("email", "john.doe@gmail.com") if is_set else None,
6679
full_name=user_props.get("full_name", "John Doe") if is_set else None,
80+
refresh_token=request.headers.get("Renku-Auth-Refresh-Token"),
6781
)

components/renku_data_services/authn/gitlab.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@
22

33
import contextlib
44
import urllib.parse as parse
5+
from contextlib import suppress
56
from dataclasses import dataclass
7+
from datetime import datetime
68

79
import gitlab
810
from sanic import Request
11+
from sanic.compat import Header
912

1013
import renku_data_services.base_models as base_models
1114
from renku_data_services import errors
@@ -23,6 +26,7 @@ class GitlabAuthenticator:
2326
gitlab_url: str
2427

2528
token_field: str = "Gitlab-Access-Token"
29+
expires_at_field: str = "Gitlab-Access-Token-Expires-At"
2630

2731
def __post_init__(self) -> None:
2832
"""Properly set gitlab url."""
@@ -36,10 +40,10 @@ async def authenticate(self, access_token: str, request: Request) -> base_models
3640
if self.token_field != "Authorization": # nosec: B105
3741
access_token = str(request.headers.get(self.token_field))
3842

39-
result = await self._get_gitlab_api_user(access_token)
43+
result = await self._get_gitlab_api_user(access_token, request.headers)
4044
return result
4145

42-
async def _get_gitlab_api_user(self, access_token: str) -> base_models.APIUser:
46+
async def _get_gitlab_api_user(self, access_token: str, headers: Header) -> base_models.APIUser:
4347
"""Get and validate a Gitlab API User."""
4448
client = gitlab.Gitlab(self.gitlab_url, oauth_token=access_token)
4549
try:
@@ -69,12 +73,18 @@ async def _get_gitlab_api_user(self, access_token: str) -> base_models.APIUser:
6973
if len(name_parts) >= 1:
7074
last_name = " ".join(name_parts)
7175

76+
expires_at: datetime | None = None
77+
expires_at_raw: str | None = headers.get(self.expires_at_field)
78+
if expires_at_raw is not None and len(expires_at_raw) > 0:
79+
with suppress(ValueError):
80+
expires_at = datetime.fromtimestamp(float(expires_at_raw))
81+
7282
return base_models.APIUser(
73-
is_admin=False,
7483
id=str(user_id),
7584
access_token=access_token,
7685
first_name=first_name,
7786
last_name=last_name,
7887
email=email,
7988
full_name=full_name,
89+
access_token_expires_at=expires_at,
8090
)

0 commit comments

Comments
 (0)