Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 71 additions & 2 deletions dask-gateway-server/dask_gateway_server/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import aiohttp
from aiohttp import web
from traitlets import Instance, Integer, Unicode, default
from traitlets import Bool, Instance, Integer, Unicode, default
from traitlets.config import LoggingConfigurable

from .models import User
Expand Down Expand Up @@ -315,6 +315,49 @@ def _default_jupyterhub_api_url(self):
raise ValueError("JUPYTERHUB_API_URL must be set")
return out

jupyterhub_service_name = Unicode(
# should this be "dask-gateway"?
# that would enable service scope enforcement by default
"",
help="""
The name of dask-gateway as a jupyterhub service.

By default this is determined from the ``JUPYTERHUB_SERVICE_NAME``
environment variable.
""",
config=True,
)

@default("jupyterhub_service_name")
def _default_jupyterhub_service_name(self):
return os.environ.get("JUPYTERHUB_SERVICE_NAME", "")

use_service_access_scopes = Bool(
help="""
Require tokens to have `access:services!service={jupyterhub_service_name}` permissions
in order to access the gateway.

Allows JupyterHub RBAC to controll access to dask-gateway.

Disabled by default for backward-compatibility, but strongly encouraged.
Enabled by default if `jupyterhub_service_name` is set.
""",
config=True,
)

@default("use_service_access_scopes")
def _default_use_service_access_scopes(self):
if self.jupyterhub_service_name:
return True
else:
self.log.warning(
"jupyterhub_service_name not set, "
"any jupyterhub token may be used to create clusters. "
"Set JupyterHubAuth.jupyterhub_service_name "
"to use jupyterhub scopes to control access to dask-gateway."
)
return False

tls_key = Unicode(
"",
help="""
Expand Down Expand Up @@ -386,9 +429,35 @@ async def authenticate(self, request):

if resp.status < 400:
data = await resp.json()
# avoid collisions between user names and service names
# 'kind' may be 'user' or 'service'
username = data["name"]
if data["kind"] != "user" or ":" in username:
# avoid collision without changing the name for users
# but disambiguate if usernames might look like
# `service:name` (unlikely but not prohibited)
username = f"{data['kind']}:{username}"
Comment on lines +432 to +439
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this code have or not rather than or in the if statement? I didn't understand this code block.

The dask-gateway-server user's username will be changed if it starts with user: or service:, which perhaps may or may not ever be allowed though - I'm not sure. It contradicts the comment about not changing the username. Perhaps it should include "except in the edge case the name is starting with "user:" or "service:".

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've updated the comment and simplified the condition to clarify.

This is to avoid collisions and also avoid changes for the ~99% of cases where it's just users accessing the service. JupyterHub doesn't put limits on usernames, so e.g. a user can technically have the name service:name (unlikely because most identity providers don't allow this). This ensures that a user is never misrepresented as service:name in this situation, they would be user:service:name.

gateway doesn't really use this for anything, so it's not that important. But this at least guarantees there is no collision. You can parse the result with:

if ":" not in username:
    kind = "user"
    name = username
else:
    kind, _, name = username.partition(":")

and it will always be correct, even for names containing :.


scopes = data.get("scopes", [])
if self.use_service_access_scopes:
# check scopes for access permissions
access_scopes = {
"access:services",
f"access:services!service={self.jupyterhub_service_name}",
}
have_scopes = set(scopes)
if not access_scopes.intersection(have_scopes):
self.log.debug(
"Token for %r does not have access to service %r; has scopes: %s",
username,
self.jupyterhub_service_name,
scopes,
)
raise unauthorized("jupyterhub")

# "groups" attribute doesn't exists in case of a service
return User(
data["name"],
username,
groups=data.get("groups", []),
admin=data.get("admin", False),
)
Expand Down
59 changes: 57 additions & 2 deletions docs/source/authentication.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,26 @@ Then add the following lines to your ``dask_gateway_config.py`` file:
c.JupyterHubAuthenticator.api_token = "<API TOKEN>"
c.JupyterHubAuthenticator.api_url = "<API URL>"

# enables jupyterhub scope-based access
# also set via $JUPYTERHUB_SERVICE_NAME
c.JupyterHubAuthenticator.jupyterhub_service_name = "dask-gateway"

Where:

- ``<API TOKEN>`` is the token generated above
- ``<API URL>`` is JupyterHub's API url. This is usually of the form
``https://<JUPYTERHUB-HOST>:<JUPYTERHUB-PORT>/hub/api``.
- ``dask-gateway`` is the name of the service registered with JupyterHub (see below)

.. warning::

If you do not set ``jupyterhub_service_name``, then any JupyterHub token,
regardless of user or token permissions,
can be used to access Dask-Gateway.
This is insecure, but the default for backward compatibility with earlier Dask-Gateway behavior.

When set, only users and tokens with the ``access:services!service=dask-gateway`` scope
will have access to Dask-Gateway.

You'll also need to register the API token with JupyterHub. This can be done by
adding the following to the corresponding ``jupyterhub_config.py`` file:
Expand All @@ -112,10 +127,50 @@ adding the following to the corresponding ``jupyterhub_config.py`` file:
{"name": "dask-gateway", "api_token": "<API TOKEN>"}
]

again, replacing ``<API TOKEN>`` with the output from above.
Finally, you'll want to grant some or all jupyterhub users access to the ``dask-gateway`` service.
You can do this by granting all users access by overriding the default ``user`` role:

.. code-block:: python

c.JupyterHub.load_roles = [
{
# defining the 'user' role
# sets the base permissions for all jupyterhub users
"name": "user",
"scopes": [
"self",
"access:services!service=dask-gateway",
],
},
]

or select users, via username and/or group membership:

.. code-block:: python

c.JupyterHub.load_roles = [
{
"name": "dask-users",
"scopes": [
"access:services!service=dask-gateway",
],
"groups": ["dask-users"],
"users": ["patience"],
},
]

Finally, if you want the token used in singleuser server environments
(e.g. for the dask labextension), add this access scope to ``c.Spawner.server_token_scopes``:

.. code-block:: python

c.Spawner.server_token_scopes = [
"access:services!service-dask-gateway",
]

With this configuration, JupyterHub will be used to authenticate requests
between users and the ``dask-gateway-server``.
between users and the ``dask-gateway-server``,
and JupyterHub admins can control which JupyterHub users have access to Dask-Gateway.

For more information see the :ref:`jupyterhub-auth-config` docs.

Expand Down
5 changes: 4 additions & 1 deletion resources/helm/dask-gateway/templates/gateway/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,10 @@ data:
"please specify `gateway.auth.jupyterhub.apiUrl` in "
"your config file"
)
c.DaskGateway.JupyterHubAuthenticator.jupyterhub_api_url = api_url
c.JupyterHubAuthenticator.jupyterhub_api_url = api_url
service_name = get_property("gateway.auth.jupyterhub.jupyterhubServiceName")
if service_name is not None:
c.JupyterHubAuthenticator.jupyterhub_service_name = service_name
elif auth_type == "custom":
auth_cls = get_property("gateway.auth.custom.class")
c.DaskGateway.authenticator_class = auth_cls
Expand Down
8 changes: 8 additions & 0 deletions resources/helm/dask-gateway/values.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,14 @@ properties:
description: |
JupyterHub's api url. Inferred from JupyterHub's service name if
running in the same namespace.
jupyterhubServiceName:
type: [string, "null"]
description: |
The JupyterHub service name
(usually "dask-gateway").
This should always be set.
If not set (default),
any JupyterHub token will be able to access the gateway.
custom:
type: object
additionalProperties: false
Expand Down
5 changes: 5 additions & 0 deletions resources/helm/dask-gateway/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ gateway:
# in the same namespace.
apiUrl:

# JupyterHub service name.
# if not set, JupyterHub permissions are ignored
# and all tokens can access the gateway
jupyterhubServiceName:

custom:
# The full authenticator class name.
class:
Expand Down
88 changes: 78 additions & 10 deletions tests/test_auth.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import os
import subprocess
import uuid
Expand All @@ -22,6 +23,8 @@
import jupyterhub.tests.mocking as hub_mocking
except ImportError:
hub_mocking = None
else:
from tornado.log import access_log, app_log, gen_log


KEYTAB_PATH = "/home/dask/dask.keytab"
Expand Down Expand Up @@ -91,9 +94,6 @@ async def __aenter__(self):
await self.hub.start()

# alembic turns off all logs, reenable them for the tests
import logging

from tornado.log import access_log, app_log, gen_log

logs = [app_log, access_log, gen_log, logging.getLogger("DaskGateway")]
for log in logs:
Expand All @@ -111,18 +111,20 @@ async def __aexit__(self, *args):
type(self.hub).clear_instance()


def configure_dask_gateway(jhub_api_token, jhub_bind_url):
def configure_dask_gateway(jhub_api_token, jhub_bind_url, service_name=""):
config = Config()
config.DaskGateway.authenticator_class = (
"dask_gateway_server.auth.JupyterHubAuthenticator"
)
config.JupyterHubAuthenticator.jupyterhub_api_token = jhub_api_token
config.JupyterHubAuthenticator.jupyterhub_api_url = jhub_bind_url + "api"
if service_name:
config.JupyterHubAuthenticator.jupyterhub_service_name = service_name
return config


@pytest.mark.skipif(not hub_mocking, reason="JupyterHub not installed")
async def test_jupyterhub_auth_user(monkeypatch):
async def test_jupyterhub_auth_legacy(monkeypatch):
from jupyterhub.tests.utils import add_user

jhub_api_token = uuid.uuid4().hex
Expand All @@ -138,7 +140,7 @@ class MockHub(hub_mocking.MockHub):
def init_logging(self):
pass

hub = MockHub(config=hub_config)
hub = MockHub(log=app_log, config=hub_config)

# Configure gateway
config = configure_dask_gateway(jhub_api_token, jhub_bind_url)
Expand All @@ -163,37 +165,103 @@ def init_logging(self):
await gateway.list_clusters()


@pytest.mark.skipif(not hub_mocking, reason="JupyterHub not installed")
async def test_jupyterhub_auth_user(monkeypatch):
from jupyterhub.tests.utils import add_user

jhub_api_token = uuid.uuid4().hex
jhub_bind_url = "http://127.0.0.1:%i/@/space%%20word/" % random_port()

hub_config = Config()
hub_config.JupyterHub.services = [
{"name": "dask-gateway", "api_token": jhub_api_token}
]
hub_config.JupyterHub.bind_url = jhub_bind_url
hub_config.JupyterHub.load_roles = [
{
"name": "dask-users",
"scopes": ["access:services!service=dask-gateway"],
"users": ["alice"],
}
]

class MockHub(hub_mocking.MockHub):
def init_logging(self):
pass

hub = MockHub(log=app_log, config=hub_config)

# Configure gateway
config = configure_dask_gateway(
jhub_api_token, jhub_bind_url, service_name="dask-gateway"
)

async with temp_gateway(config=config) as g:
async with temp_hub(hub):
# Create a new jupyterhub user alice, and get the api token
u = add_user(hub.db, name="alice")
api_token = u.new_api_token()
hub.db.commit()

u2 = add_user(hub.db, name="bob")
wrong_api_token = u2.new_api_token()
hub.db.commit()

# Configure auth with incorrect api token
auth = JupyterHubAuth(api_token=wrong_api_token)

async with g.gateway_client(auth=auth) as gateway:
# Auth fails with bad token
with pytest.raises(Exception):
await gateway.list_clusters()

# Auth works with correct token
auth.api_token = api_token
await gateway.list_clusters()


@pytest.mark.skipif(not hub_mocking, reason="JupyterHub not installed")
async def test_jupyterhub_auth_service(monkeypatch):
jhub_api_token = uuid.uuid4().hex
jhub_service_token = uuid.uuid4().hex
other_service_token = uuid.uuid4().hex
jhub_bind_url = "http://127.0.0.1:%i/@/space%%20word/" % random_port()

hub_config = Config()
hub_config.JupyterHub.services = [
{"name": "dask-gateway", "api_token": jhub_api_token},
{"name": "any-service", "api_token": jhub_service_token},
{"name": "other-service", "api_token": other_service_token},
]
hub_config.JupyterHub.bind_url = jhub_bind_url
hub_config.JupyterHub.load_roles = [
{
"name": "dask-users",
"scopes": ["access:services!service=dask-gateway"],
"services": ["any-service"],
}
]

class MockHub(hub_mocking.MockHub):
def init_logging(self):
pass

hub = MockHub(config=hub_config)
hub = MockHub(log=app_log, config=hub_config)

# Configure gateway
config = configure_dask_gateway(jhub_api_token, jhub_bind_url)
config = configure_dask_gateway(
jhub_api_token, jhub_bind_url, service_name="dask-gateway"
)

async with temp_gateway(config=config) as g:
async with temp_hub(hub):
# Configure auth with incorrect api token
auth = JupyterHubAuth(api_token=uuid.uuid4().hex)
auth = JupyterHubAuth(api_token=other_service_token)
async with g.gateway_client(auth=auth) as gateway:
# Auth fails with bad token
with pytest.raises(Exception):
await gateway.list_clusters()

# Auth works with service token
auth.api_token = jhub_api_token
auth.api_token = jhub_service_token
await gateway.list_clusters()
Loading