Skip to content

Commit af63baf

Browse files
authored
fix: remove unsafe STFP options for rclone (#964)
Closes #963. Also, some minor cleanup for the rclone options.
1 parent 2b854ed commit af63baf

4 files changed

Lines changed: 90 additions & 136 deletions

File tree

components/renku_data_services/storage/rclone.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from renku_data_services import errors
1313
from renku_data_services.app_config import logging
14-
from renku_data_services.storage.rclone_patches import BANNED_STORAGE, apply_patches
14+
from renku_data_services.storage.rclone_patches import BANNED_SFTP_OPTIONS, BANNED_STORAGE, apply_patches
1515

1616
logger = logging.getLogger(__name__)
1717

@@ -345,6 +345,14 @@ def get_option_for_provider(self, name: str, provider: str | None) -> RCloneOpti
345345

346346
return None
347347

348+
def check_unsafe_option(self, name: str) -> None:
349+
"""Check that the option is safe."""
350+
if self.prefix != "sftp":
351+
return None
352+
if name in BANNED_SFTP_OPTIONS:
353+
raise errors.ValidationError(message=f"The {name} option is not allowed.")
354+
return None
355+
348356
def validate_config(
349357
self, configuration: Union["RCloneConfig", dict[str, Any]], keep_sensitive: bool = False
350358
) -> None:
@@ -369,6 +377,8 @@ def validate_config(
369377
raise errors.ValidationError(message=f"The following fields are required but missing:\n{missing_str}")
370378

371379
for key in keys:
380+
self.check_unsafe_option(key)
381+
372382
value = configuration[key]
373383

374384
option: RCloneOption | None = self.get_option_for_provider(key, provider)

components/renku_data_services/storage/rclone_patches.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
}
2020

2121
OAUTH_PROVIDERS: Final[set[str]] = {
22-
"acd",
2322
"box",
2423
"drive",
2524
"dropbox",
@@ -31,13 +30,20 @@
3130
"onedrive",
3231
"pcloud",
3332
"pikpak",
34-
"premiumzeme",
33+
"premiumizeme",
3534
"putio",
3635
"sharefile",
3736
"yandex",
3837
"zoho",
3938
}
4039

40+
BANNED_SFTP_OPTIONS: Final[set[str]] = {
41+
"key_file", # path to a local file
42+
"pubkey_file", # path to a local file
43+
"known_hosts_file", # path to a local file
44+
"ssh", # arbitrary command to be executed
45+
}
46+
4147

4248
def find_storage(spec: list[dict[str, Any]], prefix: str) -> dict[str, Any]:
4349
"""Find and return the storage schema from the spec.
@@ -241,6 +247,16 @@ def __patch_switchdrive_storage(spec: list[dict[str, Any]]) -> None:
241247
)
242248

243249

250+
def __patch_schema_remove_banned_sftp_options(spec: list[dict[str, Any]]) -> None:
251+
"""Remove unsafe SFTP options."""
252+
sftp = find_storage(spec, "sftp")
253+
options = []
254+
for option in sftp["Options"]:
255+
if option["Name"] not in BANNED_SFTP_OPTIONS:
256+
options.append(option)
257+
sftp["Options"] = options
258+
259+
244260
def apply_patches(spec: list[dict[str, Any]]) -> None:
245261
"""Apply patches to RClone schema."""
246262
patches = [
@@ -251,6 +267,7 @@ def apply_patches(spec: list[dict[str, Any]]) -> None:
251267
__patch_schema_remove_oauth_propeties,
252268
__patch_polybox_storage,
253269
__patch_switchdrive_storage,
270+
__patch_schema_remove_banned_sftp_options,
254271
]
255272

256273
for patch in patches:

test/bases/renku_data_services/data_api/__snapshots__/test_storage.ambr

Lines changed: 0 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -9029,38 +9029,6 @@
90299029
'description': 'premiumize.me',
90309030
'name': 'premiumizeme',
90319031
'options': list([
9032-
dict({
9033-
'advanced': False,
9034-
'default': '',
9035-
'default_str': '',
9036-
'exclusive': False,
9037-
'help': '''
9038-
OAuth Client Id.
9039-
9040-
Leave blank normally.
9041-
''',
9042-
'ispassword': False,
9043-
'name': 'client_id',
9044-
'required': False,
9045-
'sensitive': True,
9046-
'type': 'string',
9047-
}),
9048-
dict({
9049-
'advanced': False,
9050-
'default': '',
9051-
'default_str': '',
9052-
'exclusive': False,
9053-
'help': '''
9054-
OAuth Client Secret.
9055-
9056-
Leave blank normally.
9057-
''',
9058-
'ispassword': False,
9059-
'name': 'client_secret',
9060-
'required': False,
9061-
'sensitive': True,
9062-
'type': 'string',
9063-
}),
90649032
dict({
90659033
'advanced': True,
90669034
'default': '',
@@ -14699,24 +14667,6 @@
1469914667
'sensitive': True,
1470014668
'type': 'string',
1470114669
}),
14702-
dict({
14703-
'advanced': False,
14704-
'default': '',
14705-
'default_str': '',
14706-
'exclusive': False,
14707-
'help': '''
14708-
Path to PEM-encoded private key file.
14709-
14710-
Leave blank or set key-use-agent to use ssh-agent.
14711-
14712-
Leading `~` will be expanded in the file name as will environment variables such as `${RCLONE_CONFIG_DIR}`.
14713-
''',
14714-
'ispassword': False,
14715-
'name': 'key_file',
14716-
'required': False,
14717-
'sensitive': False,
14718-
'type': 'string',
14719-
}),
1472014670
dict({
1472114671
'advanced': False,
1472214672
'default': '',
@@ -14750,48 +14700,6 @@
1475014700
'sensitive': False,
1475114701
'type': 'string',
1475214702
}),
14753-
dict({
14754-
'advanced': False,
14755-
'default': '',
14756-
'default_str': '',
14757-
'exclusive': False,
14758-
'help': '''
14759-
Optional path to public key file.
14760-
14761-
Set this if you have a signed certificate you want to use for authentication.
14762-
14763-
Leading `~` will be expanded in the file name as will environment variables such as `${RCLONE_CONFIG_DIR}`.
14764-
''',
14765-
'ispassword': False,
14766-
'name': 'pubkey_file',
14767-
'required': False,
14768-
'sensitive': False,
14769-
'type': 'string',
14770-
}),
14771-
dict({
14772-
'advanced': True,
14773-
'default': '',
14774-
'default_str': '',
14775-
'examples': list([
14776-
dict({
14777-
'help': "Use OpenSSH's known_hosts file.",
14778-
'value': '~/.ssh/known_hosts',
14779-
}),
14780-
]),
14781-
'exclusive': False,
14782-
'help': '''
14783-
Optional path to known_hosts file.
14784-
14785-
Set this value to enable server host key validation.
14786-
14787-
Leading `~` will be expanded in the file name as will environment variables such as `${RCLONE_CONFIG_DIR}`.
14788-
''',
14789-
'ispassword': False,
14790-
'name': 'known_hosts_file',
14791-
'required': False,
14792-
'sensitive': False,
14793-
'type': 'string',
14794-
}),
1479514703
dict({
1479614704
'advanced': False,
1479714705
'default': False,
@@ -15344,46 +15252,6 @@
1534415252
'sensitive': False,
1534515253
'type': 'SpaceSepList',
1534615254
}),
15347-
dict({
15348-
'advanced': False,
15349-
'default': list([
15350-
]),
15351-
'default_str': '',
15352-
'exclusive': False,
15353-
'help': '''
15354-
Path and arguments to external ssh binary.
15355-
15356-
Normally rclone will use its internal ssh library to connect to the
15357-
SFTP server. However it does not implement all possible ssh options so
15358-
it may be desirable to use an external ssh binary.
15359-
15360-
Rclone ignores all the internal config if you use this option and
15361-
expects you to configure the ssh binary with the user/host/port and
15362-
any other options you need.
15363-
15364-
**Important** The ssh command must log in without asking for a
15365-
password so needs to be configured with keys or certificates.
15366-
15367-
Rclone will run the command supplied either with the additional
15368-
arguments "-s sftp" to access the SFTP subsystem or with commands such
15369-
as "md5sum /path/to/file" appended to read checksums.
15370-
15371-
Any arguments with spaces in should be surrounded by "double quotes".
15372-
15373-
An example setting might be:
15374-
15375-
ssh -o ServerAliveInterval=20 user@example.com
15376-
15377-
Note that when using an external ssh binary rclone makes a new ssh
15378-
connection for every hash it calculates.
15379-
15380-
''',
15381-
'ispassword': False,
15382-
'name': 'ssh',
15383-
'required': False,
15384-
'sensitive': False,
15385-
'type': 'SpaceSepList',
15386-
}),
1538715255
dict({
1538815256
'advanced': True,
1538915257
'default': '',

test/bases/renku_data_services/data_api/test_storage.py

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from renku_data_services.data_api.dependencies import DependencyManager
1313
from renku_data_services.migrations.core import run_migrations_for_app
1414
from renku_data_services.storage.rclone import RCloneValidator
15-
from renku_data_services.storage.rclone_patches import BANNED_STORAGE, OAUTH_PROVIDERS
15+
from renku_data_services.storage.rclone_patches import BANNED_SFTP_OPTIONS, BANNED_STORAGE, OAUTH_PROVIDERS
1616
from test.utils import SanicReusableASGITestClient
1717

1818
_valid_storage: dict[str, Any] = {
@@ -249,6 +249,21 @@ async def storage_test_client(
249249
422,
250250
"",
251251
),
252+
(
253+
{
254+
"project_id": "123456",
255+
"name": "mystorage",
256+
"configuration": {
257+
"type": "sftp",
258+
"host": "myhost",
259+
"ssh": "ssh", # passing in banned option
260+
},
261+
"source_path": "bucket/myfolder",
262+
"target_path": "my/target",
263+
},
264+
422,
265+
"",
266+
),
252267
],
253268
)
254269
@pytest.mark.asyncio
@@ -508,6 +523,39 @@ async def test_storage_patch_unauthorized(storage_test_client, valid_storage_pay
508523
assert res.status_code == 403, res.text
509524

510525

526+
@pytest.mark.asyncio
527+
async def test_storage_patch_banned_option(storage_test_client, valid_storage_payload) -> None:
528+
storage_test_client, _ = storage_test_client
529+
# NOTE: The keycloak dummy client used to authorize the storage patch requests only has info
530+
# on a user with name Admin Doe, using a different user will fail with a 401 error.
531+
access_token = json.dumps({"is_admin": False, "id": "some-id", "full_name": "Admin Doe"})
532+
payload = dict(valid_storage_payload)
533+
payload["configuration"] = {
534+
"type": "sftp",
535+
"host": "myhost",
536+
}
537+
_, res = await storage_test_client.post(
538+
"/api/data/storage",
539+
headers={"Authorization": f"bearer {access_token}"},
540+
data=json.dumps(payload),
541+
)
542+
assert res.status_code == 201
543+
assert res.json["storage"]["storage_type"] == "sftp"
544+
storage_id = res.json["storage"]["storage_id"]
545+
546+
_, res = await storage_test_client.patch(
547+
f"/api/data/storage/{storage_id}",
548+
headers={"Authorization": f"bearer {access_token}"},
549+
data=json.dumps(
550+
{
551+
"configuration": {"key_file": "my_key"},
552+
}
553+
),
554+
)
555+
assert res.status_code == 422
556+
assert "key_file option is not allowed" in res.text
557+
558+
511559
@pytest.mark.asyncio
512560
async def test_storage_obscure(storage_test_client) -> None:
513561
storage_test_client, _ = storage_test_client
@@ -632,9 +680,20 @@ async def test_storage_schema_patches(storage_test_client, snapshot) -> None:
632680
oauth_providers = [s for s in schema if s["prefix"] in OAUTH_PROVIDERS]
633681
assert all(o["name"] != "client_id" and o["name"] != "client_secret" for p in oauth_providers for o in p["options"])
634682

683+
# check the OAUTH_PROVIDERS list
684+
not_exists = set(p for p in OAUTH_PROVIDERS if p not in set(s["prefix"] for s in schema))
685+
assert not_exists == set()
686+
635687
# check custom webdav storage is added
636688
assert any(s["prefix"] == "polybox" for s in schema)
637689
assert any(s["prefix"] == "switchDrive" for s in schema)
690+
691+
# check that unsafe SFTP options are removed
692+
sftp = next((e for e in schema if e["prefix"] == "sftp"), None)
693+
assert sftp
694+
assert all(o["name"] not in BANNED_SFTP_OPTIONS for o in sftp["options"])
695+
696+
# snapshot the schema
638697
assert schema == snapshot
639698

640699

0 commit comments

Comments
 (0)