Skip to content
2 changes: 1 addition & 1 deletion datashuttle/tui/shared/validate_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def on_button_pressed(self, event: Button.Pressed) -> None:
)
if not success:
self.parent_class.mainwindow.show_modal_error_dialog(
cast(str, output) # noqa
cast("str", output)
Comment thread
JoeZiminski marked this conversation as resolved.
)
else:
self.write_results_to_richlog(output)
Expand Down
48 changes: 44 additions & 4 deletions datashuttle/utils/rclone.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import os
import platform
import subprocess
import tempfile
from pathlib import Path
from subprocess import CompletedProcess
from typing import Dict, List, Literal
Expand Down Expand Up @@ -30,6 +33,45 @@ def call_rclone(command: str, pipe_std: bool = False) -> CompletedProcess:
return output


def call_rclone_through_script(command: str) -> CompletedProcess:
"""
Call rclone through a script, to avoid limits on command-line calls
(in particular on Windows). Used for transfers due to generation of
large call strings.
"""
system = platform.system()

command = "rclone " + command

if system == "Windows":
suffix = ".bat"
else:
suffix = ".sh"
command = "#!/bin/bash\n" + command

with tempfile.NamedTemporaryFile(
mode="w", suffix=suffix, delete=False
) as tmp_script:
tmp_script.write(command)
tmp_script_path = tmp_script.name

try:
if system != "Windows":
os.chmod(tmp_script_path, 0o700)
Comment on lines +59 to +60

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks good and I think will work in most cases but there might be some really unique edge cases wherein the chmod might fail. I'm not sure how or when would this happen as logically the program created the script file and should be able to chmod on it but it might fail in some unique system configuration.

I think instead of completely replacing the call_rclone function with call_rclone_through_script, we can have a function like call_rclone_safe that can check if the platform is windows and then execute call_rclone_through_script. Since the call_rclone works fine on unix/linux, I think we can still keep using it unless in future we need to call_rclone_through_script on these systems as well.

@JoeZiminski JoeZiminski May 27, 2025

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @cs7-shrey thanks for this, this is a good point. It doesn't make sense to fail just because permissions can't be set here. I checked a little deeper further and see on macOS the max character limit is 256kb for the command line, which is about 65k characters (assuming ASCII encoding). For a very rough 'quite long' path scenario of say 120 chars this would be 541 paths, which is feasible to have for a larger project. As such it would be good to use this for macOS too (and we might as well apply to Linux too for consistency).

To have the best of both worlds, what do you think of moving the chmod command outside of the current try/except and doing:

if system != "Windows":
    try:
        os.chmod(tmp_script_path, 0o700)
    except PermissionError:
        return call_rclone(command, ....)

So basically, fall back to call_rclone if permissions can't be set (we would need to change the variable names for the overwritten command). This would still fail if the user had a very long command and did not have permission to write, but this should be a very rare case.

There is probably too much going on in this function, trying to handle windows and macOS/Linux together, but I'm not sure if its worth the indirection to break it apart yet.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks great, handling permission error and calling_clone directly. On the other hand, I realise that we can omit this altogether so as to know about the behaviour of the chmod command here when it breaks anything. It won't be a good idea to over engineer things at this point so I think we could keep it as it is and tend to it when the need be.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great I agree on balance that makes sense, let's leave it and it people run into the error there might be another more elegant way to deal with it, or it will highlight some use case we have not considered.


output = subprocess.run(
[tmp_script_path],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=False,
)

finally:
os.remove(tmp_script_path)

return output


# -----------------------------------------------------------------------------
# Setup
# -----------------------------------------------------------------------------
Expand Down Expand Up @@ -189,19 +231,17 @@ def transfer_data(
extra_arguments = handle_rclone_arguments(rclone_options, include_list)

if upload_or_download == "upload":
output = call_rclone(
output = call_rclone_through_script(
f"{rclone_args('copy')} "
f'"{local_filepath}" "{cfg.get_rclone_config_name()}:'
f'{central_filepath}" {extra_arguments}',
pipe_std=True,
)

elif upload_or_download == "download":
output = call_rclone(
output = call_rclone_through_script(
f"{rclone_args('copy')} "
f'"{cfg.get_rclone_config_name()}:'
f'{central_filepath}" "{local_filepath}" {extra_arguments}',
pipe_std=True,
)

return output
Expand Down
10 changes: 4 additions & 6 deletions tests/tests_integration/test_datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,20 +63,18 @@ def test_transfer_datatypes(

# Unfortunately on Windows we are encountering 'The command line is too long'
# and so cannot test against all datatypes here.
some_narrow_datatypes = canonical_configs.quick_get_narrow_datatypes()[
:10
Comment thread
JoeZiminski marked this conversation as resolved.
]
narrow_datatypes = canonical_configs.quick_get_narrow_datatypes()

datatypes_used = self.get_narrow_only_datatypes_used(used=False)
for key in some_narrow_datatypes:
for key in narrow_datatypes:
datatypes_used[key] = True

test_utils.make_and_check_local_project_folders(
project,
"rawdata",
subs,
sessions,
some_narrow_datatypes,
narrow_datatypes,
datatypes_used,
)

Expand All @@ -87,7 +85,7 @@ def test_transfer_datatypes(
project, upload_or_download, "custom", "rawdata"
)

transfer_function("rawdata", "all", "all", some_narrow_datatypes)
transfer_function("rawdata", "all", "all", narrow_datatypes)

test_utils.check_folder_tree_is_correct(
os.path.join(base_path_to_check, "rawdata"),
Expand Down
Loading