1717 from datashuttle .configs .config_class import Configs
1818 from datashuttle .utils .custom_types import TopLevelFolder
1919
20- import glob
20+ import fnmatch
2121from pathlib import Path
2222
2323from datashuttle .configs import canonical_folders , canonical_tags
24- from datashuttle .utils import rclone , ssh , utils , validation
24+ from datashuttle .utils import rclone , utils , validation
2525from datashuttle .utils .custom_exceptions import NeuroBlueprintError
2626
2727# -----------------------------------------------------------------------------
@@ -599,56 +599,62 @@ def search_for_folders(
599599 Discovered folders (`all_folder_names`) and files (`all_filenames`).
600600
601601 """
602- if local_or_central == "central" and cfg ["connection_method" ] in [
603- "ssh" ,
604- "gdrive" ,
605- "aws" ,
606- ]:
607- if cfg ["connection_method" ] == "ssh" :
608- all_folder_names , all_filenames = (
609- ssh .search_ssh_central_for_folders (
610- search_path ,
611- search_prefix ,
612- cfg ,
613- verbose ,
614- return_full_path ,
615- )
616- )
617-
618- else :
619- all_folder_names , all_filenames = search_gdrive_or_aws_for_folders (
620- search_path , search_prefix , cfg , return_full_path
621- )
622-
602+ if (
603+ local_or_central == "local"
604+ or cfg ["connection_method" ] == "local_filesystem"
605+ ) and not search_path .exists ():
606+ if verbose :
607+ utils .log_and_message (f"No file found at { search_path .as_posix ()} " )
608+ return [], []
609+
610+ if local_or_central == "local" :
611+ rclone_config_name = None
623612 else :
624- if not search_path .exists ():
625- if verbose :
626- utils .log_and_message (
627- f"No file found at { search_path .as_posix ()} "
628- )
629- return [], []
630-
631- all_folder_names , all_filenames = search_filesystem_path_for_folders (
632- search_path / search_prefix , return_full_path
613+ rclone_config_name = cfg .get_rclone_config_name (
614+ cfg ["connection_method" ]
633615 )
616+
617+ all_folder_names , all_filenames = search_local_or_remote (
618+ search_path ,
619+ search_prefix ,
620+ rclone_config_name ,
621+ return_full_path ,
622+ )
623+
634624 return all_folder_names , all_filenames
635625
636626
637- def search_gdrive_or_aws_for_folders (
627+ def search_local_or_remote (
638628 search_path : Path ,
639629 search_prefix : str ,
640- cfg : Configs ,
630+ rclone_config_name : str | None ,
641631 return_full_path : bool = False ,
642632) -> Tuple [List [Any ], List [Any ]]:
643633 """Search for files and folders in central path using `rclone lsjson` command.
644634
645635 This command lists all the files and folders in the central path in a json format.
646636 The json contains file/folder info about each file/folder like name, type, etc.
637+
638+ Parameters
639+ ----------
640+ search_path
641+ The path to search (relative to the local or remote drive). For example,
642+ for "local_filesystem" this is the path on the local machine. For "ssh", this
643+ is the path on the machine that has been connected to.
644+ search_prefix
645+ The search string e.g. "sub-*".
646+ rclone_config_name
647+ Name of the rclone config for the remote (not set for local). `rclone config`
648+ can be used in the terminal to see how rclone has stored these. In datashuttle,
649+ these are managed by `Configs`.
650+ return_full_path
651+ If `True`, return the full filepath, otherwise return only the folder/file name.
652+
647653 """
654+ config_prefix = "" if not rclone_config_name else f"{ rclone_config_name } :"
655+
648656 output = rclone .call_rclone (
649- "lsjson "
650- f"{ cfg .get_rclone_config_name ()} :{ search_path .as_posix ()} "
651- f'--include "{ search_prefix } "' ,
657+ f'lsjson { config_prefix } "{ search_path .as_posix ()} "' ,
652658 pipe_std = True ,
653659 )
654660
@@ -657,73 +663,26 @@ def search_gdrive_or_aws_for_folders(
657663
658664 if output .returncode != 0 :
659665 utils .log_and_message (
660- f"Error searching files at { search_path .as_posix ()} \n { output .stderr .decode ('utf-8' ) if output .stderr else '' } "
666+ f"Error searching files at { search_path .as_posix ()} \n "
667+ f"{ output .stderr .decode ('utf-8' ) if output .stderr else '' } "
661668 )
662669 return all_folder_names , all_filenames
663670
664671 files_and_folders = json .loads (output .stdout )
665672
666- try :
667- for file_or_folder in files_and_folders :
668- name = file_or_folder ["Name" ]
669- is_dir = file_or_folder .get ("IsDir" , False )
670-
671- to_append = (
672- (search_path / name ).as_posix () if return_full_path else name
673- )
674-
675- if is_dir :
676- all_folder_names .append (to_append )
677- else :
678- all_filenames .append (to_append )
679-
680- except Exception :
681- utils .log_and_message (
682- f"Error searching files at { search_path .as_posix ()} "
683- )
684-
685- return all_folder_names , all_filenames
686-
687-
688- # Actual function implementation
689- def search_filesystem_path_for_folders (
690- search_path_with_prefix : Path , return_full_path : bool = False
691- ) -> Tuple [List [Path | str ], List [Path | str ]]:
692- r"""Search a folder through the local filesystem.
693-
694- Use glob to search the full search path (including prefix) with glob.
695- Files are filtered out of results, returning folders only.
696-
697- Parameters
698- ----------
699- search_path_with_prefix
700- Path to search along with search prefix e.g. "C:\drive\project\sub-*"
701-
702- return_full_path
703- If `True` returns the path to the discovered folder or file,
704- otherwise just the name.
705-
706- Returns
707- -------
708- Discovered folders (`all_folder_names`) and files (`all_filenames`).
673+ for file_or_folder in files_and_folders :
674+ name = file_or_folder ["Name" ]
709675
710- """
711- all_folder_names = []
712- all_filenames = []
676+ if not fnmatch .fnmatch (name , search_prefix ):
677+ continue
713678
714- all_files_and_folders = list (glob .glob (search_path_with_prefix .as_posix ()))
715- sorter_files_and_folders = sorted (all_files_and_folders )
679+ is_dir = file_or_folder .get ("IsDir" , False )
716680
717- for file_or_folder_str in sorter_files_and_folders :
718- file_or_folder = Path (file_or_folder_str )
681+ to_append = search_path / name if return_full_path else name
719682
720- if file_or_folder .is_dir ():
721- all_folder_names .append (
722- file_or_folder if return_full_path else file_or_folder .name
723- )
683+ if is_dir :
684+ all_folder_names .append (to_append )
724685 else :
725- all_filenames .append (
726- file_or_folder if return_full_path else file_or_folder .name
727- )
686+ all_filenames .append (to_append )
728687
729688 return all_folder_names , all_filenames
0 commit comments