11# dapi/projects.py
22import requests
3+ import pandas as pd
34from tapipy .tapis import Tapis
45from tapipy .errors import BaseTapyException
56from .exceptions import FileOperationError
6- from typing import Dict , List , Optional
7+ from typing import Dict , List , Optional , Union
78
89
910_DS_PROJECTS_API = "https://designsafe-ci.org/api/projects/v2/"
@@ -15,26 +16,44 @@ def _get_auth_headers(t: Tapis) -> Dict[str, str]:
1516 return {"X-Tapis-Token" : token , "Authorization" : f"Bearer { token } " }
1617
1718
18- def list_projects (t : Tapis , limit : int = 100 , offset : int = 0 ) -> List [Dict ]:
19+ def _extract_pi (users : List [Dict ]) -> Optional [Dict ]:
20+ """Extract the PI from a project's users list."""
21+ return next ((u for u in users if u .get ("role" ) == "pi" ), None )
22+
23+
24+ def _pi_display (pi : Optional [Dict ]) -> str :
25+ """Format PI dict as display name."""
26+ if not pi :
27+ return ""
28+ return f"{ pi .get ('fname' , '' )} { pi .get ('lname' , '' )} " .strip ()
29+
30+
31+ def list_projects (
32+ t : Tapis ,
33+ limit : int = 100 ,
34+ offset : int = 0 ,
35+ output : str = "df" ,
36+ ) -> Union [pd .DataFrame , List [Dict ]]:
1937 """List DesignSafe projects the authenticated user has access to.
2038
2139 Args:
2240 t (Tapis): Authenticated Tapis client instance.
2341 limit (int, optional): Maximum number of projects to return. Defaults to 100.
2442 offset (int, optional): Number of projects to skip. Defaults to 0.
43+ output (str, optional): Output format. "df" returns a pandas DataFrame
44+ (default), "list" returns a list of dicts.
2545
2646 Returns:
27- List[Dict]: List of project dictionaries with keys:
28- - uuid (str): Project UUID
29- - projectId (str): Project ID (e.g., "PRJ-1305")
30- - title (str): Project title
31- - pi (dict): Principal investigator info (username, fname, lname)
32- - created (str): Creation timestamp
33- - lastUpdated (str): Last update timestamp
47+ Union[pd.DataFrame, List[Dict]]: Projects in the requested format.
48+ DataFrame columns: projectId, title, pi, type, created, lastUpdated, uuid.
3449
3550 Raises:
3651 FileOperationError: If the API request fails.
52+ ValueError: If output format is invalid.
3753 """
54+ if output not in ("df" , "list" ):
55+ raise ValueError (f"output must be 'df' or 'list', got '{ output } '" )
56+
3857 headers = _get_auth_headers (t )
3958 try :
4059 resp = requests .get (
@@ -52,18 +71,28 @@ def list_projects(t: Tapis, limit: int = 100, offset: int = 0) -> List[Dict]:
5271 for p in data .get ("result" , []):
5372 val = p .get ("value" , {})
5473 users = val .get ("users" , [])
55- pi = next (( u for u in users if u . get ( "role" ) == "pi" ), None )
74+ pi = _extract_pi ( users )
5675 projects .append (
5776 {
58- "uuid" : p .get ("uuid" ),
5977 "projectId" : val .get ("projectId" ),
6078 "title" : val .get ("title" ),
61- "pi" : pi ,
79+ "pi" : _pi_display (pi ),
80+ "type" : val .get ("projectType" ),
6281 "created" : p .get ("created" ),
6382 "lastUpdated" : p .get ("lastUpdated" ),
83+ "uuid" : p .get ("uuid" ),
6484 }
6585 )
66- return projects
86+
87+ if output == "list" :
88+ return projects
89+
90+ df = pd .DataFrame (projects )
91+ if not df .empty :
92+ for col in ("created" , "lastUpdated" ):
93+ if col in df .columns :
94+ df [col ] = pd .to_datetime (df [col ], errors = "coerce" )
95+ return df
6796
6897
6998def get_project (t : Tapis , project_id : str ) -> Dict :
@@ -79,7 +108,7 @@ def get_project(t: Tapis, project_id: str) -> Dict:
79108 - projectId (str): Project ID
80109 - title (str): Project title
81110 - description (str): Project description
82- - pi (dict ): Principal investigator info
111+ - pi (str ): Principal investigator name
83112 - coPis (list): Co-PIs
84113 - teamMembers (list): Team members
85114 - awardNumbers (list): Award/grant numbers
@@ -108,15 +137,15 @@ def get_project(t: Tapis, project_id: str) -> Dict:
108137 bp = data .get ("baseProject" , {})
109138 val = bp .get ("value" , {})
110139 users = val .get ("users" , [])
111- pi = next (( u for u in users if u . get ( "role" ) == "pi" ), None )
140+ pi = _extract_pi ( users )
112141 uuid = bp .get ("uuid" , "" )
113142
114143 return {
115144 "uuid" : uuid ,
116145 "projectId" : val .get ("projectId" ),
117146 "title" : val .get ("title" ),
118147 "description" : val .get ("description" ),
119- "pi" : pi ,
148+ "pi" : _pi_display ( pi ) ,
120149 "coPis" : val .get ("coPis" , []),
121150 "teamMembers" : val .get ("teamMembers" , []),
122151 "awardNumbers" : val .get ("awardNumbers" , []),
@@ -130,24 +159,33 @@ def get_project(t: Tapis, project_id: str) -> Dict:
130159
131160
132161def list_project_files (
133- t : Tapis , project_id : str , path : str = "/" , limit : int = 100
134- ) -> List :
162+ t : Tapis ,
163+ project_id : str ,
164+ path : str = "/" ,
165+ limit : int = 100 ,
166+ output : str = "df" ,
167+ ) -> Union [pd .DataFrame , List ]:
135168 """List files in a DesignSafe project.
136169
137- Resolves the project ID to a Tapis system and lists files at the given path.
138-
139170 Args:
140171 t (Tapis): Authenticated Tapis client instance.
141172 project_id (str): Project ID (e.g., "PRJ-1305").
142173 path (str, optional): Path within the project. Defaults to "/".
143174 limit (int, optional): Maximum number of items to return. Defaults to 100.
175+ output (str, optional): Output format. "df" returns a pandas DataFrame
176+ (default), "raw" returns Tapis file objects.
144177
145178 Returns:
146- List: List of Tapis file objects with name, type, size, etc.
179+ Union[pd.DataFrame, List]: Files in the requested format.
180+ DataFrame columns: name, type, size, lastModified, path.
147181
148182 Raises:
149183 FileOperationError: If the project is not found or file listing fails.
184+ ValueError: If output format is invalid.
150185 """
186+ if output not in ("df" , "raw" ):
187+ raise ValueError (f"output must be 'df' or 'raw', got '{ output } '" )
188+
151189 project = get_project (t , project_id )
152190 system_id = project ["systemId" ]
153191 if not system_id :
@@ -160,12 +198,30 @@ def list_project_files(
160198
161199 try :
162200 results = t .files .listFiles (systemId = system_id , path = path , limit = limit )
163- return results
164201 except BaseTapyException as e :
165202 raise FileOperationError (
166203 f"Failed to list files in project '{ project_id } ' at path '{ path } ': { e } "
167204 ) from e
168205
206+ if output == "raw" :
207+ return results
208+
209+ rows = []
210+ for f in results :
211+ rows .append (
212+ {
213+ "name" : getattr (f , "name" , "" ),
214+ "type" : getattr (f , "type" , "" ),
215+ "size" : getattr (f , "size" , 0 ),
216+ "lastModified" : getattr (f , "lastModified" , "" ),
217+ "path" : getattr (f , "path" , "" ),
218+ }
219+ )
220+ df = pd .DataFrame (rows )
221+ if not df .empty and "lastModified" in df .columns :
222+ df ["lastModified" ] = pd .to_datetime (df ["lastModified" ], errors = "coerce" )
223+ return df
224+
169225
170226def resolve_project_uuid (t : Tapis , project_id : str ) -> str :
171227 """Resolve a DesignSafe project ID (e.g., PRJ-1305) to its Tapis system ID.
0 commit comments