Skip to content

Commit a511591

Browse files
committed
Return DataFrames from ds.projects.list() and ds.projects.files()
- list_projects returns DataFrame by default (output="df"|"list") - list_project_files returns DataFrame by default (output="df"|"raw") - get_project returns dict with pi as display name string - Update tests (142 passing) and example notebook - Update docs/projects.md
1 parent dc083a8 commit a511591

4 files changed

Lines changed: 1061 additions & 212 deletions

File tree

dapi/client.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -266,22 +266,25 @@ class ProjectMethods:
266266
def __init__(self, tapis_client: Tapis):
267267
self._tapis = tapis_client
268268

269-
def list(self, limit: int = 100, offset: int = 0) -> List[Dict]:
269+
def list(self, limit: int = 100, offset: int = 0, output: str = "df"):
270270
"""List DesignSafe projects you have access to.
271271
272272
Args:
273273
limit (int, optional): Maximum projects to return. Defaults to 100.
274274
offset (int, optional): Number of projects to skip. Defaults to 0.
275+
output (str, optional): "df" for DataFrame (default), "list" for
276+
list of dicts.
275277
276278
Returns:
277-
List[Dict]: List of project dicts with uuid, projectId, title, pi, etc.
279+
DataFrame or List[Dict]: Projects with projectId, title, pi, type, etc.
278280
279281
Example:
280-
>>> projects = ds.projects.list()
281-
>>> for p in projects[:3]:
282-
... print(f"{p['projectId']} - {p['title']}")
282+
>>> ds.projects.list() # returns a DataFrame
283+
>>> ds.projects.list(output="list") # returns list of dicts
283284
"""
284-
return projects_module.list_projects(self._tapis, limit=limit, offset=offset)
285+
return projects_module.list_projects(
286+
self._tapis, limit=limit, offset=offset, output=output
287+
)
285288

286289
def get(self, project_id: str) -> Dict:
287290
"""Get detailed metadata for a project.
@@ -300,24 +303,27 @@ def get(self, project_id: str) -> Dict:
300303
"""
301304
return projects_module.get_project(self._tapis, project_id)
302305

303-
def files(self, project_id: str, path: str = "/", limit: int = 100) -> List:
306+
def files(
307+
self, project_id: str, path: str = "/", limit: int = 100, output: str = "df"
308+
):
304309
"""List files in a project.
305310
306311
Args:
307312
project_id (str): Project ID (e.g., "PRJ-1305").
308313
path (str, optional): Path within the project. Defaults to "/".
309314
limit (int, optional): Max items to return. Defaults to 100.
315+
output (str, optional): "df" for DataFrame (default), "raw" for
316+
Tapis file objects.
310317
311318
Returns:
312-
List: List of Tapis file objects.
319+
DataFrame or List: Files with name, type, size, lastModified, path.
313320
314321
Example:
315-
>>> files = ds.projects.files("PRJ-1305", "/Training/")
316-
>>> for f in files[:5]:
317-
... print(f"{f.name} ({f.type})")
322+
>>> ds.projects.files("PRJ-1305", "/Training/")
323+
>>> ds.projects.files("PRJ-1305", output="raw")
318324
"""
319325
return projects_module.list_project_files(
320-
self._tapis, project_id, path=path, limit=limit
326+
self._tapis, project_id, path=path, limit=limit, output=output
321327
)
322328

323329

dapi/projects.py

Lines changed: 78 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
# dapi/projects.py
22
import requests
3+
import pandas as pd
34
from tapipy.tapis import Tapis
45
from tapipy.errors import BaseTapyException
56
from .exceptions import FileOperationError
6-
from typing import Dict, List, Optional
7+
from typing import Dict, List, Optional, Union
78

89

910
_DS_PROJECTS_API = "https://designsafe-ci.org/api/projects/v2/"
@@ -15,26 +16,44 @@ def _get_auth_headers(t: Tapis) -> Dict[str, str]:
1516
return {"X-Tapis-Token": token, "Authorization": f"Bearer {token}"}
1617

1718

18-
def list_projects(t: Tapis, limit: int = 100, offset: int = 0) -> List[Dict]:
19+
def _extract_pi(users: List[Dict]) -> Optional[Dict]:
20+
"""Extract the PI from a project's users list."""
21+
return next((u for u in users if u.get("role") == "pi"), None)
22+
23+
24+
def _pi_display(pi: Optional[Dict]) -> str:
25+
"""Format PI dict as display name."""
26+
if not pi:
27+
return ""
28+
return f"{pi.get('fname', '')} {pi.get('lname', '')}".strip()
29+
30+
31+
def list_projects(
32+
t: Tapis,
33+
limit: int = 100,
34+
offset: int = 0,
35+
output: str = "df",
36+
) -> Union[pd.DataFrame, List[Dict]]:
1937
"""List DesignSafe projects the authenticated user has access to.
2038
2139
Args:
2240
t (Tapis): Authenticated Tapis client instance.
2341
limit (int, optional): Maximum number of projects to return. Defaults to 100.
2442
offset (int, optional): Number of projects to skip. Defaults to 0.
43+
output (str, optional): Output format. "df" returns a pandas DataFrame
44+
(default), "list" returns a list of dicts.
2545
2646
Returns:
27-
List[Dict]: List of project dictionaries with keys:
28-
- uuid (str): Project UUID
29-
- projectId (str): Project ID (e.g., "PRJ-1305")
30-
- title (str): Project title
31-
- pi (dict): Principal investigator info (username, fname, lname)
32-
- created (str): Creation timestamp
33-
- lastUpdated (str): Last update timestamp
47+
Union[pd.DataFrame, List[Dict]]: Projects in the requested format.
48+
DataFrame columns: projectId, title, pi, type, created, lastUpdated, uuid.
3449
3550
Raises:
3651
FileOperationError: If the API request fails.
52+
ValueError: If output format is invalid.
3753
"""
54+
if output not in ("df", "list"):
55+
raise ValueError(f"output must be 'df' or 'list', got '{output}'")
56+
3857
headers = _get_auth_headers(t)
3958
try:
4059
resp = requests.get(
@@ -52,18 +71,28 @@ def list_projects(t: Tapis, limit: int = 100, offset: int = 0) -> List[Dict]:
5271
for p in data.get("result", []):
5372
val = p.get("value", {})
5473
users = val.get("users", [])
55-
pi = next((u for u in users if u.get("role") == "pi"), None)
74+
pi = _extract_pi(users)
5675
projects.append(
5776
{
58-
"uuid": p.get("uuid"),
5977
"projectId": val.get("projectId"),
6078
"title": val.get("title"),
61-
"pi": pi,
79+
"pi": _pi_display(pi),
80+
"type": val.get("projectType"),
6281
"created": p.get("created"),
6382
"lastUpdated": p.get("lastUpdated"),
83+
"uuid": p.get("uuid"),
6484
}
6585
)
66-
return projects
86+
87+
if output == "list":
88+
return projects
89+
90+
df = pd.DataFrame(projects)
91+
if not df.empty:
92+
for col in ("created", "lastUpdated"):
93+
if col in df.columns:
94+
df[col] = pd.to_datetime(df[col], errors="coerce")
95+
return df
6796

6897

6998
def get_project(t: Tapis, project_id: str) -> Dict:
@@ -79,7 +108,7 @@ def get_project(t: Tapis, project_id: str) -> Dict:
79108
- projectId (str): Project ID
80109
- title (str): Project title
81110
- description (str): Project description
82-
- pi (dict): Principal investigator info
111+
- pi (str): Principal investigator name
83112
- coPis (list): Co-PIs
84113
- teamMembers (list): Team members
85114
- awardNumbers (list): Award/grant numbers
@@ -108,15 +137,15 @@ def get_project(t: Tapis, project_id: str) -> Dict:
108137
bp = data.get("baseProject", {})
109138
val = bp.get("value", {})
110139
users = val.get("users", [])
111-
pi = next((u for u in users if u.get("role") == "pi"), None)
140+
pi = _extract_pi(users)
112141
uuid = bp.get("uuid", "")
113142

114143
return {
115144
"uuid": uuid,
116145
"projectId": val.get("projectId"),
117146
"title": val.get("title"),
118147
"description": val.get("description"),
119-
"pi": pi,
148+
"pi": _pi_display(pi),
120149
"coPis": val.get("coPis", []),
121150
"teamMembers": val.get("teamMembers", []),
122151
"awardNumbers": val.get("awardNumbers", []),
@@ -130,24 +159,33 @@ def get_project(t: Tapis, project_id: str) -> Dict:
130159

131160

132161
def list_project_files(
133-
t: Tapis, project_id: str, path: str = "/", limit: int = 100
134-
) -> List:
162+
t: Tapis,
163+
project_id: str,
164+
path: str = "/",
165+
limit: int = 100,
166+
output: str = "df",
167+
) -> Union[pd.DataFrame, List]:
135168
"""List files in a DesignSafe project.
136169
137-
Resolves the project ID to a Tapis system and lists files at the given path.
138-
139170
Args:
140171
t (Tapis): Authenticated Tapis client instance.
141172
project_id (str): Project ID (e.g., "PRJ-1305").
142173
path (str, optional): Path within the project. Defaults to "/".
143174
limit (int, optional): Maximum number of items to return. Defaults to 100.
175+
output (str, optional): Output format. "df" returns a pandas DataFrame
176+
(default), "raw" returns Tapis file objects.
144177
145178
Returns:
146-
List: List of Tapis file objects with name, type, size, etc.
179+
Union[pd.DataFrame, List]: Files in the requested format.
180+
DataFrame columns: name, type, size, lastModified, path.
147181
148182
Raises:
149183
FileOperationError: If the project is not found or file listing fails.
184+
ValueError: If output format is invalid.
150185
"""
186+
if output not in ("df", "raw"):
187+
raise ValueError(f"output must be 'df' or 'raw', got '{output}'")
188+
151189
project = get_project(t, project_id)
152190
system_id = project["systemId"]
153191
if not system_id:
@@ -160,12 +198,30 @@ def list_project_files(
160198

161199
try:
162200
results = t.files.listFiles(systemId=system_id, path=path, limit=limit)
163-
return results
164201
except BaseTapyException as e:
165202
raise FileOperationError(
166203
f"Failed to list files in project '{project_id}' at path '{path}': {e}"
167204
) from e
168205

206+
if output == "raw":
207+
return results
208+
209+
rows = []
210+
for f in results:
211+
rows.append(
212+
{
213+
"name": getattr(f, "name", ""),
214+
"type": getattr(f, "type", ""),
215+
"size": getattr(f, "size", 0),
216+
"lastModified": getattr(f, "lastModified", ""),
217+
"path": getattr(f, "path", ""),
218+
}
219+
)
220+
df = pd.DataFrame(rows)
221+
if not df.empty and "lastModified" in df.columns:
222+
df["lastModified"] = pd.to_datetime(df["lastModified"], errors="coerce")
223+
return df
224+
169225

170226
def resolve_project_uuid(t: Tapis, project_id: str) -> str:
171227
"""Resolve a DesignSafe project ID (e.g., PRJ-1305) to its Tapis system ID.

0 commit comments

Comments
 (0)