Skip to content

Commit 0f2a637

Browse files
committed
Add ds.systems.list(), improve publications search with filters
Systems: - ds.systems.list(): DataFrame of HPC and storage systems with credential status - Filter by category: "hpc", "storage", "all" - Filters out internal, duplicate, and project-specific systems - Add examples/systems.ipynb Publications search: - Add specific filters: pi, keyword, publication_type - Filters combine with AND logic, all case-insensitive - ds.publications.search(pi="Rathje", publication_type="simulation") Update docs and examples for both features.
1 parent db4a215 commit 0f2a637

8 files changed

Lines changed: 936 additions & 67 deletions

File tree

dapi/client.py

Lines changed: 54 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -361,23 +361,47 @@ def list(self, limit: int = 100, offset: int = 0, output: str = "df"):
361361
self._tapis, limit=limit, offset=offset, output=output
362362
)
363363

364-
def search(self, query: str, limit: int = 100, output: str = "df"):
365-
"""Search published datasets by keyword, title, or PI name.
364+
def search(
365+
self,
366+
query: Optional[str] = None,
367+
*,
368+
pi: Optional[str] = None,
369+
keyword: Optional[str] = None,
370+
publication_type: Optional[str] = None,
371+
limit: int = 100,
372+
output: str = "df",
373+
):
374+
"""Search published datasets with optional filters.
375+
376+
All filters are AND-combined and case-insensitive.
366377
367378
Args:
368-
query (str): Search term (case-insensitive).
369-
limit (int, optional): Max publications to fetch before filtering. Defaults to 100.
379+
query (str, optional): General search across title, description, keywords, PI.
380+
pi (str, optional): Filter by PI name.
381+
keyword (str, optional): Filter by keyword.
382+
publication_type (str, optional): Filter by type: "simulation",
383+
"experimental", "field_recon", "other", "hybrid_simulation".
384+
limit (int, optional): Max publications to fetch. Defaults to 100.
370385
output (str, optional): "df" for DataFrame (default), "list" for dicts.
371386
372387
Returns:
373388
DataFrame or List[Dict]: Matching publications.
374389
375390
Example:
376391
>>> ds.publications.search("liquefaction")
377-
>>> ds.publications.search("lateral spreading", limit=500)
392+
>>> ds.publications.search(pi="Rathje")
393+
>>> ds.publications.search(
394+
... keyword="storm surge", publication_type="simulation"
395+
... )
378396
"""
379397
return publications_module.search_publications(
380-
self._tapis, query, limit=limit, output=output
398+
self._tapis,
399+
query,
400+
pi=pi,
401+
keyword=keyword,
402+
publication_type=publication_type,
403+
limit=limit,
404+
output=output,
381405
)
382406

383407
def get(self, project_id: str) -> Dict:
@@ -435,6 +459,30 @@ def __init__(self, tapis_client: Tapis):
435459
"""
436460
self._tapis = tapis_client
437461

462+
def list(self, category: Optional[str] = None, output: str = "df"):
463+
"""List Tapis systems you have access to.
464+
465+
Filters out internal and project-specific systems by default.
466+
467+
Args:
468+
category (str, optional): "hpc" for execution systems,
469+
"storage" for storage systems, "all" for everything,
470+
None for HPC + storage (default).
471+
output (str, optional): "df" for DataFrame (default), "list" for dicts.
472+
473+
Returns:
474+
DataFrame or List[Dict]: Systems with id, host, category, authn, credentials.
475+
476+
Example:
477+
>>> ds.systems.list() # HPC + storage
478+
>>> ds.systems.list("hpc") # HPC only with credential status
479+
>>> ds.systems.list("storage") # Storage only
480+
>>> ds.systems.list("all") # Everything including internal
481+
"""
482+
return systems_module.list_systems(
483+
self._tapis, category=category, output=output
484+
)
485+
438486
def queues(self, system_id: str, verbose: bool = True) -> List[Any]:
439487
"""List logical queues available on a Tapis execution system.
440488

dapi/publications.py

Lines changed: 59 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -88,31 +88,44 @@ def list_publications(
8888

8989
def search_publications(
9090
t: Tapis,
91-
query: str,
91+
query: Optional[str] = None,
92+
*,
93+
pi: Optional[str] = None,
94+
keyword: Optional[str] = None,
95+
publication_type: Optional[str] = None,
9296
limit: int = 100,
9397
output: str = "df",
9498
) -> Union[pd.DataFrame, List[Dict]]:
95-
"""Search published datasets by keyword, title, or PI name.
99+
"""Search published datasets with optional filters.
96100
97-
Searches across title, description, keywords, and PI name fields.
98-
Filtering is done client-side since the API doesn't support server-side search.
101+
All filters are case-insensitive and combined with AND logic.
102+
At least one filter (query, pi, keyword, or publication_type) must be provided.
99103
100104
Args:
101105
t (Tapis): Authenticated Tapis client instance.
102-
query (str): Search term (case-insensitive).
103-
limit (int, optional): Maximum publications to fetch before filtering. Defaults to 100.
106+
query (str, optional): General search across title, description, keywords, and PI.
107+
pi (str, optional): Filter by PI name (partial match).
108+
keyword (str, optional): Filter by keyword (partial match against keywords list).
109+
publication_type (str, optional): Filter by type: "simulation", "experimental",
110+
"field_recon", "other", "hybrid_simulation".
111+
limit (int, optional): Max publications to fetch before filtering. Defaults to 100.
104112
output (str, optional): "df" for DataFrame (default), "list" for list of dicts.
105113
106114
Returns:
107115
Union[pd.DataFrame, List[Dict]]: Matching publications.
108116
109117
Raises:
110118
FileOperationError: If the API request fails.
111-
ValueError: If output format is invalid.
119+
ValueError: If output format is invalid or no filters provided.
112120
"""
113121
if output not in ("df", "list"):
114122
raise ValueError(f"output must be 'df' or 'list', got '{output}'")
115123

124+
if not any([query, pi, keyword, publication_type]):
125+
raise ValueError(
126+
"At least one filter must be provided: query, pi, keyword, or publication_type."
127+
)
128+
116129
headers = _get_auth_headers(t)
117130
try:
118131
resp = requests.get(
@@ -125,31 +138,48 @@ def search_publications(
125138
except requests.RequestException as e:
126139
raise FileOperationError(f"Failed to search publications: {e}") from e
127140

128-
q = query.lower()
129141
data = resp.json()
130142
matches = []
131143
for p in data.get("result", []):
132-
searchable = " ".join(
133-
[
134-
str(p.get("title", "")),
135-
str(p.get("description", "")),
136-
" ".join(p.get("keywords", [])),
137-
_pi_display(p.get("pi")),
138-
str(p.get("projectId", "")),
139-
]
140-
).lower()
141-
if q in searchable:
142-
pi = p.get("pi")
143-
matches.append(
144-
{
145-
"projectId": p.get("projectId"),
146-
"title": p.get("title"),
147-
"pi": _pi_display(pi),
148-
"type": p.get("type"),
149-
"keywords": p.get("keywords", []),
150-
"created": p.get("created"),
151-
}
152-
)
144+
# Apply filters with AND logic
145+
if query:
146+
searchable = " ".join(
147+
[
148+
str(p.get("title", "")),
149+
str(p.get("description", "")),
150+
" ".join(p.get("keywords", [])),
151+
_pi_display(p.get("pi")),
152+
str(p.get("projectId", "")),
153+
]
154+
).lower()
155+
if query.lower() not in searchable:
156+
continue
157+
158+
if pi:
159+
pi_name = _pi_display(p.get("pi")).lower()
160+
if pi.lower() not in pi_name:
161+
continue
162+
163+
if keyword:
164+
kw_lower = keyword.lower()
165+
kw_list = [k.lower() for k in p.get("keywords", [])]
166+
if not any(kw_lower in k for k in kw_list):
167+
continue
168+
169+
if publication_type:
170+
if (p.get("type") or "").lower() != publication_type.lower():
171+
continue
172+
173+
matches.append(
174+
{
175+
"projectId": p.get("projectId"),
176+
"title": p.get("title"),
177+
"pi": _pi_display(p.get("pi")),
178+
"type": p.get("type"),
179+
"keywords": p.get("keywords", []),
180+
"created": p.get("created"),
181+
}
182+
)
153183

154184
if output == "list":
155185
return matches

dapi/systems.py

Lines changed: 118 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,127 @@
11
# dapi/systems.py
2+
import pandas as pd
23
from tapipy.tapis import Tapis
34
from tapipy.errors import BaseTapyException, UnauthorizedError, NotFoundError
4-
from typing import Dict, List, Any, Optional
5+
from typing import Dict, List, Any, Optional, Union
56
from .exceptions import SystemInfoError, CredentialError
67

78

9+
# Known DesignSafe system categories
10+
_KNOWN_HPC = {"stampede3", "frontera", "ls6", "vista"}
11+
_KNOWN_STORAGE = {
12+
"designsafe.storage.default",
13+
"designsafe.storage.community",
14+
"designsafe.storage.published",
15+
"nees.public",
16+
}
17+
_INTERNAL_PREFIXES = ("project-", "apcd.", "wma-", "ds-stko", "cloud.data", "c4-")
18+
_DUPLICATE_SUFFIXES = (".tms", ".designsafe", "-simcenter")
19+
_STORAGE_PREFIXES = ("designsafe.storage.",)
20+
21+
22+
def list_systems(
23+
t: Tapis,
24+
category: Optional[str] = None,
25+
output: str = "df",
26+
) -> Union[pd.DataFrame, List[Dict]]:
27+
"""List Tapis systems the user has access to.
28+
29+
Filters out internal, duplicate, and project-specific systems by default,
30+
showing only the systems useful for job submission and data access.
31+
32+
Args:
33+
t (Tapis): Authenticated Tapis client instance.
34+
category (str, optional): Filter by category:
35+
"hpc" for execution systems (stampede3, frontera, ls6, vista),
36+
"storage" for storage systems (MyData, CommunityData, etc.),
37+
"all" for all systems without filtering.
38+
If None, shows HPC + storage (excludes internal/project systems).
39+
output (str, optional): "df" for DataFrame (default), "list" for dicts.
40+
41+
Returns:
42+
Union[pd.DataFrame, List[Dict]]: Systems with id, host, type, category, credentials.
43+
44+
Raises:
45+
SystemInfoError: If the API request fails.
46+
ValueError: If output or category is invalid.
47+
"""
48+
if output not in ("df", "list"):
49+
raise ValueError(f"output must be 'df' or 'list', got '{output}'")
50+
if category is not None and category not in ("hpc", "storage", "all"):
51+
raise ValueError(
52+
f"category must be 'hpc', 'storage', 'all', or None, got '{category}'"
53+
)
54+
55+
try:
56+
all_systems = t.systems.getSystems(listType="ALL", limit=200)
57+
except BaseTapyException as e:
58+
raise SystemInfoError(f"Failed to list systems: {e}") from e
59+
60+
username = getattr(t, "username", None)
61+
rows = []
62+
63+
for s in all_systems:
64+
sid = s.id
65+
host = getattr(s, "host", "")
66+
can_exec = getattr(s, "canExec", False)
67+
authn = getattr(s, "defaultAuthnMethod", "")
68+
69+
# Classify
70+
if sid in _KNOWN_HPC:
71+
cat = "hpc"
72+
elif sid in _KNOWN_STORAGE:
73+
cat = "storage"
74+
elif (
75+
any(sid.startswith(pfx) for pfx in _STORAGE_PREFIXES)
76+
and sid not in _KNOWN_STORAGE
77+
):
78+
cat = "internal"
79+
elif sid.startswith("project-"):
80+
cat = "project"
81+
elif any(sid.endswith(sfx) for sfx in _DUPLICATE_SUFFIXES):
82+
cat = "internal"
83+
elif any(sid.startswith(pfx) for pfx in _INTERNAL_PREFIXES):
84+
cat = "internal"
85+
elif sid == "maverick2":
86+
cat = "internal"
87+
elif can_exec:
88+
cat = "hpc"
89+
else:
90+
cat = "other"
91+
92+
# Filter
93+
if category == "hpc" and cat != "hpc":
94+
continue
95+
if category == "storage" and cat != "storage":
96+
continue
97+
if category is None and cat not in ("hpc", "storage"):
98+
continue
99+
# category == "all" shows everything
100+
101+
# Check TMS credentials for HPC systems
102+
has_creds = None
103+
if cat == "hpc" and authn == "TMS_KEYS" and username:
104+
try:
105+
has_creds = check_credentials(t, sid, username)
106+
except Exception:
107+
has_creds = None
108+
109+
rows.append(
110+
{
111+
"id": sid,
112+
"host": host,
113+
"category": cat,
114+
"authn": authn,
115+
"credentials": has_creds,
116+
}
117+
)
118+
119+
if output == "list":
120+
return rows
121+
122+
return pd.DataFrame(rows)
123+
124+
8125
def list_system_queues(t: Tapis, system_id: str, verbose: bool = True) -> List[Any]:
9126
"""
10127
Retrieves the list of batch logical queues available on a specific Tapis execution system.

docs/examples.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@ Search and browse published datasets on DesignSafe.
2323

2424
---
2525

26+
### Systems
27+
List HPC and storage systems, check credentials, and view queues.
28+
29+
[![Try on DesignSafe](https://raw.githubusercontent.com/DesignSafe-CI/dapi/main/DesignSafe-Badge.svg)](https://jupyter.designsafe-ci.org/hub/user-redirect/lab/tree/CommunityData/dapi/systems.ipynb)
30+
31+
---
32+
2633
### Application Management
2734
Discover and manage applications on DesignSafe.
2835

docs/publications.md

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,26 @@ DataFrame columns: `projectId`, `title`, `pi`, `type`, `keywords`, `created`.
2323

2424
## Search publications
2525

26-
Search across titles, descriptions, keywords, and PI names. Case-insensitive.
26+
Search with specific filters or general text. All filters are case-insensitive and combined with AND logic.
2727

2828
```python
29-
# By topic
29+
# General text search (across title, description, keywords, PI)
3030
ds.publications.search("liquefaction")
3131

32-
# By PI
33-
ds.publications.search("Rathje")
32+
# Filter by PI name
33+
ds.publications.search(pi="Rathje")
3434

35-
# By project type keyword
36-
ds.publications.search("storm surge")
35+
# Filter by keyword
36+
ds.publications.search(keyword="storm surge")
37+
38+
# Filter by publication type: simulation, experimental, field_recon, other, hybrid_simulation
39+
ds.publications.search(publication_type="simulation")
40+
41+
# Combine filters (AND logic)
42+
ds.publications.search(keyword="storm surge", publication_type="simulation")
3743

3844
# Increase search pool (API returns 100 by default)
39-
ds.publications.search("earthquake", limit=500)
45+
ds.publications.search(keyword="earthquake", limit=500)
4046
```
4147

4248
## Get publication details

0 commit comments

Comments
 (0)