Skip to content

Commit a62b99a

Browse files
committed
Auto-TMS on DSClient init, configurable jobs list output
- DSClient() now auto-establishes TMS credentials on TACC systems (frontera, stampede3, ls6). Skips gracefully on errors/no allocation. - ds.jobs.list() supports output="df" (default), "list", or "raw". - Simplify auth docs: DB credentials use built-in defaults, no .env needed. - Remove manual TMS step from quickstart (now automatic). - 59 tests pass (8 new for setup_tms, 6 new for output formats).
1 parent a8a65ca commit a62b99a

8 files changed

Lines changed: 285 additions & 75 deletions

File tree

dapi/client.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ def __init__(self, tapis_client: Optional[Tapis] = None, **auth_kwargs):
9292
self.systems = SystemMethods(self.tapis)
9393
self.db = DatabaseAccessor()
9494

95+
# Auto-setup TMS credentials on TACC execution systems
96+
systems_module.setup_tms_credentials(self.tapis)
97+
9598

9699
# --- AppMethods and FileMethods remain the same ---
97100
class AppMethods:
@@ -582,34 +585,41 @@ def list(
582585
app_id: Optional[str] = None,
583586
status: Optional[str] = None,
584587
limit: int = 100,
588+
output: str = "df",
585589
verbose: bool = False,
586590
):
587-
"""List jobs as a pandas DataFrame with optional filtering.
591+
"""List jobs with optional filtering.
588592
589-
Fetches jobs from Tapis ordered by creation date (newest first)
590-
and returns them as a DataFrame. Filters are applied client-side.
593+
Fetches jobs from Tapis ordered by creation date (newest first).
594+
Filters are applied client-side.
591595
592596
Args:
593597
app_id (str, optional): Filter by application ID.
594598
status (str, optional): Filter by job status (e.g., "FINISHED").
595599
Case-insensitive.
596600
limit (int, optional): Maximum jobs to fetch. Defaults to 100.
601+
output (str, optional): Output format. "df" for pandas DataFrame
602+
(default), "list" for list of dicts, "raw" for TapisResult
603+
objects.
597604
verbose (bool, optional): Print job count. Defaults to False.
598605
599606
Returns:
600-
pd.DataFrame: Job metadata with formatted datetime columns.
607+
Depends on output: DataFrame, list of dicts, or list of
608+
TapisResult objects.
601609
602610
Raises:
603611
JobMonitorError: If the Tapis API call fails.
604612
605613
Example:
606614
>>> df = ds.jobs.list(app_id="matlab-r2023a", status="FINISHED")
607-
>>> print(df[["name", "uuid", "status", "created_dt"]])
615+
>>> jobs = ds.jobs.list(output="list")
616+
>>> raw = ds.jobs.list(limit=10, output="raw")
608617
"""
609618
return jobs_module.list_jobs(
610619
self._tapis,
611620
app_id=app_id,
612621
status=status,
613622
limit=limit,
623+
output=output,
614624
verbose=verbose,
615625
)

dapi/jobs.py

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1348,9 +1348,10 @@ def list_jobs(
13481348
app_id: Optional[str] = None,
13491349
status: Optional[str] = None,
13501350
limit: int = 100,
1351+
output: str = "df",
13511352
verbose: bool = False,
1352-
) -> pd.DataFrame:
1353-
"""Fetch Tapis jobs and return them as a pandas DataFrame.
1353+
):
1354+
"""Fetch Tapis jobs with optional filtering.
13541355
13551356
Retrieves jobs from Tapis ordered by creation date (newest first)
13561357
and optionally filters by app ID and/or status. Filters are applied
@@ -1362,22 +1363,29 @@ def list_jobs(
13621363
status: Filter by job status (e.g., "FINISHED", "FAILED").
13631364
Case-insensitive.
13641365
limit: Maximum number of jobs to fetch from Tapis. Defaults to 100.
1366+
output: Output format. "df" returns a pandas DataFrame (default),
1367+
"list" returns a list of dicts, "raw" returns the raw
1368+
TapisResult objects.
13651369
verbose: If True, prints the number of jobs found.
13661370
13671371
Returns:
1368-
DataFrame with job metadata and formatted datetime columns.
1369-
Priority columns appear first: name, uuid, status, appId, appVersion,
1370-
created_dt, ended_dt. Additional datetime columns include _dt
1371-
(timezone-aware) and _date (date only) variants for created, ended,
1372-
remoteStarted, and lastUpdated.
1372+
Depends on ``output``:
1373+
- "df": pandas DataFrame with formatted datetime columns.
1374+
- "list": list of dicts with job metadata.
1375+
- "raw": list of TapisResult objects as returned by the API.
13731376
13741377
Raises:
13751378
JobMonitorError: If the Tapis API call fails.
1379+
ValueError: If output format is not recognized.
13761380
13771381
Example:
13781382
>>> df = list_jobs(t, app_id="matlab-r2023a", status="FINISHED")
1379-
>>> print(df[["name", "uuid", "status", "created_dt"]])
1383+
>>> jobs = list_jobs(t, output="list")
1384+
>>> raw = list_jobs(t, limit=10, output="raw")
13801385
"""
1386+
if output not in ("df", "list", "raw"):
1387+
raise ValueError(f"output must be 'df', 'list', or 'raw', got '{output}'")
1388+
13811389
try:
13821390
jobs_list = tapis_client.jobs.getJobList(
13831391
limit=limit,
@@ -1391,17 +1399,48 @@ def list_jobs(
13911399
if not jobs_list:
13921400
if verbose:
13931401
print("Found 0 jobs.")
1402+
if output == "raw":
1403+
return []
1404+
if output == "list":
1405+
return []
13941406
return pd.DataFrame()
13951407

1408+
# For raw output, apply filters manually on TapisResult objects
1409+
if output == "raw":
1410+
results = jobs_list
1411+
if app_id:
1412+
results = [j for j in results if getattr(j, "appId", None) == app_id]
1413+
if status:
1414+
results = [
1415+
j for j in results
1416+
if getattr(j, "status", "").upper() == status.upper()
1417+
]
1418+
if verbose:
1419+
print(f"Found {len(results)} jobs.")
1420+
return results
1421+
13961422
# Convert TapisResult objects to dicts
13971423
jobs_dicts = [job.__dict__ for job in jobs_list]
1398-
df = pd.DataFrame(jobs_dicts)
13991424

14001425
# Apply client-side filters
1401-
if app_id and "appId" in df.columns:
1402-
df = df[df["appId"] == app_id]
1403-
if status and "status" in df.columns:
1404-
df = df[df["status"] == status.upper()]
1426+
if app_id:
1427+
jobs_dicts = [j for j in jobs_dicts if j.get("appId") == app_id]
1428+
if status:
1429+
jobs_dicts = [
1430+
j for j in jobs_dicts if j.get("status", "").upper() == status.upper()
1431+
]
1432+
1433+
if verbose:
1434+
print(f"Found {len(jobs_dicts)} jobs.")
1435+
1436+
if output == "list":
1437+
return jobs_dicts
1438+
1439+
# Build DataFrame
1440+
df = pd.DataFrame(jobs_dicts)
1441+
1442+
if df.empty:
1443+
return df
14051444

14061445
# Add formatted datetime columns
14071446
time_cols = ["created", "ended", "remoteStarted", "lastUpdated"]
@@ -1421,7 +1460,4 @@ def list_jobs(
14211460

14221461
df = df.reset_index(drop=True)
14231462

1424-
if verbose:
1425-
print(f"Found {len(df)} jobs.")
1426-
14271463
return df

dapi/systems.py

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# dapi/systems.py
22
from tapipy.tapis import Tapis
33
from tapipy.errors import BaseTapyException, UnauthorizedError, NotFoundError
4-
from typing import List, Any, Optional
4+
from typing import Dict, List, Any, Optional
55
from .exceptions import SystemInfoError, CredentialError
66

77

@@ -292,3 +292,77 @@ def revoke_credentials(
292292
f"Unexpected error revoking credentials for user '{effective_username}' "
293293
f"on system '{system_id}': {e}"
294294
) from e
295+
296+
297+
# Default TACC execution systems that use TMS_KEYS
298+
TACC_SYSTEMS = ["frontera", "stampede3", "ls6"]
299+
300+
301+
def setup_tms_credentials(
302+
t: Tapis,
303+
systems: Optional[List[str]] = None,
304+
) -> Dict[str, str]:
305+
"""Check and establish TMS credentials on execution systems.
306+
307+
For each system, checks if credentials exist and creates them if missing.
308+
Failures are handled gracefully — a system that can't be reached or where
309+
the user lacks an allocation is skipped with a warning.
310+
311+
Args:
312+
t: Authenticated Tapis client instance.
313+
systems: List of system IDs to set up. Defaults to TACC_SYSTEMS
314+
(frontera, stampede3, ls6).
315+
316+
Returns:
317+
Dict mapping system_id to status: "ready", "created", or "skipped".
318+
"""
319+
if systems is None:
320+
systems = TACC_SYSTEMS
321+
322+
username = getattr(t, "username", None)
323+
if not username:
324+
print("Warning: Could not determine username. Skipping TMS setup.")
325+
return {s: "skipped" for s in systems}
326+
327+
results = {}
328+
329+
for system_id in systems:
330+
try:
331+
# Check if system uses TMS_KEYS
332+
system_details = t.systems.getSystem(systemId=system_id)
333+
authn_method = getattr(system_details, "defaultAuthnMethod", None)
334+
335+
if authn_method != "TMS_KEYS":
336+
results[system_id] = "skipped"
337+
continue
338+
339+
# Check existing credentials
340+
if check_credentials(t, system_id, username):
341+
results[system_id] = "ready"
342+
continue
343+
344+
# Try to create credentials
345+
t.systems.createUserCredential(
346+
systemId=system_id,
347+
userName=username,
348+
createTmsKeys=True,
349+
)
350+
results[system_id] = "created"
351+
352+
except Exception:
353+
results[system_id] = "skipped"
354+
355+
# Print summary
356+
ready = [s for s, v in results.items() if v in ("ready", "created")]
357+
created = [s for s, v in results.items() if v == "created"]
358+
skipped = [s for s, v in results.items() if v == "skipped"]
359+
360+
if ready:
361+
msg = f"TMS credentials ready: {', '.join(ready)}"
362+
if created:
363+
msg += f" (newly created: {', '.join(created)})"
364+
print(msg)
365+
if skipped:
366+
print(f"TMS credentials skipped: {', '.join(skipped)}")
367+
368+
return results

docs/authentication.md

Lines changed: 12 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -327,65 +327,37 @@ print(os.access('.env', os.R_OK))
327327

328328
### Database Connection Issues
329329

330-
For database-specific authentication issues:
330+
Database connections use built-in public read-only credentials by default -- no `.env` setup is required for database access. If you need to override the defaults (e.g., for a private database instance), you can set environment variables:
331331

332-
```python
333-
# Check database environment variables
334-
import os
335-
print("NGL_DB_USER:", os.getenv('NGL_DB_USER'))
336-
print("VP_DB_USER:", os.getenv('VP_DB_USER'))
337-
print("EQ_DB_USER:", os.getenv('EQ_DB_USER'))
338-
```
339-
340-
Required database environment variables:
341332
```bash
342-
# NGL Database
343-
export NGL_DB_USER="dspublic"
344-
export NGL_DB_PASSWORD="your_password"
345-
export NGL_DB_HOST="db_host"
346-
export NGL_DB_PORT="3306"
347-
348-
# VP Database
349-
export VP_DB_USER="dspublic"
350-
export VP_DB_PASSWORD="your_password"
351-
export VP_DB_HOST="db_host"
352-
export VP_DB_PORT="3306"
353-
354-
# Earthquake Recovery Database
355-
export EQ_DB_USER="dspublic"
356-
export EQ_DB_PASSWORD="your_password"
357-
export EQ_DB_HOST="db_host"
358-
export EQ_DB_PORT="3306"
333+
# Optional: override database credentials via .env or environment
334+
NGL_DB_USER=your_user
335+
NGL_DB_PASSWORD=your_password
336+
NGL_DB_HOST=your_host
337+
NGL_DB_PORT=3306
359338
```
360339

361-
## Example: Complete Setup
340+
The same pattern applies for VP (`VP_DB_*`) and Earthquake Recovery (`EQ_DB_*`) databases.
362341

363-
Here's a complete example of setting up authentication:
342+
## Example: Complete Setup
364343

365344
```python
366-
# 1. Create .env file
345+
# 1. Create .env file (only Tapis credentials required)
367346
with open('.env', 'w') as f:
368347
f.write('DESIGNSAFE_USERNAME=your_username\n')
369348
f.write('DESIGNSAFE_PASSWORD=your_password\n')
370-
f.write('NGL_DB_USER=dspublic\n')
371-
f.write('NGL_DB_PASSWORD=your_db_password\n')
372-
f.write('NGL_DB_HOST=db_host\n')
373-
f.write('NGL_DB_PORT=3306\n')
374349

375-
# 2. Initialize client
350+
# 2. Initialize client (auto-sets up TMS credentials)
376351
from dapi import DSClient
377352
ds = DSClient()
378353

379-
# 3. Test authentication
380-
print("Testing TAPIS API access...")
354+
# 3. Test
381355
apps = ds.apps.find("matlab", verbose=False)
382356
print(f"Found {len(apps)} MATLAB apps")
383357

384-
print("Testing database access...")
358+
# Database works out of the box -- no extra credentials needed
385359
df = ds.db.ngl.read_sql("SELECT COUNT(*) FROM SITE")
386360
print(f"NGL database has {df.iloc[0, 0]} sites")
387-
388-
print("All authentication successful!")
389361
```
390362

391363
## Troubleshooting

docs/jobs.md

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ from dapi import DSClient
2222

2323
ds = DSClient()
2424

25-
# List all recent jobs (default: last 100)
25+
# List all recent jobs (default: last 100, returns DataFrame)
2626
df = ds.jobs.list()
2727
print(df[["name", "uuid", "status", "appId", "created_dt"]])
2828

@@ -41,7 +41,22 @@ print(f"Finished jobs: {len(finished)}")
4141
print(finished.groupby("appId").size())
4242
```
4343

44-
The returned DataFrame includes formatted datetime columns (`created_dt`, `ended_dt`, `created_date`, etc.) for easy time-based analysis.
44+
### Output Formats
45+
46+
By default `list()` returns a pandas DataFrame. Use the `output` parameter for other formats:
47+
48+
```python
49+
# DataFrame (default) -- includes formatted datetime columns
50+
df = ds.jobs.list()
51+
52+
# List of dicts -- lightweight, no pandas dependency
53+
jobs = ds.jobs.list(output="list")
54+
for job in jobs:
55+
print(f"{job['name']}: {job['status']}")
56+
57+
# Raw TapisResult objects -- for advanced Tapis API usage
58+
raw = ds.jobs.list(output="raw")
59+
```
4560

4661
## Application Discovery
4762

docs/quickstart.md

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -61,20 +61,13 @@ ds = DSClient()
6161
# Output: Authentication successful.
6262
```
6363

64-
### Step 1b: Establish TMS Credentials (One-Time)
64+
`DSClient()` automatically sets up TMS credentials on TACC execution systems (Frontera, Stampede3, LS6). You'll see a summary like:
6565

66-
Before submitting jobs, ensure you have TMS credentials on the execution system:
67-
68-
```python
69-
# One-time setup per system -- safe to call repeatedly
70-
ds.systems.establish_credentials("frontera")
71-
# Output: TMS credentials established for user 'myuser' on system 'frontera'.
72-
73-
# Or if already established:
74-
# Output: Credentials already exist for user 'myuser' on system 'frontera'. No action taken.
66+
```
67+
TMS credentials ready: frontera, stampede3, ls6
7568
```
7669

77-
See the [Authentication Guide](authentication.md#tms-credentials-execution-system-access) for details.
70+
Systems where you don't have an allocation are silently skipped. See the [Authentication Guide](authentication.md#tms-credentials-execution-system-access) for manual control.
7871

7972
### Step 2: Explore Available Applications
8073

0 commit comments

Comments
 (0)