@@ -1856,8 +1856,9 @@ def get_logs(
18561856 "--component" ,
18571857 "-c" ,
18581858 help = (
1859- "Pod to read logs from: 'orchestrator' (default) or 'env-server'. "
1860- "Inferred from --env when omitted."
1859+ "Pod to read logs from: 'orchestrator' (default), 'trainer', "
1860+ "'inference', or 'env-server'. trainer/inference apply only "
1861+ "to dedicated full-FT runs. Inferred from --env when omitted."
18611862 ),
18621863 ),
18631864 env : Optional [str ] = typer .Option (
@@ -1875,30 +1876,38 @@ def get_logs(
18751876) -> None :
18761877 """Get logs for a run.
18771878
1878- Defaults to the orchestrator pod. Pass ``--env <name> `` to read an
1879- env-server pod instead — useful when an env-server is crash-looping
1880- (e.g. ``ModuleNotFoundError``) and the orchestrator has stalled at
1881- "Starting orchestrator step 0" .
1879+ Defaults to the orchestrator pod. Use ``--component `` to pick one of
1880+ ``trainer`` / ``inference`` / `` env-server`` (dedicated full-FT only).
1881+ Pass ``--env <name>`` to read an env-server pod by name (shorthand for
1882+ ``--component=env-server``) .
18821883
18831884 List available pods first with ``prime train components <run_id>``.
18841885
1886+ Per-rank narrowing on multi-replica trainer/inference is not yet
1887+ surfaced here — `--local-ranks-filter=0` in the chart's torchrun
1888+ invocation already dedupes the in-pod rank fan-out, and per-pod
1889+ inspection on multi-node runs requires kubectl + the PVC log files.
1890+
18851891 Examples:
18861892
18871893 prime train logs <run_id>
18881894 prime train logs <run_id> -f
1895+ prime train logs <run_id> -c trainer
1896+ prime train logs <run_id> -c inference
18891897 prime train logs <run_id> --env reverse-text
18901898 prime train logs <run_id> --env reverse-text/1 -f
18911899 """
1900+ valid_components = ("orchestrator" , "trainer" , "inference" , "env-server" )
18921901 if component is None :
18931902 component = "env-server" if env is not None else "orchestrator"
1894- elif component not in ( "orchestrator" , "env-server" ) :
1903+ elif component not in valid_components :
18951904 raise typer .BadParameter (
1896- f"Invalid component '{ component } '. Use 'orchestrator' or 'env-server' ." ,
1905+ f"Invalid component '{ component } '. Use one of: { ', ' . join ( valid_components ) } ." ,
18971906 param_hint = "--component" ,
18981907 )
1899- if component == "orchestrator" and env is not None :
1908+ if env is not None and component != "env-server" :
19001909 raise typer .BadParameter (
1901- "--env applies only to env-server logs. Drop --component=orchestrator or drop --env." ,
1910+ f "--env applies only to env-server logs. Drop --component={ component } or drop --env." ,
19021911 param_hint = "--env" ,
19031912 )
19041913 if component == "env-server" and env is None :
@@ -1912,25 +1921,46 @@ def get_logs(
19121921 api_client = APIClient ()
19131922 rl_client = RLClient (api_client )
19141923
1915- if component == "orchestrator" :
1924+ if component == "env-server" and env is not None and "/" in env :
1925+ # Legacy shared-RFT env-server (`name/index` qualifier) — go
1926+ # through the dedicated env-server endpoint which uses the
1927+ # cluster_id-backed pod lookup path. Dedicated full-FT
1928+ # env-servers use the unified /logs route with
1929+ # component=env-server + env_name (StatefulSets always run
1930+ # one pod per env, so no index disambiguation needed).
1931+ env_name_q , env_index_q = _parse_env_qualifier (env )
1932+
1933+ def fetch (t : int ) -> str :
1934+ return rl_client .get_env_server_logs (
1935+ run_id ,
1936+ env_name = env_name_q ,
1937+ env_index = env_index_q ,
1938+ tail_lines = t ,
1939+ )
1940+
1941+ label = f"env-server { env } "
1942+ elif component == "orchestrator" :
19161943
19171944 def fetch (t : int ) -> str :
19181945 return rl_client .get_logs (run_id , tail_lines = t )
19191946
19201947 label = "orchestrator"
19211948 else :
1922- assert env is not None # narrowed by validation above
1923- env_name , env_index = _parse_env_qualifier (env )
1949+ # trainer / inference / dedicated env-server — unified /logs
1950+ # route. env (no slash) names the dedicated env-server's
1951+ # StatefulSet.
1952+ fetch_component = component
1953+ fetch_env = env if component == "env-server" else None
19241954
19251955 def fetch (t : int ) -> str :
1926- return rl_client .get_env_server_logs (
1956+ return rl_client .get_logs (
19271957 run_id ,
1928- env_name = env_name ,
1929- env_index = env_index ,
19301958 tail_lines = t ,
1959+ component = fetch_component ,
1960+ env_name = fetch_env ,
19311961 )
19321962
1933- label = f"env-server { env } "
1963+ label = f"env-server { env } " if component == "env-server" else component
19341964
19351965 _stream_logs (
19361966 fetch_fn = fetch ,
0 commit comments