@@ -399,6 +399,108 @@ async def fetch_one(wid: str) -> dict | None:
399399 return "\n " .join (lines )
400400
401401
402+ async def _train_composition (train_id : int ) -> tuple [str | None , list [dict ]]:
403+ """(dataset_name, [wagon dicts]) for a train. Shared composition fetch."""
404+ t = await _get ("trains/train.jsp" , {"train_id" : train_id })
405+ ds = t .get ("dataset_name" )
406+ wagons_ts = t .get ("wagons_timestamp" ) or t .get ("dataset_timestamp" )
407+ if not wagons_ts :
408+ return ds , []
409+ wd = await _get ("trains/wagons_derived_data.jsp" ,
410+ {"train_id" : train_id , "wagons_timestamp" : wagons_ts })
411+ wagon_ids = list (wd .keys ()) if isinstance (wd , dict ) else []
412+
413+ async def fetch_one (wid : str ) -> dict | None :
414+ try :
415+ w = await _get ("analysis/wagon/wagon.jsp" ,
416+ {"wagon_id" : int (wid ), "referenceTime" : 0 })
417+ if isinstance (w , dict ) and w .get ("id" ) is not None :
418+ return w
419+ except Exception :
420+ pass
421+ return None
422+
423+ wagons = [w for w in await asyncio .gather (* (fetch_one (w ) for w in wagon_ids )) if w ]
424+ return ds , wagons
425+
426+
427+ def _summarize_sig (sig ) -> str :
428+ """Human-readable 'Nx workflow [analysis_id]' summary of a composition signature."""
429+ if not sig or not sig [1 ]:
430+ return "(no wagons / unresolved)"
431+ c = collections .Counter (f"{ wf } [{ aid } ]" for wf , aid in sig [1 ])
432+ return ", " .join (f"{ n } x { k } " for k , n in sorted (c .items ()))
433+
434+
435+ async def _match_compositions (train_ids : list [int ]):
436+ """Group trains by (dataset, multiset of (workflow, analysis_id)).
437+
438+ Returns (groups, ref_sig, matched_ids, failed_ids) where groups maps each
439+ signature to its train ids, ref_sig is the largest group's signature (None
440+ if nothing resolved), and matched_ids are the trains sharing it. Shared by
441+ validate_train_composition and grid_job_bands so both apply the same guard.
442+ """
443+ async def one (tid : int ):
444+ try :
445+ ds , wagons = await _train_composition (tid )
446+ sig = (ds , tuple (sorted ((w .get ("work_flow_name" ) or "?" ,
447+ w .get ("analysis_id" )) for w in wagons )))
448+ return tid , sig
449+ except Exception :
450+ return tid , None
451+
452+ res = await asyncio .gather (* (one (t ) for t in train_ids ))
453+ groups : dict = collections .defaultdict (list )
454+ failed = []
455+ for tid , sig in res :
456+ (failed .append (tid ) if sig is None else groups [sig ].append (tid ))
457+ if not groups :
458+ return groups , None , [], failed
459+ ref = max (groups , key = lambda s : len (groups [s ]))
460+ return groups , ref , sorted (groups [ref ]), failed
461+
462+
463+ @mcp .tool ()
464+ async def validate_train_composition (train_ids : list [int ]) -> str :
465+ """Check whether a set of trains share the same dataset + wagon composition.
466+
467+ For each train builds a signature = its dataset plus the multiset of
468+ (workflow, analysis_id) over its wagons, then groups the trains. Run this
469+ before comparing trains over time (throughput / CPU trends, distribution
470+ heatmaps) so confounders — a different analysis, an extra or missing wagon,
471+ a different dataset — are dropped rather than silently skewing the result.
472+
473+ Returns the reference composition (the largest matching group), the matched
474+ train list (feed it straight into the comparison), and each outlier with how
475+ it differs.
476+ """
477+ groups , ref , matched , failed = await _match_compositions (train_ids )
478+ if ref is None :
479+ return "Could not resolve composition for: " + ", " .join (map (str , failed ))
480+ ref_ds = ref [0 ]
481+
482+ out = [f"Composition check for { len (train_ids )} trains:\n " ,
483+ f"Reference ({ len (matched )} /{ len (train_ids )} match): dataset={ ref_ds } " ,
484+ f" { _summarize_sig (ref )} " ,
485+ f" matched: { ', ' .join (map (str , matched ))} \n " ]
486+
487+ outliers = sorted ([(s , ts ) for s , ts in groups .items () if s != ref ],
488+ key = lambda x : sorted (x [1 ])[0 ])
489+ if outliers or failed :
490+ out .append ("Outliers (exclude from the comparison):" )
491+ for s , ts in outliers :
492+ tag = f"dataset={ s [0 ]} ; " if s [0 ] != ref_ds else ""
493+ out .append (f" { ', ' .join (map (str , sorted (ts )))} : { tag } { _summarize_sig (s )} " )
494+ for tid in failed :
495+ out .append (f" { tid } : composition could not be resolved" )
496+ out .append ("" )
497+ else :
498+ out .append ("All trains share the same composition. ✓\n " )
499+
500+ out .append (f"matched_train_ids = { matched } " )
501+ return "\n " .join (out )
502+
503+
402504# ---------------------------------------------------------------------------
403505# Analysis / wagon browsing
404506#
0 commit comments