11package cli
22
33// This file implements the `forecast` command, which samples a workflow's recent
4- // GitHub Actions run history and projects forward effective token usage and yield
5- // on a per-week or per-month basis.
4+ // GitHub Actions run history and projects forward effective token usage (including
5+ // Monte Carlo probability distributions) on a per-week or per-month basis.
66//
77// Workflow metadata (trigger types, concurrency, experiments) is read from the
88// workflow's Markdown frontmatter so that projections account for how often the
6060 }
6161)
6262
63- // ForecastEpisodeSummary contains episode-level aggregate metrics derived from
64- // run history without downloading artifacts. Episodes are reconstructed from the
65- // fields available in the GitHub Actions run list (event type, head SHA, branch).
66- // Dispatch and workflow_call linkages that require aw_info.json are not available
67- // in this lightweight analysis, so the episode count is a lower-bound estimate.
68- type ForecastEpisodeSummary struct {
69- // SampledEpisodes is the number of distinct episodes detected in the sampled
70- // run history. Each "episode" represents one logical task execution, which may
71- // span multiple runs when a workflow dispatches sub-workflows.
72- SampledEpisodes int `json:"sampled_episodes"`
73- // RunsPerEpisode is the average number of runs per episode (SampledRuns /
74- // SampledEpisodes). Values > 1 indicate orchestrator-style workflows that
75- // dispatch multiple sub-workflows per task.
76- RunsPerEpisode float64 `json:"runs_per_episode"`
77- // AvgEffectiveTokensPerEpisode is the mean effective-token count per episode.
78- AvgEffectiveTokensPerEpisode int `json:"avg_effective_tokens_per_episode"`
79- // ObservedEpisodesPerPeriod is the projected number of episodes in the forecast
80- // period, scaled from the observed episode frequency.
81- ObservedEpisodesPerPeriod float64 `json:"observed_episodes_per_period"`
82- }
83-
8463// ForecastWorkflowResult contains the projected metrics for a single workflow.
8564type ForecastWorkflowResult struct {
8665 // WorkflowID is the short identifier of the workflow (basename without .md).
@@ -97,8 +76,6 @@ type ForecastWorkflowResult struct {
9776
9877 // SuccessRate is the fraction of sampled runs that completed successfully (0–1).
9978 SuccessRate float64 `json:"success_rate"`
100- // Yield is the effective throughput: success rate × observed runs per period.
101- Yield float64 `json:"yield"`
10279
10380 // Average per-run metrics (from completed runs).
10481 AvgEffectiveTokens int `json:"avg_effective_tokens"`
@@ -107,10 +84,6 @@ type ForecastWorkflowResult struct {
10784 // Projected totals for the period.
10885 ProjectedEffectiveTokens int `json:"projected_effective_tokens"`
10986
110- // EpisodeAnalysis contains episode-level metrics derived from the sampled runs.
111- // Nil when no completed runs were available to analyze.
112- EpisodeAnalysis * ForecastEpisodeSummary `json:"episode_analysis,omitempty"`
113-
11487 // MonteCarlo contains the probability distribution of projected effective-token
11588 // counts derived from a Monte Carlo simulation (10 000 trials).
11689 // Nil when no completed runs were available.
@@ -559,9 +532,6 @@ func forecastWorkflow(ctx context.Context, workflowName, startDate string, confi
559532 // scaled to the projection period.
560533 result .ObservedRunsPerPeriod = float64 (n ) / float64 (config .Days ) * float64 (periodDays )
561534
562- // Effective throughput (yield) accounts for the success rate.
563- result .Yield = result .ObservedRunsPerPeriod * result .SuccessRate
564-
565535 // Projected token usage (point estimate using simple means).
566536 result .ProjectedEffectiveTokens = int (math .Round (result .ObservedRunsPerPeriod * float64 (result .AvgEffectiveTokens )))
567537
@@ -573,10 +543,6 @@ func forecastWorkflow(ctx context.Context, workflowName, startDate string, confi
573543 // Populate experiment variant fractions from run history when metadata has variants.
574544 result .ExperimentVariants = computeVariantFractions (result .ExperimentVariants , completed )
575545
576- // Build lightweight episode analysis from the completed runs using the fields
577- // available in the GitHub Actions run list (no artifact download required).
578- result .EpisodeAnalysis = buildForecastEpisodeSummary (completed , config .Days , periodDays )
579-
580546 return result , nil
581547}
582548
@@ -753,75 +719,6 @@ func extractWorkflowIDFromName(name string) string {
753719 return name
754720}
755721
756- // workflowRunToRunData converts a WorkflowRun (sourced from the GitHub Actions API)
757- // to a RunData using the fields available without artifact downloads. Fields that
758- // require aw_info.json (AwContext, Repository, Ref, SHA, Actor, RunAttempt, …) are
759- // left as zero values; the episode engine degrades gracefully when they are absent.
760- func workflowRunToRunData (r WorkflowRun ) RunData {
761- return RunData {
762- RunID : r .DatabaseID ,
763- Number : r .Number ,
764- WorkflowName : r .WorkflowName ,
765- WorkflowPath : r .WorkflowPath ,
766- Status : r .Status ,
767- Conclusion : r .Conclusion ,
768- URL : r .URL ,
769- Event : r .Event ,
770- Branch : r .HeadBranch ,
771- HeadSHA : r .HeadSha ,
772- DisplayTitle : r .DisplayTitle ,
773- CreatedAt : r .CreatedAt ,
774- StartedAt : r .StartedAt ,
775- UpdatedAt : r .UpdatedAt ,
776- TokenUsage : r .TokenUsage ,
777- EffectiveTokens : r .EffectiveTokens ,
778- EstimatedCost : r .EstimatedCost ,
779- }
780- }
781-
782- // buildForecastEpisodeSummary derives episode-level metrics from a slice of
783- // completed WorkflowRun objects using the lightweight episode engine. Returns nil
784- // when no runs are provided.
785- //
786- // Because only GitHub API fields are available (no aw_info.json artifacts), the
787- // episode engine can link runs via workflow_run event SHA/branch matching but
788- // cannot detect dispatch or workflow_call lineage. The resulting episode count is
789- // therefore a lower-bound estimate for orchestrator-style workflows.
790- func buildForecastEpisodeSummary (runs []WorkflowRun , historyDays , periodDays int ) * ForecastEpisodeSummary {
791- if len (runs ) == 0 {
792- return nil
793- }
794-
795- runData := make ([]RunData , 0 , len (runs ))
796- for _ , r := range runs {
797- runData = append (runData , workflowRunToRunData (r ))
798- }
799-
800- // buildEpisodeData returns (episodes, edges); edges are not needed for
801- // the lightweight forecast summary so they are intentionally discarded.
802- episodes , _ := buildEpisodeData (runData , nil )
803- numEpisodes := len (episodes )
804- if numEpisodes == 0 {
805- return nil
806- }
807-
808- var totalEpisodeET int
809- for _ , ep := range episodes {
810- totalEpisodeET += ep .TotalEffectiveTokens
811- }
812-
813- avgETPerEpisode := totalEpisodeET / numEpisodes
814- runsPerEpisode := float64 (len (runs )) / float64 (numEpisodes )
815- observedEpisodesPerPeriod := float64 (numEpisodes ) / float64 (historyDays ) * float64 (periodDays )
816-
817- return & ForecastEpisodeSummary {
818- SampledEpisodes : numEpisodes ,
819- RunsPerEpisode : runsPerEpisode ,
820- AvgEffectiveTokensPerEpisode : avgETPerEpisode ,
821- ObservedEpisodesPerPeriod : observedEpisodesPerPeriod ,
822- }
823- }
824-
825722// loadCachedEffectiveTokens looks up a locally-cached RunSummary for the given
826723// run ID and returns the TotalEffectiveTokens from its TokenUsage summary.
827724// Returns 0 when no cache exists or the cache does not contain token data.
@@ -953,7 +850,6 @@ type forecastTableRow struct {
953850 Workflow string `json:"workflow" console:"header:Workflow"`
954851 Runs int `json:"runs" console:"header:Sampled Runs"`
955852 SuccessRate string `json:"success_rate" console:"header:Success Rate"`
956- Yield string `json:"yield" console:"header:Yield/Period"`
957853 AvgEffectiveTokens string `json:"avg_effective_tokens" console:"header:Avg ET"`
958854 ProjectedTokens string `json:"projected_tokens" console:"header:Proj. ET (P50)"`
959855 ETRange string `json:"et_range" console:"header:80% CI (P10–P90)"`
@@ -992,7 +888,6 @@ func renderForecastTable(output ForecastResult, config ForecastConfig) error {
992888 Workflow : wf .WorkflowID + unreliableMark ,
993889 Runs : wf .SampledRuns ,
994890 SuccessRate : formatForecastPercent (wf .SuccessRate , wf .SampledRuns > 0 ),
995- Yield : fmt .Sprintf ("%.1f" , wf .Yield ),
996891 AvgEffectiveTokens : formatForecastTokens (wf .AvgEffectiveTokens ),
997892 ProjectedTokens : projETStr ,
998893 ETRange : etRangeStr ,
@@ -1004,18 +899,6 @@ func renderForecastTable(output ForecastResult, config ForecastConfig) error {
1004899 fmt .Fprint (os .Stderr , console .RenderStruct (rows ))
1005900 fmt .Fprintln (os .Stderr , "" )
1006901
1007- // Show episode analysis when any workflow has multi-run episodes.
1008- anyMultiRunEpisodes := false
1009- for _ , wf := range output .Workflows {
1010- if wf .EpisodeAnalysis != nil && wf .EpisodeAnalysis .RunsPerEpisode > 1.0 {
1011- anyMultiRunEpisodes = true
1012- break
1013- }
1014- }
1015- if anyMultiRunEpisodes {
1016- printEpisodeBreakdown (output .Workflows )
1017- }
1018-
1019902 // Show experiment variant details when present.
1020903 for _ , wf := range output .Workflows {
1021904 if len (wf .ExperimentVariants ) > 0 {
@@ -1039,36 +922,6 @@ func renderForecastTable(output ForecastResult, config ForecastConfig) error {
1039922 return nil
1040923}
1041924
1042- // printEpisodeBreakdown renders per-episode ET metrics for workflows that have
1043- // multi-run episodes (i.e. orchestrator-style workflows dispatching sub-workflows).
1044- func printEpisodeBreakdown (workflows []ForecastWorkflowResult ) {
1045- type episodeRow struct {
1046- Workflow string `json:"workflow" console:"header:Workflow"`
1047- Episodes int `json:"episodes" console:"header:Episodes"`
1048- RunsPerEpisode string `json:"runs_per_episode" console:"header:Runs/Episode"`
1049- AvgETPerEpisode string `json:"avg_et_per_episode" console:"header:Avg ET/Episode"`
1050- EpisodesPerPeriod string `json:"episodes_per_period" console:"header:Episodes/Period"`
1051- }
1052-
1053- fmt .Fprintln (os .Stderr , console .FormatInfoMessage ("Episode analysis (runs grouped by logical task):" ))
1054- epRows := make ([]episodeRow , 0 , len (workflows ))
1055- for _ , wf := range workflows {
1056- ep := wf .EpisodeAnalysis
1057- if ep == nil {
1058- continue
1059- }
1060- epRows = append (epRows , episodeRow {
1061- Workflow : wf .WorkflowID ,
1062- Episodes : ep .SampledEpisodes ,
1063- RunsPerEpisode : fmt .Sprintf ("%.1f" , ep .RunsPerEpisode ),
1064- AvgETPerEpisode : formatForecastTokens (ep .AvgEffectiveTokensPerEpisode ),
1065- EpisodesPerPeriod : fmt .Sprintf ("%.1f" , ep .ObservedEpisodesPerPeriod ),
1066- })
1067- }
1068- fmt .Fprint (os .Stderr , console .RenderStruct (epRows ))
1069- fmt .Fprintln (os .Stderr , "" )
1070- }
1071-
1072925// printEvalBreakdown renders the backtesting comparison table.
1073926func printEvalBreakdown (workflows []ForecastWorkflowResult ) {
1074927 type evalRow struct {
0 commit comments