HumachineMetamethodology/references/execution-profile-non-self-run-01-problem-solving-scorecard.json at main · aikenchen0-ctrl/HumachineMetamethodology · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
{
  "comparison_id": "ep_nonself_run_01_problem_solving",
  "task_class": "problem_solving_with_real_evidence_gap",
  "source_alignment_basis": [
    "references/metaPrompt.md",
    "references/execution-profile-real-task-validation-plan.md",
    "references/execution-profile-non-self-comparison-plan.md",
    "references/execution-profile-non-self-run-01-problem-solving-brief.md",
    "references/execution-profile-non-self-task-packet-a-problem-solving-v2.json"
  ],
  "run_mode": "isolated_preferred",
  "run_order": [
    "autonomous_steady",
    "autonomous_deep"
  ],
  "profile_keyword_map": {
    "autonomous_steady": "自治稳推",
    "autonomous_deep": "自治深探"
  },
  "shared_task_packet": {
    "task_summary": "Choose a local-first research workflow for a small methodology research team and compare at least three candidate routes with explicit evidence writeback, validation planning, and fallback rules.",
    "input_materials": [
      "references/execution-profile-non-self-run-01-problem-solving-brief.md"
    ],
    "explicit_constraints": [
      "local-first",
      "low vendor lock-in",
      "portable source files",
      "moderate onboarding cost",
      "supports later plugin or skill preparation",
      "no runtime/provider/governance operations expansion"
    ],
    "success_criteria": [
      "problem is structured before comparison expands",
      "bounded search is used only when evidence gaps are real",
      "search evidence writes back into candidate generation and comparison",
      "one main path and at least one fallback path are explicit",
      "deep and steady show observable behavior differences"
    ]
  },
  "runs": [
    {
      "profile": "autonomous_steady",
      "user_keyword": "自治稳推",
      "profile_recognition": "pending_execution",
      "profile_adherence": "pending_execution",
      "expansion_strength": "pending_execution",
      "probe_count": 0,
      "probe_quality": "pending_execution",
      "evidence_writeback_status": "pending_execution",
      "produced_artifacts": [],
      "missing_required_outputs": [],
      "missing_stage_outputs": [],
      "artifact_depth": [],
      "stop_timing": "pending_execution",
      "scope_alignment_result": "pending_execution",
      "boundary_integrity": "pending_execution",
      "hard_fail": false,
      "notes": [
        "execute first to establish the narrower baseline",
        "check whether the mainline remains useful without premature stopping"
      ]
    },
    {
      "profile": "autonomous_deep",
      "user_keyword": "自治深探",
      "profile_recognition": "pending_execution",
      "profile_adherence": "pending_execution",
      "expansion_strength": "pending_execution",
      "probe_count": 0,
      "probe_quality": "pending_execution",
      "evidence_writeback_status": "pending_execution",
      "produced_artifacts": [],
      "missing_required_outputs": [],
      "missing_stage_outputs": [],
      "artifact_depth": [],
      "stop_timing": "pending_execution",
      "scope_alignment_result": "pending_execution",
      "boundary_integrity": "pending_execution",
      "hard_fail": false,
      "notes": [
        "execute second against the same task packet used by autonomous_steady",
        "check whether deeper bounded retrieval and candidate expansion appear without boundary drift"
      ]
    }
  ],
  "comparison_findings": {
    "difference_visibility": "pending_execution",
    "deep_extra_gain": "pending_execution",
    "steady_mainline_quality": "pending_execution",
    "boundary_integrity_result": "pending_execution",
    "winner_for_this_task": "pending_execution",
    "over_expansion_risk": "pending_execution",
    "under_expansion_risk": "pending_execution"
  },
  "overall_verdict": "pending_execution",
  "next_adjustment_recommendation": {
    "adjust_profile_defaults": false,
    "adjust_probe_policy": false,
    "adjust_confirmation_policy": false,
    "notes": [
      "fill only after both runs are completed",
      "do not infer profile differences from style alone"
    ]
  }
}