TRADER/run_experiments_auto_batch.py at main · Xtra-Computing/TRADER · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
#!/usr/bin/env python3
"""
Script to run dp_with_filter_full.cpp with different parameters and capture output.
"""

import os
import subprocess
import time
from pathlib import Path
from typing import List, Tuple
import logging
import argparse

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

DEFAULT_K_VALUES = [3, 4, 5, 6]
DEFAULT_NUM_TRIALS = 10
DEFAULT_N = 10
DEFAULT_BLOCKS = ["sample"]

def get_graph_file_pairs(processed_data_dir: Path, block_numbers: List[str]) -> List[Tuple[str, str]]:
    """Get all static_graph and dynamic_graph file pairs."""
    file_pairs = []

    for block_number in block_numbers:
        # Extract block number
        static_file = processed_data_dir / f"{block_number}_token_graph.txt"
        dynamic_file = processed_data_dir / f"{block_number}_dynamic.txt"

        # Check if corresponding dynamic file exists
        if dynamic_file.exists():
            file_pairs.append((str(static_file), str(dynamic_file)))
        else:
            logger.warning(f"Dynamic file not found for block {block_number}")

    return sorted(file_pairs)

def run_single_experiment(
    executable_path: Path,
    seeds_file: Path,
    static_file: str,
    dynamic_file: str,
    k: int,
    num_trials: int = DEFAULT_NUM_TRIALS,
    n: int = DEFAULT_N,
) -> Tuple[int, str, str]:
    """
    Run a single experiment with given parameters.
    Returns: (return_code, stdout, stderr)
    """
    cmd = [
        str(executable_path),
        static_file,
        dynamic_file,
        str(k),
        str(num_trials),
        str(n),
        str(seeds_file),
        "1",
        "1"
    ]

    logger.info(f"Running command: {' '.join(cmd)}")

    try:
        # Run the command and capture output
        result = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            timeout=3600*2
        )

        return result.returncode, result.stdout, result.stderr
    except subprocess.TimeoutExpired as e:
        logger.error(f"Command timed out")
        return -1, e.stdout, "Command timed out"
    except Exception as e:
        logger.error(f"Error running command: {e}")
        return -1, "", str(e)

def save_result(result_file: str, static_file: str, dynamic_file: str, k: int,
                num_trials: int, n: int, return_code: int, stdout: str, stderr: str):
    """Save experiment results to file."""
    with open(result_file, 'w') as f:
        f.write(stdout)
        f.write("\n")

def parse_args() -> argparse.Namespace:
    script_dir = Path(__file__).resolve().parent
    parser = argparse.ArgumentParser(description="Run auto-batch experiments for dp_with_filter_full.")
    parser.add_argument(
        "--processed-data-dir",
        type=Path,
        default=script_dir / "dynamic_graph_data" / "processed_graph_data_new",
        help="Directory containing processed graph data.",
    )
    parser.add_argument(
        "--results-dir",
        type=Path,
        default=script_dir / "results_auto_batch",
        help="Directory to store results.",
    )
    parser.add_argument(
        "--executable-path",
        type=Path,
        default=script_dir / "build" / "dynamic_cycle_detection" / "dp_with_filter_full",
        help="Path to the dp_with_filter_full executable.",
    )
    parser.add_argument(
        "--seeds-file",
        type=Path,
        default=script_dir / "seeds.txt",
        help="Path to the seeds file.",
    )
    parser.add_argument(
        "--blocks",
        nargs="+",
        default=DEFAULT_BLOCKS,
        help="Block identifiers to run (e.g., sample 18510000).",
    )
    parser.add_argument("--k-values", nargs="+", type=int, default=DEFAULT_K_VALUES)
    parser.add_argument("--num-trials", type=int, default=DEFAULT_NUM_TRIALS)
    parser.add_argument("--n", type=int, default=DEFAULT_N)
    return parser.parse_args()


def main():
    args = parse_args()
    # Create results directory
    args.results_dir.mkdir(parents=True, exist_ok=True)

    # Check if executable exists
    if not args.executable_path.exists():
        logger.error(f"Executable not found: {args.executable_path}")
        logger.error("Please build the project first using CMake")
        return 1

    # Check if seeds file exists
    if not args.seeds_file.exists():
        logger.error(f"Seeds file not found: {args.seeds_file}")
        return 1

    # Get all file pairs
    file_pairs = get_graph_file_pairs(args.processed_data_dir, args.blocks)
    if not file_pairs:
        logger.error("No graph file pairs found")
        return 1

    logger.info(f"Found {len(file_pairs)} file pairs")

    # Run experiments
    total_experiments = len(file_pairs) * len(args.k_values)
    current_experiment = 0

    for block_number_index, (static_file, dynamic_file) in enumerate(file_pairs):
        block_number = args.blocks[block_number_index]
        logger.info(f"Processing block {block_number}")

        for k in args.k_values:
            current_experiment += 1
            logger.info(f"Experiment {current_experiment}/{total_experiments}: block {block_number}, k={k}")

            # Create result filename
            result_file = args.results_dir / f"{block_number}_{k}.txt"

            # Run experiment
            start_time = time.time()
            return_code, stdout, stderr = run_single_experiment(
                args.executable_path,
                args.seeds_file,
                static_file,
                dynamic_file,
                k,
                args.num_trials,
                args.n,
            )
            end_time = time.time()

            # Save results
            with open(result_file, 'w') as f:
                f.write(stdout)
                f.write("\n")

            # Log results
            duration = end_time - start_time
            if return_code == 0:
                logger.info(f"✓ Experiment completed successfully in {duration:.2f}s")
                logger.info(f"  Results saved to: {result_file}")
            else:
                logger.error(f"✗ Experiment failed (return code: {return_code}) in {duration:.2f}s")
                logger.error(f"  Error output saved to: {result_file}")

            # Add a small delay between experiments
            time.sleep(1)

    logger.info("=" * 80)
    logger.info("ALL EXPERIMENTS COMPLETED")
    logger.info("=" * 80)
    logger.info(f"Total experiments run: {total_experiments}")
    logger.info(f"Results saved in: {args.results_dir}")

    return 0

if __name__ == "__main__":
    exit(main())