Working on fine-tuning script

Breta01 · Breta01 · commit a524c3d5e55f · 2025-06-01T15:48:43.000+08:00
diff --git a/brain/agent.py b/brain/agent.py
@@ -59,7 +59,7 @@ class CustomState(AgentState):
 tools = [
     submit_alpha,
     describe_operators,
-    # search_datafields,
+    search_datafields,
     get_random_datafields,
     get_random_idea,
 ]
diff --git a/brain/agent_config.py b/brain/agent_config.py
@@ -3,8 +3,8 @@
 DEFAULT_CONFIG = {
     "region": "USA",
     "universe": "TOP3000",
-    "neutralization": "SECTOR",
-    "decay": 10,
+    "neutralization": "INDUSTRY",
+    "decay": 5,
     "delay": 1,
 }
 
diff --git a/brain/alpha_class.py b/brain/alpha_class.py
@@ -180,7 +180,12 @@ def from_stats(cls, stats: dict) -> "Alpha":
             "neutralization",
             "pasteurization",
         ]
-        is_stats = stats["is_stats"].iloc[0]
+
+        if "train" in stats:
+            is_stats = stats["train"]
+        else:
+            is_stats = stats["is_stats"].iloc[0]
+
         is_tests_df = stats["is_tests"]
         self_corr = is_tests_df[is_tests_df["name"] == "SELF_CORRELATION"].iloc[0]["value"]
 
diff --git a/brain/fine_tune.py b/brain/fine_tune.py
@@ -1,50 +1,89 @@
 """Methods for fine-tuning parameters of alpha."""
 
-from itertools import product
+import random
 
+from brain.agent import agent
+from brain.agent_config import DEFAULT_CONFIG
 from brain.alpha_class import Alpha
-from brain.api import BrainAPI
-
-
-def generate_alpha_grid(regular: str):
-    """Generate a grid of alpha parameters for fine-tuning."""
-    # Define the grid of parameters to explore
-    param_options = {
-        # "universe": ["TOP3000", "TOP1000", "TOP500", "TOP200", "TOPSP500"],
-        "universe": ["TOP3000", "TOP1000", "TOP500"],
-        "neutralization": ["INDUSTRY", "SECTOR", "MARKET", "NONE", "SUBINDUSTRY"],
-        "decay": [4, 8, 16, 32],
-        "truncation": [0.01, 0.05, 0.1],
-        # "pasteurization": ["ON", "OFF"],
-        "pasteurization": ["ON"],
+from brain.alpha_storage import Storage
+from brain.genetic_algorithm import genetic_algorithm
+from brain.score import get_score
+from brain.tools.ideas import get_random_idea
+from brain.tools.simulation import StopException
+
+MAIN_ALPHA = Alpha(
+    regular="ts_corr(fnd6_newqv1300_lltq, fnd6_newqv1300_aociotherq, 40) * zscore(ts_mean(pcr_vol_120, 40))",
+)
+
+
+def create_alpha_simulation(storage: Storage):
+    """Create a new alpha based on the given ID."""
+
+    formatted_alphas = {
+        cat: "\n".join(alpha.prompt_format() for alpha in storage.get_top_k(cat, 10))
+        for cat in storage.categories
     }
 
-    # Generate all combinations of parameters
-    combinations = list(product(*param_options.values()))
-    params = [dict(zip(param_options.keys(), values)) for values in combinations]
-    alphas = [Alpha.create_alpha(regular=regular, **p) for p in params]
+    if random.random() < 0.05:
+        prompt = "Create a completely new alpha by random data fields."
+    else:
+        prompt = f"""
+Your task is to fine-tune the parameters of the following alpha:
+{MAIN_ALPHA.prompt_format()}
+Create a new alphas by adding or removing data fields, changing parameters, or modifying the logic.
+You can add/remove data fields, change parameters, add operators like neutralization
+or modify the logic.
+HOWEVER, THE CORE LOGIC OF THE ALPHA SHOULD REMAIN THE SAME.
+
+PASSING
+-------
+{formatted_alphas['passing']}
+
+FAILING
+-------
+{formatted_alphas['failing']}
+
+PENDING
+-------
+{formatted_alphas['pending']}
+
+{get_random_idea() if random.random() < 0.3 else ''}
+"""
 
-    return alphas
+    print(f"Prompt:\n{prompt}")
 
+    alphas_store = []
+    while not alphas_store:
+        try:
+            agent.invoke(
+                {
+                    "messages": [
+                        {
+                            "role": "user",
+                            "content": prompt,
+                        }
+                    ]
+                },
+                config={
+                    "recursion_limit": 100,
+                    "configurable": {
+                        **DEFAULT_CONFIG,
+                        "alphas": alphas_store,
+                    },
+                },
+            )
+        except StopException:
+            continue
 
-def get_fitness(result):
-    """Get the fitness of the alpha from result."""
-    stats = result["is_stats"]
-    if "fitness" in stats:
-        return stats["fitness"][0]
+    print(f"Alphas store: {alphas_store}")
+    return alphas_store[-1]
 
-    return -1
 
+def main():
+    """Main function to run the agent."""
+    storage = Storage(score_func=get_score, max_size=50)
+    genetic_algorithm(storage, create_alpha_simulation)
 
-def fine_tune_alpha(regular: str):
-    alphas = generate_alpha_grid(regular)
-    results = BrainAPI.simulate_alpha_list(alphas)
-    sorted_results = sorted(results, key=get_fitness, reverse=True)
 
-    print("Best alpha parameters:")
-    for i, result in enumerate(sorted_results):
-        print(f"Rank {i + 1}:")
-        print(f"Alpha: {result['alpha']}")
-        print(f"Fitness: {get_fitness(result)}")
-        print(f"Parameters: {result['simulate_data']}")
-        print()
+if __name__ == "__main__":
+    main()
diff --git a/brain/genetic_algorithm.py b/brain/genetic_algorithm.py
@@ -0,0 +1,101 @@
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from typing import Callable
+
+from requests import Response
+
+from brain.alpha_class import Alpha
+from brain.alpha_storage import Storage
+from brain.api import DEFAULT_CONFIG as API_DEFAULT_CONFIG
+from brain.api import BrainAPI
+from brain.database import Database
+
+MAX_WORKERS = 3
+
+
+def genetic_algorithm(storage: Storage, create_alpha: Callable[[Storage], tuple[Response, Alpha]]):
+    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as pool:
+        live_jobs = {}
+
+        # Make initial alpha simulations
+        for _ in range(MAX_WORKERS):
+            response, alpha = create_alpha(storage)
+            storage.add_alpha(alpha, "pending")
+            live_jobs[pool.submit(_monitor_alpha, response, alpha)] = alpha
+
+        # Wait for jobs to complete and start new ones
+        while live_jobs:
+            for job in as_completed(live_jobs):
+                # Update storage with the results
+                alpha = live_jobs.pop(job)
+                stats = job.result()
+                print(f"Stats: {stats}")
+                alpha = _update_alphas_storage(storage, stats, alpha.alpha_id)
+
+                # Start a new alpha simulation
+                if alpha is not None and alpha.alpha_id is not None and alpha.fitness < -0.5:
+                    split = alpha.regular.split(";")
+                    regular = f'{";".join(split[:-1])}{";" if len(split) > 1 else ""}-({split[-1]})'
+
+                    new_alpha = alpha.replace(regular=regular)
+                    response = BrainAPI.start_simulation(
+                        new_alpha.get_simulation_data(test_period="P1Y0M0D")
+                    )
+                else:
+                    response, new_alpha = create_alpha(storage)
+
+                storage.add_alpha(new_alpha, "pending")
+                live_jobs[pool.submit(_monitor_alpha, response, new_alpha)] = new_alpha
+
+
+def _monitor_alpha(response, alpha):
+    """Monitor the alpha simulation."""
+    try:
+        simulation_result = BrainAPI.simulation_progress(response)
+        if not simulation_result["completed"]:
+            return {
+                "alpha_id": None,
+                "simulate_data": alpha.get_simulation_data(),
+                "error": simulation_result["error"],
+            }
+
+        BrainAPI.set_alpha_properties(simulation_result["result"]["id"])
+        return BrainAPI.get_specified_alpha_stats(
+            simulation_result["result"]["id"], alpha.get_simulation_data(), **API_DEFAULT_CONFIG
+        )
+    except Exception as e:
+        print(f"Error during obtaining results: {e}")
+        if isinstance(e, (ConnectionError)):
+            BrainAPI._new_session()
+
+        return {
+            "alpha_id": None,
+            "simulate_data": alpha.get_simulation_data(),
+            "error": str(e),
+        }
+
+
+def _update_alphas_storage(
+    storage: Storage,
+    stats: dict,
+    old_id: str,
+):
+    """Update the alphas dictionary with the new stats."""
+    storage.remove_pending_alpha(old_id)
+
+    if stats["alpha_id"] is None:
+        return
+
+    alpha = Alpha.from_stats(stats)
+    try:
+        Database().insert_alpha(alpha)
+    except Exception as e:
+        print(f"Error during database insertion: {e}")
+        pass
+
+    if alpha.short_count + alpha.long_count > 0:
+        if (stats["is_tests"]["result"] != "FAIL").all():
+            storage.add_alpha(alpha, "passing")
+        else:
+            storage.add_alpha(alpha, "failing")
+
+    return alpha
diff --git a/brain/search_algorithm.py b/brain/search_algorithm.py
diff --git a/brain/tools/simulation.py b/brain/tools/simulation.py

Original file line number	Diff line number	Diff line change
`@@ -59,7 +59,7 @@ class CustomState(AgentState):`
`59`	`59`	`tools = [`
`60`	`60`	`submit_alpha,`
`61`	`61`	`describe_operators,`
`62`		`- # search_datafields,`
	`62`	`+ search_datafields,`
`63`	`63`	`get_random_datafields,`
`64`	`64`	`get_random_idea,`
`65`	`65`	`]`