@@ -85,7 +85,9 @@ def _initialize_environment_and_messages(self) -> List[dict]:
8585 params = self .env_params ,
8686 )
8787 state_message : dict = init_response ["state" ]
88- _ , init_messages = self ._get_init_messages (state_message )
88+ query , init_messages = self ._get_init_messages (state_message )
89+ # Update main_query with actual query from environment
90+ self .workflow_task .task .main_query = query
8991 except Exception as e :
9092 logger .bind (exception = True ).exception (
9193 f"encounter exception in env_worker.create_instance~ error={ e .args } "
@@ -175,12 +177,18 @@ def step(self, action: dict) -> Tuple[str, float, bool, dict]:
175177 )
176178 obs = ""
177179 assert isinstance (env_output , dict )
178- if ("content" not in env_output ["state" ]) and ("error" in env_output ["state" ]):
179- obs = f"[Error from environment: { env_output ['error' ]} ]"
180- elif env_output ["state" ]["content" ] == "" :
181- obs = "Warning: the environment does not provide any feedback, please provide valid inpu and try again."
180+ # === Support list-type state passthrough ===
181+ # 1. If state is a list (new standard format), pass through directly
182+ if isinstance (env_output ["state" ], list ):
183+ obs = env_output ["state" ]
184+ # 2. If state is a dict (old format or error)
182185 else :
183- obs = env_output ["state" ]["content" ]
186+ if ("content" not in env_output ["state" ]) and ("error" in env_output ["state" ]):
187+ obs = f"[Error from environment: { env_output ['error' ]} ]"
188+ elif env_output ["state" ].get ("content" , "" ) == "" :
189+ obs = "Warning: the environment does not provide any feedback, please provide valid inpu and try again."
190+ else :
191+ obs = env_output ["state" ]["content" ]
184192 reward = 0
185193 info = {}
186194 terminate = env_output ["is_terminated" ]
0 commit comments