Updates.

ashayathalye · ashayathalye · commit b1eac4c29a0c · 2025-04-22T05:31:44.000-07:00
diff --git a/predicators/approaches/grammar_search_invention_approach.py b/predicators/approaches/grammar_search_invention_approach.py
@@ -486,6 +486,11 @@ def enumerate(self) -> Iterator[Tuple[Predicate, float]]:
         "Whole0",
         "Cut0",
     ],
+    "pybullet_coffee": [
+        "JugInMachine",
+        "Holding",
+        "HandEmpty",
+    ]
 }
 _DEBUG_VLM_PREDICATES = defaultdict(list, _DEBUG_VLM_PREDICATES)
 
@@ -1054,15 +1059,18 @@ def learn_from_offline_dataset(self, dataset: Dataset) -> None:
                 self._parse_atom_dataset_from_annotated_dataset(dataset)
             atom_dataset = utils.merge_ground_atom_datasets(
                 atom_dataset_from_grammar, atom_dataset_from_vlm)
+            
             # If grammar_search_invent_geo_predicates_only is False, then we
             # want to invent both VLM and geo predicates
-            if not CFG.grammar_search_invent_geo_predicates_only:
-                candidates = candidates_from_grammar | candidates_from_vlm
-            # Otherwise, we only want to invent geo predicates, and directly
-            # select all the VLM predicates.
-            else:
-                candidates = candidates_from_grammar
-                self._initial_predicates |= set(candidates_from_vlm.keys())
+            # if not CFG.grammar_search_invent_geo_predicates_only:
+            #     candidates = candidates_from_grammar | candidates_from_vlm
+            # # Otherwise, we only want to invent geo predicates, and directly
+            # # select all the VLM predicates.
+            # else:
+            #     candidates = candidates_from_grammar
+            #     self._initial_predicates |= set(candidates_from_vlm.keys())
+            candidates = candidates_from_vlm
+            
         elif not CFG.offline_data_method in [
                 "demo+labelled_atoms", "saved_vlm_img_demos_folder",
                 "demo_with_vlm_imgs"
diff --git a/predicators/datasets/generate_atom_trajs_with_vlm.py b/predicators/datasets/generate_atom_trajs_with_vlm.py
@@ -78,6 +78,7 @@ def _generate_prompt_for_atom_proposals(
                             for act in traj.actions)
         # NOTE: exact same issue as described in the above note for
         # naive_whole_traj.
+        # import pdb; pdb.set_trace()
         ret_list.append(
             (prompt, [traj.imgs[i][0] for i in range(len(traj.imgs))]))
     else:  # pragma: no cover.
@@ -144,6 +145,8 @@ def _label_single_trajectory_with_vlm_atom_values(indexed_traj: Tuple[
     obj_names = [o.name for o in traj.objects]
     filtered_atoms_list = []
     for a in atoms_list:
+        # Remove whitespace from the atom string.
+        a = a.replace(' ', '')
         # Get the names of the objects in this atom.
         atom_args = a[a.find('(') + 1:a.find(')')]
         atom_objs = atom_args.split(',')
@@ -412,6 +415,7 @@ def _save_img_option_trajs_in_folder(
             for j, img_list in enumerate(img_option_traj.imgs):
                 curr_traj_timestep_folder = Path(curr_traj_folder, str(j))
                 os.makedirs(curr_traj_timestep_folder, exist_ok=False)
+                # import pdb; pdb.set_trace()
                 for k, img in enumerate(img_list):
                     img.save(
                         Path(curr_traj_timestep_folder,
@@ -1086,6 +1090,7 @@ def create_ground_atom_data_from_generated_demos(
                     raise NotImplementedError(
                         f"Cropped images not implemented for {CFG.env}.")
             if CFG.env in ["pybullet_coffee"]:
+                # import pdb; pdb.set_trace()
                 state_imgs.append(state.simulator_state['images'])
             else:
                 state_imgs.append([
@@ -1116,6 +1121,7 @@ def create_ground_atom_data_from_generated_demos(
     if CFG.vlm_predicate_vision_api_generate_ground_atoms:
         generate_func = _generate_ground_atoms_with_vlm_oo_code_gen
     else:
+        # import pdb; pdb.set_trace()
         generate_func = _generate_ground_atoms_with_vlm_pure_visual_preds
     ground_atoms_trajs = generate_func(img_option_trajs, env, train_tasks,
                                        known_predicates, all_task_objs, vlm)
diff --git a/predicators/datasets/vlm_input_data_prompts/atom_labelling/img_option_diffs_label_history.txt b/predicators/datasets/vlm_input_data_prompts/atom_labelling/img_option_diffs_label_history.txt
@@ -1,4 +1,4 @@
-You are a vision system for a robot provided with two images: a before image showing the state before a skill is executed, an after image showing the state after the skill is executed. You are given a list of predicates below, and you are given the values of these predicates in the image before the skill is executed. Your job is to output the values of the following predicates in the image after the skill is executed. Pay careful attention to the visual changes between the two images to figure out which predicates change and which predicates do not change. Note that some or all of the predicates don't necessary have to change. First, output a description of what changes you expect to happen based on the skill that was just run, explicitly noting the skill that was run. Second, output a description of what visual changes you see happen between the before and after images, looking specifically at the objects involved in the skill's arguments, noting what objects these are. From these two descriptions, for each predicate labeled in the previous timestep, note whether you expect its value to change or stay the same. Next, output each predicate value in the after image as a bulleted list (use '*' for the bullets) with each predicate and value on a different line. Ensure there is a period ('.') after the truth value of the predicate. For each predicate value, provide an explanation as to why you labelled this predicate as having this particular value, and note what value this predicate had in the previous timestep, which is given to you in the prompt. Use the format: `* <predicate>: <truth_value>. <explanation>`. When labeling the value of a predicate, if you don't see the objects involved in that predicate, retain its truth value from the previous timestep. Also, if your description of changes you expect to happen, and your description of visual changes you saw happen, have nothing to do with the predicate you are trying to label, retain its truth value from the previous timestep. For example, if in the previous timestep I paint an object, and in the current timestamp I sit on it, we don't expect its color to change after sitting on it. 
+You are a vision system for a robot provided with two images: a before image showing the state before a skill is executed, an after image showing the state after the skill is executed. You are given a list of predicates below, and you are given the values of these predicates in the image before the skill is executed. Your job is to output the values of the following predicates in the image after the skill is executed. Pay careful attention to the visual changes between the two images to figure out which predicates change and which predicates do not change. Note that some or all of the predicates don't necessary have to change. First, output a description of what changes you expect to happen based on the skill that was just run, explicitly noting the skill that was run. Second, output a description of what visual changes you see happen between the before and after images, looking specifically at the objects involved in the skill's arguments, noting what objects these are. From these two descriptions, for each predicate labeled in the previous timestep, note whether you expect its value to change or stay the same. Next, for each predicate given in the list of predicates to label, output each predicate value in the after image as a bulleted list (use '*' for the bullets) with each predicate and value on a different line. Ensure there is a period ('.') after the truth value of the predicate. For each predicate value, provide an explanation as to why you labelled this predicate as having this particular value, and note what value this predicate had in the previous timestep, which is given to you in the prompt. Use the format: `* <predicate>: <truth_value>. <explanation>`. When labeling the value of a predicate, if you don't see the objects involved in that predicate, retain its truth value from the previous timestep. Also, if your description of changes you expect to happen, and your description of visual changes you saw happen, have nothing to do with the predicate you are trying to label, retain its truth value from the previous timestep. For example, if in the previous timestep I paint an object, and in the current timestamp I sit on it, we don't expect its color to change after sitting on it. 
 
 Your response should have three sections. Here is an outline of what your response should look like:
 [START OULTLINE]
@@ -12,4 +12,4 @@ Your response should have three sections. Here is an outline of what your respon
 [insert your bulleted list of `* <predicate>: <truth value>. <explanation>`]
 [END OUTLINE]
 
-Predicates:
+Predicates to label:
diff --git a/predicators/envs/pybullet_coffee.py b/predicators/envs/pybullet_coffee.py
@@ -48,7 +48,7 @@
     create_single_arm_pybullet_robot
 from predicators.settings import CFG
 from predicators.structs import Action, Array, EnvironmentTask, Object, \
-    Predicate, State, Observation
+    Predicate, State, Observation, Task
 
 class PyBulletCoffeeEnv(PyBulletEnv, CoffeeEnv):
     """PyBullet Coffee domain.
@@ -285,6 +285,22 @@ def predicates(self) -> Set[Predicate]:
     @property
     def agent_goal_predicates(self) -> Set[Predicate]:
         return self.goal_predicates
+    
+    # def get_vlm_debug_atom_strs(self,
+    #                             train_tasks: List[Task]) -> List[List[str]]:
+    #     # Convert the default value from List[List[str]] to List[str] to match
+    #     # the other entries we'll put into the dictionary.
+    #     default = [a[0] for a in super().get_vlm_debug_atom_strs(train_tasks)]
+    #     atom_strs_by_task_type = {
+    #         "more_stacks": ["Cooked(patty1)"],
+    #         "fatter_burger": ["Cooked(patty1)"],
+    #         "combo_burger":
+    #         ["Cooked(patty1)", "Cut(lettuce1)", "Whole(lettuce1)"]
+    #     }
+    #     atom_strs_by_task_type = defaultdict(lambda: default,
+    #                                          atom_strs_by_task_type)
+    #     atom_strs = atom_strs_by_task_type[CFG.burger_no_move_task_type]
+    #     return [[a] for a in atom_strs]
 
     @property
     def oracle_proposed_predicates(self) -> Set[Predicate]:
diff --git a/predicators/pretrained_model_interface.py b/predicators/pretrained_model_interface.py
@@ -387,10 +387,12 @@ def _sample_completions(
                                                 images=imgs,
                                                 detail="auto")
         responses = [
-            self.call_openai_api(messages,
-                                 model=self.model_name,
-                                 max_tokens=self._max_tokens,
-                                 temperature=temperature)
+            self.call_openai_api(messages, model=self.model_name, max_tokens=self._max_tokens, temperature=temperature)
             for _ in range(num_completions)
         ]
+        while any("sorry" in response.lower() for response in responses):
+            responses = [
+                self.call_openai_api(messages, model=self.model_name, max_tokens=self._max_tokens, temperature=temperature)
+                for _ in range(num_completions)
+            ]
         return responses