Learning-and-Intelligent-Systems
diff --git a/‎scripts/run_supercloud_experiments.sh‎
Lines changed: 2 additions & 2 deletions b/‎scripts/run_supercloud_experiments.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/nsrt_learning/nsrt_learning_main.py‎
Lines changed: 21 additions & 229 deletions b/‎src/nsrt_learning/nsrt_learning_main.py‎
Lines changed: 21 additions & 229 deletions
@@ -31,9 +31,9 @@ for SEED in $(seq $START_SEED $((NUM_SEEDS+START_SEED-1))); do
     python $FILE --experiment_id tools_invent_allexclude --env tools --approach grammar_search_invention --excluded_predicates all --seed $SEED --num_train_tasks 200
 
     # repeated_nextto
-    # requires extra flag: "--side_predicate_learner prediction_error_hillclimbing"
+    # requires extra flag: "--side_predicate_learner prediction_error_hill_climbing"
     python $FILE --experiment_id repeated_nextto_oracle --env repeated_nextto --approach oracle --seed $SEED --num_train_tasks 0
-    python $FILE --experiment_id repeated_nextto_nsrt_learning --env repeated_nextto --approach nsrt_learning --side_predicate_learner prediction_error_hillclimbing --seed $SEED --num_train_tasks 50
+    python $FILE --experiment_id repeated_nextto_nsrt_learning --env repeated_nextto --approach nsrt_learning --side_predicate_learner prediction_error_hill_climbing --seed $SEED --num_train_tasks 50
 
     # playroom
     python $FILE --experiment_id playroom_oracle --env playroom --approach oracle --seed $SEED --num_train_tasks 0
 
@@ -3,21 +3,20 @@
 from __future__ import annotations
 
 import logging
-from typing import FrozenSet, Iterator, List, Set, Tuple
+from typing import List, Set
 
 from predicators.src import utils
 from predicators.src.nsrt_learning.option_learning import create_option_learner
 from predicators.src.nsrt_learning.sampler_learning import learn_samplers
 from predicators.src.nsrt_learning.segmentation import segment_trajectory
+from predicators.src.nsrt_learning.side_predicate_learning import \
+    PredictionErrorHillClimbingSidePredicateLearner, \
+    PreserveSkeletonsHillClimbingSidePredicateLearner, SidePredicateLearner
 from predicators.src.nsrt_learning.strips_learning import \
     learn_strips_operators
-from predicators.src.planning import task_plan_grounding
-from predicators.src.predicate_search_score_functions import \
-    _PredictionErrorScoreFunction
 from predicators.src.settings import CFG
-from predicators.src.structs import NSRT, GroundAtom, LowLevelTrajectory, \
-    OptionSpec, PartialNSRTAndDatastore, Predicate, Segment, STRIPSOperator, \
-    Task, _GroundNSRT
+from predicators.src.structs import NSRT, LowLevelTrajectory, \
+    PartialNSRTAndDatastore, Predicate, Task
 
 
 def learn_nsrts_from_data(trajectories: List[LowLevelTrajectory],
@@ -79,8 +78,21 @@ def learn_nsrts_from_data(trajectories: List[LowLevelTrajectory],
     if CFG.side_predicate_learner != "no_learning":
         assert CFG.option_learner == "no_learning", \
             "Can't learn options and side predicates together."
-        pnads = _learn_pnad_side_predicates(pnads, trajectories, train_tasks,
-                                            predicates, segmented_trajs)
+        if CFG.side_predicate_learner == "prediction_error_hill_climbing":
+            side_pred_learner: SidePredicateLearner = \
+                PredictionErrorHillClimbingSidePredicateLearner(
+                    pnads, trajectories, train_tasks, predicates,
+                    segmented_trajs)
+        elif CFG.side_predicate_learner == "preserve_skeletons_hill_climbing":
+            side_pred_learner = \
+                PreserveSkeletonsHillClimbingSidePredicateLearner(
+                    pnads, trajectories, train_tasks, predicates,
+                    segmented_trajs)
+        else:
+            raise ValueError(
+                f"side_predicate_learner {CFG.side_predicate_learner} not " +
+                "implemented")
+        pnads = side_pred_learner.sideline()
 
     # STEP 5: Learn options (option_learning.py) and update PNADs.
     _learn_pnad_options(pnads)  # in-place update
@@ -97,130 +109,6 @@ def learn_nsrts_from_data(trajectories: List[LowLevelTrajectory],
     return set(nsrts)
 
 
-def _learn_pnad_side_predicates(
-        pnads: List[PartialNSRTAndDatastore],
-        trajectories: List[LowLevelTrajectory], train_tasks: List[Task],
-        predicates: Set[Predicate],
-        segmented_trajs: List[List[Segment]]) -> List[PartialNSRTAndDatastore]:
-
-    def _check_goal(s: Tuple[PartialNSRTAndDatastore, ...]) -> bool:
-        del s  # unused
-        # There are no goal states for this search; run until exhausted.
-        return False
-
-    def _get_successors(
-        s: Tuple[PartialNSRTAndDatastore, ...],
-    ) -> Iterator[Tuple[None, Tuple[PartialNSRTAndDatastore, ...], float]]:
-        # For each PNAD/operator...
-        for i in range(len(s)):
-            pnad = s[i]
-            _, option_vars = pnad.option_spec
-            # ...consider changing each of its effects to a side predicate.
-            for effect in pnad.op.add_effects:
-                if len(pnad.op.add_effects) > 1:
-                    # We don't want sidelining to result in a no-op
-                    new_pnad = PartialNSRTAndDatastore(
-                        pnad.op.effect_to_side_predicate(
-                            effect, option_vars, "add"), pnad.datastore,
-                        pnad.option_spec)
-                    sprime = list(s)
-                    sprime[i] = new_pnad
-                    yield (None, tuple(sprime), 1.0)
-
-            # ...consider removing it.
-            sprime = list(s)
-            del sprime[i]
-            yield (None, tuple(sprime), 1.0)
-
-    if CFG.side_predicate_learner == "prediction_error_hillclimbing":
-        score_func = _PredictionErrorScoreFunction(predicates, [], {},
-                                                   train_tasks)
-
-        def _evaluate(s: Tuple[PartialNSRTAndDatastore, ...]) -> float:
-            # Score function for search. Lower is better.
-            strips_ops = [pnad.op for pnad in s]
-            option_specs = [pnad.option_spec for pnad in s]
-            score = score_func.evaluate_with_operators(frozenset(),
-                                                       trajectories,
-                                                       segmented_trajs,
-                                                       strips_ops,
-                                                       option_specs)
-            return score
-
-    elif CFG.side_predicate_learner == "preserve_skeletons_hillclimbing":
-
-        def _evaluate(s: Tuple[PartialNSRTAndDatastore, ...]) -> float:
-            # Score function for search. Lower is better.
-            strips_ops = [pnad.op for pnad in s]
-            option_specs = [pnad.option_spec for pnad in s]
-            preserves_harmlessness = check_harmlessness(
-                predicates, train_tasks, trajectories, segmented_trajs,
-                strips_ops, option_specs)
-            # NOTE: Arbitrary large number bigger than the total number of
-            # operators at the start of the search.
-            score = 10 * len(pnads)
-            if preserves_harmlessness:
-                score = 2 * len(strips_ops)
-                for op in strips_ops:
-                    score -= len(op.side_predicates)
-            return score
-
-    else:
-        raise ValueError(
-            f"side_predicate_learner {CFG.side_predicate_learner} not " +
-            "implemented")
-
-    # Run the search, starting from original PNADs.
-    path, _, _ = utils.run_hill_climbing(tuple(pnads), _check_goal,
-                                         _get_successors, _evaluate)
-    # The last state in the search holds the final PNADs.
-    pnads = list(path[-1])
-    # Recompute the datastores in the PNADs. We need to do this
-    # because now that we have side predicates, each transition may be
-    # assigned to *multiple* datastores.
-    _recompute_datastores_from_segments(segmented_trajs, pnads)
-    return pnads
-
-
-def _recompute_datastores_from_segments(
-        segmented_trajs: List[List[Segment]],
-        pnads: List[PartialNSRTAndDatastore]) -> None:
-    for pnad in pnads:
-        pnad.datastore = []  # reset all PNAD datastores
-    for seg_traj in segmented_trajs:
-        objects = set(seg_traj[0].states[0])
-        for segment in seg_traj:
-            assert segment.has_option()
-            segment_option = segment.get_option()
-            segment_param_option = segment_option.parent
-            segment_option_objs = tuple(segment_option.objects)
-            # Get ground operators given these objects and option objs.
-            for pnad in pnads:
-                param_opt, opt_vars = pnad.option_spec
-                if param_opt != segment_param_option:
-                    continue
-                isub = dict(zip(opt_vars, segment_option_objs))
-                # Consider adding this segment to each datastore.
-                for ground_op in utils.all_ground_operators_given_partial(
-                        pnad.op, objects, isub):
-                    # Check if preconditions hold.
-                    if not ground_op.preconditions.issubset(
-                            segment.init_atoms):
-                        continue
-                    # Check if effects match. Note that we're using the side
-                    # predicates semantics here!
-                    atoms = utils.apply_operator(ground_op, segment.init_atoms)
-                    if not atoms.issubset(segment.final_atoms):
-                        continue
-                    # Skip over segments that have multiple possible bindings.
-                    if len(set(ground_op.objects)) != len(ground_op.objects):
-                        continue
-                    # This segment belongs in this datastore, so add it.
-                    sub = dict(zip(pnad.op.parameters, ground_op.objects))
-                    pnad.add_to_datastore((segment, sub),
-                                          check_effect_equality=False)
-
-
 def _learn_pnad_options(pnads: List[PartialNSRTAndDatastore]) -> None:
     logging.info("\nDoing option learning...")
     option_learner = create_option_learner()
@@ -263,99 +151,3 @@ def _learn_pnad_samplers(pnads: List[PartialNSRTAndDatastore],
     # Replace the samplers in the PNADs.
     for pnad, sampler in zip(pnads, samplers):
         pnad.sampler = sampler
-
-
-def check_harmlessness(predicates: Set[Predicate], train_tasks: List[Task],
-                       trajectories: List[LowLevelTrajectory],
-                       segmented_trajs: List[List[Segment]],
-                       strips_ops: List[STRIPSOperator],
-                       option_specs: List[OptionSpec]) -> bool:
-    """Function to check whether a given set of operators and predicates
-    preserves harmlessness over demonstrations on some number of training
-    tasks.
-
-    Preserving harmlessness roughly means that the set of operators and
-    predicates supports the agent's ability to plan to achieve all of
-    the training tasks in the same way as was demonstrated (i.e, the
-    predicates and operators don't render any demonstrated trajectory
-    impossible).
-    """
-
-    assert len(trajectories) == len(segmented_trajs)
-    for ll_traj, seg_traj in zip(trajectories, segmented_trajs):
-        if not ll_traj.is_demo:
-            continue
-        atoms_seq = utils.segment_trajectory_to_atoms_sequence(seg_traj)
-        traj_goal = train_tasks[ll_traj.train_task_idx].goal
-        demo_preserved = check_single_demo_preservation(
-            ll_traj, atoms_seq, traj_goal, predicates, strips_ops,
-            option_specs)
-        if not demo_preserved:
-            return False
-
-    return True
-
-
-def check_single_demo_preservation(ll_traj: LowLevelTrajectory,
-                                   atoms_seq: List[Set[GroundAtom]],
-                                   traj_goal: Set[GroundAtom],
-                                   predicates: Set[Predicate],
-                                   strips_ops: List[STRIPSOperator],
-                                   option_specs: List[OptionSpec]) -> bool:
-    """Function to check whether a given set of operators and predicates
-    preserves a single training trajectory."""
-    init_atoms = utils.abstract(ll_traj.states[0], predicates)
-    objects = set(ll_traj.states[0])
-    ground_nsrts, _ = task_plan_grounding(init_atoms, objects, strips_ops,
-                                          option_specs)
-    heuristic = utils.create_task_planning_heuristic(
-        CFG.sesame_task_planning_heuristic, init_atoms, traj_goal,
-        ground_nsrts, predicates, objects)
-
-    def _check_goal(state: Tuple[FrozenSet[GroundAtom], int]) -> bool:
-        return traj_goal.issubset(state[0])
-
-    def _get_successor_with_correct_option(
-        searchnode_state: Tuple[FrozenSet[GroundAtom], int]
-    ) -> Iterator[Tuple[_GroundNSRT, Tuple[FrozenSet[GroundAtom], int],
-                        float]]:
-        state = searchnode_state[0]
-        idx_into_traj = searchnode_state[1]
-
-        if idx_into_traj > len(ll_traj.actions) - 1:
-            return
-
-        gt_option = ll_traj.actions[idx_into_traj].get_option()
-        expected_next_hl_state = atoms_seq[idx_into_traj + 1]
-
-        for applicable_nsrt in utils.get_applicable_operators(
-                ground_nsrts, state):
-            # NOTE: we check that the ParameterizedOptions are equal before
-            # attempting to ground because otherwise, we might
-            # get a parameter mismatch and trigger an AssertionError
-            # during grounding.
-            if applicable_nsrt.option != gt_option.parent:
-                continue
-            if applicable_nsrt.option_objs != gt_option.objects:
-                continue
-            next_hl_state = utils.apply_operator(applicable_nsrt, set(state))
-            exp_state_matches = next_hl_state.issubset(expected_next_hl_state)
-            if exp_state_matches:
-                # The returned cost is uniform because we don't
-                # actually care about finding the shortest path;
-                # just one that matches!
-                yield (applicable_nsrt, (frozenset(next_hl_state),
-                                         idx_into_traj + 1), 1.0)
-
-    init_atoms_frozen = frozenset(init_atoms)
-    init_searchnode_state = (init_atoms_frozen, 0)
-    # NOTE: each state in the below GBFS is a tuple of
-    # (current_atoms, idx_into_traj). The idx_into_traj is necessary because
-    # we need to check whether the atoms that are true at this particular
-    # index into the trajectory is what we would expect given the demo
-    # trajectory.
-    state_seq, _ = utils.run_gbfs(
-        init_searchnode_state, _check_goal, _get_successor_with_correct_option,
-        lambda searchnode_state: heuristic(searchnode_state[0]))
-
-    return _check_goal(state_seq[-1])