williamedwards · dohunnim · Jun 21, 2022 · Jun 21, 2022 · Jun 21, 2022 · Jul 19, 2022
diff --git a/autompc/controller.py b/autompc/controller.py
@@ -173,6 +173,8 @@ def set_ocp(self, ocp):
         """
         self.ocp = ocp
 
+        if len(self.ocp_transformers) == 1:
+            self.ocp_transformer = self.ocp_transformers[0]
         if self.ocp_transformer:
             self.transformed_ocp = self.ocp_transformer(self.ocp)
         else:
@@ -453,6 +455,12 @@ def build(self, trajs : List[Trajectory] = None) -> None:
                 self.ocp_transformer.train(trajs)
             else:
                 raise ControllerStateError("Specified OCP transformer requires learning from trajectories.")
+
+        if self.ocp_transformer:
+            self.transformed_ocp = self.ocp_transformer(self.ocp)
+        else:
+            self.transformed_ocp = self.ocp
+        self.optimizer.set_ocp(self.transformed_ocp)
 
         self.reset()
 
@@ -481,6 +489,7 @@ def reset_history(self) -> None:
         from influencing current model predictions.
         """
         self.model_state = None
+        self.last_control = None
 
     def reset_optimizer(self) -> None:
         """

diff --git a/autompc/costs/__init__.py b/autompc/costs/__init__.py
@@ -1,3 +1,4 @@
 from .quad_cost import QuadCost
 from .thresh_cost import ThresholdCost, BoxThresholdCost
+from .barrier_cost import LogBarrierCost
 from .cost import Cost
diff --git a/autompc/costs/barrier_cost.py b/autompc/costs/barrier_cost.py
@@ -0,0 +1,187 @@
+# Created by Teodor Tchalakov, (ttcha2@illinois.edu)
+
+import numpy as np
+import numpy.linalg as la
+
+from .cost import Cost
+
+class LogBarrierCost(Cost):
+    def __init__(self, system, boundedStates):
+        """
+        Create barrier cost that approximates an inequality constraint.
+        Function does not exist outside the limit.
+        where : - b * ln ( a - x ) for upper limit
+                - b * ln ( a + x ) for lower limit
+        Parameters
+        ----------
+        system : System
+            Robot system object.
+        boundedState : dict
+            Dictionary of { "observation/control name" : (limit, scale, upper)}
+                observation/control (x) : String
+                    Observation/control name for which limit is specified.
+                limit (a) : double
+                    limit value a that barrier is placed at.
+                scale (b) : double
+                    Positive scalar to magnify the cost function.
+                    scale: (0, inf)
+                upper : boolean
+                    True if the limit is an upper limit.
+                    False if the limit is a lower limit.
+        """
+        super().__init__(system)
+        self.obsConfiguration = []
+        self.ctrlsConfiguration = []
+
+        for variable in boundedStates.keys():
+            config = boundedStates[variable]
+            # Check that scale is positive
+            if(config[1] < 0):
+                raise ValueError(f"{variable}'s log barrier must be positive, was {config[1]}")
+            elif(variable in system.observations):
+                self.obsConfiguration.append([variable, config])
+            elif(variable in system.controls):
+                self.ctrlsConfiguration.append([variable, config])
+            else:
+                raise ValueError(f"Variable {variable} is not in the given system")
+
+        # Configs
+        self._is_quad = False
+        self._is_convex = True
+        self._is_diff = True
+        self._is_twice_diff = True
+        self._has_goal = False
+
+    def incremental(self, obs, control):
+        return self.eval_obs_cost(obs) + self.eval_ctrl_cost(control)
+
+    def incremental_diff(self, obs, control):
+        return self.incremental(obs, control), self.eval_obs_cost_diff(obs), self.eval_ctrl_cost_diff(control)
+
+    def incremental_hess(self, obs, control): # TODO: Tuple unpacking only supported for python>=3.8
+        hess_obs_ctrl = np.zeros((self.system.obs_dim, self.system.ctrl_dim))
+        return self.incremental(obs, control), self.eval_obs_cost_diff(obs), self.eval_ctrl_cost_diff(control), self.eval_obs_cost_hess(obs), hess_obs_ctrl, self.eval_ctrl_cost_hess(control)
+
+    def terminal(self, obs):
+        return 0
+
+    def terminal_diff(self, obs):
+        return 0, 0
+
+    def terminal_hess(self, obs):
+        return 0, 0, 0
+
+    def __add__(self, rhs):
+        if isinstance(rhs, LogBarrierCost):
+            if (self.goal is None and rhs.goal is None) or np.all(self.goal == rhs.goal):
+                return LogBarrierCost(self.system, self.boundedStates+rhs.boundedStates)
+        return Cost.__add__(self, rhs)
+
+    def __mul__(self, rhs):
+        if not isinstance(rhs, (float, int)):
+            raise ValueError("* only supports product with numbers")
+        new_cost = LogBarrierCost(self.system, self.boundedStates)
+        return new_cost
+
+
+    #Cost Function:
+    # b = scale
+    # - b * ln ( a - x ) upper limit x < a
+    # - b * ln ( a + x ) lower limit x > a
+    def eval_obs_cost(self, obs):
+        sum = 0
+        for boundedObs in self.obsConfiguration:
+            variable, config = boundedObs
+            index = self.system.observations.index(variable)
+            lower, upper, scale = config
+            if lower > -np.inf:
+                if lower >= obs[index]:
+                    sum += np.inf
+                else:
+                    sum = sum + -scale * np.log(-lower + obs[index])
+            if upper < np.inf:
+                if obs[index] >= upper:
+                    sum += np.inf
+                else:
+                    sum = sum + -scale * np.log(upper - obs[index])           
+        return sum
+
+    #Jacobian:
+    # b / (a - x) upper limit
+    # -b / (-a + x) lower limit
+    def eval_obs_cost_diff(self, obs):
+        jacobian = np.zeros(self.system.obs_dim)
+        for boundedObs in self.obsConfiguration:
+            variable, config = boundedObs
+            index = self.system.observations.index(variable)
+            lower, upper, scale = config
+            if lower > -np.inf:
+                if lower >= obs[index]:
+                    jacobian[index] += -np.inf
+                else:
+                    jacobian[index] += -scale / (-lower + obs[index])
+            if upper < np.inf:
+                if obs[index] >= upper:
+                    jacobian[index] += np.inf
+                else:
+                    jacobian[index] += scale / (upper - obs[index])   
+
+        return jacobian
+
+    #Hessian:
+    # b / (a - x)^2 upper limit
+    # b / (-a + x)^2 lower limit
+    def eval_obs_cost_hess(self, obs):
+        hessian = np.zeros((self.system.obs_dim, self.system.obs_dim))
+        for boundedObs in self.obsConfiguration:
+            variable, config = boundedObs
+            index = self.system.observations.index(variable)
+            lower, upper, scale = config
+            if lower > -np.inf:
+                if lower >= obs[index]:
+                    hessian[index][index] += np.inf
+                else:
+                    hessian[index][index] += scale / ((lower - obs[index])**2)
+            if upper < np.inf:
+                if obs[index] >= upper:
+                    hessian[index][index] += np.inf
+                else:
+                    hessian[index][index] += scale / ((upper - obs[index])**2)
+
+        return hessian
+
+    def eval_ctrl_cost(self, ctrl):
+        sum = 0
+        for boundedCtrl in self.ctrlsConfiguration:
+            variable, config = boundedCtrl
+            index = self.system.controls.index(variable)
+            lower, upper, scale = config
+            if lower > -np.inf:
+                sum = sum + -scale * np.log(-lower + ctrl[index])
+            if upper < np.inf:
+                sum = sum + -scale * np.log(upper - ctrl[index])
+        return sum
+
+    def eval_ctrl_cost_diff(self, ctrl):
+        jacobian = np.zeros(self.system.ctrl_dim)
+        for boundedCtrl in self.ctrlsConfiguration:
+            variable, config = boundedCtrl
+            index = self.system.controls.index(variable)
+            lower, upper, scale = config
+            if lower > -np.inf:
+                jacobian[index] += -scale / (-lower + ctrl[index])
+            if upper < np.inf:
+                jacobian[index] += scale / (upper - ctrl[index])   
+        return jacobian
+
+    def eval_ctrl_cost_hess(self, ctrl):
+        hessian = np.zeros((self.system.ctrl_dim, self.system.ctrl_dim))
+        for boundedCtrl in self.ctrlsConfiguration:
+            variable, config = boundedCtrl
+            index = self.system.controls.index(variable)
+            lower, upper, scale = config
+            if lower > -np.inf:
+                hessian[index][index] += scale / ((lower - ctrl[index])**2)
+            if upper < np.inf:
+                hessian[index][index] += scale / ((upper - ctrl[index])**2)
+        return hessian
diff --git a/autompc/costs/cost.py b/autompc/costs/cost.py
@@ -186,6 +186,12 @@ def goal(self) -> Optional[np.ndarray]:
         return np.copy(self.properties['goal'])
 
     @goal.setter
+    def goal(self, goal):
+        """Sets the cost's goal state. (Note: not all costs actually act to
+        drive the system toward a goal).
+        """
+        self.properties['goal'] = np.copy(goal)
+
     def set_goal(self,goal):
         """Sets the cost's goal state. (Note: not all costs actually act to
         drive the system toward a goal).
@@ -201,12 +207,12 @@ def __add__(self, other):
     def __mul__(self, rhs):
         if not isinstance(rhs,(int,float)):
             raise ValueError("Can only multiply by a float")
-        return MulCost(self.system, [self, rhs])
+        return MulCost(self.system, self, rhs)
 
     def __rmul__(self, lhs):
         if not isinstance(lhs,(int,float)):
             raise ValueError("Can only multiply by a float")
-        return MulCost(self.system, [self, lhs])
+        return MulCost(self.system, self, lhs)
 
 
 class SumCost(Cost):
@@ -279,6 +285,11 @@ def goal(self):
         return super().goal
 
     @goal.setter
+    def goal(self, goal):
+        super().goal=goal
+        for cost in self.costs:
+            cost.goal = goal
+
     def set_goal(self,goal):
         super().set_goal(goal)
         for cost in self.costs:
@@ -354,9 +365,15 @@ def goal(self):
         return super().goal
 
     @goal.setter
+    def goal(self, goal):
+        super().goal=goal
+        for cost in self.costs:
+            cost.goal = goal
+
     def set_goal(self,goal):
         super().set_goal(goal)
-        self._cost.goal = goal
+        for cost in self.costs:
+            cost.goal = goal
 
     def __mul__(self, scale):
         if not isinstance(scale,(float,int)):

diff --git a/autompc/costs/quad_cost.py b/autompc/costs/quad_cost.py
@@ -81,7 +81,8 @@ def incremental_hess(self, obs, control):
             obst = obs
         QQt = (self._Q + self._Q.T)
         RRt = (self._R + self._R)
-        return obst.T @ self._Q @ obst + control.T @ self._R @control, QQt @ obst, RRt @ control, QQt, None, RRt
+        hess_obs_ctrl = np.zeros((self.system.obs_dim, self.system.ctrl_dim))
+        return obst.T @ self._Q @ obst + control.T @ self._R @control, QQt @ obst, RRt @ control, QQt, hess_obs_ctrl, RRt
 
     def terminal(self, obs):
         try:

diff --git a/autompc/costs/thresh_cost.py b/autompc/costs/thresh_cost.py
@@ -10,22 +10,21 @@ def __init__(self, system, goal, threshold, obs_range=None, observations=None):
         """
         Create threshold cost. Returns 1 for every time steps
         where :math:`||x - x_\\textrm{goal}||_\\infty > \\textrm{threshold}`.
-        The check is performed only over the observation dimensions from
-        obs_range[0] to obs_range[1].
-
+
+        The norm is performed only over the observation dimensions from
+        obs_range[0] to obs_range[1], or the observations named in
+        `observations`.
         Parameters
         ----------
         system : System
             Robot system object
-
         goal : Numpy array
-            Goal position
-
+            Goal position. Can either be length system.obs_dim or
+            # of observations in
         obs_range : (int, int)
             First (inclusive and last (exclusive) index of observations
             for which goal is specified.  If neither this field nor
             observations is set, default is full observation range.
-
         observations : [str]
             List of observation names for which goal is specified.
             Supersedes obs_range when present.
@@ -39,18 +38,15 @@ def __init__(self, system, goal, threshold, obs_range=None, observations=None):
             self._obs_idxs = [system.observations.index(obs) for obs in observations]
         if self._obs_idxs is None:
             self._obs_idxs = list(range(0, system.obs_dim))
-        self.set_goal(goal)
-
-    def set_goal(self, goal):
         if len(goal) < self.system.obs_dim:
-            self._goal = np.zeros(self.system.obs_dim)
-            self._goal[self._obs_idxs] = goal
-        else:
-            self._goal = np.copy(goal)
+            full_goal = np.zeros(self.system.obs_dim)
+            full_goal[self._obs_idxs] = goal
+            goal = full_goal
+        self.set_goal(goal)
 
     def incremental(self, obs, ctrl):
-        if (la.norm(obs[self._obs_idxs] - self._goal[self._obs_idxs], np.inf) 
-                > self._threshold):
+        max_dist_to_goal = la.norm(obs[self._obs_idxs] - self.goal[self._obs_idxs], np.inf)
+        if (max_dist_to_goal > self._threshold or np.isnan(max_dist_to_goal)):
             return 1.0
         else:
             return 0.0
@@ -64,16 +60,13 @@ def __init__(self, system, limits, goal=None):
         """
         Create Box threshold cost. Returns 1 for every time steps
         where observation is outisde of limits.
-
         Paramters
         ---------
         system : System
             System cost is computed for
-
         limits : numpy array of shape (system.obs_dim, 2)
             Upper and lower limits.  Use +np.inf or -np.inf
             to allow certain dimensions unbounded.
-
         goal : numpy array of size system.obs_dim
             Goal state.  Not used directly for computing cost, but
             may be used by downstream cost factories.
@@ -91,4 +84,4 @@ def incremental(self, obs, ctrl):
             return 0.0
 
     def terminal(self, obs):
-        return 0.0
+        return 0.0
diff --git a/autompc/costs/zero_cost.py b/autompc/costs/zero_cost.py
@@ -20,7 +20,7 @@ def incremental_diff(self, obs, ctrl):
         return 0.0,np.zeros(len(obs)),np.zeros(len(ctrl))
 
     def incremental_hess(self, obs, ctrl):
-        return 0.0,np.zeros(len(obs)),np.zeros(len(ctrl)),np.zeros(len(obs),len(obs)),None,np.zeros(len(ctrl),len(ctrl))
+        return 0.0,np.zeros(len(obs)),np.zeros(len(ctrl)),np.zeros((len(obs),len(obs))),np.zeros((len(obs),len(ctrl))),np.zeros((len(ctrl),len(ctrl)))
 
     def terminal(self, obs) -> float:
         return 0.0
@@ -29,5 +29,5 @@ def terminal_diff(self, obs):
         return 0.0,np.zeros(len(obs))
 
     def terminal_hess(self, obs):
-        return 0.0,np.zeros(len(obs)),np.zeros(len(obs),len(obs))
+        return 0.0,np.zeros(len(obs)),np.zeros((len(obs),len(obs)))
 
diff --git a/autompc/ocp/__init__.py b/autompc/ocp/__init__.py
@@ -3,3 +3,4 @@
 from .quad_cost_transformer import QuadCostTransformer
 from .gauss_reg_transformer import GaussRegTransformer
 from .bounds_transformer import KeepBoundsTransformer,DeleteBoundsTransformer
+from .barrier_cost_transformer import LogBarrierCostTransformer