From 7c6b5dc4883e51b436b287eb0e1561d1d69c923e Mon Sep 17 00:00:00 2001
From: An Thai Le <an.thai.le97@gmail.com>
Date: Mon, 16 Oct 2023 11:48:09 +0200
Subject: [PATCH] Finalize

Signed-off-by: An Thai Le <an.thai.le97@gmail.com>
---
 README.md                                   |  61 ++++-
 examples/mpot_occupancy.py                  | 182 ++++++++++++++
 examples/mpot_panda.py                      | 209 +++++++++++++++++
 examples/mpot_planar.py                     | 199 ----------------
 examples/mpot_sdf.py                        | 175 ++++++++++++++
 mpot/costs.py                               | 248 ++++++++++++--------
 mpot/envs/map_generator.py                  | 155 +++++-------
 mpot/envs/obst_map.py                       |  12 +-
 mpot/envs/occupancy.py                      |  48 ++++
 mpot/gp/LICENSE                             |   2 +-
 mpot/gp/field_factor.py                     |  59 +++++
 mpot/gp/gp_factor.py                        |   2 +-
 mpot/gp/{mp_priors_multi.py => gp_prior.py} |   2 +-
 mpot/ot/problem.py                          |   2 +-
 mpot/ot/sinkhorn.py                         |   3 +-
 mpot/ot/sinkhorn_step.py                    |  26 +-
 mpot/planner.py                             |   4 +-
 requirements.txt                            |   2 +
 18 files changed, 966 insertions(+), 425 deletions(-)
 create mode 100644 examples/mpot_occupancy.py
 create mode 100644 examples/mpot_panda.py
 delete mode 100644 examples/mpot_planar.py
 create mode 100644 examples/mpot_sdf.py
 create mode 100644 mpot/envs/occupancy.py
 create mode 100644 mpot/gp/field_factor.py
 rename mpot/gp/{mp_priors_multi.py => gp_prior.py} (99%)

diff --git a/README.md b/README.md
index ca0b1d8..41602d4 100644
--- a/README.md
+++ b/README.md
@@ -1,29 +1,72 @@
-# mpot
-
-
 # Accelerating Motion Planning via Optimal Transport
 
+This repository implements Motion Planning via Optimal Transport `mpot` in PyTorch. 
+The philosophy of `mpot` follows Monte Carlo methods' argument, i.e., more samples could discover more better modes with high enough initialization variances.
+In other words, `mpot` enables better **brute-force** planning with GPU vectorization, for robustness against bad local minima, which is common in optimization-based motion planning.
 
 ![Point-mass with three goals](demos/planar.gif)
 
-
 ## Installation
 
-Simply install this repos by
+Simply install `mpot` by
 
 ```azure
 pip install -e .
 ```
 
+`mpot` algorithm is specifically designed to work with GPU. Please check if you have installed PyTorch with the CUDA option. 
+
 ## Examples
 
-For the demo of paralleled planning in planar environment with 3 goals and 33 plans each (99 plans):
+Please find in `examples/` folder the demo of vectorized planning in planar environments with occupancy map:
 
 ```azure
 python examples/mpot_planar.py
 ```
 
-Please find in `data/planar/` for the result in GIF.
+and with signed-distance-field (SDF):
+
+```azure
+python examples/mpot_sdf.py
+```
+
+We also added a demo with vectorized Panda planning with dense obstacle environments with SDF:
+
+```azure
+python examples/mpot_panda.py
+```
+
+The resulting optimization visualizations are stored at your current directory.
+Please refer to the example scripts for playing around with options and different goal points. Note that for all cases, we normalize the joint space to the joint limits and velocity limits, then perform Sinkhorn Step on the normalized state-space. Changing any hyperparameters may require tuning again.
 
-See `examples/mpot_planar.py` for playing around with options and different goal points. Note that changing any parameters may require tuning again.
-We tested the script on RTX 3080Ti GPU, with planning time 0.2-0.3 seconds.
+**Tuning Tips**: The most sensitive parameters are:
+
+- `polytope`: for small state-dimension that is less than 20, `cube` or `orthoplex` are good choices. For much higer state-dimension, the only choice is `simplex`.
+- `step_radius`: the step size
+- `probe_radius`: the probing radius, which projects towards polytope vertices to compute cost-to-go. Note, `probe_radius` >= `step_radius`.
+- `num_probe`: number of probing points along the probe radius. This is critical for optimizing performance, usually 3-5 is enough.
+- `epsilon`: decay rate of the step/probe size, usually 0.01-0.05.
+- `ent_epsilon`: Sinkhorn entropy regularization, usually 1e-2 to 5e-2 for balancing between sharpness and speed.
+- Various cost term weightings. This depends on your applications.
+
+## Troubleshooting
+
+If you encounter memory problems, try:
+
+```azure
+export 'PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512'
+```
+
+to reduce memory fragmentation.
+
+## Citation
+
+If you found this repository useful, please consider citing these references:
+
+```azure
+@inproceedings{le2023accelerating,
+  title={Accelerating Motion Planning via Optimal Transport},
+  author={Le, An T. and Chalvatzaki, Georgia and Biess, Armin and Peters, Jan},
+  booktitle={Advances in Neural Information Processing Systems (NeurIPS)},
+  year={2023}
+}
diff --git a/examples/mpot_occupancy.py b/examples/mpot_occupancy.py
new file mode 100644
index 0000000..27f4eb7
--- /dev/null
+++ b/examples/mpot_occupancy.py
@@ -0,0 +1,182 @@
+import os
+from pathlib import Path
+import time
+import matplotlib.pyplot as plt
+import torch
+from einops._torch_specific import allow_ops_in_compiled_graph  # requires einops>=0.6.1
+
+from mpot.ot.problem import Epsilon
+from mpot.ot.sinkhorn import Sinkhorn
+from mpot.planner import MPOT
+from mpot.costs import CostGPHolonomic, CostField, CostComposite
+from mpot.envs.occupancy import EnvOccupancy2D
+from mpot.utils.trajectory import interpolate_trajectory
+
+from torch_robotics.robots.robot_point_mass import RobotPointMass
+from torch_robotics.torch_utils.seed import fix_random_seed
+from torch_robotics.torch_utils.torch_timer import TimerCUDA
+from torch_robotics.torch_utils.torch_utils import get_torch_device
+from torch_robotics.tasks.tasks import PlanningTask
+from torch_robotics.visualizers.planning_visualizer import PlanningVisualizer
+
+allow_ops_in_compiled_graph()
+
+
+if __name__ == "__main__":
+    seed = int(time.time())
+    fix_random_seed(seed)
+
+    device = get_torch_device()
+    tensor_args = {'device': device, 'dtype': torch.float32}
+
+    # ---------------------------- Environment, Robot, PlanningTask ---------------------------------
+    q_limits = torch.tensor([[-10, -10], [10, 10]], **tensor_args)
+    env = EnvOccupancy2D(
+        precompute_sdf_obj_fixed=False,
+        tensor_args=tensor_args
+    )
+
+    robot = RobotPointMass(
+        q_limits=q_limits,  # joint limits
+        tensor_args=tensor_args
+    )
+
+    task = PlanningTask(
+        env=env,
+        robot=robot,
+        ws_limits=q_limits,  # workspace limits
+        obstacle_cutoff_margin=0.05,
+        tensor_args=tensor_args
+    )
+
+    # -------------------------------- Params ---------------------------------
+    # NOTE: these parameters are tuned for this environment
+    step_radius = 0.15
+    probe_radius = 0.15  # probe radius >= step radius
+
+    # NOTE: changing polytope may require tuning again
+    polytope = 'cube'  # 'simplex' | 'orthoplex' | 'cube';
+
+    epsilon = 0.01
+    ent_epsilon = Epsilon(1e-2)
+    num_probe = 5  # number of probes points for each polytope vertices
+    num_particles_per_goal = 33  # number of plans per goal
+    pos_limits = [-10, 10]
+    vel_limits = [-10, 10]
+    w_coll = 5e-3  # for tuning the obstacle cost
+    w_smooth = 1e-7  # for tuning the GP cost: error = w_smooth * || Phi x(t) - x(1+1) ||^2
+    sigma_gp = 0.1   # for tuning the GP cost: Q_c = sigma_gp^2 * I
+    sigma_gp_init = 1.6   # for controlling the initial GP variance: Q0_c = sigma_gp_init^2 * I
+    max_inner_iters = 100  # max inner iterations for Sinkhorn-Knopp
+    max_outer_iters = 100  # max outer iterations for MPOT
+    
+    start_state = torch.tensor([-9, -9, 0., 0.], **tensor_args)
+
+    # NOTE: change goal states here (zero vel goals)
+    multi_goal_states = torch.tensor([
+        [0, 9, 0., 0.],
+        [9, 9, 0., 0.],
+        [9, 0, 0., 0.]
+    ], **tensor_args)
+
+    traj_len = 64
+    dt = 0.1
+
+    #--------------------------------- Cost function ---------------------------------
+    
+    cost_coll = CostField(
+        robot, traj_len,
+        field=env.occupancy_map,
+        sigma_coll=1.0,
+        tensor_args=tensor_args
+    )
+    cost_gp = CostGPHolonomic(robot, traj_len, dt, sigma_gp, [0, 1], weight=w_smooth, tensor_args=tensor_args)
+    cost_func_list = [cost_coll, cost_gp]
+    weights_cost_l = [w_coll, w_smooth]
+    cost = CostComposite(
+        robot, traj_len, cost_func_list,
+        weights_cost_l=weights_cost_l,
+        tensor_args=tensor_args
+    )
+
+    #--------------------------------- MPOT Init ---------------------------------
+
+    linear_ot_solver = Sinkhorn(
+        threshold=1e-6,
+        inner_iterations=1,
+        max_iterations=max_inner_iters,
+    )
+    ss_params = dict(
+        epsilon=epsilon,
+        ent_epsilon=ent_epsilon,
+        step_radius=step_radius,
+        probe_radius=probe_radius,
+        num_probe=num_probe,
+        min_iterations=5,
+        max_iterations=max_outer_iters,
+        threshold=2e-3,
+        store_history=True,
+        tensor_args=tensor_args,
+    )
+
+    mpot_params = dict(
+        objective_fn=cost,
+        linear_ot_solver=linear_ot_solver,
+        ss_params=ss_params,
+        dim=2,
+        traj_len=traj_len,
+        num_particles_per_goal=num_particles_per_goal,
+        dt=dt,
+        start_state=start_state,
+        multi_goal_states=multi_goal_states,
+        pos_limits=pos_limits,
+        vel_limits=vel_limits,
+        polytope=polytope,
+        fixed_goal=True,
+        sigma_start_init=0.001,
+        sigma_goal_init=0.001,
+        sigma_gp_init=sigma_gp_init,
+        seed=seed,
+        tensor_args=tensor_args,
+    )
+    planner = MPOT(**mpot_params)
+
+    #--------------------------------- Optimize ---------------------------------
+
+    with TimerCUDA() as t:
+        trajs, optim_state, opt_iters = planner.optimize()
+    int_trajs = interpolate_trajectory(trajs, num_interpolation=3)
+    colls = env.occupancy_map.get_collisions(int_trajs[..., :2]).any(dim=1)
+    sinkhorn_iters = optim_state.linear_convergence[:opt_iters]
+    print(f'Optimization finished at {opt_iters}! Parallelization Quality (GOOD [%]): {(1 - colls.float().mean()) * 100:.2f}')
+    print(f'Time(s) optim: {t.elapsed} sec')
+    print(f'Average Sinkhorn Iterations: {sinkhorn_iters.mean():.2f}, min: {sinkhorn_iters.min():.2f}, max: {sinkhorn_iters.max():.2f}')
+
+    # -------------------------------- Visualize ---------------------------------
+    planner_visualizer = PlanningVisualizer(
+        task=task,
+        planner=planner
+    )
+
+    traj_history = optim_state.X_history[:opt_iters]
+    traj_history = traj_history.view(opt_iters, -1, traj_len, 4)
+    base_file_name = Path(os.path.basename(__file__)).stem
+    pos_trajs_iters = robot.get_position(traj_history)
+
+    planner_visualizer.animate_opt_iters_joint_space_state(
+        trajs=traj_history,
+        pos_start_state=start_state,
+        vel_start_state=torch.zeros_like(start_state),
+        video_filepath=f'{base_file_name}-joint-space-opt-iters.mp4',
+        n_frames=max((2, opt_iters // 5)),
+        anim_time=5
+    )
+
+    planner_visualizer.animate_opt_iters_robots(
+        trajs=pos_trajs_iters, start_state=start_state,
+        video_filepath=f'{base_file_name}-traj-opt-iters.mp4',
+        n_frames=max((2, opt_iters // 5)),
+        anim_time=5
+    )
+
+    plt.show()
diff --git a/examples/mpot_panda.py b/examples/mpot_panda.py
new file mode 100644
index 0000000..c45d538
--- /dev/null
+++ b/examples/mpot_panda.py
@@ -0,0 +1,209 @@
+
+import os
+from pathlib import Path
+import time
+import matplotlib.pyplot as plt
+import torch
+from einops._torch_specific import allow_ops_in_compiled_graph  # requires einops>=0.6.1
+
+from mpot.ot.problem import Epsilon
+from mpot.ot.sinkhorn import Sinkhorn
+from mpot.planner import MPOT
+from mpot.costs import CostGPHolonomic, CostField, CostComposite
+
+from torch_robotics.environments.env_spheres_3d import EnvSpheres3D
+from torch_robotics.robots.robot_panda import RobotPanda
+from torch_robotics.tasks.tasks import PlanningTask
+from torch_robotics.torch_utils.seed import fix_random_seed
+from torch_robotics.torch_utils.torch_timer import TimerCUDA
+from torch_robotics.torch_utils.torch_utils import get_torch_device
+from torch_robotics.visualizers.planning_visualizer import PlanningVisualizer
+
+allow_ops_in_compiled_graph()
+
+
+if __name__ == "__main__":
+    base_file_name = Path(os.path.basename(__file__)).stem
+
+    seed = int(time.time())
+    fix_random_seed(seed)
+
+    device = get_torch_device()
+    tensor_args = {'device': device, 'dtype': torch.float32}
+
+    # ---------------------------- Environment, Robot, PlanningTask ---------------------------------
+    env = EnvSpheres3D(
+        precompute_sdf_obj_fixed=False,
+        sdf_cell_size=0.01,
+        tensor_args=tensor_args
+    )
+
+    robot = RobotPanda(
+        use_self_collision_storm=False,
+        tensor_args=tensor_args
+    )
+
+    task = PlanningTask(
+        env=env,
+        robot=robot,
+        ws_limits=torch.tensor([[-1.5, -1.5, -1.5], [1.5, 1.5, 1.5]], **tensor_args),  # workspace limits
+        obstacle_cutoff_margin=0.03,
+        tensor_args=tensor_args
+    )
+
+    # -------------------------------- Params ---------------------------------
+    for _ in range(100):
+        q_free = task.random_coll_free_q(n_samples=2)
+        start_state = q_free[0]
+        goal_state = q_free[1]
+
+        # check if the EE positions are "enough" far apart
+        start_state_ee_pos = robot.get_EE_position(start_state).squeeze()
+        goal_state_ee_pos = robot.get_EE_position(goal_state).squeeze()
+
+        if torch.linalg.norm(start_state_ee_pos - goal_state_ee_pos) > 0.5:
+            break
+
+    # start_state = torch.tensor([1.0403,  0.0493,  0.0251, -1.2673,  1.6676,  3.3611, -1.5428], **tensor_args)
+    # goal_state = torch.tensor([1.1142,  1.7289, -0.1771, -0.9284,  2.7171,  1.2497,  1.7724], **tensor_args)
+
+    print('Start state: ', start_state)
+    print('Goal state: ', goal_state)
+    start_state = torch.concatenate((start_state, torch.zeros_like(start_state)))
+    goal_state = torch.concatenate((goal_state, torch.zeros_like(goal_state)))
+    multi_goal_states = goal_state.unsqueeze(0)
+
+    # Construct planner
+    duration = 5  # sec
+    traj_len = 64
+    dt = duration / traj_len
+    num_particles_per_goal = 10  # number of plans per goal  # NOTE: if memory is not enough, reduce this number
+
+    # NOTE: these parameters are tuned for this environment
+    step_radius = 0.03
+    probe_radius = 0.08  # probe radius >= step radius
+
+    # NOTE: changing polytope may require tuning again 
+    # NOTE: cube in this case could lead to memory insufficiency, depending how many plans are optimized
+    polytope = 'cube'  # 'simplex' | 'orthoplex' | 'cube';
+
+    epsilon = 0.02
+    ent_epsilon = Epsilon(1e-2)
+    num_probe = 3  # number of probes points for each polytope vertices
+    # panda joint limits
+    q_max = torch.tensor([2.7437, 1.7837, 2.9007, -0.1518, 2.8065, 4.5169, 3.0159], **tensor_args)
+    q_min = torch.tensor([-2.7437, -1.7837, -2.9007, -3.0421, -2.8065, 0.5445, -3.0159], **tensor_args)
+    pos_limits = torch.stack([q_min, q_max], dim=1)
+    vel_limits = [-5, 5]
+    w_coll = 1e-1  # for tuning the obstacle cost
+    w_smooth = 1e-7  # for tuning the GP cost: error = w_smooth * || Phi x(t) - x(1+1) ||^2
+    sigma_gp = 0.01  # for tuning the GP cost: Q_c = sigma_gp^2 * I
+    sigma_gp_init = 0.5   # for controlling the initial GP variance: Q0_c = sigma_gp_init^2 * I
+    max_inner_iters = 100  # max inner iterations for Sinkhorn-Knopp
+    max_outer_iters = 40  # max outer iterations for MPOT
+
+    #--------------------------------- Cost function ---------------------------------
+
+    cost_func_list = []
+    weights_cost_l = []
+    for collision_field in task.get_collision_fields():
+        cost_func_list.append(
+            CostField(
+                robot, traj_len,
+                field=collision_field,
+                sigma_coll=1.0,
+                tensor_args=tensor_args
+            )
+        )
+        weights_cost_l.append(w_coll)
+    cost_gp = CostGPHolonomic(robot, traj_len, dt, sigma_gp, [0, 1], weight=w_smooth, tensor_args=tensor_args)
+    cost_func_list.append(cost_gp)
+    weights_cost_l.append(w_smooth)
+    cost = CostComposite(
+        robot, traj_len, cost_func_list,
+        weights_cost_l=weights_cost_l,
+        tensor_args=tensor_args
+    )
+
+    #--------------------------------- MPOT Init ---------------------------------
+
+    linear_ot_solver = Sinkhorn(
+        threshold=1e-3,
+        inner_iterations=1,
+        max_iterations=max_inner_iters,
+    )
+    ss_params = dict(
+        epsilon=epsilon,
+        ent_epsilon=ent_epsilon,
+        step_radius=step_radius,
+        probe_radius=probe_radius,
+        num_probe=num_probe,
+        min_iterations=5,
+        max_iterations=max_outer_iters,
+        threshold=2e-3,
+        store_history=True,
+        tensor_args=tensor_args,
+    )
+
+    mpot_params = dict(
+        objective_fn=cost,
+        linear_ot_solver=linear_ot_solver,
+        ss_params=ss_params,
+        dim=7,
+        traj_len=traj_len,
+        num_particles_per_goal=num_particles_per_goal,
+        dt=dt,
+        start_state=start_state,
+        multi_goal_states=multi_goal_states,
+        pos_limits=pos_limits,
+        vel_limits=vel_limits,
+        polytope=polytope,
+        fixed_goal=True,
+        sigma_start_init=0.001,
+        sigma_goal_init=0.001,
+        sigma_gp_init=sigma_gp_init,
+        seed=seed,
+        tensor_args=tensor_args,
+    )
+    planner = MPOT(**mpot_params)
+
+    # Optimize
+    with TimerCUDA() as t:
+        trajs, optim_state, opt_iters = planner.optimize()
+    sinkhorn_iters = optim_state.linear_convergence[:opt_iters]
+    print(f'Optimization finished at {opt_iters}! Optimization time: {t.elapsed:.3f} sec')
+    print(f'Average Sinkhorn Iterations: {sinkhorn_iters.mean():.2f}, min: {sinkhorn_iters.min():.2f}, max: {sinkhorn_iters.max():.2f}')
+
+    # -------------------------------- Visualize ---------------------------------
+    planner_visualizer = PlanningVisualizer(
+        task=task,
+        planner=planner
+    )
+
+    traj_history = optim_state.X_history[:opt_iters]
+    traj_history = traj_history.view(opt_iters, -1, traj_len, 14)  # 7 + 7
+    pos_trajs_iters = robot.get_position(traj_history)
+    trajs = trajs.flatten(0, 1)
+    trajs_coll, trajs_free = task.get_trajs_collision_and_free(trajs)
+
+    planner_visualizer.animate_opt_iters_joint_space_state(
+        trajs=traj_history,
+        pos_start_state=start_state, pos_goal_state=goal_state,
+        vel_start_state=torch.zeros_like(start_state), vel_goal_state=torch.zeros_like(goal_state),
+        video_filepath=f'{base_file_name}-joint-space-opt-iters.mp4',
+        n_frames=max((2, opt_iters // 2)),
+        anim_time=5
+    )
+
+    if trajs_free is not None:
+        planner_visualizer.animate_robot_trajectories(
+            trajs=trajs_free, start_state=start_state, goal_state=goal_state,
+            plot_trajs=False,
+            draw_links_spheres=False,
+            video_filepath=f'{base_file_name}-robot-traj.mp4',
+            # n_frames=max((2, pos_trajs_iters[-1].shape[1]//10)),
+            n_frames=trajs_free.shape[-2],
+            anim_time=duration
+        )
+
+    plt.show()
diff --git a/examples/mpot_planar.py b/examples/mpot_planar.py
deleted file mode 100644
index 8c4d8d5..0000000
--- a/examples/mpot_planar.py
+++ /dev/null
@@ -1,199 +0,0 @@
-import torch
-import time
-import random
-import matplotlib.pyplot as plt
-from matplotlib import animation, rc
-rc('animation', html='html5')
-save_path = 'data/planar'
-import numpy as np
-import os
-
-from mpot.envs.map_generator import generate_obstacle_map
-from mpot.ot.problem import Epsilon
-from mpot.ot.sinkhorn import Sinkhorn
-from mpot.planner import MPOT
-from mpot.costs import CostComposite, CostField, CostGPHolonomic
-from mpot.utils.trajectory import interpolate_trajectory
-from torch_robotics.torch_utils.torch_timer import TimerCUDA
-
-
-if __name__ == "__main__":
-    if not torch.cuda.is_available():
-        print('CUDA is not available. Using CPU instead!')
-        device = torch.device('cpu')
-    else:
-        device = torch.device('cuda')
-    tensor_args = {'device': device, 'dtype': torch.float32}
-    seed = int(time.time())
-    x = np.linspace(-10, 10, 200)
-    y = np.linspace(-10, 10, 200)
-    dim = 2  # state_dim = 4 for velocity
-    dt = 0.1
-    traj_len = 64
-    view_optim = True   # plot optimization progress
-
-    #--------------------------------- MPOT Tuning Parameters -------------------------------------
-    # NOTE: these parameters are tuned for the planar environment
-    step_radius = 1.0
-    probe_radius = 1.0  # probe radius >= step radius
-
-    # NOTE: changing polytope may require tuning again
-    polytope = 'cube'  # 'simplex' | 'orthoplex' | 'cube';
-
-    epsilon = 0.2
-    ent_epsilon = Epsilon(1e-2)
-    num_probe = 5  # number of probes points for each polytope vertices
-    num_particles_per_goal = 33  # number of plans per goal
-    pos_limits = [-10, 10]
-    vel_limits = [-10, 10]
-    w_coll = 2.4e-3  # for tuning the obstacle cost
-    w_smooth = 1e-7  # for tuning the GP cost: error = w_smooth * || Phi x(t) - x(1+1) ||^2
-    sigma_gp = 0.1   # for tuning the GP cost: Q_c = sigma_gp^2 * I
-    sigma_gp_init = 1.6   # for controlling the initial GP variance: Q0_c = sigma_gp_init^2 * I
-    max_inner_iters = 100  # max inner iterations for Sinkhorn-Knopp
-    max_outer_iters = 100  # max outer iterations for MPOT
-    #--------------------------------- Task Settings ----------------------------------------------
-
-    start_state = torch.tensor([-9, -9, 0., 0.], **tensor_args)
-
-    # NOTE: change goal states here (zero vel goals)
-    multi_goal_states = torch.tensor([
-        [0, 9, 0., 0.],
-        [9, 9, 0., 0.],
-        [9, 0, 0., 0.]
-    ], **tensor_args)
-
-    ## Obstacle map
-    obst_params = dict(
-        map_dim=[20, 20],
-        obst_list=[],
-        cell_size=0.1,
-        random_gen=True,
-        num_obst=15,
-        rand_xy_limits=[[-7.5, 7.5], [-7.5, 7.5]],
-        tensor_args=tensor_args,
-    )
-    # For obst. generation
-    random.seed(seed)
-    np.random.seed(seed)
-    obst_map = generate_obstacle_map(**obst_params)[0]
-
-    #--------------------------------- Cost function ---------------------------------
-
-    cost_gp = CostGPHolonomic(dim, traj_len, dt, sigma_gp, [0, 1], weight=w_smooth, tensor_args=tensor_args)
-    cost_obst_2D = CostField(dim, [0, traj_len], field=obst_map, weight=w_coll, tensor_args=tensor_args)
-    cost_func_list = [cost_obst_2D, cost_gp]
-    objective_fn = CostComposite(dim, cost_func_list, tensor_args=tensor_args)
-
-    #--------------------------------- MPOT Init ---------------------------------
-    # Sinkhorn-Knopp parameters
-    linear_ot_solver = Sinkhorn(
-        threshold=1e-6,
-        inner_iterations=1,
-        max_iterations=max_inner_iters,
-    )
-    ss_params = dict(
-        epsilon=epsilon,
-        ent_epsilon=ent_epsilon,
-        step_radius=step_radius,
-        probe_radius=probe_radius,
-        num_probe=num_probe,
-        min_iterations=5,
-        max_iterations=max_outer_iters,
-        threshold=8e-5,
-        store_history=True,
-        tensor_args=tensor_args,
-    )
-
-    mpot_params = dict(
-        dim=dim,
-        objective_fn=objective_fn,
-        linear_ot_solver=linear_ot_solver,
-        ss_params=ss_params,
-        traj_len=traj_len,
-        num_particles_per_goal=num_particles_per_goal,
-        dt=dt,
-        start_state=start_state,
-        multi_goal_states=multi_goal_states,
-        pos_limits=pos_limits,
-        vel_limits=vel_limits,
-        polytope=polytope,
-        fixed_goal=True,
-        sigma_start_init=0.001,
-        sigma_goal_init=0.001,
-        sigma_gp_init=sigma_gp_init,
-        seed=seed,
-        tensor_args=tensor_args,
-    )
-    mpot = MPOT(**mpot_params)
-
-    #--------------------------------- Optimize ---------------------------------
-
-    with TimerCUDA() as t:
-        trajs, optim_state, iter = mpot.optimize()
-    int_trajs = interpolate_trajectory(trajs, num_interpolation=3)
-    colls = obst_map.get_collisions(int_trajs[..., :2]).any(dim=1)
-    print(f'Optimization finished at {iter}! Parallelization Quality (GOOD [%]): {(1 - colls.float().mean()) * 100:.2f}')
-    print(f'Time(s) optim: {t.elapsed} sec')
-    if view_optim:
-        os.makedirs(save_path, exist_ok=True)
-        X_hist = optim_state.X_history[:iter]
-        fig, ax = plt.subplots()
-        points = []
-        lines = []
-        for i in range(3 * num_particles_per_goal):
-            point, = ax.plot([], [], 'ro', alpha=0.7, markersize=3)
-            line,  = ax.plot([], [], 'r-', alpha=0.5, linewidth=0.5)
-            points.append(point)
-            lines.append(line)
-        text = ax.text(10, 11, f'Iters {i}', style='italic')
-        ax.set_xlim((-10, 10))
-        ax.set_ylim((-10, 10))
-        ax.axis('off')
-        ax.set_aspect('equal')
-        cs = ax.contourf(x, y, obst_map.map, 20, cmap='Greys', alpha=1)
-        multi_goal_states = multi_goal_states.cpu().numpy()
-        for i in range(multi_goal_states.shape[0]):
-            ax.plot(multi_goal_states[i, 0], multi_goal_states[i, 1], 'go', markersize=5)
-        fig.tight_layout()
-
-        def plot_history(i):
-            X = X_hist[i]
-            X = X.view(multi_goal_states.shape[0] * num_particles_per_goal, -1, 4)
-            # free trajs flag
-            colls = obst_map.get_collisions(X[:, :, :2]).any(dim=1)
-            X = X.cpu().numpy()
-            
-            for j in range(X.shape[0]):
-                if colls[j]:
-                    points[j].set_color('black')
-                    lines[j].set_color('black')
-                else:
-                    points[j].set_color('red')
-                    lines[j].set_color('red')
-                points[j].set_data(X[j, :, 0], X[j, :, 1])
-                lines[j].set_data(X[j, :, 0], X[j, :, 1])
-            text.set_text(f'Iters: {i}')
-            return *points, *lines
-
-        print('Saving animation..., please wait')
-        anim = animation.FuncAnimation(fig, plot_history, frames=len(X_hist), interval=20, blit=True)
-        anim.save('data/planar/planar.gif', writer='imagemagick', fps=30)
-
-    #--------------------------------- Plotting ---------------------------------  
-
-    fig = plt.figure()
-    ax = fig.gca()
-    cs = ax.contourf(x, y, obst_map.map, 20, cmap='Greys', alpha=1.)
-    ax.set_aspect('equal')
-    trajs = trajs.cpu().numpy()
-    vel_trajs = trajs[:, :, :, 2:]
-    for i in range(trajs.shape[0]):
-        for j in range(trajs.shape[1]):
-            ax.plot(trajs[i, j, :, 0], trajs[i, j, :, 1], 'b-', alpha=0.5, linewidth=0.5)
-    # plot goal
-    for i in range(multi_goal_states.shape[0]):
-        ax.plot(multi_goal_states[i, 0], multi_goal_states[i, 1], 'go', markersize=5)
-    ax.axis('off')
-    fig.tight_layout()
-    plt.show()
diff --git a/examples/mpot_sdf.py b/examples/mpot_sdf.py
new file mode 100644
index 0000000..b6931da
--- /dev/null
+++ b/examples/mpot_sdf.py
@@ -0,0 +1,175 @@
+import os
+from pathlib import Path
+import time
+import matplotlib.pyplot as plt
+import torch
+from einops._torch_specific import allow_ops_in_compiled_graph  # requires einops>=0.6.1
+
+from mpot.ot.problem import Epsilon
+from mpot.ot.sinkhorn import Sinkhorn
+from mpot.planner import MPOT
+from mpot.costs import CostGPHolonomic, CostField, CostComposite
+
+from torch_robotics.environments.env_grid_circles_2d import EnvGridCircles2D
+from torch_robotics.robots.robot_point_mass import RobotPointMass
+from torch_robotics.tasks.tasks import PlanningTask
+from torch_robotics.torch_utils.seed import fix_random_seed
+from torch_robotics.torch_utils.torch_timer import TimerCUDA
+from torch_robotics.torch_utils.torch_utils import get_torch_device
+from torch_robotics.visualizers.planning_visualizer import PlanningVisualizer
+
+allow_ops_in_compiled_graph()
+
+
+if __name__ == "__main__":
+    seed = int(time.time())
+    fix_random_seed(seed)
+
+    device = get_torch_device()
+    tensor_args = {'device': device, 'dtype': torch.float32}
+
+    # ---------------------------- Environment, Robot, PlanningTask ---------------------------------
+    env = EnvGridCircles2D(
+        precompute_sdf_obj_fixed=False,
+        sdf_cell_size=0.001,
+        tensor_args=tensor_args
+    )
+
+    robot = RobotPointMass(
+        q_limits=torch.tensor([[-1, -1], [1, 1]], **tensor_args),  # joint limits
+        tensor_args=tensor_args
+    )
+
+    task = PlanningTask(
+        env=env,
+        robot=robot,
+        ws_limits=torch.tensor([[-0.81, -0.81], [0.95, 0.95]], **tensor_args),  # workspace limits
+        obstacle_cutoff_margin=0.005,
+        tensor_args=tensor_args
+    )
+
+    # -------------------------------- Params ---------------------------------
+
+    # NOTE: these parameters are tuned for this environment
+    step_radius = 0.15
+    probe_radius = 0.15  # probe radius >= step radius
+
+    # NOTE: changing polytope may require tuning again
+    polytope = 'cube'  # 'simplex' | 'orthoplex' | 'cube';
+
+    epsilon = 0.01
+    ent_epsilon = Epsilon(1e-2)
+    num_probe = 5  # number of probes points for each polytope vertices
+    num_particles_per_goal = 50  # number of plans per goal  # NOTE: if memory is not enough, reduce this number
+    pos_limits = [-1, 1]
+    vel_limits = [-1, 1]
+    w_coll = 7e-2  # for tuning the obstacle cost
+    w_smooth = 1e-7  # for tuning the GP cost: error = w_smooth * || Phi x(t) - x(1+1) ||^2
+    sigma_gp = 0.03   # for tuning the GP cost: Q_c = sigma_gp^2 * I
+    sigma_gp_init = 0.8   # for controlling the initial GP variance: Q0_c = sigma_gp_init^2 * I
+    max_inner_iters = 100  # max inner iterations for Sinkhorn-Knopp
+    max_outer_iters = 70  # max outer iterations for MPOT
+    start_state = torch.tensor([-0.8, -0.8, 0., 0.], **tensor_args)
+    multi_goal_states = torch.tensor([
+        [0, 0.75, 0., 0.],
+        [0.75, 0.75, 0., 0.],
+        [0.75, 0, 0., 0.]
+    ], **tensor_args)
+
+    traj_len = 64
+    dt = 0.04
+
+    #--------------------------------- Cost function ---------------------------------
+
+    cost_coll = CostField(
+        robot, traj_len,
+        field=task.df_collision_objects,
+        sigma_coll=1.0,
+        tensor_args=tensor_args
+    )
+    cost_gp = CostGPHolonomic(robot, traj_len, dt, sigma_gp, [0, 1], weight=w_smooth, tensor_args=tensor_args)
+    cost_func_list = [cost_coll, cost_gp]
+    weights_cost_l = [w_coll, w_smooth]
+    cost = CostComposite(
+        robot, traj_len, cost_func_list,
+        weights_cost_l=weights_cost_l,
+        tensor_args=tensor_args
+    )
+
+    #--------------------------------- MPOT Init ---------------------------------
+
+    linear_ot_solver = Sinkhorn(
+        threshold=1e-4,
+        inner_iterations=1,
+        max_iterations=max_inner_iters,
+    )
+    ss_params = dict(
+        epsilon=epsilon,
+        ent_epsilon=ent_epsilon,
+        step_radius=step_radius,
+        probe_radius=probe_radius,
+        num_probe=num_probe,
+        min_iterations=5,
+        max_iterations=max_outer_iters,
+        threshold=1e-3,
+        store_history=True,
+        tensor_args=tensor_args,
+    )
+
+    mpot_params = dict(
+        objective_fn=cost,
+        linear_ot_solver=linear_ot_solver,
+        ss_params=ss_params,
+        dim=2,
+        traj_len=traj_len,
+        num_particles_per_goal=num_particles_per_goal,
+        dt=dt,
+        start_state=start_state,
+        multi_goal_states=multi_goal_states,
+        pos_limits=pos_limits,
+        vel_limits=vel_limits,
+        polytope=polytope,
+        fixed_goal=True,
+        sigma_start_init=0.001,
+        sigma_goal_init=0.001,
+        sigma_gp_init=sigma_gp_init,
+        seed=seed,
+        tensor_args=tensor_args,
+    )
+    planner = MPOT(**mpot_params)
+
+    # Optimize
+    with TimerCUDA() as t:
+        trajs, optim_state, opt_iters = planner.optimize()
+    sinkhorn_iters = optim_state.linear_convergence[:opt_iters]
+    print(f'Optimization finished at {opt_iters}! Optimization time: {t.elapsed:.3f} sec')
+    print(f'Average Sinkhorn Iterations: {sinkhorn_iters.mean():.2f}, min: {sinkhorn_iters.min():.2f}, max: {sinkhorn_iters.max():.2f}')
+
+    # -------------------------------- Visualize ---------------------------------
+    planner_visualizer = PlanningVisualizer(
+        task=task,
+        planner=planner
+    )
+
+    traj_history = optim_state.X_history[:opt_iters]
+    traj_history = traj_history.view(opt_iters, -1, traj_len, 4)
+    base_file_name = Path(os.path.basename(__file__)).stem
+    pos_trajs_iters = robot.get_position(traj_history)
+
+    planner_visualizer.animate_opt_iters_joint_space_state(
+        trajs=traj_history,
+        pos_start_state=start_state,
+        vel_start_state=torch.zeros_like(start_state),
+        video_filepath=f'{base_file_name}-joint-space-opt-iters.mp4',
+        n_frames=max((2, opt_iters // 5)),
+        anim_time=5
+    )
+
+    planner_visualizer.animate_opt_iters_robots(
+        trajs=pos_trajs_iters, start_state=start_state,
+        video_filepath=f'{base_file_name}-traj-opt-iters.mp4',
+        n_frames=max((2, opt_iters// 5)),
+        anim_time=5
+    )
+
+    plt.show()
diff --git a/mpot/costs.py b/mpot/costs.py
index 40b66fb..11f0ad4 100644
--- a/mpot/costs.py
+++ b/mpot/costs.py
@@ -1,130 +1,125 @@
-from typing import Any, Optional, Tuple, List, Callable
 from abc import ABC, abstractmethod
+from typing import Any, Optional, Tuple, List, Callable
 import torch
+import einops
+
+from mpot.gp.field_factor import FieldFactor
 
 
 class Cost(ABC):
-    def __init__(self, dim: int, tensor_args=None):
-        if tensor_args is None:
-            tensor_args = {'device': torch.device('cpu'), 'dtype': torch.float32}
-        self.dim = dim
-        self.state_dim = dim * 2
-        self.tensor_args = tensor_args
+    def __init__(self, robot, n_support_points, tensor_args=None, **kwargs):
+        self.robot = robot
+        self.n_dof = robot.q_dim
+        self.dim = 2 * self.n_dof  # position + velocity
+        self.n_support_points = n_support_points
 
-    def __call__(self, *args: Any, **observation: Any) -> Any:
-        return self.eval(*args, **observation)
+        self.tensor_args = tensor_args
 
-    @abstractmethod
-    def eval(self, points: torch.Tensor, observation=None):
+    def set_cost_factors(self):
         pass
 
+    def __call__(self, trajs, **kwargs):
+        return self.eval(trajs, **kwargs)
 
-class CostComposite(Cost):
-
-    def __init__(
-        self,
-        dim: int,
-        cost_list: List[Cost],
-        FK: Callable = None,
-        tensor_args=None
-    ):
-        super().__init__(dim, tensor_args=tensor_args)
-        self.cost_list = cost_list
-        self.FK = FK
-
-    def cost(self, points: torch.Tensor, x_points: torch.Tensor = None, **observation) -> torch.Tensor:
-        traj_dim = observation.get('traj_dim', None)
-        x_points = None
-        if self.FK is not None:  # NOTE: only works with SE(3) FK for now
-            x_points = self.FK(points.view(-1, self.state_dim)[:, :self.dim]).reshape(traj_dim[:-1] + (-1, 4, 4))
-        costs = 0
-        for cost in self.cost_list:
-            costs += cost.cost(points, x_points=x_points, **observation)
-        return costs
-
-    def eval(self, points: torch.Tensor, **observation) -> torch.Tensor:
-        num1, num2, num_probe, dim = points.shape
-        traj_dim = observation.get('traj_dim', None)
-        assert dim == self.state_dim
-        x_points = None
-        if self.FK is not None:  # NOTE: only works with SE(3) FK for now
-            x_points = self.FK(points.view(-1, self.state_dim)[:, :self.dim]).reshape(traj_dim[:-1] + (num2, num_probe, -1, 4, 4))
-        costs = 0
+    @abstractmethod
+    def eval(self, trajs, **kwargs):
+        pass
 
-        for cost in self.cost_list:
-            costs += cost.eval(points, x_points=x_points, **observation)
+    def get_q_pos_vel_and_fk_map(self, trajs, **kwargs):
+        assert trajs.ndim == 3 or trajs.ndim == 4
+        N = 1
+        if trajs.ndim == 4:
+            N, B, H, D = trajs.shape  # n_goals (or steps), batch of trajectories, length, dim
+            trajs = einops.rearrange(trajs, 'N B H D -> (N B) H D')
+        else:
+            B, H, D = trajs.shape
 
-        return costs.mean(-1)  # mean the probe point dim
+        q_pos = self.robot.get_position(trajs)
+        q_vel = self.robot.get_velocity(trajs)
+        H_positions = self.robot.fk_map_collision(q_pos)  # I, taskspaces, x_dim+1, x_dim+1 (homogeneous transformation matrices)
+        return trajs, q_pos, q_vel, H_positions
 
 
 class CostField(Cost):
 
     def __init__(
         self,
-        dim: int,
-        traj_range: Tuple[int, int],
+        robot,
+        n_support_points: int,
         field: Callable = None,
-        weight: float = 1.,
-        tensor_args=None,
+        sigma: float = 1.0,
+        **kwargs
     ):
-        super().__init__(dim, tensor_args=tensor_args)
-        self.traj_range = traj_range
+        super().__init__(robot, n_support_points, **kwargs)
         self.field = field
-        self.weight = weight
-
-    def cost(self, points: torch.Tensor, x_points: torch.Tensor = None, **observation) -> torch.Tensor:
+        self.sigma = sigma
+
+        self.set_cost_factors()
+    
+    def set_cost_factors(self):
+        # ========= Cost factors ===============
+        self.obst_factor = FieldFactor(
+            self.n_dof,
+            self.sigma,
+            [0, None]
+        )
+
+    def cost(self, trajs: torch.Tensor, **observation) -> torch.Tensor:
         traj_dim = observation.get('traj_dim', None)
         if self.field is None:
             return 0
-        trajs = points.view(traj_dim)  # [..., traj_len, dim]
-        states = trajs[..., :self.dim] if x_points is None else x_points
-        field_cost = self.field(states, **observation) * self.weight
+        trajs = trajs.view(traj_dim)  # [..., traj_len, dim]
+        states = trajs[..., :self.n_dof]
+        field_cost = self.field.compute_cost(states, **observation)
         return field_cost.view(traj_dim[:-1]).mean(-1)  # mean the traj_len
 
-    def eval(self, points: torch.Tensor, x_points: torch.Tensor = None, **observation) -> torch.Tensor:
-        traj_dim = observation.get('traj_dim', None)
-        if self.field is None:
-            return 0
-        optim_dim = points.shape
-        cost_dim = traj_dim[:-1] + optim_dim[1:3]  # [..., traj_len] + [nb2, num_probe]
-        costs = torch.zeros(cost_dim, **self.tensor_args)
-        points = points.view(cost_dim + traj_dim[-1:])
-        states = points[..., self.traj_range[0]:self.traj_range[1], :, :, :self.dim] if x_points is None else x_points[..., self.traj_range[0]:self.traj_range[1], :, :, :, :, :]
-        field_cost = self.field.compute_cost(states, **observation).view(cost_dim[:-3] + (self.traj_range[1] - self.traj_range[0], ) + cost_dim[-2:]) * self.weight
-        costs[..., self.traj_range[0]:self.traj_range[1], :, :] = field_cost
+    def eval(self, trajs: torch.Tensor, q_pos=None, q_vel=None, H_positions=None, **observation):
+        optim_dim = observation.get('optim_dim')
+        costs = 0
+        if self.field is not None:
+            # H_pos = link_pos_from_link_tensor(H)  # get translation part from transformation matrices
+            H_pos = H_positions
+            err_obst = self.obst_factor.get_error(
+                trajs,
+                self.field,
+                q_pos=q_pos,
+                q_vel=q_vel,
+                H_pos=H_pos,
+                calc_jacobian=False
+            )
+            w_mat = self.obst_factor.K
+            obst_costs = w_mat * err_obst.mean(-1)
+            costs = obst_costs.reshape(optim_dim[:2])
 
-        return costs.view((-1,) + optim_dim[1:3])
+        return costs
 
 
 class CostGPHolonomic(Cost):
 
     def __init__(
         self,
-        dim: int,
-        traj_len: int,
+        robot,
+        n_support_points: int,
         dt: float,
         sigma: float,
         probe_range: Tuple[int, int],
-        weight: float = 1.,
         Q_c_inv: torch.Tensor = None,
-        tensor_args=None,
+        **kwargs
     ):
-        super().__init__(dim, tensor_args=tensor_args)
+        super().__init__(robot, n_support_points, **kwargs)
         self.dt = dt
-        self.traj_len = traj_len
         self.phi = self.calc_phi()
         self.phi_T = self.phi.T
         if Q_c_inv is None:
-            Q_c_inv = torch.eye(self.dim, **self.tensor_args) / sigma**2
-        self.Q_c_inv = torch.zeros(self.traj_len - 1, self.dim, self.dim, **self.tensor_args) + Q_c_inv
+            Q_c_inv = torch.eye(self.n_dof, **self.tensor_args) / sigma**2
+        self.Q_c_inv = torch.zeros(self.n_support_points - 1, self.n_dof, self.n_dof, **self.tensor_args) + Q_c_inv
         self.Q_inv = self.calc_Q_inv()
         self.single_Q_inv = self.Q_inv[[0]]
         self.probe_range = probe_range
-        self.weight = weight
 
     def calc_phi(self) -> torch.Tensor:
-        I = torch.eye(self.dim, **self.tensor_args)
-        Z = torch.zeros(self.dim, self.dim, **self.tensor_args)
+        I = torch.eye(self.n_dof, **self.tensor_args)
+        Z = torch.zeros(self.n_dof, self.n_dof, **self.tensor_args)
         phi_u = torch.cat((I, self.dt * I), dim=1)
         phi_l = torch.cat((Z, I), dim=1)
         phi = torch.cat((phi_u, phi_l), dim=0)
@@ -140,33 +135,80 @@ def calc_Q_inv(self) -> torch.Tensor:
         Q_inv  = torch.cat((Q_inv_u, Q_inv_l), dim=-2)
         return Q_inv
 
-    def cost(self, points: torch.Tensor, x_points: torch.Tensor = None, **observation) -> torch.Tensor:
+    def cost(self, trajs: torch.Tensor, **observation) -> torch.Tensor:
         traj_dim = observation.get('traj_dim', None)
-        trajs = points.view(traj_dim)  # [..., traj_len, dim]
-        errors = (trajs[..., 1:, :] - trajs[..., :-1, :] @ self.phi_T) * self.weight  # [..., traj_len-1, dim * 2]
-        costs = torch.einsum('...ij,...ijk,...ik->...i', errors, self.single_Q_inv, errors)  # [..., traj_len-1]
-        return costs.mean(dim=-1)  # mean the traj_len
+        trajs = trajs.view(traj_dim)  # [..., n_support_points, dim]
+        errors = (trajs[..., 1:, :] - trajs[..., :-1, :] @ self.phi_T)  # [..., n_support_points-1, dim * 2]
+        costs = torch.einsum('...ij,...ijk,...ik->...i', errors, self.single_Q_inv, errors)  # [..., n_support_points-1]
+        return costs.mean(dim=-1)  # mean the n_support_points
 
-    def eval(self, points: torch.Tensor, x_points: torch.Tensor = None, **observation) -> torch.Tensor:
-        traj_dim = observation.get('traj_dim', None)
-        current_trajs = observation.get('current_trajs', None)
-        current_trajs = current_trajs.view(traj_dim)  # [..., traj_len, dim]
-        current_trajs = current_trajs.unsqueeze(-2).unsqueeze(-2)  # [..., traj_len, 1, 1, dim]
+    def eval(self, trajs: torch.Tensor, **observation) -> torch.Tensor:
+        traj_dim = observation.get('traj_dim')
+        optim_dim = observation.get('optim_dim')
 
-        cost_dim = traj_dim[:-1] + points.shape[1:3]  # [..., traj_len] + [nb2, num_probe]
+        current_trajs = observation.get('current_trajs')
+        current_trajs = current_trajs.view(traj_dim)  # [..., n_support_points, dim]
+        current_trajs = current_trajs.unsqueeze(-2).unsqueeze(-2)  # [..., n_support_points, 1, 1, dim]
+
+        cost_dim = traj_dim[:-1] + optim_dim[1:3]  # [..., n_support_points] + [nb2, num_probe]
         costs = torch.zeros(cost_dim, **self.tensor_args)
-        states = points
+        states = trajs
+
         probe_points = states[..., self.probe_range[0]:self.probe_range[1], :]  # [..., nb2, num_eval, dim * 2]
         len_probe = probe_points.shape[-2]
-        probe_points = probe_points.view(traj_dim[:-1] + (points.shape[1], len_probe, self.state_dim,))  # [..., traj_len] + [nb2, num_eval, dim * 2]
-        right_errors = probe_points[..., 1:self.traj_len, :, :, :] - current_trajs[..., 0:self.traj_len-1, :, :, :] @ self.phi_T # [..., traj_len-1, nb2, num_eval, dim * 2]
-        left_errors = current_trajs[..., 1:self.traj_len, :, :, :] - probe_points[..., 0:self.traj_len-1, :, :, :] @ self.phi_T # [..., traj_len-1, nb2, num_eval, dim * 2]
+        probe_points = probe_points.view(traj_dim[:-1] + (optim_dim[1], len_probe, self.dim,))  # [..., n_support_points] + [nb2, num_eval, dim * 2]
+        right_errors = probe_points[..., 1:self.n_support_points, :, :, :] - current_trajs[..., 0:self.n_support_points-1, :, :, :] @ self.phi_T # [..., n_support_points-1, nb2, num_eval, dim * 2]
+        left_errors = current_trajs[..., 1:self.n_support_points, :, :, :] - probe_points[..., 0:self.n_support_points-1, :, :, :] @ self.phi_T # [..., n_support_points-1, nb2, num_eval, dim * 2]
         # mahalanobis distance
-        left_cost_dist = torch.einsum('...ij,...ijk,...ik->...i', left_errors, self.single_Q_inv, left_errors) * (self.weight / 2)  # [..., traj_len-1, nb2, num_eval]
-        right_cost_dist = torch.einsum('...ij,...ijk,...ik->...i', right_errors, self.single_Q_inv, right_errors) * (self.weight / 2)  # [..., traj_len-1, nb2, num_eval]
-        # cost_dist = scale_cost_matrix(cost_dist)
-        # cost_dist = torch.sqrt(cost_dist)
-        costs[..., 0:self.traj_len-1, :, self.probe_range[0]:self.probe_range[1]] += left_cost_dist
-        costs[..., 1:self.traj_len, :, self.probe_range[0]:self.probe_range[1]] += right_cost_dist
-
-        return costs.view((-1,) + points.shape[1:3])
+        left_cost_dist = torch.einsum('...ij,...ijk,...ik->...i', left_errors, self.single_Q_inv, left_errors)  # [..., n_support_points-1, nb2, num_eval]
+        right_cost_dist = torch.einsum('...ij,...ijk,...ik->...i', right_errors, self.single_Q_inv, right_errors)  # [..., n_support_points-1, nb2, num_eval]
+
+        costs[..., 0:self.n_support_points-1, :, self.probe_range[0]:self.probe_range[1]] += left_cost_dist
+        costs[..., 1:self.n_support_points, :, self.probe_range[0]:self.probe_range[1]] += right_cost_dist
+        costs = costs.view(optim_dim).mean(dim=-1)  # mean the probe
+
+        return costs
+
+
+class CostComposite(Cost):
+
+    def __init__(
+        self,
+        robot,
+        n_support_points,
+        cost_list,
+        weights_cost_l=None,
+        **kwargs
+    ):
+        super().__init__(robot, n_support_points, **kwargs)
+        self.cost_l = cost_list
+        self.weight_cost_l = weights_cost_l if weights_cost_l is not None else [1.0] * len(cost_list)
+
+    def eval(self, trajs, trajs_interpolated=None, return_invidual_costs_and_weights=False, **kwargs):
+        trajs, q_pos, q_vel, H_positions = self.get_q_pos_vel_and_fk_map(trajs)
+
+        if not return_invidual_costs_and_weights:
+            cost_total = 0
+            for cost, weight_cost in zip(self.cost_l, self.weight_cost_l):
+                if trajs_interpolated is not None:
+                    trajs_tmp = trajs_interpolated
+                else:
+                    trajs_tmp = trajs
+                cost_tmp = weight_cost * cost(trajs_tmp, q_pos=q_pos, q_vel=q_vel, H_positions=H_positions, **kwargs)
+                cost_total += cost_tmp
+            return cost_total
+        else:
+            cost_l = []
+            for cost in self.cost_l:
+                if trajs_interpolated is not None:
+                    # Compute only collision costs with interpolated trajectories.
+                    # Other costs are computed with non-interpolated trajectories, e.g. smoothness
+                    trajs_tmp = trajs_interpolated
+                else:
+                    trajs_tmp = trajs
+
+                cost_tmp = cost(trajs_tmp, q_pos=q_pos, q_vel=q_vel, H_positions=H_positions, **kwargs)
+                cost_l.append(cost_tmp)
+
+            if return_invidual_costs_and_weights:
+                return cost_l, self.weight_cost_l
diff --git a/mpot/envs/map_generator.py b/mpot/envs/map_generator.py
index 23df8d6..8223d0c 100644
--- a/mpot/envs/map_generator.py
+++ b/mpot/envs/map_generator.py
@@ -5,121 +5,92 @@
 from mpot.envs.obst_utils import random_rect, random_circle
 import copy
 
+from torch_robotics.environments.primitives import MultiSphereField, ObjectField, MultiBoxField
 
-def generate_obstacle_map(
-        map_dim=(10, 10),
-        obst_list=[],
-        cell_size=1.,
-        random_gen=False,
-        num_obst=0,
-        rand_xy_limits=None,
+
+def random_obstacles(
+        map_dim = (1, 1),
+        cell_size: float = 1.,
+        num_obst: int = 5,
+        rand_xy_limits=[[-1, 1], [-1, 1]],
         rand_rect_shape=[2, 2],
-        rand_circle_radius=1,
+        rand_circle_radius: float = 1,
+        max_attempts: int = 50,
         tensor_args=None,
 ):
-
-    """
-    Args
-    ---
-    map_dim : (int,int)
-        2D tuple containing dimensions of obstacle/occupancy grid.
-        Treat as [x,y] coordinates. Origin is in the center.
-        ** Dimensions must be an even number. **
-    cell_sz : float
-        size of each square map cell
-    obst_list : [(cx_i, cy_i, width, height)]
-        List of obstacle param tuples
-    start_pts : float
-        Array of x-y points for start configuration.
-        Dim: [Num. of points, 2]
-    goal_pts : float
-        Array of x-y points for target configuration.
-        Dim: [Num. of points, 2]
-    seed : int or None
-    random_gen : bool
-        Specify whether to generate random obstacles. Will first generate obstacles provided by obst_list,
-        then add random obstacles until number specified by num_obst.
-    num_obst : int
-        Total number of obstacles
-    rand_limit: [[float, float],[float, float]]
-        List defining x-y sampling bounds [[x_min, x_max], [y_min, y_max]]
-    rand_shape: [float, float]
-        Shape [width, height] of randomly generated obstacles.
-    """
-    ## Make occupancy grid
     obst_map = ObstacleMap(map_dim, cell_size, tensor_args=tensor_args)
-    num_fixed = len(obst_list)
-    for obst in obst_list:
-        obst._add_to_map(obst_map)
+    num_boxes = np.random.randint(0, num_obst)
+    num_circles = num_obst - num_boxes
+    # randomize box obstacles
+    xlim = rand_xy_limits[0]
+    ylim = rand_xy_limits[1]
+    width, height = rand_rect_shape
+
+    boxes = []
+    for _ in range(num_boxes):
+        num_attempts = 0
+        while num_attempts <= max_attempts:
+            obst = random_rect(xlim, ylim, width, height)
 
-    ## Add random obstacles
-    obst_list = copy.deepcopy(obst_list)
-    if random_gen:
-        assert num_fixed <= num_obst, "Total number of obstacles must be greater than or equal to number specified in obst_list"
-        xlim = rand_xy_limits[0]
-        ylim = rand_xy_limits[1]
-        width = rand_rect_shape[0]
-        height = rand_rect_shape[1]
-        radius = rand_circle_radius
-        for _ in range(num_obst - num_fixed):
-            num_attempts = 0
-            max_attempts = 25
-            while num_attempts <= max_attempts:
-                if np.random.choice(2):
-                    obst = random_rect(xlim, ylim, width, height)
-                else:
-                    obst = random_circle(xlim, ylim, radius)
+            # Check validity of new obstacle
+            # Do not overlap obstacles
+            valid = obst._obstacle_collision_check(obst_map)
+            if valid:
+                # Add to Map
+                obst._add_to_map(obst_map)
+                # Add to list
+                boxes.append(obst.to_array())
+                break
 
-                # Check validity of new obstacle
-                # Do not overlap obstacles
-                valid = obst._obstacle_collision_check(obst_map)
+            if num_attempts == max_attempts:
+                print("Obstacle generation: Max. number of attempts reached. ")
+                print(f"Total num. boxes: {len(boxes)}")
+            num_attempts += 1
+    boxes = torch.tensor(np.array(boxes), **tensor_args)
+    cubes = MultiBoxField(boxes[:, :2], boxes[:, 2:], tensor_args=tensor_args)
+    box_field = ObjectField([cubes], 'random-boxes')
 
-                if valid:
-                    # Add to Map
-                    obst._add_to_map(obst_map)
-                    # Add to list
-                    obst_list.append(obst)
-                    break
+    # randomize circle obstacles
+    circles = []
+    for _ in range(num_circles):
+        num_attempts = 0
+        while num_attempts <= max_attempts:
+            obst = random_circle(xlim, ylim, rand_circle_radius)
+            # Check validity of new obstacle
+            # Do not overlap obstacles
+            valid = obst._obstacle_collision_check(obst_map)
 
-                if num_attempts == max_attempts:
-                    print("Obstacle generation: Max. number of attempts reached. ")
-                    print("Total num. obstacles: {}.  Num. random obstacles: {}.\n"
-                          .format( len(obst_list), len(obst_list) - num_fixed))
+            if valid:
+                # Add to Map
+                obst._add_to_map(obst_map)
+                # Add to list
+                circles.append(obst.to_array())
+                break
 
-                num_attempts += 1
+            if num_attempts == max_attempts:
+                print("Obstacle generation: Max. number of attempts reached. ")
+                print(f"Total num. boxes: {len(circles)}")
 
+            num_attempts += 1
+    circles = torch.tensor(np.array(circles), **tensor_args)
+    spheres = MultiSphereField(circles[:, :2], circles[:, 2], tensor_args=tensor_args)
+    sphere_field = ObjectField([spheres], 'random-spheres')
+    obj_list = [box_field, sphere_field]
     obst_map.convert_map()
-    return obst_map, obst_list
+    return obst_map, obj_list
 
 
 if __name__ == "__main__":
-    import sys
-    import numpy
-    numpy.set_printoptions(threshold=sys.maxsize)
-    obst_list = [
-        ObstacleRectangle(0, 0, 2, 3),
-        ObstacleCircle(-5, -5, 1)
-    ]
-
     cell_size = 0.1
     map_dim = [20, 20]
     seed = 2
     tensor_args = {'device': torch.device('cpu'), 'dtype': torch.float32}
-    obst_map, obst_list = generate_obstacle_map(
-        map_dim, obst_list, cell_size,
-        random_gen=True,
-        # random_gen=False,
+    obst_map, obst_list = random_obstacles(
+        map_dim, cell_size,
         num_obst=5,
         rand_xy_limits=[[-5, 5], [-5, 5]],
         rand_rect_shape=[2,2],
         rand_circle_radius=1,
         tensor_args=tensor_args
     )
-
     fig = obst_map.plot()
-
-    traj_y = torch.linspace(-map_dim[1]/2., map_dim[1]/2., 20)
-    traj_x = torch.zeros_like(traj_y)
-    X = torch.cat((traj_x.unsqueeze(1), traj_y.unsqueeze(1)), dim=1)
-    cost = obst_map.get_collisions(X)
-    print(cost)
diff --git a/mpot/envs/obst_map.py b/mpot/envs/obst_map.py
index b1ded12..9c707d5 100644
--- a/mpot/envs/obst_map.py
+++ b/mpot/envs/obst_map.py
@@ -70,6 +70,9 @@ def _add_to_map(self, obst_map):
             c_x + ceil(w/2.) + obst_map.origin_xi,
             ] += 1
         return obst_map
+    
+    def to_array(self):
+        return np.array([self.center_x, self.center_y, self.width, self.height])
 
 
 class ObstacleCircle(Obstacle):
@@ -104,6 +107,9 @@ def _add_to_map(self, obst_map):
                     obst_map.map[i, j] += 1
         return obst_map
 
+    def to_array(self):
+        return np.array([self.center_x, self.center_y, self.radius])
+
 
 class ObstacleMap:
     """
@@ -161,7 +167,7 @@ def get_xy_grid(self, device):
         xy_grid = torch.stack((xv, yv), dim=2)
         return xy_grid.to(device)
 
-    def get_collisions(self, X, **kwargs):
+    def get_collisions(self, X, *args, **kwargs):
         """
         Checks for collision in a batch of trajectories using the generated occupancy grid (i.e. obstacle map), and
         returns sum of collision costs for the entire batch.
@@ -181,8 +187,8 @@ def get_collisions(self, X, **kwargs):
         collision_vals = self.map_torch[X_occ[..., 1], X_occ[..., 0]]
         return collision_vals
 
-    def compute_cost(self, X, **kwargs):
-        return self.get_collisions(X, **kwargs)
+    def compute_cost(self, X, *args, **kwargs):
+        return self.get_collisions(X, *args, **kwargs)
 
     def zero_grad(self):
         pass
diff --git a/mpot/envs/occupancy.py b/mpot/envs/occupancy.py
new file mode 100644
index 0000000..31e0023
--- /dev/null
+++ b/mpot/envs/occupancy.py
@@ -0,0 +1,48 @@
+import torch
+from matplotlib import pyplot as plt
+
+from torch_robotics.environments.env_base import EnvBase
+from torch_robotics.robots import RobotPointMass
+from torch_robotics.torch_utils.torch_utils import DEFAULT_TENSOR_ARGS
+from torch_robotics.visualizers.planning_visualizer import create_fig_and_axes
+from mpot.envs.map_generator import random_obstacles
+
+
+class EnvOccupancy2D(EnvBase):
+
+    def __init__(self, tensor_args=None, **kwargs):
+        if tensor_args is None:
+            tensor_args = DEFAULT_TENSOR_ARGS
+        obst_map, obj_list = random_obstacles(
+            map_dim=[20, 20],
+            cell_size=0.1,
+            num_obst=15,
+            rand_xy_limits=[[-7.5, 7.5], [-7.5, 7.5]],
+            rand_rect_shape=[2, 2],
+            rand_circle_radius=1.,
+            tensor_args=tensor_args
+        )
+
+        super().__init__(
+            name=self.__class__.__name__,
+            limits=torch.tensor([[-10, -10], [10, 10]], **tensor_args),  # environments limits
+            obj_fixed_list=obj_list,
+            tensor_args=tensor_args,
+            **kwargs
+        )
+        self.occupancy_map = obst_map
+
+
+if __name__ == '__main__':
+    env = EnvOccupancy2D(precompute_sdf_obj_fixed=False, tensor_args=DEFAULT_TENSOR_ARGS)
+    fig, ax = create_fig_and_axes(env.dim)
+    env.render(ax)
+    plt.show()
+
+    # Render sdf
+    fig, ax = create_fig_and_axes(env.dim)
+    env.render_sdf(ax, fig)
+
+    # Render gradient of sdf
+    env.render_grad_sdf(ax, fig)
+    plt.show()
diff --git a/mpot/gp/LICENSE b/mpot/gp/LICENSE
index 8d98040..a8902ae 100644
--- a/mpot/gp/LICENSE
+++ b/mpot/gp/LICENSE
@@ -22,7 +22,7 @@ SOFTWARE.
 
 **********************************************************************
 The first version of some files in this gp/ folder were licensed as
-"Original Source License" (see below). Somes enhancements
+"Original Source License" (see below). Somes enhancements and developments
 were done by An Thai Le since obtaining the first version.
 **********************************************************************
 
diff --git a/mpot/gp/field_factor.py b/mpot/gp/field_factor.py
new file mode 100644
index 0000000..6a0b3dc
--- /dev/null
+++ b/mpot/gp/field_factor.py
@@ -0,0 +1,59 @@
+import torch
+
+
+class FieldFactor:
+
+    def __init__(
+            self,
+            n_dof,
+            sigma,
+            traj_range,
+    ):
+        self.sigma = sigma
+        self.n_dof = n_dof
+        self.traj_range = traj_range
+        self.K = 1. / (sigma**2)
+
+    def get_error(
+            self,
+            q_trajs,
+            field,
+            q_pos=None,
+            q_vel=None,
+            H_pos=None,
+            q_trajs_interp=None,
+            q_pos_interp=None,
+            q_vel_interp=None,
+            H_pos_interp=None,
+            calc_jacobian=True,
+            **kwargs
+    ):
+        batch = q_trajs.shape[0]
+
+        if H_pos is not None:
+            states = H_pos[:, self.traj_range[0]:self.traj_range[1]]
+        else:
+            states = q_trajs[:, self.traj_range[0]:self.traj_range[1], :self.n_dof].reshape(-1, self.n_dof)
+        q_pos_new = q_pos[:, self.traj_range[0]:self.traj_range[1], :]
+        length = q_pos_new.shape[-2]
+        error = field.compute_cost(q_pos_new, states, **kwargs).reshape(batch, length)
+
+        if calc_jacobian:
+            # compute jacobian wrt to the error of the interpolated trajectory
+            error_interp = error
+            if H_pos_interp is not None or q_trajs_interp is not None:
+                # interpolated trajectory
+                if H_pos_interp is not None:
+                    states = H_pos_interp[:, self.traj_range[0]:self.traj_range[1]]
+                else:
+                    states = q_trajs_interp[:, self.traj_range[0]:self.traj_range[1], :self.n_dof].reshape(-1, self.n_dof)
+                q_pos_new = q_pos_interp[:, self.traj_range[0]:self.traj_range[1], :]
+                length = q_pos_new.shape[-2]
+                error_interp = field.compute_cost(q_pos_new, states, **kwargs).reshape(batch, length)
+
+            H = -torch.autograd.grad(error_interp.sum(), q_trajs, retain_graph=True)[0][:, self.traj_range[0]:self.traj_range[1], :self.n_dof]
+            error = error.detach()
+            field.zero_grad()
+            return error, H
+        else:
+            return error
diff --git a/mpot/gp/gp_factor.py b/mpot/gp/gp_factor.py
index 3063a33..7e93885 100644
--- a/mpot/gp/gp_factor.py
+++ b/mpot/gp/gp_factor.py
@@ -61,4 +61,4 @@ def get_error(self, x_traj: torch.Tensor, calc_jacobian: bool = True) -> torch.T
             # H2 = self.H2.unsqueeze(0).repeat(batch, 1, 1, 1)
             return error, H1, H2
         else:
-            return error
+            return error
\ No newline at end of file
diff --git a/mpot/gp/mp_priors_multi.py b/mpot/gp/gp_prior.py
similarity index 99%
rename from mpot/gp/mp_priors_multi.py
rename to mpot/gp/gp_prior.py
index b8383a9..1bd0be1 100644
--- a/mpot/gp/mp_priors_multi.py
+++ b/mpot/gp/gp_prior.py
@@ -191,4 +191,4 @@ def sample(self, num_samples: int) -> torch.Tensor:
         ).transpose(1, 0)
 
     def log_prob(self, X: torch.Tensor) -> torch.Tensor:
-        return self.dist.log_prob(X)
+        return self.dist.log_prob(X)
\ No newline at end of file
diff --git a/mpot/ot/problem.py b/mpot/ot/problem.py
index 870528d..5e98c54 100644
--- a/mpot/ot/problem.py
+++ b/mpot/ot/problem.py
@@ -148,7 +148,7 @@ def __init__(
         self.errors = errors
         self.fu = fu
         self.gv = gv
-        self.converged = False
+        self.converged_at = -1
 
     def solution_error(
         self,
diff --git a/mpot/ot/sinkhorn.py b/mpot/ot/sinkhorn.py
index 6401395..7358654 100644
--- a/mpot/ot/sinkhorn.py
+++ b/mpot/ot/sinkhorn.py
@@ -195,7 +195,8 @@ def iterations(
         while self._continue(state, iteration):
             state = self.one_iteration(ot_prob, state, iteration, compute_error=compute_error)
             iteration += self.inner_iterations
-        state.converged = self._converged(state, iteration)
+        if self._converged(state, iteration):
+            state.converged_at = iteration 
         return state
 
 
diff --git a/mpot/ot/sinkhorn_step.py b/mpot/ot/sinkhorn_step.py
index f619dc8..30b1d7c 100644
--- a/mpot/ot/sinkhorn_step.py
+++ b/mpot/ot/sinkhorn_step.py
@@ -25,7 +25,7 @@ class SinkhornStep():
     def __init__(
         self,
         dim: int,
-        objective_fn: Any,
+        objective_fn: Callable,
         linear_ot_solver: Sinkhorn,
         epsilon: Union[Epsilon, float] ,
         ent_epsilon: Union[Epsilon, float] = 0.01,
@@ -85,16 +85,17 @@ def init_state(
         else:
             costs = None
     
-        self.displacement_sqnorms = -torch.ones(num_iters).type_as(X_init)
-        self.linear_convergence = -torch.ones(num_iters).type_as(X_init)
+        displacement_sqnorms = -torch.ones(num_iters).type_as(X_init)
+        linear_convergence = -torch.ones(num_iters).type_as(X_init)
+
         a = torch.ones((num_points,)).type_as(X_init) / num_points  # always uniform weights for now
 
         return SinkhornStepState(
             X_init=X_init,
             costs=costs,
-            linear_convergence=self.linear_convergence,
+            linear_convergence=linear_convergence,
             X_history=X_history,
-            displacement_sqnorms=self.displacement_sqnorms,
+            displacement_sqnorms=displacement_sqnorms,
             a=a,
         )
 
@@ -108,14 +109,14 @@ def step(self, state: SinkhornStepState, iteration: int, **kwargs) -> SinkhornSt
                 scaler(X)
 
         eps = self.epsilon.at(iteration) if isinstance(self.epsilon, Epsilon) else self.epsilon
-        step_radius = self.step_radius * eps
-        probe_radius = self.probe_radius * eps
+        self.step_radius = self.step_radius * (1 - eps)
+        self.probe_radius = self.probe_radius * (1 - eps)
 
         # compute sampled polytope vertices
         X_vertices, X_probe, vertices = get_sampled_polytope_vertices(X,
                                                                       polytope_vertices=self.polytope_vertices,
-                                                                      step_radius=step_radius,
-                                                                      probe_radius=probe_radius,
+                                                                      step_radius=self.step_radius,
+                                                                      probe_radius=self.probe_radius,
                                                                       num_probe=self.num_probe)
         
         # unscale for cost evaluation
@@ -125,8 +126,9 @@ def step(self, state: SinkhornStepState, iteration: int, **kwargs) -> SinkhornSt
                 scaler.inverse(X_probe)
 
         # solve Sinkhorn
-        C = self.objective_fn(X_probe, current_trajs=state.X, **kwargs) 
-        ot_prob = LinearProblem(C, epsilon=self.ent_epsilon, a=state.a, scaling_cost=False)
+        optim_dim = X_probe.shape[:-1]
+        C = self.objective_fn(X_probe, current_trajs=state.X, optim_dim=optim_dim, **kwargs)
+        ot_prob = LinearProblem(C, epsilon=self.ent_epsilon, a=state.a, scaling_cost=True)
         W, res = self.linear_ot_solver(ot_prob)
 
         # barycentric projection
@@ -138,7 +140,7 @@ def step(self, state: SinkhornStepState, iteration: int, **kwargs) -> SinkhornSt
         if self.store_history:
             state.X_history[iteration] = X_new
 
-        state.linear_convergence[iteration] = res.converged
+        state.linear_convergence[iteration] = res.converged_at
         state.displacement_sqnorms[iteration] = torch.square(X_new - state.X).sum()
         state.X = X_new
         return state
diff --git a/mpot/planner.py b/mpot/planner.py
index 90f53c9..17ceb57 100644
--- a/mpot/planner.py
+++ b/mpot/planner.py
@@ -4,7 +4,7 @@
 from mpot.ot.sinkhorn_step import SinkhornStep, SinkhornStepState
 from mpot.ot.sinkhorn import Sinkhorn
 from mpot.utils.misc import MinMaxCenterScaler
-from mpot.gp.mp_priors_multi import BatchGPPrior
+from mpot.gp.gp_prior import BatchGPPrior
 from mpot.gp.gp_factor import GPFactor
 from mpot.gp.unary_factor import UnaryFactor
 
@@ -15,7 +15,7 @@ class MPOT(object):
     def __init__(
         self,
         dim: int,
-        objective_fn: Any,
+        objective_fn: Callable,
         linear_ot_solver: Sinkhorn,
         ss_params: dict,
         traj_len: int = 64,
diff --git a/requirements.txt b/requirements.txt
index 032715e..8ea60ff 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,5 @@ scipy
 numpy
 matplotlib
 torch 
+einops
+torch_robotics @ git+https://github.com/anindex/torch_robotics@main