Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.9.0
1.0.0
7 changes: 4 additions & 3 deletions examples/cartpole_example.cu
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,12 @@ int main(int argc, char** argv)
auto sampler = new SAMPLER_T(sampler_params);

// Feedback Controller
auto fb_controller = new DDPFeedback<CartpoleDynamics, num_timesteps>(model, dt);
auto fb_controller = new DDPFeedback<CartpoleDynamics>(model, dt);

int num_rollouts = 2048;
auto CartpoleController =
new VanillaMPPIController<CartpoleDynamics, CartpoleQuadraticCost, DDPFeedback<CartpoleDynamics, num_timesteps>,
num_timesteps, 2048>(model, cost, fb_controller, sampler, dt, max_iter, lambda, alpha);
new VanillaMPPIController<CartpoleDynamics, CartpoleQuadraticCost, DDPFeedback<CartpoleDynamics>>(
model, cost, fb_controller, sampler, dt, max_iter, lambda, alpha, num_timesteps, num_rollouts);
auto controller_params = CartpoleController->getParams();
controller_params.dynamics_rollout_dim_ = dim3(64, 4, 1);
controller_params.cost_rollout_dim_ = dim3(64, 4, 1);
Expand Down
51 changes: 29 additions & 22 deletions examples/double_integrator_CORL2020.cu
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#include <cnpy.h>
#include <random> // Used to generate random noise for control trajectories

const int NUM_ROLLOUTS = 1024;

bool tubeFailure(float* s)
{
float inner_path_radius2 = 1.675 * 1.675;
Expand All @@ -29,7 +31,7 @@ using SCost = DoubleIntegratorCircleCost;
using RCost = DoubleIntegratorRobustCost;
const int num_timesteps = 50; // Optimization time horizon
const int total_time_horizon = 5000;
using Feedback = DDPFeedback<Dyn, num_timesteps>;
using Feedback = DDPFeedback<Dyn>;
using Sampler = mppi::sampling_distributions::GaussianDistribution<Dyn::DYN_PARAMS_T>;

// Problem setup
Expand Down Expand Up @@ -87,11 +89,11 @@ void runVanilla(const Eigen::Ref<const Eigen::Matrix<float, Dyn::STATE_DIM, tota
auto fb_params = fb_controller.getParams();
fb_params.Q.diagonal() << 500, 500, 100, 100;
fb_controller.setParams(fb_params);
auto controller = VanillaMPPIController<Dyn, SCost, Feedback, num_timesteps, 1024, Sampler>(
&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha);
auto controller = VanillaMPPIController<Dyn, SCost, Feedback, Sampler>(
&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, num_timesteps, NUM_ROLLOUTS);
auto controller_params = controller.getParams();
controller_params.dynamics_rollout_dim_ = dim3(64, 1, 1);
controller_params.cost_rollout_dim_ = dim3(64, 1, 1);
controller_params.cost_rollout_dim_ = dim3(min(64, num_timesteps), 1, 1);
controller.setParams(controller_params);
controller.initFeedback();

Expand Down Expand Up @@ -170,11 +172,11 @@ void runVanillaLarge(const Eigen::Ref<const Eigen::Matrix<float, Dyn::STATE_DIM,
auto fb_params = fb_controller.getParams();
fb_params.Q.diagonal() << 500, 500, 100, 100;
fb_controller.setParams(fb_params);
auto controller = VanillaMPPIController<Dyn, SCost, Feedback, num_timesteps, 1024, Sampler>(
&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha);
auto controller = VanillaMPPIController<Dyn, SCost, Feedback, Sampler>(
&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, num_timesteps, NUM_ROLLOUTS);
auto controller_params = controller.getParams();
controller_params.dynamics_rollout_dim_ = dim3(64, 1, 1);
controller_params.cost_rollout_dim_ = dim3(64, 1, 1);
controller_params.cost_rollout_dim_ = dim3(min(64, num_timesteps), 1, 1);
controller.setParams(controller_params);
controller.initFeedback();

Expand Down Expand Up @@ -258,11 +260,11 @@ void runVanillaLargeRC(const Eigen::Ref<const Eigen::Matrix<float, Dyn::STATE_DI
fb_params.Q.diagonal() << 500, 500, 100, 100;
fb_controller.setParams(fb_params);

auto controller = VanillaMPPIController<Dyn, RCost, Feedback, num_timesteps, 1024, Sampler>(
&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha);
auto controller = VanillaMPPIController<Dyn, RCost, Feedback, Sampler>(
&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, num_timesteps, NUM_ROLLOUTS);
auto controller_params = controller.getParams();
controller_params.dynamics_rollout_dim_ = dim3(64, 1, 1);
controller_params.cost_rollout_dim_ = dim3(64, 1, 1);
controller_params.cost_rollout_dim_ = dim3(min(64, num_timesteps), 1, 1);
controller.setParams(controller_params);
controller.initFeedback();

Expand Down Expand Up @@ -343,13 +345,15 @@ void runTube(const Eigen::Ref<const Eigen::Matrix<float, Dyn::STATE_DIM, total_t
auto fb_params = fb_controller.getParams();
fb_params.Q.diagonal() << 500, 500, 100, 100;
fb_controller.setParams(fb_params);
auto controller = TubeMPPIController<Dyn, SCost, Feedback, num_timesteps, 1024, Sampler>(
&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha);
std::cout << "Starting controller construction" << std::endl;
auto controller = TubeMPPIController<Dyn, SCost, Feedback, Sampler>(
&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, num_timesteps, NUM_ROLLOUTS);
auto controller_params = controller.getParams();
controller_params.dynamics_rollout_dim_ = dim3(64, 1, 1);
controller_params.cost_rollout_dim_ = dim3(64, 1, 1);
controller_params.cost_rollout_dim_ = dim3(min(64, num_timesteps), 1, 1);
controller.setParams(controller_params);
controller.setNominalThreshold(20);
std::cout << "Made it through the controller construction" << std::endl;
// Start the loop
for (int t = 0; t < total_time_horizon; ++t)
{
Expand Down Expand Up @@ -435,11 +439,11 @@ void runTubeRC(const Eigen::Ref<const Eigen::Matrix<float, Dyn::STATE_DIM, total
auto fb_params = fb_controller.getParams();
fb_params.Q.diagonal() << 500, 500, 100, 100;
fb_controller.setParams(fb_params);
auto controller = TubeMPPIController<Dyn, RCost, Feedback, num_timesteps, 1024>(
&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha);
auto controller = TubeMPPIController<Dyn, RCost, Feedback>(&model, &cost, &fb_controller, &sampler, dt, max_iter,
lambda, alpha, num_timesteps, NUM_ROLLOUTS);
auto controller_params = controller.getParams();
controller_params.dynamics_rollout_dim_ = dim3(64, 1, 1);
controller_params.cost_rollout_dim_ = dim3(64, 1, 1);
controller_params.cost_rollout_dim_ = dim3(min(64, num_timesteps), 1, 1);
controller.setParams(controller_params);
controller.setNominalThreshold(2);
// Start the loop
Expand Down Expand Up @@ -529,11 +533,12 @@ void runRobustSc(const Eigen::Ref<const Eigen::Matrix<float, Dyn::STATE_DIM, tot
fb_controller.setParams(fb_params);
// Value function threshold
float value_function_threshold = 20.0;
auto controller = RobustMPPIController<Dyn, SCost, Feedback, num_timesteps, 1024, Sampler>(
&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, value_function_threshold);
auto controller =
RobustMPPIController<Dyn, SCost, Feedback, Sampler>(&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda,
alpha, value_function_threshold, num_timesteps, NUM_ROLLOUTS);
auto controller_params = controller.getParams();
controller_params.dynamics_rollout_dim_ = dim3(64, 1, 1);
controller_params.cost_rollout_dim_ = dim3(64, 1, 1);
controller_params.cost_rollout_dim_ = dim3(min(64, num_timesteps), 1, 1);
controller.setParams(controller_params);

// Start the loop
Expand Down Expand Up @@ -641,11 +646,12 @@ void runRobustRc(const Eigen::Ref<const Eigen::Matrix<float, Dyn::STATE_DIM, tot

// Value function threshold
float value_function_threshold = 20.0;
auto controller = RobustMPPIController<Dyn, RCost, Feedback, num_timesteps, 1024, Sampler>(
&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, value_function_threshold);
auto controller =
RobustMPPIController<Dyn, RCost, Feedback, Sampler>(&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda,
alpha, value_function_threshold, num_timesteps, NUM_ROLLOUTS);
auto controller_params = controller.getParams();
controller_params.dynamics_rollout_dim_ = dim3(64, 1, 1);
controller_params.cost_rollout_dim_ = dim3(64, 1, 1);
controller_params.cost_rollout_dim_ = dim3(min(64, num_timesteps), 1, 1);
controller.setParams(controller_params);

// Start the loop
Expand Down Expand Up @@ -722,6 +728,7 @@ void runRobustRc(const Eigen::Ref<const Eigen::Matrix<float, Dyn::STATE_DIM, tot

int main()
{
mppi::util::GLOBAL_LOG_LEVEL = mppi::util::LOG_LEVEL::DEBUG;
// Run the double integrator example on all the controllers with the SAME noise 20 times.

// Create a random number generator
Expand Down
6 changes: 3 additions & 3 deletions examples/double_integrator_example.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ const int NUM_ROLLOUTS = 128;

using DYN = DoubleIntegratorDynamics;
using COST = QuadraticCost<DYN>;
using FB_CONTROLLER = DDPFeedback<DYN, TIMESTEPS>;
using FB_CONTROLLER = DDPFeedback<DYN>;

#ifdef USE_COLORED_NOISE
using SAMPLER = mppi::sampling_distributions::ColoredNoiseDistribution<DYN::DYN_PARAMS_T>;
Expand Down Expand Up @@ -66,8 +66,8 @@ int main()
int max_iter = 1;
int total_time_horizon = 300;

auto controller = VanillaMPPIController<DYN, COST, FB_CONTROLLER, TIMESTEPS, NUM_ROLLOUTS, SAMPLER>(
&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha);
auto controller = VanillaMPPIController<DYN, COST, FB_CONTROLLER, SAMPLER>(
&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, TIMESTEPS, NUM_ROLLOUTS);

auto controller_params = controller.getParams();
controller_params.dynamics_rollout_dim_ = dim3(64, 1, 1);
Expand Down
42 changes: 20 additions & 22 deletions include/mppi/controllers/ColoredMPPI/colored_mppi_controller.cu
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,16 @@
#include <iostream>
#include <mppi/sampling_distributions/colored_noise/colored_noise.cuh>

#define ColoredMPPI_TEMPLATE \
template <class DYN_T, class COST_T, class FB_T, int MAX_TIMESTEPS, int NUM_ROLLOUTS, class SAMPLING_T, \
class PARAMS_T>
#define ColoredMPPI ColoredMPPIController<DYN_T, COST_T, FB_T, MAX_TIMESTEPS, NUM_ROLLOUTS, SAMPLING_T, PARAMS_T>
#define ColoredMPPI_TEMPLATE template <class DYN_T, class COST_T, class FB_T, class SAMPLING_T, class PARAMS_T>
#define ColoredMPPI ColoredMPPIController<DYN_T, COST_T, FB_T, SAMPLING_T, PARAMS_T>

ColoredMPPI_TEMPLATE ColoredMPPI::ColoredMPPIController(DYN_T* model, COST_T* cost, FB_T* fb_controller,
SAMPLING_T* sampler, float dt, int max_iter, float lambda,
float alpha, int num_timesteps,
float alpha, int num_timesteps, int num_rollouts,
const Eigen::Ref<const control_trajectory>& init_control_traj,
cudaStream_t stream)
: PARENT_CLASS(model, cost, fb_controller, sampler, dt, max_iter, lambda, alpha, num_timesteps, init_control_traj,
stream)
: PARENT_CLASS(model, cost, fb_controller, sampler, dt, max_iter, lambda, alpha, num_timesteps, num_rollouts,
init_control_traj, stream)
{
// Allocate CUDA memory for the controller
allocateCUDAMemory();
Expand Down Expand Up @@ -97,7 +95,7 @@ ColoredMPPI_TEMPLATE void ColoredMPPI::chooseAppropriateKernel()
for (int i = 0; i < this->getNumKernelEvaluations() && !too_much_mem_single_kernel; i++)
{
mppi::kernels::launchRolloutKernel<DYN_T, COST_T, SAMPLING_T>(
this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), NUM_ROLLOUTS,
this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), this->getNumRollouts(),
this->getLambda(), this->getAlpha(), this->initial_state_d_, this->trajectory_costs_d_,
this->params_.dynamics_rollout_dim_, this->stream_, true);
}
Expand All @@ -106,7 +104,7 @@ ColoredMPPI_TEMPLATE void ColoredMPPI::chooseAppropriateKernel()
for (int i = 0; i < this->getNumKernelEvaluations() && !too_much_mem_split_kernel; i++)
{
mppi::kernels::launchSplitRolloutKernel<DYN_T, COST_T, SAMPLING_T>(
this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), NUM_ROLLOUTS,
this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), this->getNumRollouts(),
this->getLambda(), this->getAlpha(), this->initial_state_d_, this->output_d_, this->trajectory_costs_d_,
this->params_.dynamics_rollout_dim_, this->params_.cost_rollout_dim_, this->stream_, true);
}
Expand All @@ -133,8 +131,8 @@ ColoredMPPI_TEMPLATE void ColoredMPPI::chooseAppropriateKernel()
kernel_choice = "single";
}
this->logger_->info("Choosing %s kernel based on split taking %f ms and single taking %f ms after %d iterations\n",
kernel_choice.c_str(), split_kernel_time_ms, single_kernel_time_ms,
this->getNumKernelEvaluations());
kernel_choice.c_str(), split_kernel_time_ms, single_kernel_time_ms,
this->getNumKernelEvaluations());
}

ColoredMPPI_TEMPLATE ColoredMPPI::~ColoredMPPIController()
Expand Down Expand Up @@ -169,24 +167,24 @@ ColoredMPPI_TEMPLATE void ColoredMPPI::computeControl(const Eigen::Ref<const sta
if (this->getKernelChoiceAsEnum() == kernelType::USE_SPLIT_KERNELS)
{
mppi::kernels::launchSplitRolloutKernel<DYN_T, COST_T, SAMPLING_T>(
this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), NUM_ROLLOUTS,
this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), this->getNumRollouts(),
this->getLambda(), this->getAlpha(), this->initial_state_d_, this->output_d_, this->trajectory_costs_d_,
this->params_.dynamics_rollout_dim_, this->params_.cost_rollout_dim_, this->stream_, false);
}
else if (this->getKernelChoiceAsEnum() == kernelType::USE_SINGLE_KERNEL)
{
mppi::kernels::launchRolloutKernel<DYN_T, COST_T, SAMPLING_T>(
this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), NUM_ROLLOUTS,
this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), this->getNumRollouts(),
this->getLambda(), this->getAlpha(), this->initial_state_d_, this->trajectory_costs_d_,
this->params_.dynamics_rollout_dim_, this->stream_, false);
}

// Copy the costs back to the host
HANDLE_ERROR(cudaMemcpyAsync(this->trajectory_costs_.data(), this->trajectory_costs_d_,
NUM_ROLLOUTS * sizeof(float), cudaMemcpyDeviceToHost, this->stream_));
this->getNumRollouts() * sizeof(float), cudaMemcpyDeviceToHost, this->stream_));
HANDLE_ERROR(cudaStreamSynchronize(this->stream_));

this->setBaseline(mppi::kernels::computeBaselineCost(this->trajectory_costs_.data(), NUM_ROLLOUTS));
this->setBaseline(mppi::kernels::computeBaselineCost(this->trajectory_costs_.data(), this->getNumRollouts()));

if (this->getBaselineCost() > baseline_prev + 1)
{
Expand All @@ -198,24 +196,24 @@ ColoredMPPI_TEMPLATE void ColoredMPPI::computeControl(const Eigen::Ref<const sta
// Launch the norm exponential kernel
if (getGamma() == 0 || getRExp() == 0)
{
mppi::kernels::launchNormExpKernel(NUM_ROLLOUTS, this->getNormExpThreads(), this->trajectory_costs_d_,
mppi::kernels::launchNormExpKernel(this->getNumRollouts(), this->getNormExpThreads(), this->trajectory_costs_d_,
1.0 / this->getLambda(), this->getBaselineCost(), this->stream_, false);
}
else
{
mppi::kernels::launchTsallisKernel(NUM_ROLLOUTS, this->getNormExpThreads(), this->trajectory_costs_d_, getGamma(),
getRExp(), this->getBaselineCost(), this->stream_, false);
mppi::kernels::launchTsallisKernel(this->getNumRollouts(), this->getNormExpThreads(), this->trajectory_costs_d_,
getGamma(), getRExp(), this->getBaselineCost(), this->stream_, false);
}
HANDLE_ERROR(cudaMemcpyAsync(this->trajectory_costs_.data(), this->trajectory_costs_d_,
NUM_ROLLOUTS * sizeof(float), cudaMemcpyDeviceToHost, this->stream_));
this->getNumRollouts() * sizeof(float), cudaMemcpyDeviceToHost, this->stream_));
HANDLE_ERROR(cudaStreamSynchronize(this->stream_));
// Compute the normalizer
this->setNormalizer(mppi::kernels::computeNormalizer(this->trajectory_costs_.data(), NUM_ROLLOUTS));
this->setNormalizer(mppi::kernels::computeNormalizer(this->trajectory_costs_.data(), this->getNumRollouts()));

mppi::kernels::computeFreeEnergy(this->free_energy_statistics_.real_sys.freeEnergyMean,
this->free_energy_statistics_.real_sys.freeEnergyVariance,
this->free_energy_statistics_.real_sys.freeEnergyModifiedVariance,
this->trajectory_costs_.data(), NUM_ROLLOUTS, this->getBaselineCost(),
this->trajectory_costs_.data(), this->getNumRollouts(), this->getBaselineCost(),
this->getLambda());

// Compute the cost weighted average //TODO SUM_STRIDE is BDIM_X, but should it be its own parameter?
Expand All @@ -225,7 +223,7 @@ ColoredMPPI_TEMPLATE void ColoredMPPI::computeControl(const Eigen::Ref<const sta
this->sampler_->setHostOptimalControlSequence(this->control_.data(), 0, true);
}

this->free_energy_statistics_.real_sys.normalizerPercent = this->getNormalizerCost() / NUM_ROLLOUTS;
this->free_energy_statistics_.real_sys.normalizerPercent = this->getNormalizerCost() / this->getNumRollouts();
this->free_energy_statistics_.real_sys.increase =
this->getBaselineCost() - this->free_energy_statistics_.real_sys.previousBaseline;
smoothControlTrajectory();
Expand Down
Loading