diff --git a/VERSION b/VERSION index ac39a106..3eefcb9d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.9.0 +1.0.0 diff --git a/examples/cartpole_example.cu b/examples/cartpole_example.cu index 4f9f4f63..dfa434d8 100644 --- a/examples/cartpole_example.cu +++ b/examples/cartpole_example.cu @@ -41,11 +41,12 @@ int main(int argc, char** argv) auto sampler = new SAMPLER_T(sampler_params); // Feedback Controller - auto fb_controller = new DDPFeedback(model, dt); + auto fb_controller = new DDPFeedback(model, dt); + int num_rollouts = 2048; auto CartpoleController = - new VanillaMPPIController, - num_timesteps, 2048>(model, cost, fb_controller, sampler, dt, max_iter, lambda, alpha); + new VanillaMPPIController>( + model, cost, fb_controller, sampler, dt, max_iter, lambda, alpha, num_timesteps, num_rollouts); auto controller_params = CartpoleController->getParams(); controller_params.dynamics_rollout_dim_ = dim3(64, 4, 1); controller_params.cost_rollout_dim_ = dim3(64, 4, 1); diff --git a/examples/double_integrator_CORL2020.cu b/examples/double_integrator_CORL2020.cu index c88dc29e..2b51b89d 100644 --- a/examples/double_integrator_CORL2020.cu +++ b/examples/double_integrator_CORL2020.cu @@ -9,6 +9,8 @@ #include #include // Used to generate random noise for control trajectories +const int NUM_ROLLOUTS = 1024; + bool tubeFailure(float* s) { float inner_path_radius2 = 1.675 * 1.675; @@ -29,7 +31,7 @@ using SCost = DoubleIntegratorCircleCost; using RCost = DoubleIntegratorRobustCost; const int num_timesteps = 50; // Optimization time horizon const int total_time_horizon = 5000; -using Feedback = DDPFeedback; +using Feedback = DDPFeedback; using Sampler = mppi::sampling_distributions::GaussianDistribution; // Problem setup @@ -87,11 +89,11 @@ void runVanilla(const Eigen::Ref( - &model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha); + auto controller = VanillaMPPIController( + &model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, num_timesteps, NUM_ROLLOUTS); auto controller_params = controller.getParams(); controller_params.dynamics_rollout_dim_ = dim3(64, 1, 1); - controller_params.cost_rollout_dim_ = dim3(64, 1, 1); + controller_params.cost_rollout_dim_ = dim3(min(64, num_timesteps), 1, 1); controller.setParams(controller_params); controller.initFeedback(); @@ -170,11 +172,11 @@ void runVanillaLarge(const Eigen::Ref( - &model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha); + auto controller = VanillaMPPIController( + &model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, num_timesteps, NUM_ROLLOUTS); auto controller_params = controller.getParams(); controller_params.dynamics_rollout_dim_ = dim3(64, 1, 1); - controller_params.cost_rollout_dim_ = dim3(64, 1, 1); + controller_params.cost_rollout_dim_ = dim3(min(64, num_timesteps), 1, 1); controller.setParams(controller_params); controller.initFeedback(); @@ -258,11 +260,11 @@ void runVanillaLargeRC(const Eigen::Ref( - &model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha); + auto controller = VanillaMPPIController( + &model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, num_timesteps, NUM_ROLLOUTS); auto controller_params = controller.getParams(); controller_params.dynamics_rollout_dim_ = dim3(64, 1, 1); - controller_params.cost_rollout_dim_ = dim3(64, 1, 1); + controller_params.cost_rollout_dim_ = dim3(min(64, num_timesteps), 1, 1); controller.setParams(controller_params); controller.initFeedback(); @@ -343,13 +345,15 @@ void runTube(const Eigen::Ref( - &model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha); + std::cout << "Starting controller construction" << std::endl; + auto controller = TubeMPPIController( + &model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, num_timesteps, NUM_ROLLOUTS); auto controller_params = controller.getParams(); controller_params.dynamics_rollout_dim_ = dim3(64, 1, 1); - controller_params.cost_rollout_dim_ = dim3(64, 1, 1); + controller_params.cost_rollout_dim_ = dim3(min(64, num_timesteps), 1, 1); controller.setParams(controller_params); controller.setNominalThreshold(20); + std::cout << "Made it through the controller construction" << std::endl; // Start the loop for (int t = 0; t < total_time_horizon; ++t) { @@ -435,11 +439,11 @@ void runTubeRC(const Eigen::Ref( - &model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha); + auto controller = TubeMPPIController(&model, &cost, &fb_controller, &sampler, dt, max_iter, + lambda, alpha, num_timesteps, NUM_ROLLOUTS); auto controller_params = controller.getParams(); controller_params.dynamics_rollout_dim_ = dim3(64, 1, 1); - controller_params.cost_rollout_dim_ = dim3(64, 1, 1); + controller_params.cost_rollout_dim_ = dim3(min(64, num_timesteps), 1, 1); controller.setParams(controller_params); controller.setNominalThreshold(2); // Start the loop @@ -529,11 +533,12 @@ void runRobustSc(const Eigen::Ref( - &model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, value_function_threshold); + auto controller = + RobustMPPIController(&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, + alpha, value_function_threshold, num_timesteps, NUM_ROLLOUTS); auto controller_params = controller.getParams(); controller_params.dynamics_rollout_dim_ = dim3(64, 1, 1); - controller_params.cost_rollout_dim_ = dim3(64, 1, 1); + controller_params.cost_rollout_dim_ = dim3(min(64, num_timesteps), 1, 1); controller.setParams(controller_params); // Start the loop @@ -641,11 +646,12 @@ void runRobustRc(const Eigen::Ref( - &model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, value_function_threshold); + auto controller = + RobustMPPIController(&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, + alpha, value_function_threshold, num_timesteps, NUM_ROLLOUTS); auto controller_params = controller.getParams(); controller_params.dynamics_rollout_dim_ = dim3(64, 1, 1); - controller_params.cost_rollout_dim_ = dim3(64, 1, 1); + controller_params.cost_rollout_dim_ = dim3(min(64, num_timesteps), 1, 1); controller.setParams(controller_params); // Start the loop @@ -722,6 +728,7 @@ void runRobustRc(const Eigen::Ref; -using FB_CONTROLLER = DDPFeedback; +using FB_CONTROLLER = DDPFeedback; #ifdef USE_COLORED_NOISE using SAMPLER = mppi::sampling_distributions::ColoredNoiseDistribution; @@ -66,8 +66,8 @@ int main() int max_iter = 1; int total_time_horizon = 300; - auto controller = VanillaMPPIController( - &model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha); + auto controller = VanillaMPPIController( + &model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, TIMESTEPS, NUM_ROLLOUTS); auto controller_params = controller.getParams(); controller_params.dynamics_rollout_dim_ = dim3(64, 1, 1); diff --git a/include/mppi/controllers/ColoredMPPI/colored_mppi_controller.cu b/include/mppi/controllers/ColoredMPPI/colored_mppi_controller.cu index a4d3f6db..e41e3e23 100644 --- a/include/mppi/controllers/ColoredMPPI/colored_mppi_controller.cu +++ b/include/mppi/controllers/ColoredMPPI/colored_mppi_controller.cu @@ -4,18 +4,16 @@ #include #include -#define ColoredMPPI_TEMPLATE \ - template -#define ColoredMPPI ColoredMPPIController +#define ColoredMPPI_TEMPLATE template +#define ColoredMPPI ColoredMPPIController ColoredMPPI_TEMPLATE ColoredMPPI::ColoredMPPIController(DYN_T* model, COST_T* cost, FB_T* fb_controller, SAMPLING_T* sampler, float dt, int max_iter, float lambda, - float alpha, int num_timesteps, + float alpha, int num_timesteps, int num_rollouts, const Eigen::Ref& init_control_traj, cudaStream_t stream) - : PARENT_CLASS(model, cost, fb_controller, sampler, dt, max_iter, lambda, alpha, num_timesteps, init_control_traj, - stream) + : PARENT_CLASS(model, cost, fb_controller, sampler, dt, max_iter, lambda, alpha, num_timesteps, num_rollouts, + init_control_traj, stream) { // Allocate CUDA memory for the controller allocateCUDAMemory(); @@ -97,7 +95,7 @@ ColoredMPPI_TEMPLATE void ColoredMPPI::chooseAppropriateKernel() for (int i = 0; i < this->getNumKernelEvaluations() && !too_much_mem_single_kernel; i++) { mppi::kernels::launchRolloutKernel( - this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), NUM_ROLLOUTS, + this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), this->getNumRollouts(), this->getLambda(), this->getAlpha(), this->initial_state_d_, this->trajectory_costs_d_, this->params_.dynamics_rollout_dim_, this->stream_, true); } @@ -106,7 +104,7 @@ ColoredMPPI_TEMPLATE void ColoredMPPI::chooseAppropriateKernel() for (int i = 0; i < this->getNumKernelEvaluations() && !too_much_mem_split_kernel; i++) { mppi::kernels::launchSplitRolloutKernel( - this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), NUM_ROLLOUTS, + this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), this->getNumRollouts(), this->getLambda(), this->getAlpha(), this->initial_state_d_, this->output_d_, this->trajectory_costs_d_, this->params_.dynamics_rollout_dim_, this->params_.cost_rollout_dim_, this->stream_, true); } @@ -133,8 +131,8 @@ ColoredMPPI_TEMPLATE void ColoredMPPI::chooseAppropriateKernel() kernel_choice = "single"; } this->logger_->info("Choosing %s kernel based on split taking %f ms and single taking %f ms after %d iterations\n", - kernel_choice.c_str(), split_kernel_time_ms, single_kernel_time_ms, - this->getNumKernelEvaluations()); + kernel_choice.c_str(), split_kernel_time_ms, single_kernel_time_ms, + this->getNumKernelEvaluations()); } ColoredMPPI_TEMPLATE ColoredMPPI::~ColoredMPPIController() @@ -169,24 +167,24 @@ ColoredMPPI_TEMPLATE void ColoredMPPI::computeControl(const Eigen::RefgetKernelChoiceAsEnum() == kernelType::USE_SPLIT_KERNELS) { mppi::kernels::launchSplitRolloutKernel( - this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), NUM_ROLLOUTS, + this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), this->getNumRollouts(), this->getLambda(), this->getAlpha(), this->initial_state_d_, this->output_d_, this->trajectory_costs_d_, this->params_.dynamics_rollout_dim_, this->params_.cost_rollout_dim_, this->stream_, false); } else if (this->getKernelChoiceAsEnum() == kernelType::USE_SINGLE_KERNEL) { mppi::kernels::launchRolloutKernel( - this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), NUM_ROLLOUTS, + this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), this->getNumRollouts(), this->getLambda(), this->getAlpha(), this->initial_state_d_, this->trajectory_costs_d_, this->params_.dynamics_rollout_dim_, this->stream_, false); } // Copy the costs back to the host HANDLE_ERROR(cudaMemcpyAsync(this->trajectory_costs_.data(), this->trajectory_costs_d_, - NUM_ROLLOUTS * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); + this->getNumRollouts() * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); HANDLE_ERROR(cudaStreamSynchronize(this->stream_)); - this->setBaseline(mppi::kernels::computeBaselineCost(this->trajectory_costs_.data(), NUM_ROLLOUTS)); + this->setBaseline(mppi::kernels::computeBaselineCost(this->trajectory_costs_.data(), this->getNumRollouts())); if (this->getBaselineCost() > baseline_prev + 1) { @@ -198,24 +196,24 @@ ColoredMPPI_TEMPLATE void ColoredMPPI::computeControl(const Eigen::RefgetNormExpThreads(), this->trajectory_costs_d_, + mppi::kernels::launchNormExpKernel(this->getNumRollouts(), this->getNormExpThreads(), this->trajectory_costs_d_, 1.0 / this->getLambda(), this->getBaselineCost(), this->stream_, false); } else { - mppi::kernels::launchTsallisKernel(NUM_ROLLOUTS, this->getNormExpThreads(), this->trajectory_costs_d_, getGamma(), - getRExp(), this->getBaselineCost(), this->stream_, false); + mppi::kernels::launchTsallisKernel(this->getNumRollouts(), this->getNormExpThreads(), this->trajectory_costs_d_, + getGamma(), getRExp(), this->getBaselineCost(), this->stream_, false); } HANDLE_ERROR(cudaMemcpyAsync(this->trajectory_costs_.data(), this->trajectory_costs_d_, - NUM_ROLLOUTS * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); + this->getNumRollouts() * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); HANDLE_ERROR(cudaStreamSynchronize(this->stream_)); // Compute the normalizer - this->setNormalizer(mppi::kernels::computeNormalizer(this->trajectory_costs_.data(), NUM_ROLLOUTS)); + this->setNormalizer(mppi::kernels::computeNormalizer(this->trajectory_costs_.data(), this->getNumRollouts())); mppi::kernels::computeFreeEnergy(this->free_energy_statistics_.real_sys.freeEnergyMean, this->free_energy_statistics_.real_sys.freeEnergyVariance, this->free_energy_statistics_.real_sys.freeEnergyModifiedVariance, - this->trajectory_costs_.data(), NUM_ROLLOUTS, this->getBaselineCost(), + this->trajectory_costs_.data(), this->getNumRollouts(), this->getBaselineCost(), this->getLambda()); // Compute the cost weighted average //TODO SUM_STRIDE is BDIM_X, but should it be its own parameter? @@ -225,7 +223,7 @@ ColoredMPPI_TEMPLATE void ColoredMPPI::computeControl(const Eigen::Refsampler_->setHostOptimalControlSequence(this->control_.data(), 0, true); } - this->free_energy_statistics_.real_sys.normalizerPercent = this->getNormalizerCost() / NUM_ROLLOUTS; + this->free_energy_statistics_.real_sys.normalizerPercent = this->getNormalizerCost() / this->getNumRollouts(); this->free_energy_statistics_.real_sys.increase = this->getBaselineCost() - this->free_energy_statistics_.real_sys.previousBaseline; smoothControlTrajectory(); diff --git a/include/mppi/controllers/ColoredMPPI/colored_mppi_controller.cuh b/include/mppi/controllers/ColoredMPPI/colored_mppi_controller.cuh index 30ce7a43..1f8d46fb 100644 --- a/include/mppi/controllers/ColoredMPPI/colored_mppi_controller.cuh +++ b/include/mppi/controllers/ColoredMPPI/colored_mppi_controller.cuh @@ -13,39 +13,39 @@ #include -template -struct ColoredMPPIParams : public ControllerParams +template +struct ColoredMPPIParams : public ControllerParams { float r = 2.0; float gamma = 0; Eigen::Matrix state_leash_dist_ = Eigen::Matrix::Zero(); ColoredMPPIParams() = default; - ColoredMPPIParams(const ColoredMPPIParams& other) + ColoredMPPIParams(const ColoredMPPIParams& other) { - typedef ControllerParams BASE; + typedef ControllerParams BASE; const BASE& other_item_ref = other; *(static_cast(this)) = other_item_ref; // this->colored_noise_exponents_ = other.colored_noise_exponents_; } - ColoredMPPIParams(ColoredMPPIParams& other) + ColoredMPPIParams(ColoredMPPIParams& other) { - typedef ControllerParams BASE; + typedef ControllerParams BASE; BASE& other_item_ref = other; *(static_cast(this)) = other_item_ref; // this->colored_noise_exponents_ = other.colored_noise_exponents_; } }; -template , - class PARAMS_T = ColoredMPPIParams> -class ColoredMPPIController : public Controller + class PARAMS_T = ColoredMPPIParams> +class ColoredMPPIController : public Controller { public: EIGEN_MAKE_ALIGNED_OPERATOR_NEW - typedef Controller PARENT_CLASS; + typedef Controller PARENT_CLASS; // need control_array = ... so that we can initialize // Eigen::Matrix with Eigen::Matrix::Zero(); using control_array = typename PARENT_CLASS::control_array; @@ -60,10 +60,11 @@ public: * Public member functions */ // Constructor - ColoredMPPIController(DYN_T* model, COST_T* cost, FB_T* fb_controller, SAMPLING_T* sampler, float dt, int max_iter, - float lambda, float alpha, int num_timesteps = MAX_TIMESTEPS, - const Eigen::Ref& init_control_traj = control_trajectory::Zero(), - cudaStream_t stream = nullptr); + ColoredMPPIController( + DYN_T* model, COST_T* cost, FB_T* fb_controller, SAMPLING_T* sampler, float dt, int max_iter, float lambda, + float alpha, int num_timesteps, int num_rollouts, + const Eigen::Ref& init_control_traj = control_trajectory::Zero(DYN_T::CONTROL_DIM, 1), + cudaStream_t stream = nullptr); ColoredMPPIController(DYN_T* model, COST_T* cost, FB_T* fb_controller, SAMPLING_T* sampler, PARAMS_T& params, cudaStream_t stream = nullptr); diff --git a/include/mppi/controllers/MPPI/mppi_controller.cu b/include/mppi/controllers/MPPI/mppi_controller.cu index f5e47b8a..86387729 100644 --- a/include/mppi/controllers/MPPI/mppi_controller.cu +++ b/include/mppi/controllers/MPPI/mppi_controller.cu @@ -6,18 +6,16 @@ #include #include -#define VANILLA_MPPI_TEMPLATE \ - template +#define VANILLA_MPPI_TEMPLATE template -#define VanillaMPPI VanillaMPPIController +#define VanillaMPPI VanillaMPPIController VANILLA_MPPI_TEMPLATE VanillaMPPI::VanillaMPPIController(DYN_T* model, COST_T* cost, FB_T* fb_controller, SAMPLING_T* sampler, float dt, - int max_iter, float lambda, float alpha, int num_timesteps, + int max_iter, float lambda, float alpha, int num_timesteps, int num_rollouts, const Eigen::Ref& init_control_traj, cudaStream_t stream) - : PARENT_CLASS(model, cost, fb_controller, sampler, dt, max_iter, lambda, alpha, num_timesteps, init_control_traj, - stream) + : PARENT_CLASS(model, cost, fb_controller, sampler, dt, max_iter, lambda, alpha, num_timesteps, num_rollouts, + init_control_traj, stream) { // Allocate CUDA memory for the controller allocateCUDAMemory(); @@ -102,7 +100,7 @@ void VanillaMPPI::chooseAppropriateKernel() for (int i = 0; i < this->getNumKernelEvaluations() && !too_much_mem_single_kernel; i++) { mppi::kernels::launchRolloutKernel( - this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), NUM_ROLLOUTS, + this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), this->getNumRollouts(), this->getLambda(), this->getAlpha(), this->initial_state_d_, this->trajectory_costs_d_, this->params_.dynamics_rollout_dim_, this->stream_, true); } @@ -111,7 +109,7 @@ void VanillaMPPI::chooseAppropriateKernel() for (int i = 0; i < this->getNumKernelEvaluations() && !too_much_mem_split_kernel; i++) { mppi::kernels::launchSplitRolloutKernel( - this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), NUM_ROLLOUTS, + this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), this->getNumRollouts(), this->getLambda(), this->getAlpha(), this->initial_state_d_, this->output_d_, this->trajectory_costs_d_, this->params_.dynamics_rollout_dim_, this->params_.cost_rollout_dim_, this->stream_, true); } @@ -138,8 +136,8 @@ void VanillaMPPI::chooseAppropriateKernel() kernel_choice = "single"; } this->logger_->info("Choosing %s kernel based on split taking %f ms and single taking %f ms after %d iterations\n", - kernel_choice.c_str(), split_kernel_time_ms, single_kernel_time_ms, - this->getNumKernelEvaluations()); + kernel_choice.c_str(), split_kernel_time_ms, single_kernel_time_ms, + this->getNumKernelEvaluations()); } VANILLA_MPPI_TEMPLATE @@ -171,24 +169,24 @@ void VanillaMPPI::computeControl(const Eigen::Ref& state, int if (this->getKernelChoiceAsEnum() == kernelType::USE_SPLIT_KERNELS) { mppi::kernels::launchSplitRolloutKernel( - this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), NUM_ROLLOUTS, + this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), this->getNumRollouts(), this->getLambda(), this->getAlpha(), this->initial_state_d_, this->output_d_, this->trajectory_costs_d_, this->params_.dynamics_rollout_dim_, this->params_.cost_rollout_dim_, this->stream_, false); } else if (this->getKernelChoiceAsEnum() == kernelType::USE_SINGLE_KERNEL) { mppi::kernels::launchRolloutKernel( - this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), NUM_ROLLOUTS, + this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), this->getNumRollouts(), this->getLambda(), this->getAlpha(), this->initial_state_d_, this->trajectory_costs_d_, this->params_.dynamics_rollout_dim_, this->stream_, false); } // Copy the costs back to the host HANDLE_ERROR(cudaMemcpyAsync(this->trajectory_costs_.data(), this->trajectory_costs_d_, - NUM_ROLLOUTS * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); + this->getNumRollouts() * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); HANDLE_ERROR(cudaStreamSynchronize(this->stream_)); - this->setBaseline(mppi::kernels::computeBaselineCost(this->trajectory_costs_.data(), NUM_ROLLOUTS)); + this->setBaseline(mppi::kernels::computeBaselineCost(this->trajectory_costs_.data(), this->getNumRollouts())); if (this->getBaselineCost() > baseline_prev + 1) { @@ -198,19 +196,19 @@ void VanillaMPPI::computeControl(const Eigen::Ref& state, int baseline_prev = this->getBaselineCost(); // Launch the norm exponential kernel - mppi::kernels::launchNormExpKernel(NUM_ROLLOUTS, this->getNormExpThreads(), this->trajectory_costs_d_, + mppi::kernels::launchNormExpKernel(this->getNumRollouts(), this->getNormExpThreads(), this->trajectory_costs_d_, 1.0 / this->getLambda(), this->getBaselineCost(), this->stream_, false); HANDLE_ERROR(cudaMemcpyAsync(this->trajectory_costs_.data(), this->trajectory_costs_d_, - NUM_ROLLOUTS * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); + this->getNumRollouts() * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); HANDLE_ERROR(cudaStreamSynchronize(this->stream_)); // Compute the normalizer - this->setNormalizer(mppi::kernels::computeNormalizer(this->trajectory_costs_.data(), NUM_ROLLOUTS)); + this->setNormalizer(mppi::kernels::computeNormalizer(this->trajectory_costs_.data(), this->getNumRollouts())); mppi::kernels::computeFreeEnergy(this->free_energy_statistics_.real_sys.freeEnergyMean, this->free_energy_statistics_.real_sys.freeEnergyVariance, this->free_energy_statistics_.real_sys.freeEnergyModifiedVariance, - this->trajectory_costs_.data(), NUM_ROLLOUTS, this->getBaselineCost(), + this->trajectory_costs_.data(), this->getNumRollouts(), this->getBaselineCost(), this->getLambda()); this->sampler_->updateDistributionParamsFromDevice(this->trajectory_costs_d_, this->getNormalizerCost(), 0, false); @@ -219,7 +217,7 @@ void VanillaMPPI::computeControl(const Eigen::Ref& state, int this->sampler_->setHostOptimalControlSequence(this->control_.data(), 0, true); } - this->free_energy_statistics_.real_sys.normalizerPercent = this->getNormalizerCost() / NUM_ROLLOUTS; + this->free_energy_statistics_.real_sys.normalizerPercent = this->getNormalizerPercent(); this->free_energy_statistics_.real_sys.increase = this->getBaselineCost() - this->free_energy_statistics_.real_sys.previousBaseline; smoothControlTrajectory(); @@ -243,7 +241,7 @@ void VanillaMPPI::computeControl(const Eigen::Ref& state, int VANILLA_MPPI_TEMPLATE void VanillaMPPI::allocateCUDAMemory() { - PARENT_CLASS::allocateCUDAMemoryHelper(); + PARENT_CLASS::allocateCUDAMemoryHelper(1); } VANILLA_MPPI_TEMPLATE diff --git a/include/mppi/controllers/MPPI/mppi_controller.cuh b/include/mppi/controllers/MPPI/mppi_controller.cuh index f045b918..d4c79556 100644 --- a/include/mppi/controllers/MPPI/mppi_controller.cuh +++ b/include/mppi/controllers/MPPI/mppi_controller.cuh @@ -11,16 +11,16 @@ #include #include -template , - class PARAMS_T = ControllerParams> -class VanillaMPPIController : public Controller + class PARAMS_T = ControllerParams> +class VanillaMPPIController : public Controller { public: EIGEN_MAKE_ALIGNED_OPERATOR_NEW // nAeed control_array = ... so that we can initialize // Eigen::Matrix with Eigen::Matrix::Zero(); - typedef Controller PARENT_CLASS; + typedef Controller PARENT_CLASS; using control_array = typename PARENT_CLASS::control_array; using control_trajectory = typename PARENT_CLASS::control_trajectory; using state_trajectory = typename PARENT_CLASS::state_trajectory; @@ -33,10 +33,11 @@ public: * Public member functions */ // Constructor - VanillaMPPIController(DYN_T* model, COST_T* cost, FB_T* fb_controller, SAMPLING_T* sampler, float dt, int max_iter, - float lambda, float alpha, int num_timesteps = MAX_TIMESTEPS, - const Eigen::Ref& init_control_traj = control_trajectory::Zero(), - cudaStream_t stream = nullptr); + VanillaMPPIController( + DYN_T* model, COST_T* cost, FB_T* fb_controller, SAMPLING_T* sampler, float dt, int max_iter, float lambda, + float alpha, int num_timesteps, int num_rollouts, + const Eigen::Ref& init_control_traj = control_trajectory::Zero(DYN_T::CONTROL_DIM, 1), + cudaStream_t stream = nullptr); VanillaMPPIController(DYN_T* model, COST_T* cost, FB_T* fb_controller, SAMPLING_T* sampler, PARAMS_T& params, cudaStream_t stream = nullptr); diff --git a/include/mppi/controllers/R-MPPI/robust_mppi_controller.cu b/include/mppi/controllers/R-MPPI/robust_mppi_controller.cu index 344c9db2..f0394875 100644 --- a/include/mppi/controllers/R-MPPI/robust_mppi_controller.cu +++ b/include/mppi/controllers/R-MPPI/robust_mppi_controller.cu @@ -2,19 +2,18 @@ #include #include -#define ROBUST_MPPI_TEMPLATE \ - template +#define ROBUST_MPPI_TEMPLATE template -#define RobustMPPI RobustMPPIController +#define RobustMPPI RobustMPPIController ROBUST_MPPI_TEMPLATE RobustMPPI::RobustMPPIController(DYN_T* model, COST_T* cost, FB_T* fb_controller, SAMPLING_T* sampler, float dt, int max_iter, float lambda, float alpha, float value_function_threshold, - int num_timesteps, const Eigen::Ref& init_control_traj, + int num_timesteps, int num_rollouts, + const Eigen::Ref& init_control_traj, int num_candidate_nominal_states, int optimization_stride, cudaStream_t stream) - : PARENT_CLASS(model, cost, fb_controller, sampler, dt, max_iter, lambda, alpha, num_timesteps, init_control_traj, - stream) + : PARENT_CLASS(model, cost, fb_controller, sampler, dt, max_iter, lambda, alpha, num_timesteps, num_rollouts, + init_control_traj, stream) { setValueFunctionThreshold(value_function_threshold); setOptimizationStride(optimization_stride); @@ -23,6 +22,14 @@ RobustMPPI::RobustMPPIController(DYN_T* model, COST_T* cost, FB_T* fb_controller setParams(this->params_); this->sampler_->setNumDistributions(2); + // Properly size nominal variables + setNumTimestepsHelper(this->getNumTimesteps(), false); + setNumRolloutsHelper(this->getNumRollouts(), false); + + // Zero the nominal trajectories + nominal_state_trajectory_.setZero(); + trajectory_costs_nominal_.setZero(); + // Zero the control history this->control_history_ = Eigen::Matrix::Zero(); nominal_control_history_ = Eigen::Matrix::Zero(); @@ -37,7 +44,7 @@ RobustMPPI::RobustMPPIController(DYN_T* model, COST_T* cost, FB_T* fb_controller this->fb_controller_->initTrackingController(); // Initialize the nominal control trajectory - nominal_control_trajectory_ = init_control_traj; + nominal_control_trajectory_ = this->params_.init_control_traj_; this->enable_feedback_ = true; chooseAppropriateKernel(); @@ -52,6 +59,14 @@ RobustMPPI::RobustMPPIController(DYN_T* model, COST_T* cost, FB_T* fb_controller setParams(params); this->sampler_->setNumDistributions(2); + // Properly size nominal variables + setNumTimestepsHelper(this->getNumTimesteps(), false); + setNumRolloutsHelper(this->getNumRollouts(), false); + + // Zero the nominal trajectories + nominal_state_trajectory_.setZero(); + trajectory_costs_nominal_.setZero(); + // Zero the control history this->control_history_ = Eigen::Matrix::Zero(); nominal_control_history_ = Eigen::Matrix::Zero(); @@ -81,7 +96,7 @@ RobustMPPI::~RobustMPPIController() ROBUST_MPPI_TEMPLATE void RobustMPPI::allocateCUDAMemory() { - PARENT_CLASS::allocateCUDAMemoryHelper(1); + PARENT_CLASS::allocateCUDAMemoryHelper(2); } ROBUST_MPPI_TEMPLATE @@ -157,8 +172,9 @@ void RobustMPPI::chooseAppropriateKernel() { mppi::kernels::rmppi::launchRMPPIRolloutKernel( this->model_, this->cost_, this->sampler_, this->fb_controller_->getHostPointer().get(), this->getDt(), - this->getNumTimesteps(), NUM_ROLLOUTS, this->getLambda(), this->getAlpha(), getValueFunctionThreshold(), - this->initial_state_d_, this->trajectory_costs_d_, this->params_.dynamics_rollout_dim_, this->stream_, true); + this->getNumTimesteps(), this->getNumRollouts(), this->getLambda(), this->getAlpha(), + getValueFunctionThreshold(), this->initial_state_d_, this->trajectory_costs_d_, + this->params_.dynamics_rollout_dim_, this->stream_, true); } auto end_rollout_single_kernel_time = std::chrono::steady_clock::now(); @@ -167,9 +183,9 @@ void RobustMPPI::chooseAppropriateKernel() { mppi::kernels::rmppi::launchSplitRMPPIRolloutKernel( this->model_, this->cost_, this->sampler_, this->fb_controller_->getHostPointer().get(), this->getDt(), - this->getNumTimesteps(), NUM_ROLLOUTS, this->getLambda(), this->getAlpha(), getValueFunctionThreshold(), - this->initial_state_d_, this->output_d_, this->trajectory_costs_d_, this->params_.dynamics_rollout_dim_, - this->params_.cost_rollout_dim_, this->stream_, true); + this->getNumTimesteps(), this->getNumRollouts(), this->getLambda(), this->getAlpha(), + getValueFunctionThreshold(), this->initial_state_d_, this->output_d_, this->trajectory_costs_d_, + this->params_.dynamics_rollout_dim_, this->params_.cost_rollout_dim_, this->stream_, true); } auto end_rollout_split_kernel_time = std::chrono::steady_clock::now(); @@ -309,6 +325,26 @@ void RobustMPPI::chooseAppropriateEvalKernel() kernel_choice.c_str(), split_eval_kernel_time_ms, single_eval_kernel_time_ms, this->getNumKernelEvaluations()); } +ROBUST_MPPI_TEMPLATE +void RobustMPPI::setNumTimestepsHelper(const int num_timesteps, const bool update_gpu_mem) +{ + PARENT_CLASS::setNumTimestepsHelper(num_timesteps, update_gpu_mem); + // Set up nominal trajectories + PARENT_CLASS::resizeTimeTrajectory(nominal_control_trajectory_, num_timesteps); + nominal_control_trajectory_ = this->params_.init_control_traj_; + PARENT_CLASS::resizeTimeTrajectory(nominal_state_trajectory_, num_timesteps); + resetCandidateCudaMem(); +} + +ROBUST_MPPI_TEMPLATE +void RobustMPPI::setNumRolloutsHelper(const int num_rollouts, const bool update_gpu_mem) +{ + PARENT_CLASS::setNumRolloutsHelper(num_rollouts, update_gpu_mem); + // Set up nominal trajectories + Eigen::NoChange_t same_col = Eigen::NoChange_t::NoChange; + trajectory_costs_nominal_.conservativeResize(num_rollouts, same_col); +} + ROBUST_MPPI_TEMPLATE void RobustMPPI::setParams(const PARAMS_T& p) { @@ -423,12 +459,12 @@ void RobustMPPI::copyNominalControlToDevice(bool synchronize) ROBUST_MPPI_TEMPLATE void RobustMPPI::updateNumCandidates(int new_num_candidates) { - if ((new_num_candidates * getNumEvalSamplesPerCandidate()) > NUM_ROLLOUTS) + if ((new_num_candidates * getNumEvalSamplesPerCandidate()) > this->getNumRollouts()) { std::cerr << "ERROR: (number of candidates) * (SAMPLES_PER_CANDIDATE) cannot exceed NUM_ROLLOUTS\n"; std::cerr << "number of candidates: " << new_num_candidates - << ", SAMPLES_PER_CANDIDATE: " << getNumEvalSamplesPerCandidate() << ", NUM_ROLLOUTS: " << NUM_ROLLOUTS - << "\n"; + << ", SAMPLES_PER_CANDIDATE: " << getNumEvalSamplesPerCandidate() + << ", NUM_ROLLOUTS: " << this->getNumRollouts() << "\n"; std::terminate(); } @@ -636,7 +672,7 @@ void RobustMPPI::computeControl(const Eigen::Ref& state, int { // Handy dandy pointers to nominal data float* trajectory_costs_nominal_d = this->trajectory_costs_d_; - float* trajectory_costs_real_d = this->trajectory_costs_d_ + NUM_ROLLOUTS; + float* trajectory_costs_real_d = this->trajectory_costs_d_ + this->getNumRollouts(); float* initial_state_nominal_d = this->initial_state_d_; float* initial_state_real_d = this->initial_state_d_ + DYN_T::STATE_DIM; @@ -668,58 +704,59 @@ void RobustMPPI::computeControl(const Eigen::Ref& state, int { mppi::kernels::rmppi::launchSplitRMPPIRolloutKernel( this->model_, this->cost_, this->sampler_, this->fb_controller_->getHostPointer().get(), this->getDt(), - this->getNumTimesteps(), NUM_ROLLOUTS, this->getLambda(), this->getAlpha(), getValueFunctionThreshold(), - this->initial_state_d_, this->output_d_, this->trajectory_costs_d_, this->params_.dynamics_rollout_dim_, - this->params_.cost_rollout_dim_, this->stream_, false); + this->getNumTimesteps(), this->getNumRollouts(), this->getLambda(), this->getAlpha(), + getValueFunctionThreshold(), this->initial_state_d_, this->output_d_, this->trajectory_costs_d_, + this->params_.dynamics_rollout_dim_, this->params_.cost_rollout_dim_, this->stream_, false); } else { mppi::kernels::rmppi::launchRMPPIRolloutKernel( this->model_, this->cost_, this->sampler_, this->fb_controller_->getHostPointer().get(), this->getDt(), - this->getNumTimesteps(), NUM_ROLLOUTS, this->getLambda(), this->getAlpha(), getValueFunctionThreshold(), - this->initial_state_d_, this->trajectory_costs_d_, this->params_.dynamics_rollout_dim_, this->stream_, false); + this->getNumTimesteps(), this->getNumRollouts(), this->getLambda(), this->getAlpha(), + getValueFunctionThreshold(), this->initial_state_d_, this->trajectory_costs_d_, + this->params_.dynamics_rollout_dim_, this->stream_, false); } // Return the costs -> nominal, real costs - HANDLE_ERROR(cudaMemcpyAsync(this->trajectory_costs_.data(), trajectory_costs_real_d, NUM_ROLLOUTS * sizeof(float), - cudaMemcpyDeviceToHost, this->stream_)); + HANDLE_ERROR(cudaMemcpyAsync(this->trajectory_costs_.data(), trajectory_costs_real_d, + this->getNumRollouts() * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); HANDLE_ERROR(cudaMemcpyAsync(trajectory_costs_nominal_.data(), trajectory_costs_nominal_d, - NUM_ROLLOUTS * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); + this->getNumRollouts() * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); HANDLE_ERROR(cudaStreamSynchronize(this->stream_)); // Launch the norm exponential kernels for the nominal costs and the real costs - this->setBaseline(mppi::kernels::computeBaselineCost(trajectory_costs_nominal_.data(), NUM_ROLLOUTS), 0); - this->setBaseline(mppi::kernels::computeBaselineCost(this->trajectory_costs_.data(), NUM_ROLLOUTS), 1); + this->setBaseline(mppi::kernels::computeBaselineCost(trajectory_costs_nominal_.data(), this->getNumRollouts()), 0); + this->setBaseline(mppi::kernels::computeBaselineCost(this->trajectory_costs_.data(), this->getNumRollouts()), 1); // In this case this->gamma = 1 / lambda - mppi::kernels::launchNormExpKernel(NUM_ROLLOUTS, this->getNormExpThreads(), trajectory_costs_nominal_d, + mppi::kernels::launchNormExpKernel(this->getNumRollouts(), this->getNormExpThreads(), trajectory_costs_nominal_d, 1.0 / this->getLambda(), this->getBaselineCost(0), this->stream_, false); - mppi::kernels::launchNormExpKernel(NUM_ROLLOUTS, this->getNormExpThreads(), trajectory_costs_real_d, + mppi::kernels::launchNormExpKernel(this->getNumRollouts(), this->getNormExpThreads(), trajectory_costs_real_d, 1.0 / this->getLambda(), this->getBaselineCost(1), this->stream_, false); - HANDLE_ERROR(cudaMemcpyAsync(this->trajectory_costs_.data(), trajectory_costs_real_d, NUM_ROLLOUTS * sizeof(float), - cudaMemcpyDeviceToHost, this->stream_)); + HANDLE_ERROR(cudaMemcpyAsync(this->trajectory_costs_.data(), trajectory_costs_real_d, + this->getNumRollouts() * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); HANDLE_ERROR(cudaMemcpyAsync(trajectory_costs_nominal_.data(), trajectory_costs_nominal_d, - NUM_ROLLOUTS * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); + this->getNumRollouts() * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); HANDLE_ERROR(cudaStreamSynchronize(this->stream_)); // Launch the weighted reduction kernel for the nominal costs and the real costs - this->setNormalizer(mppi::kernels::computeNormalizer(trajectory_costs_nominal_.data(), NUM_ROLLOUTS), 0); - this->setNormalizer(mppi::kernels::computeNormalizer(this->trajectory_costs_.data(), NUM_ROLLOUTS), 1); + this->setNormalizer(mppi::kernels::computeNormalizer(trajectory_costs_nominal_.data(), this->getNumRollouts()), 0); + this->setNormalizer(mppi::kernels::computeNormalizer(this->trajectory_costs_.data(), this->getNumRollouts()), 1); // Compute real free energy mppi::kernels::computeFreeEnergy(this->free_energy_statistics_.real_sys.freeEnergyMean, this->free_energy_statistics_.real_sys.freeEnergyVariance, this->free_energy_statistics_.real_sys.freeEnergyModifiedVariance, - this->trajectory_costs_.data(), NUM_ROLLOUTS, this->getBaselineCost(1), + this->trajectory_costs_.data(), this->getNumRollouts(), this->getBaselineCost(1), this->getLambda()); // Compute Nominal State free Energy mppi::kernels::computeFreeEnergy(this->free_energy_statistics_.nominal_sys.freeEnergyMean, this->free_energy_statistics_.nominal_sys.freeEnergyVariance, this->free_energy_statistics_.nominal_sys.freeEnergyModifiedVariance, - this->trajectory_costs_nominal_.data(), NUM_ROLLOUTS, this->getBaselineCost(0), - this->getLambda()); + this->trajectory_costs_nominal_.data(), this->getNumRollouts(), + this->getBaselineCost(0), this->getLambda()); // Calculate new optimal trajectories this->sampler_->updateDistributionParamsFromDevice(trajectory_costs_nominal_d, this->getNormalizerCost(0), 0, @@ -737,10 +774,10 @@ void RobustMPPI::computeControl(const Eigen::Ref& state, int // Compute the nominal trajectory because we updated the nominal control! this->computeStateTrajectoryHelper(nominal_state_trajectory_, nominal_state_, nominal_control_trajectory_); - this->free_energy_statistics_.real_sys.normalizerPercent = this->getNormalizerCost(1) / NUM_ROLLOUTS; + this->free_energy_statistics_.real_sys.normalizerPercent = this->getNormalizerCost(1) / this->getNumRollouts(); this->free_energy_statistics_.real_sys.increase = this->getBaselineCost(1) - this->free_energy_statistics_.real_sys.previousBaseline; - this->free_energy_statistics_.nominal_sys.normalizerPercent = this->getNormalizerCost(0) / NUM_ROLLOUTS; + this->free_energy_statistics_.nominal_sys.normalizerPercent = this->getNormalizerCost(0) / this->getNumRollouts(); this->free_energy_statistics_.nominal_sys.increase = this->getBaselineCost(0) - this->free_energy_statistics_.nominal_sys.previousBaseline; diff --git a/include/mppi/controllers/R-MPPI/robust_mppi_controller.cuh b/include/mppi/controllers/R-MPPI/robust_mppi_controller.cuh index 2eefcaea..fbda79eb 100644 --- a/include/mppi/controllers/R-MPPI/robust_mppi_controller.cuh +++ b/include/mppi/controllers/R-MPPI/robust_mppi_controller.cuh @@ -42,8 +42,8 @@ // Needed for list of candidate states #include -template -struct RobustMPPIParams : public ControllerParams +template +struct RobustMPPIParams : public ControllerParams { float value_function_threshold_ = 1000.0; int optimization_stride_ = 1; @@ -52,19 +52,16 @@ struct RobustMPPIParams : public ControllerParams dim3 eval_dyn_kernel_dim_; }; -// template , -// int SAMPLES_PER_CONDITION_MULTIPLIER = 1> -template , - class PARAMS_T = RobustMPPIParams> -class RobustMPPIController : public Controller + class PARAMS_T = RobustMPPIParams> +class RobustMPPIController : public Controller { public: /** * Set up useful types */ - typedef Controller PARENT_CLASS; + typedef Controller PARENT_CLASS; using control_array = typename PARENT_CLASS::control_array; using control_trajectory = typename PARENT_CLASS::control_trajectory; @@ -101,11 +98,11 @@ public: * @param num_timesteps The number of timesteps to look ahead for. * TODO Finish this description */ - RobustMPPIController(DYN_T* model, COST_T* cost, FB_T* fb_controller, SAMPLING_T* sampler, float dt, int max_iter, - float lambda, float alpha, float value_function_threshold, int num_timesteps = MAX_TIMESTEPS, - const Eigen::Ref& init_control_traj = control_trajectory::Zero(), - int num_candidate_nominal_states = 9, int optimization_stride = 1, - cudaStream_t stream = nullptr); + RobustMPPIController( + DYN_T* model, COST_T* cost, FB_T* fb_controller, SAMPLING_T* sampler, float dt, int max_iter, float lambda, + float alpha, float value_function_threshold, int num_timesteps, int num_rollouts, + const Eigen::Ref& init_control_traj = control_trajectory::Zero(DYN_T::CONTROL_DIM, 1), + int num_candidate_nominal_states = 9, int optimization_stride = 1, cudaStream_t stream = nullptr); RobustMPPIController(DYN_T* model, COST_T* cost, FB_T* fb_controller, SAMPLING_T* sampler, PARAMS_T& params, cudaStream_t stream = nullptr); @@ -240,6 +237,10 @@ public: void setParams(const PARAMS_T& p); + void setNumTimestepsHelper(const int num_timesteps, const bool update_gpu_mem = true) override; + + void setNumRolloutsHelper(const int num_timesteps, const bool update_gpu_mem = true) override; + void calculateSampledStateTrajectories() override; void chooseAppropriateKernel() override; @@ -261,9 +262,9 @@ protected: float nominal_free_energy_modified_variance_ = 0; // Storage classes - control_trajectory nominal_control_trajectory_ = control_trajectory::Zero(); - state_trajectory nominal_state_trajectory_ = state_trajectory::Zero(); - sampled_cost_traj trajectory_costs_nominal_ = sampled_cost_traj::Zero(); + control_trajectory nominal_control_trajectory_; + state_trajectory nominal_state_trajectory_; + sampled_cost_traj trajectory_costs_nominal_; // Make the control history size flexible, related to issue #30 Eigen::Matrix nominal_control_history_; // History used for nominal_state IS diff --git a/include/mppi/controllers/Tube-MPPI/tube_mppi_controller.cu b/include/mppi/controllers/Tube-MPPI/tube_mppi_controller.cu index df20db9b..e42b8bb1 100644 --- a/include/mppi/controllers/Tube-MPPI/tube_mppi_controller.cu +++ b/include/mppi/controllers/Tube-MPPI/tube_mppi_controller.cu @@ -1,26 +1,32 @@ #include "tube_mppi_controller.cuh" #include -#define TUBE_MPPI_TEMPLATE \ - template +#define TUBE_MPPI_TEMPLATE template -#define TubeMPPI TubeMPPIController +#define TubeMPPI TubeMPPIController TUBE_MPPI_TEMPLATE TubeMPPI::TubeMPPIController(DYN_T* model, COST_T* cost, FB_T* fb_controller, SAMPLING_T* sampler, float dt, - int max_iter, float lambda, float alpha, int num_timesteps, + int max_iter, float lambda, float alpha, int num_timesteps, int num_rollouts, const Eigen::Ref& init_control_traj, cudaStream_t stream) - : PARENT_CLASS(model, cost, fb_controller, sampler, dt, max_iter, lambda, alpha, num_timesteps, init_control_traj, - stream) + : PARENT_CLASS(model, cost, fb_controller, sampler, dt, max_iter, lambda, alpha, num_timesteps, num_rollouts, + init_control_traj, stream) { - nominal_control_trajectory_ = init_control_traj; - // call rollout kernel with z = 2 since we have a nominal state this->params_.dynamics_rollout_dim_.z = max(2, this->params_.dynamics_rollout_dim_.z); this->params_.cost_rollout_dim_.z = max(2, this->params_.cost_rollout_dim_.z); this->sampler_->setNumDistributions(2); + // Properly size nominal variables + setNumTimestepsHelper(this->getNumTimesteps(), false); + setNumRolloutsHelper(this->getNumRollouts(), false); + + // Zero the nominal trajectories + nominal_state_trajectory_.setZero(); + nominal_state_trajectory_.setZero(); + nominal_control_trajectory_ = this->params_.init_control_traj_; + trajectory_costs_nominal_.setZero(); + // Allocate CUDA memory for the controller allocateCUDAMemory(); @@ -35,13 +41,21 @@ TubeMPPI::TubeMPPIController(DYN_T* model, COST_T* cost, FB_T* fb_controller, SA cudaStream_t stream) : PARENT_CLASS(model, cost, fb_controller, sampler, params, stream) { - nominal_control_trajectory_ = this->params_.init_control_traj_; - // call rollout kernel with z = 2 since we have a nominal state this->params_.dynamics_rollout_dim_.z = max(2, this->params_.dynamics_rollout_dim_.z); this->params_.cost_rollout_dim_.z = max(2, this->params_.cost_rollout_dim_.z); this->sampler_->setNumDistributions(2); + // Properly size nominal variables + setNumTimestepsHelper(this->getNumTimesteps(), false); + setNumRolloutsHelper(this->getNumRollouts(), false); + + // Zero the nominal trajectories + nominal_state_trajectory_.setZero(); + nominal_state_trajectory_.setZero(); + nominal_control_trajectory_ = this->params_.init_control_traj_; + trajectory_costs_nominal_.setZero(); + // Allocate CUDA memory for the controller allocateCUDAMemory(); @@ -114,7 +128,7 @@ void TubeMPPI::chooseAppropriateKernel() for (int i = 0; i < this->getNumKernelEvaluations() && !too_much_mem_single_kernel; i++) { mppi::kernels::launchRolloutKernel( - this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), NUM_ROLLOUTS, + this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), this->getNumRollouts(), this->getLambda(), this->getAlpha(), this->initial_state_d_, this->trajectory_costs_d_, this->params_.dynamics_rollout_dim_, this->stream_, true); } @@ -123,7 +137,7 @@ void TubeMPPI::chooseAppropriateKernel() for (int i = 0; i < this->getNumKernelEvaluations() && !too_much_mem_split_kernel; i++) { mppi::kernels::launchSplitRolloutKernel( - this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), NUM_ROLLOUTS, + this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), this->getNumRollouts(), this->getLambda(), this->getAlpha(), this->initial_state_d_, this->output_d_, this->trajectory_costs_d_, this->params_.dynamics_rollout_dim_, this->params_.cost_rollout_dim_, this->stream_, true); } @@ -150,8 +164,27 @@ void TubeMPPI::chooseAppropriateKernel() kernel_choice = "single"; } this->logger_->info("Choosing %s kernel based on split taking %f ms and single taking %f ms after %d iterations\n", - kernel_choice.c_str(), split_kernel_time_ms, single_kernel_time_ms, - this->getNumKernelEvaluations()); + kernel_choice.c_str(), split_kernel_time_ms, single_kernel_time_ms, + this->getNumKernelEvaluations()); +} + +TUBE_MPPI_TEMPLATE +void TubeMPPI::setNumTimestepsHelper(const int num_timesteps, const bool update_gpu_mem) +{ + PARENT_CLASS::setNumTimestepsHelper(num_timesteps, update_gpu_mem); + // Set up nominal trajectories + PARENT_CLASS::resizeTimeTrajectory(nominal_control_trajectory_, num_timesteps); + nominal_control_trajectory_ = this->params_.init_control_traj_; + PARENT_CLASS::resizeTimeTrajectory(nominal_state_trajectory_, num_timesteps); +} + +TUBE_MPPI_TEMPLATE +void TubeMPPI::setNumRolloutsHelper(const int num_rollouts, const bool update_gpu_mem) +{ + PARENT_CLASS::setNumRolloutsHelper(num_rollouts, update_gpu_mem); + // Set up nominal trajectories + Eigen::NoChange_t same_col = Eigen::NoChange_t::NoChange; + trajectory_costs_nominal_.conservativeResize(num_rollouts, same_col); } TUBE_MPPI_TEMPLATE @@ -171,7 +204,7 @@ void TubeMPPI::computeControl(const Eigen::Ref& state, int op // std::cout << " Nominal State: "; this->model_->printState(nominal_state_trajectory_.col(0).data()); // Handy reference pointers to the nominal state - float* trajectory_costs_nominal_d = this->trajectory_costs_d_ + NUM_ROLLOUTS; + float* trajectory_costs_nominal_d = this->trajectory_costs_d_ + this->getNumRollouts(); float* initial_state_nominal_d = this->initial_state_d_ + DYN_T::STATE_DIM; for (int opt_iter = 0; opt_iter < this->getNumIters(); opt_iter++) @@ -196,59 +229,61 @@ void TubeMPPI::computeControl(const Eigen::Ref& state, int op if (this->getKernelChoiceAsEnum() == kernelType::USE_SPLIT_KERNELS) { mppi::kernels::launchSplitRolloutKernel( - this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), NUM_ROLLOUTS, + this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), this->getNumRollouts(), this->getLambda(), this->getAlpha(), this->initial_state_d_, this->output_d_, this->trajectory_costs_d_, this->params_.dynamics_rollout_dim_, this->params_.cost_rollout_dim_, this->stream_, false); } else { mppi::kernels::launchRolloutKernel( - this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), NUM_ROLLOUTS, + this->model_, this->cost_, this->sampler_, this->getDt(), this->getNumTimesteps(), this->getNumRollouts(), this->getLambda(), this->getAlpha(), this->initial_state_d_, this->trajectory_costs_d_, this->params_.dynamics_rollout_dim_, this->stream_, false); } // Copy the costs back to the host HANDLE_ERROR(cudaMemcpyAsync(this->trajectory_costs_.data(), this->trajectory_costs_d_, - NUM_ROLLOUTS * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); + this->getNumRollouts() * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); HANDLE_ERROR(cudaMemcpyAsync(trajectory_costs_nominal_.data(), trajectory_costs_nominal_d, - NUM_ROLLOUTS * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); + this->getNumRollouts() * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); HANDLE_ERROR(cudaStreamSynchronize(this->stream_)); - this->setBaseline(mppi::kernels::computeBaselineCost(this->trajectory_costs_.data(), NUM_ROLLOUTS), 0); - this->setBaseline(mppi::kernels::computeBaselineCost(this->trajectory_costs_nominal_.data(), NUM_ROLLOUTS), 1); + this->setBaseline(mppi::kernels::computeBaselineCost(this->trajectory_costs_.data(), this->getNumRollouts()), 0); + this->setBaseline( + mppi::kernels::computeBaselineCost(this->trajectory_costs_nominal_.data(), this->getNumRollouts()), 1); // Launch the norm exponential kernel for both actual and nominal - mppi::kernels::launchNormExpKernel(NUM_ROLLOUTS, this->getNormExpThreads(), this->trajectory_costs_d_, + mppi::kernels::launchNormExpKernel(this->getNumRollouts(), this->getNormExpThreads(), this->trajectory_costs_d_, 1.0 / this->getLambda(), this->getBaselineCost(0), this->stream_, false); - mppi::kernels::launchNormExpKernel(NUM_ROLLOUTS, this->getNormExpThreads(), trajectory_costs_nominal_d, + mppi::kernels::launchNormExpKernel(this->getNumRollouts(), this->getNormExpThreads(), trajectory_costs_nominal_d, 1.0 / this->getLambda(), this->getBaselineCost(1), this->stream_, false); HANDLE_ERROR(cudaMemcpyAsync(this->trajectory_costs_.data(), this->trajectory_costs_d_, - NUM_ROLLOUTS * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); + this->getNumRollouts() * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); HANDLE_ERROR(cudaMemcpyAsync(trajectory_costs_nominal_.data(), trajectory_costs_nominal_d, - NUM_ROLLOUTS * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); + this->getNumRollouts() * sizeof(float), cudaMemcpyDeviceToHost, this->stream_)); HANDLE_ERROR(cudaStreamSynchronize(this->stream_)); // Compute the normalizer - this->setNormalizer(mppi::kernels::computeNormalizer(this->trajectory_costs_.data(), NUM_ROLLOUTS), 0); - this->setNormalizer(mppi::kernels::computeNormalizer(this->trajectory_costs_nominal_.data(), NUM_ROLLOUTS), 1); + this->setNormalizer(mppi::kernels::computeNormalizer(this->trajectory_costs_.data(), this->getNumRollouts()), 0); + this->setNormalizer( + mppi::kernels::computeNormalizer(this->trajectory_costs_nominal_.data(), this->getNumRollouts()), 1); // Compute real free energy mppi::kernels::computeFreeEnergy(this->free_energy_statistics_.real_sys.freeEnergyMean, this->free_energy_statistics_.real_sys.freeEnergyVariance, this->free_energy_statistics_.real_sys.freeEnergyModifiedVariance, - this->trajectory_costs_.data(), NUM_ROLLOUTS, this->getBaselineCost(0), + this->trajectory_costs_.data(), this->getNumRollouts(), this->getBaselineCost(0), this->getLambda()); // Compute Nominal State free Energy mppi::kernels::computeFreeEnergy(this->free_energy_statistics_.nominal_sys.freeEnergyMean, this->free_energy_statistics_.nominal_sys.freeEnergyVariance, this->free_energy_statistics_.nominal_sys.freeEnergyModifiedVariance, - this->trajectory_costs_nominal_.data(), NUM_ROLLOUTS, this->getBaselineCost(1), - this->getLambda()); + this->trajectory_costs_nominal_.data(), this->getNumRollouts(), + this->getBaselineCost(1), this->getLambda()); // Compute the cost weighted average this->sampler_->updateDistributionParamsFromDevice(this->trajectory_costs_d_, this->getNormalizerCost(0), 0, false); @@ -286,10 +321,10 @@ void TubeMPPI::computeControl(const Eigen::Ref& state, int op smoothControlTrajectory(); computeStateTrajectory(state); // Input is the actual state - this->free_energy_statistics_.real_sys.normalizerPercent = this->getNormalizerCost(0) / NUM_ROLLOUTS; + this->free_energy_statistics_.real_sys.normalizerPercent = this->getNormalizerCost(0) / this->getNumRollouts(); this->free_energy_statistics_.real_sys.increase = this->getBaselineCost(0) - this->free_energy_statistics_.real_sys.previousBaseline; - this->free_energy_statistics_.nominal_sys.normalizerPercent = this->getNormalizerCost(1) / NUM_ROLLOUTS; + this->free_energy_statistics_.nominal_sys.normalizerPercent = this->getNormalizerCost(1) / this->getNumRollouts(); this->free_energy_statistics_.nominal_sys.increase = this->getBaselineCost(1) - this->free_energy_statistics_.nominal_sys.previousBaseline; @@ -308,7 +343,7 @@ void TubeMPPI::copyControlToDevice(bool synchronize) TUBE_MPPI_TEMPLATE void TubeMPPI::allocateCUDAMemory() { - PARENT_CLASS::allocateCUDAMemoryHelper(1); + PARENT_CLASS::allocateCUDAMemoryHelper(2); } TUBE_MPPI_TEMPLATE diff --git a/include/mppi/controllers/Tube-MPPI/tube_mppi_controller.cuh b/include/mppi/controllers/Tube-MPPI/tube_mppi_controller.cuh index 91bf4943..d3a96768 100644 --- a/include/mppi/controllers/Tube-MPPI/tube_mppi_controller.cuh +++ b/include/mppi/controllers/Tube-MPPI/tube_mppi_controller.cuh @@ -14,23 +14,23 @@ #include #include -template -struct TubeMPPIParams : public ControllerParams +template +struct TubeMPPIParams : public ControllerParams { float nominal_threshold_ = 20; // How much worse the actual system has to be compared to the nominal }; -template , - class PARAMS_T = TubeMPPIParams> -class TubeMPPIController : public Controller + class PARAMS_T = TubeMPPIParams> +class TubeMPPIController : public Controller { public: // EIGEN_MAKE_ALIGNED_OPERATOR_NEW unnecessary due to EIGEN_MAX_ALIGN_BYTES=0 /** * Set up useful types */ - typedef Controller PARENT_CLASS; + typedef Controller PARENT_CLASS; using control_array = typename PARENT_CLASS::control_array; using control_trajectory = typename PARENT_CLASS::control_trajectory; using state_trajectory = typename PARENT_CLASS::state_trajectory; @@ -40,10 +40,11 @@ public: using FEEDBACK_PARAMS = typename PARENT_CLASS::TEMPLATED_FEEDBACK_PARAMS; using FEEDBACK_GPU = typename PARENT_CLASS::TEMPLATED_FEEDBACK_GPU; - TubeMPPIController(DYN_T* model, COST_T* cost, FB_T* fb_controller, SAMPLING_T* sampler, float dt, int max_iter, - float lambda, float alpha, int num_timesteps = MAX_TIMESTEPS, - const Eigen::Ref& init_control_traj = control_trajectory::Zero(), - cudaStream_t stream = nullptr); + TubeMPPIController( + DYN_T* model, COST_T* cost, FB_T* fb_controller, SAMPLING_T* sampler, float dt, int max_iter, float lambda, + float alpha, int num_timesteps, int num_rollouts, + const Eigen::Ref& init_control_traj = control_trajectory::Zero(DYN_T::CONTROL_DIM, 1), + cudaStream_t stream = nullptr); TubeMPPIController(DYN_T* model, COST_T* cost, FB_T* fb_controller, SAMPLING_T* sampler, PARAMS_T& params, cudaStream_t stream = nullptr); @@ -104,7 +105,11 @@ public: void calculateSampledStateTrajectories() override; - void chooseAppropriateKernel(); + void chooseAppropriateKernel() override; + + void setNumTimestepsHelper(const int num_timesteps, const bool update_gpu_mem = true) override; + + void setNumRolloutsHelper(const int num_rollouts, const bool update_gpu_mem = true) override; private: // float nominal_threshold_ = 20; // How much worse the actual system has to be compared to the nominal @@ -115,9 +120,9 @@ private: float nominal_free_energy_modified_variance_ = 0; // nominal state CPU side copies - control_trajectory nominal_control_trajectory_ = control_trajectory::Zero(); - state_trajectory nominal_state_trajectory_ = state_trajectory::Zero(); - sampled_cost_traj trajectory_costs_nominal_ = sampled_cost_traj::Zero(); + control_trajectory nominal_control_trajectory_; + state_trajectory nominal_state_trajectory_; + sampled_cost_traj trajectory_costs_nominal_; // Check to see if nominal state has been initialized bool nominalStateInit_ = false; diff --git a/include/mppi/controllers/controller.cu b/include/mppi/controllers/controller.cu index 32cd9bed..6315dd01 100644 --- a/include/mppi/controllers/controller.cu +++ b/include/mppi/controllers/controller.cu @@ -1,9 +1,7 @@ #include -#define CONTROLLER_TEMPLATE \ - template -#define CONTROLLER Controller +#define CONTROLLER_TEMPLATE template +#define CONTROLLER Controller CONTROLLER_TEMPLATE void CONTROLLER::deallocateCUDAMemory() @@ -12,16 +10,18 @@ void CONTROLLER::deallocateCUDAMemory() { HANDLE_ERROR(cudaFree(initial_state_d_)); HANDLE_ERROR(cudaFree(vis_initial_state_d_)); - HANDLE_ERROR(cudaFree(control_d_)); + // HANDLE_ERROR(cudaFree(control_d_)); HANDLE_ERROR(cudaFree(output_d_)); HANDLE_ERROR(cudaFree(trajectory_costs_d_)); HANDLE_ERROR(cudaFree(cost_baseline_and_norm_d_)); + HANDLE_CURAND_ERROR(curandDestroyGenerator(gen_)); CUDA_mem_init_ = false; } if (sampled_states_CUDA_mem_init_) { HANDLE_ERROR(cudaFree(sampled_outputs_d_)); HANDLE_ERROR(cudaFree(sampled_costs_d_)); + HANDLE_ERROR(cudaFree(sampled_crash_status_d_)); sampled_states_CUDA_mem_init_ = false; } } @@ -60,7 +60,7 @@ void CONTROLLER::copySampledControlFromDevice(bool synchronize) return; } - int num_sampled_trajectories = perc_sampled_control_trajectories_ * NUM_ROLLOUTS; + int num_sampled_trajectories = perc_sampled_control_trajectories_ * getNumRollouts(); std::vector samples(num_sampled_trajectories); if (perc_sampled_control_trajectories_ > 0.98) { @@ -71,7 +71,7 @@ void CONTROLLER::copySampledControlFromDevice(bool synchronize) { // Create sample list without replacement // removes the top 2% since top 1% are complete noise - samples = mppi::math::sample_without_replacement(num_sampled_trajectories, NUM_ROLLOUTS * 0.98); + samples = mppi::math::sample_without_replacement(num_sampled_trajectories, getNumRollouts() * 0.98); } // this explicitly adds the optimized control sequence @@ -80,7 +80,7 @@ void CONTROLLER::copySampledControlFromDevice(bool synchronize) this->vis_stream_)); HANDLE_ERROR(cudaMemcpyAsync( // this->sampled_noise_d_, - this->sampler_->getVisControlSample(0, 0, 0), this->control_d_, + this->sampler_->getVisControlSample(0, 0, 0), this->sampler_->getDeviceOptimalControlSequence(0), sizeof(float) * getNumTimesteps() * DYN_T::CONTROL_DIM, cudaMemcpyDeviceToDevice, this->vis_stream_)); for (int i = 1; i < num_sampled_trajectories; i++) @@ -132,7 +132,7 @@ void CONTROLLER::copyTopControlFromDevice(bool synchronize) } // Important note: Highest weighted trajectories are the ones with the lowest cost - int start_top_control_traj_index = perc_sampled_control_trajectories_ * NUM_ROLLOUTS; + int start_top_control_traj_index = perc_sampled_control_trajectories_ * getNumRollouts(); std::vector samples(num_top_control_trajectories_); // Start by filling in the top samples list with the first n in the trajectory for (int i = 0; i < num_top_control_trajectories_; i++) @@ -146,7 +146,7 @@ void CONTROLLER::copyTopControlFromDevice(bool synchronize) std::tie(min_sample_index, min_sample_value) = findMinIndexAndValue(samples); // find top n samples by removing the smallest weights from the list - for (int i = num_top_control_trajectories_; i < NUM_ROLLOUTS; i++) + for (int i = num_top_control_trajectories_; i < getNumRollouts(); i++) { if (trajectory_costs_[i] > min_sample_value) { // Remove the smallest weight in the current list and add the new index @@ -207,76 +207,75 @@ void CONTROLLER::setSeedCUDARandomNumberGen(unsigned seed) } CONTROLLER_TEMPLATE -void CONTROLLER::allocateCUDAMemoryHelper(int nominal_size, bool allocate_double_noise) +void CONTROLLER::allocateCUDAMemoryHelper(const int num_systems) { - if (nominal_size < 0) + if (num_systems <= 0) { - nominal_size = 1; - std::cerr << "nominal size cannot be below 0 when allocateCudaMemoryHelper is called" << std::endl; + this->logger_->error("Number of systems cannot be below 1 when allocateCUDAMemoryHelper is called"); std::exit(-1); } - else + // TODO: Check if control_d_ is even used anymore + if (CUDA_mem_init_) { - // increment by 1 since actual is not included - ++nominal_size; + HANDLE_ERROR(cudaFree(initial_state_d_)); + HANDLE_ERROR(cudaFree(vis_initial_state_d_)); + // HANDLE_ERROR(cudaFree(control_d_)); + HANDLE_ERROR(cudaFree(output_d_)); + HANDLE_ERROR(cudaFree(trajectory_costs_d_)); + HANDLE_ERROR(cudaFree(cost_baseline_and_norm_d_)); } - HANDLE_ERROR(cudaMalloc((void**)&initial_state_d_, sizeof(float) * DYN_T::STATE_DIM * nominal_size)); - HANDLE_ERROR(cudaMalloc((void**)&vis_initial_state_d_, sizeof(float) * DYN_T::STATE_DIM * nominal_size)); - HANDLE_ERROR(cudaMalloc((void**)&control_d_, sizeof(float) * DYN_T::CONTROL_DIM * MAX_TIMESTEPS * nominal_size)); - HANDLE_ERROR( - cudaMalloc((void**)&output_d_, sizeof(float) * DYN_T::OUTPUT_DIM * MAX_TIMESTEPS * NUM_ROLLOUTS * nominal_size)); - HANDLE_ERROR(cudaMalloc((void**)&trajectory_costs_d_, sizeof(float) * NUM_ROLLOUTS * nominal_size)); - // HANDLE_ERROR(cudaMalloc((void**)&control_std_dev_d_, sizeof(float) * DYN_T::CONTROL_DIM)); - // HANDLE_ERROR(cudaMalloc((void**)&control_noise_d_, sizeof(float) * DYN_T::CONTROL_DIM * MAX_TIMESTEPS * - // NUM_ROLLOUTS * - // (allocate_double_noise ? nominal_size : 1))); - HANDLE_ERROR(cudaMalloc((void**)&cost_baseline_and_norm_d_, sizeof(float2) * nominal_size)); - cost_baseline_and_norm_.resize(nominal_size, make_float2(0.0, 0.0)); + HANDLE_ERROR(cudaMalloc((void**)&initial_state_d_, sizeof(float) * DYN_T::STATE_DIM * num_systems)); + HANDLE_ERROR(cudaMalloc((void**)&vis_initial_state_d_, sizeof(float) * DYN_T::STATE_DIM * num_systems)); + // HANDLE_ERROR(cudaMalloc((void**)&control_d_, sizeof(float) * DYN_T::CONTROL_DIM * getNumTimesteps() * + // num_systems)); + HANDLE_ERROR(cudaMalloc((void**)&output_d_, + sizeof(float) * DYN_T::OUTPUT_DIM * getNumTimesteps() * getNumRollouts() * num_systems)); + HANDLE_ERROR(cudaMalloc((void**)&trajectory_costs_d_, sizeof(float) * getNumRollouts() * num_systems)); + HANDLE_ERROR(cudaMalloc((void**)&cost_baseline_and_norm_d_, sizeof(float2) * num_systems)); + cost_baseline_and_norm_.resize(num_systems, make_float2(0.0, 0.0)); CUDA_mem_init_ = true; } CONTROLLER_TEMPLATE void CONTROLLER::resizeSampledControlTrajectories(float perc, int multiplier, int top_num) { - int num_sampled_trajectories = perc * NUM_ROLLOUTS + top_num; + int num_sampled_trajectories = perc * getNumRollouts() + top_num; if (sampled_states_CUDA_mem_init_) { - cudaFree(sampled_outputs_d_); - // cudaFree(sampled_noise_d_); - cudaFree(sampled_costs_d_); - cudaFree(sampled_crash_status_d_); + HANDLE_ERROR(cudaFree(sampled_outputs_d_)); + HANDLE_ERROR(cudaFree(sampled_costs_d_)); + HANDLE_ERROR(cudaFree(sampled_crash_status_d_)); sampled_states_CUDA_mem_init_ = false; } - sampled_trajectories_.resize(num_sampled_trajectories * multiplier, output_trajectory::Zero()); - sampled_costs_.resize(num_sampled_trajectories * multiplier, cost_trajectory::Zero()); - sampled_crash_status_.resize(num_sampled_trajectories * multiplier, crash_status_trajectory::Zero()); + sampled_trajectories_.resize(num_sampled_trajectories * multiplier, + output_trajectory::Zero(DYN_T::OUTPUT_DIM, getNumTimesteps())); + sampled_costs_.resize(num_sampled_trajectories * multiplier, cost_trajectory::Zero(1, getNumTimesteps() + 1)); + sampled_crash_status_.resize(num_sampled_trajectories * multiplier, + crash_status_trajectory::Zero(1, getNumTimesteps())); sampler_->setNumVisRollouts(num_sampled_trajectories); if (num_sampled_trajectories <= 0) { return; } - HANDLE_ERROR(cudaMalloc((void**)&sampled_outputs_d_, - sizeof(float) * DYN_T::OUTPUT_DIM * MAX_TIMESTEPS * num_sampled_trajectories * multiplier)); - // HANDLE_ERROR(cudaMalloc((void**)&sampled_noise_d_, - // sizeof(float) * DYN_T::CONTROL_DIM * MAX_TIMESTEPS * num_sampled_trajectories * - // multiplier)); + HANDLE_ERROR(cudaMalloc((void**)&sampled_outputs_d_, sizeof(float) * DYN_T::OUTPUT_DIM * getNumTimesteps() * + num_sampled_trajectories * multiplier)); // +1 for terminal cost HANDLE_ERROR(cudaMalloc((void**)&sampled_costs_d_, - sizeof(float) * (MAX_TIMESTEPS + 1) * num_sampled_trajectories * multiplier)); + sizeof(float) * (getNumTimesteps() + 1) * num_sampled_trajectories * multiplier)); HANDLE_ERROR(cudaMalloc((void**)&sampled_crash_status_d_, - sizeof(int) * MAX_TIMESTEPS * num_sampled_trajectories * multiplier)); + sizeof(int) * getNumTimesteps() * num_sampled_trajectories * multiplier)); sampled_states_CUDA_mem_init_ = true; } CONTROLLER_TEMPLATE std::vector CONTROLLER::getSampledNoise() { - std::vector vector = std::vector(NUM_ROLLOUTS * getNumTimesteps() * DYN_T::CONTROL_DIM, FLT_MIN); + std::vector vector = std::vector(getNumRollouts() * getNumTimesteps() * DYN_T::CONTROL_DIM, FLT_MIN); HANDLE_ERROR(cudaMemcpyAsync(vector.data(), this->sampler_->getControlSample(0, 0, 0), - sizeof(float) * NUM_ROLLOUTS * getNumTimesteps() * DYN_T::CONTROL_DIM, + sizeof(float) * getNumRollouts() * getNumTimesteps() * DYN_T::CONTROL_DIM, cudaMemcpyDeviceToHost, stream_)); HANDLE_ERROR(cudaStreamSynchronize(stream_)); return vector; diff --git a/include/mppi/controllers/controller.cuh b/include/mppi/controllers/controller.cuh index 140dd83c..89cb6c3c 100644 --- a/include/mppi/controllers/controller.cuh +++ b/include/mppi/controllers/controller.cuh @@ -43,18 +43,19 @@ enum class kernelType : int USE_SPLIT_KERNELS, // separate kernels for dynamics and cost calls }; -template +template struct ControllerParams { + EIGEN_MAKE_ALIGNED_OPERATOR_NEW static const int TEMPLATED_STATE_DIM = S_DIM; static const int TEMPLATED_CONTROL_DIM = C_DIM; - static const int TEMPLATED_MAX_TIMESTEPS = MAX_TIMESTEPS; float dt_; float lambda_ = 1.0; // Value of the temperature in the softmax. float alpha_ = 0.0; // // MAX_TIMESTEPS is defined as an upper bound, if lower that region is just ignored when calculating control // does not reallocate cuda memory - int num_timesteps_ = MAX_TIMESTEPS; + int num_timesteps_ = 1; + int num_rollouts_ = 1; int num_iters_ = 1; // Number of optimization iterations unsigned seed_ = std::chrono::system_clock::now().time_since_epoch().count(); @@ -63,12 +64,12 @@ struct ControllerParams dim3 visualize_dim_ = dim3(32, 1, 1); int norm_exp_kernel_parallelization_ = 64; - Eigen::Matrix init_control_traj_ = Eigen::Matrix::Zero(); Eigen::Matrix slide_control_scale_ = Eigen::Matrix::Zero(); + Eigen::Matrix init_control_traj_ = Eigen::Matrix::Zero(); }; -template > +template > class Controller { public: @@ -86,32 +87,32 @@ public: using TEMPLATED_FEEDBACK_PARAMS = typename FB_T::TEMPLATED_PARAMS; using TEMPLATED_FEEDBACK_GPU = typename FB_T::TEMPLATED_GPU_FEEDBACK; using TEMPLATED_SAMPLING_PARAMS = typename SAMPLING_T::SAMPLING_PARAMS_T; - static const int TEMPLATED_FEEDBACK_TIMESTEPS = FB_T::FB_TIMESTEPS; /** * Aliases */ // Control typedefs using control_array = typename DYN_T::control_array; - typedef Eigen::Matrix control_trajectory; // A control trajectory + typedef Eigen::Matrix control_trajectory; // A control trajectory // State typedefs using state_array = typename DYN_T::state_array; - typedef Eigen::Matrix state_trajectory; // A state trajectory + typedef Eigen::Matrix state_trajectory; // A state trajectory // Output typedefs using output_array = typename DYN_T::output_array; - typedef Eigen::Matrix output_trajectory; // An output trajectory + typedef Eigen::Matrix output_trajectory; // An output trajectory // Cost typedefs - typedef Eigen::Matrix cost_trajectory; // +1 for terminal cost - typedef Eigen::Matrix sampled_cost_traj; - typedef Eigen::Matrix crash_status_trajectory; + typedef Eigen::Matrix cost_trajectory; // +1 for terminal cost + typedef Eigen::Matrix sampled_cost_traj; + typedef Eigen::Matrix crash_status_trajectory; - Controller(DYN_T* model, COST_T* cost, FB_T* fb_controller, SAMPLING_T* sampler, float dt, int max_iter, float lambda, - float alpha, int num_timesteps = MAX_TIMESTEPS, - const Eigen::Ref& init_control_traj = control_trajectory::Zero(), - cudaStream_t stream = nullptr) + Controller( + DYN_T* model, COST_T* cost, FB_T* fb_controller, SAMPLING_T* sampler, float dt, int max_iter, float lambda, + float alpha, int num_timesteps, int num_rollouts, + const Eigen::Ref& init_control_traj = control_trajectory::Zero(DYN_T::CONTROL_DIM, 1), + cudaStream_t stream = nullptr) { // Create the random number generator createAndSeedCUDARandomNumberGen(); @@ -119,7 +120,9 @@ public: cost_ = cost; fb_controller_ = fb_controller; sampler_ = sampler; - sampler_->setNumRollouts(NUM_ROLLOUTS); + auto logger = std::make_shared(); + setLogger(logger); + sampler_->setNumDistributions(1); TEMPLATED_PARAMS params; params.dt_ = dt; @@ -127,11 +130,14 @@ public: params.lambda_ = lambda; params.alpha_ = alpha; params.num_timesteps_ = num_timesteps; + params.num_rollouts_ = num_rollouts; + + // Set initial control in temp params params.init_control_traj_ = init_control_traj; - setNumTimesteps(params.num_timesteps_); setParams(params); + setTrajectoriesToZero(); - control_ = init_control_traj; + control_ = params_.init_control_traj_; control_history_ = Eigen::Matrix::Zero(); // Bind the model and control to the given stream @@ -141,9 +147,6 @@ public: GPUSetup(); - auto logger = std::make_shared(); - setLogger(logger); - /** * When implementing your own version make sure to write your own allocateCUDAMemory and call it from the * constructor along with any other methods to copy memory to the device and back @@ -154,16 +157,18 @@ public: Controller(DYN_T* model, COST_T* cost, FB_T* fb_controller, SAMPLING_T* sampler, PARAMS_T& params, cudaStream_t stream = nullptr) { + // Create the random number generator + createAndSeedCUDARandomNumberGen(); model_ = model; cost_ = cost; fb_controller_ = fb_controller; sampler_ = sampler; - sampler_->setNumRollouts(NUM_ROLLOUTS); + auto logger = std::make_shared(); + setLogger(logger); + sampler_->setNumDistributions(1); - setNumTimesteps(params_.num_timesteps_); - // Create the random number generator - createAndSeedCUDARandomNumberGen(); setParams(params); + setTrajectoriesToZero(); control_ = params_.init_control_traj_; control_history_ = Eigen::Matrix::Zero(); @@ -174,9 +179,6 @@ public: GPUSetup(); - auto logger = std::make_shared(); - setLogger(logger); - /** * When implementing your own version make sure to write your own allocateCUDAMemory and call it from the * constructor along with any other methods to copy memory to the device and back @@ -466,7 +468,7 @@ public: // Indicator for algorithm health, should be between 0.01 and 0.1 anecdotally float getNormalizerPercent() const { - return this->getNormalizerCost() / (float)NUM_ROLLOUTS; + return this->getNormalizerCost() / (float)getNumRollouts(); } /** @@ -585,14 +587,15 @@ public: virtual void slideControlSequenceHelper(int steps, Eigen::Ref u) { + control_array zero_control = model_->getZeroControl(); for (int i = 0; i < getNumTimesteps(); ++i) { int ind = std::min(i + steps, getNumTimesteps() - 1); u.col(i) = u.col(ind); if (i + steps > getNumTimesteps() - 1) { - u.col(i) = (u.col(ind).array() - model_->zero_control_.array()) * params_.slide_control_scale_.array() + - model_->zero_control_.array(); + u.col(i) = + (u.col(ind).array() - zero_control.array()) * params_.slide_control_scale_.array() + zero_control.array(); } } } @@ -660,19 +663,114 @@ public: } } - void setNumTimesteps(int num_timesteps) + virtual void setNumRolloutsHelper(const int num_rollouts, const bool update_gpu_mem = true) { - // TODO fix the tracking controller as well - if ((num_timesteps <= MAX_TIMESTEPS) && (num_timesteps > 0)) + if (num_rollouts <= 0) + { + this->logger_->error( + "Attempted to change number of samples to %d. The number of samples must be greater than 0.\n"); + return; + } + + params_.num_rollouts_ = num_rollouts; + Eigen::NoChange_t same_col = Eigen::NoChange_t::NoChange; + trajectory_costs_.conservativeResize(num_rollouts, same_col); + sampler_->setNumRollouts(num_rollouts); + if (CUDA_mem_init_ && update_gpu_mem) { - params_.num_timesteps_ = num_timesteps; + allocateCUDAMemory(); + resizeSampledControlTrajectories(perc_sampled_control_trajectories_, sample_multiplier_, + num_top_control_trajectories_); } - else + } + + virtual void setNumRollouts(const int num_rollouts) + { + setNumRolloutsHelper(num_rollouts, true); + } + + virtual void setNumTimestepsHelper(const int num_timesteps, const bool update_gpu_mem = true) + { + if (num_timesteps <= 0) + { + this->logger_->error("You must give a number of timesteps greater than 0. Attempted timestep change: %d\n", + num_timesteps); + return; + } + bool require_fill_in = num_timesteps > params_.init_control_traj_.cols(); + int prev_size = params_.init_control_traj_.cols(); + params_.num_timesteps_ = num_timesteps; + resizeTimeTrajectory(control_, num_timesteps); + resizeTimeTrajectory(state_, num_timesteps); + resizeTimeTrajectory(output_, num_timesteps); + resizeTimeTrajectory(propagated_feedback_state_trajectory_, num_timesteps); + resizeTimeTrajectory(params_.init_control_traj_, num_timesteps); + for (std::size_t i = 0; i < sampled_trajectories_.size(); i++) + { + resizeTimeTrajectory(sampled_trajectories_[i], num_timesteps); + resizeTimeTrajectory<1>(sampled_costs_[i], num_timesteps + 1); + resizeTimeTrajectory<1, int>(sampled_crash_status_[i], num_timesteps); + } + if (require_fill_in) { - params_.num_timesteps_ = MAX_TIMESTEPS; - printf("You must give a number of timesteps between [0, %d]\n", MAX_TIMESTEPS); + control_array zero_control = model_->getZeroControl(); + for (int i = prev_size; i < num_timesteps; i++) + { // Fill time horizon with linear interpolation between last control and zero control + params_.init_control_traj_.col(i) = + (params_.init_control_traj_.col(prev_size - 1).array() - zero_control.array()) * + params_.slide_control_scale_.array() + + zero_control.array(); + } + // Reinitialize control_ to init_control_trajectory + control_ = params_.init_control_traj_; } sampler_->setNumTimesteps(params_.num_timesteps_); + fb_controller_->setNumTimesteps(params_.num_timesteps_); + if (CUDA_mem_init_ && update_gpu_mem) + { + allocateCUDAMemory(); + resizeSampledControlTrajectories(perc_sampled_control_trajectories_, sample_multiplier_, + num_top_control_trajectories_); + } + } + + virtual void setNumTimesteps(const int num_timesteps) + { + setNumTimestepsHelper(num_timesteps, true); + } + + template + void resizeTimeTrajectory(Eigen::Matrix& trajectory, int num_timesteps = -1) + { + if (num_timesteps == -1) + { + num_timesteps = getNumTimesteps(); + } + Eigen::NoChange_t same_row = Eigen::NoChange_t::NoChange; + trajectory.conservativeResize(same_row, num_timesteps); + } + + void setTrajectoriesToZero() + { + // control_.conservativeResize(Eigen::NoChange_t, num_timesteps); + // state_.conservativeResize(Eigen::NoChange_t, num_timesteps); + // output_.conservativeResize(Eigen::NoChange_t, num_timesteps); + // propagated_feedback_state_trajectory_.conservativeResize(Eigen::NoChange_t, num_timesteps); + // trajectory_costs_.conservativeResize(Eigen::NoChange_t, num_timesteps); + const int num_timesteps = getNumTimesteps(); + for (int i = 0; i < num_timesteps; i++) + { + control_.col(i) = model_->getZeroControl(); + state_.col(i) = model_->getZeroState(); + output_.col(i) = output_array::Zero(); + propagated_feedback_state_trajectory_.col(i) = model_->getZeroState(); + for (std::size_t j = 0; j < sampled_trajectories_.size(); j++) + { + sampled_trajectories_[j].col(i) = output_array::Zero(); + sampled_costs_[j](0, i) = 0.0f; + sampled_crash_status_[j](0, i) = 0; + } + } } void setBaseline(float baseline, int index = 0) @@ -685,6 +783,11 @@ public: cost_baseline_and_norm_[index].y = normalizer; }; + int getNumRollouts() const + { + return this->params_.num_rollouts_; + } + int getNumTimesteps() const { return this->params_.num_timesteps_; @@ -747,7 +850,7 @@ public: int getNumberSampledTrajectories() const { - return perc_sampled_control_trajectories_ * NUM_ROLLOUTS; + return perc_sampled_control_trajectories_ * getNumRollouts(); } int getNumberTopControlTrajectories() const @@ -835,16 +938,27 @@ public: { bool change_seed = p.seed_ != params_.seed_; bool change_num_timesteps = p.num_timesteps_ != params_.num_timesteps_; + bool change_num_rollouts = p.num_rollouts_ != params_.num_rollouts_; bool change_dt = p.dt_ != params_.dt_; - // bool change_std_dev = p.control_std_dev_ != params_.control_std_dev_; params_ = p; if (change_num_timesteps) { - setNumTimesteps(p.num_timesteps_); + setNumTimestepsHelper(p.num_timesteps_, false); } if (change_seed) { - setSeedCUDARandomNumberGen(params_.seed_); + setSeedCUDARandomNumberGen(p.seed_); + } + if (change_num_rollouts) + { + setNumRolloutsHelper(p.num_rollouts_, false); + } + + if ((change_num_rollouts || change_num_timesteps) && CUDA_mem_init_) + { + allocateCUDAMemory(); + resizeSampledControlTrajectories(perc_sampled_control_trajectories_, sample_multiplier_, + num_top_control_trajectories_); } if (change_dt) { @@ -963,15 +1077,15 @@ protected: // one array of this size is allocated for each state we care about, // so it can be the size*N for N nominal states // [actual, nominal] - float* control_d_; // Array of size DYN_T::CONTROL_DIM*NUM_TIMESTEPS*N + // float* control_d_; // Array of size DYN_T::CONTROL_DIM*NUM_TIMESTEPS*N float* output_d_; // Array of size DYN_T::OUTPUT_DIM*NUM_ROLLOUTS*N float* trajectory_costs_d_; // Array of size NUM_ROLLOUTS*N // float* control_noise_d_; // Array of size DYN_T::CONTROL_DIM*NUM_TIMESTEPS*NUM_ROLLOUTS*N float2* cost_baseline_and_norm_d_; // Array of size number of systems - control_trajectory control_ = control_trajectory::Zero(); - state_trajectory state_ = state_trajectory::Zero(); - output_trajectory output_ = output_trajectory::Zero(); - sampled_cost_traj trajectory_costs_ = sampled_cost_traj::Zero(); + control_trajectory control_; + state_trajectory state_; + output_trajectory output_; + sampled_cost_traj trajectory_costs_; std::vector cost_baseline_and_norm_ = { make_float2(0.0, 0.0) }; bool CUDA_mem_init_ = false; @@ -985,7 +1099,7 @@ protected: std::vector sampled_crash_status_; // Propagated real state trajectory - state_trajectory propagated_feedback_state_trajectory_ = state_trajectory::Zero(); + state_trajectory propagated_feedback_state_trajectory_; // tracking controller variables bool enable_feedback_ = false; @@ -1009,17 +1123,18 @@ protected: void setSeedCUDARandomNumberGen(unsigned seed); /** - * Allocates CUDA memory for actual states and nominal states if needed - * @param nominal_size if only actual this should be 0 + * @brief Allocates CUDA memory for actual states and nominal states if needed + * + * @param num_systems - Number of systems to allocate memory for */ - void allocateCUDAMemoryHelper(int nominal_size = 0, bool allocate_double_noise = true); + void allocateCUDAMemoryHelper(const int num_systems = 1); // TODO all the copy to device functions to streamline process private: // ======== MUST BE OVERWRITTEN ========= - void allocateCUDAMemory() + virtual void allocateCUDAMemory() { - allocateCUDAMemoryHelper(); + allocateCUDAMemoryHelper(this->sampler_->getNumDistributions()); }; /** * TODO all copy to device and back functions implemented for specific controller diff --git a/include/mppi/core/base_plant.hpp b/include/mppi/core/base_plant.hpp index ca141562..a4ad5261 100644 --- a/include/mppi/core/base_plant.hpp +++ b/include/mppi/core/base_plant.hpp @@ -131,12 +131,17 @@ class BasePlant controller_ = controller; hz_ = hz; optimization_stride_ = optimization_stride; - control_traj_ = c_traj::Zero(); - state_traj_ = s_traj::Zero(); + controller_->template resizeTimeTrajectory(control_traj_); + controller_->template resizeTimeTrajectory(state_traj_, controller_->getNumTimesteps()); + controller_->template resizeTimeTrajectory(output_traj_, controller_->getNumTimesteps()); + control_traj_.setZero(); + state_traj_.setZero(); + output_traj_.setZero(); dynamics_params_ = controller->model_->getParams(); cost_params_ = controller_->cost_->getParams(); init_state_ = controller_->model_->getZeroState(); state_ = controller_->model_->getZeroState(); + init_u_ = controller_->model_->getZeroControl(); auto logger = std::make_shared(); setLogger(logger); }; diff --git a/include/mppi/ddp/ddp_model_wrapper.h b/include/mppi/ddp/ddp_model_wrapper.h index 69470faf..5d52cabf 100644 --- a/include/mppi/ddp/ddp_model_wrapper.h +++ b/include/mppi/ddp/ddp_model_wrapper.h @@ -26,11 +26,6 @@ bool getGrad(T* model, typename DDP_structures::DynamicscomputeGrad(x, u, A, B); jac.block(0, 0, T::STATE_DIM, T::STATE_DIM) = A; jac.block(0, T::STATE_DIM, T::STATE_DIM, T::CONTROL_DIM) = B; - // for (int i = 0; i < T::STATE_DIM; i++){ - // for (int j = 0; j < T::STATE_DIM; j++){ - // jac(i,j) = A(i,j); - // } - // } return exists; } @@ -47,13 +42,12 @@ struct ModelWrapperDDP : public DDP_structures::Dynamics::State; - using Control = typename DDP_structures::Dynamics::Control; - using Jacobian = typename DDP_structures::Dynamics::Jacobian; - using StateTrajectory = - typename DDP_structures::Dynamics::StateTrajectory; - using ControlTrajectory = - typename DDP_structures::Dynamics::ControlTrajectory; + using PARENT_CLASS = typename DDP_structures::Dynamics; + using State = typename PARENT_CLASS::State; + using Control = typename PARENT_CLASS::Control; + using Jacobian = typename PARENT_CLASS::Jacobian; + using StateTrajectory = typename PARENT_CLASS::StateTrajectory; + using ControlTrajectory = typename PARENT_CLASS::ControlTrajectory; State state; Control control; @@ -89,7 +83,7 @@ struct ModelWrapperDDP : public DDP_structures::Dynamics::Has>()); if (!analyticGradComputed) { - j_ = DDP_structures::Dynamics::df(x, u); + j_ = PARENT_CLASS::df(x, u); } return j_; } diff --git a/include/mppi/feedback_controllers/DDP/ddp.cu b/include/mppi/feedback_controllers/DDP/ddp.cu index bac73eeb..910bd068 100644 --- a/include/mppi/feedback_controllers/DDP/ddp.cu +++ b/include/mppi/feedback_controllers/DDP/ddp.cu @@ -1,26 +1,228 @@ #include -template -DeviceDDPImpl::DeviceDDPImpl(int num_timesteps, cudaStream_t stream) - : num_timesteps_(num_timesteps) - , GPUFeedbackController>(stream) +/** + * Methods for DDPFeedbackState + */ + +template +DDPFeedbackState::DDPFeedbackState(int num_timesteps, cudaStream_t stream) +{ + setNumTimesteps(num_timesteps); + setCUDAStream(stream); +} + +template +DDPFeedbackState::DDPFeedbackState(const DDPFeedbackState& other) +{ + setCUDAStream(other.stream_); + setNumTimesteps(other.getNumTimesteps()); // this creates the CPU memory fb_gain_traj_ + if (other.fb_gain_traj_d_) + { + allocateCUDAMemory(); + HANDLE_ERROR(cudaMemcpyAsync(fb_gain_traj_d_, other.fb_gain_traj_d_, sizeof(float) * size(), + cudaMemcpyDeviceToDevice, stream_)); + HANDLE_ERROR(cudaStreamSynchronize(stream_)); + } + for (int i = 0; i < size(); i++) + { + fb_gain_traj_[i] = other.fb_gain_traj_[i]; + } +} + +// This method is not a member method but it can access member variables as it is friended +template +inline void swap(DDPFeedbackState& first, DDPFeedbackState& second) +{ + using std::swap; // Declare like this for ADL purposes + swap(first.num_timesteps_, second.num_timesteps_); + swap(first.stream_, second.stream_); + swap(first.fb_gain_traj_, second.fb_gain_traj_); + swap(first.fb_gain_traj_d_, second.fb_gain_traj_d_); +} + +// Let the compiler create a new DDPFeedbackState by explicitly using pass by value +template +DDPFeedbackState& DDPFeedbackState::operator=(DDPFeedbackState other) +{ + swap(*this, other); + return *this; +} + +template +DDPFeedbackState::~DDPFeedbackState() +{ + if (fb_gain_traj_ != nullptr) + { + delete[] fb_gain_traj_; + fb_gain_traj_ = nullptr; + } + deallocateCUDAMemory(); +} + +template +void DDPFeedbackState::setNumTimesteps(const int num_timesteps) +{ + const int prev_timesteps = this->num_timesteps_; + const bool larger_array_needed = num_timesteps > prev_timesteps; + this->num_timesteps_ = num_timesteps; + if (larger_array_needed) + { + allocateCUDAMemory(); + + // float* fb_gain_traj_new = new float[size()](); // initialize to zero + if (fb_gain_traj_ != nullptr) + { + // for (int i = 0; i < prev_timesteps i++) + // { + // fb_gain_traj_new[i] = fb_gain_traj_[i]; + // } + delete[] fb_gain_traj_; + } + fb_gain_traj_ = new float[size()](); // Initialize to zero + // fb_gain_traj_ = fb_gain_traj_new; + } +} + +template +void DDPFeedbackState::setCUDAStream(const cudaStream_t stream) +{ + stream_ = stream; +} + +template +__host__ __device__ int DDPFeedbackState::getNumTimesteps() const { + return num_timesteps_; } -template -__device__ void DeviceDDPImpl::k(const float* __restrict__ x_act, - const float* __restrict__ x_goal, const int t, - float* __restrict__ theta, - float* __restrict__ control_output) +template +__host__ __device__ float* DDPFeedbackState::getFeedbackGainPtr() const { - float* fb_gain_t = &(this->state_.fb_gain_traj_[DYN_T::STATE_DIM * DYN_T::CONTROL_DIM * t]); +#ifdef __CUDA_ARCH__ + return fb_gain_traj_d_; +#else + return fb_gain_traj_; +#endif +} + +template +__host__ __device__ const float* DDPFeedbackState::getConstFeedbackGainPtr() const +{ +#ifdef __CUDA_ARCH__ + return fb_gain_traj_d_; +#else + return fb_gain_traj_; +#endif +} + +template +__host__ __device__ std::size_t DDPFeedbackState::size() const +{ + return DYN_T::CONTROL_DIM * DYN_T::STATE_DIM * getNumTimesteps(); +} + +template +__host__ void DDPFeedbackState::allocateCUDAMemory() +{ + if (fb_gain_traj_d_) + { + deallocateCUDAMemory(); + } + HANDLE_ERROR(cudaMalloc((void**)&fb_gain_traj_d_, sizeof(float) * size())); +} + +template +__host__ void DDPFeedbackState::deallocateCUDAMemory() +{ + if (fb_gain_traj_d_) + { + HANDLE_ERROR(cudaFree(fb_gain_traj_d_)); + fb_gain_traj_d_ = nullptr; + } +} + +template +__host__ void DDPFeedbackState::copyToDevice(bool synchronize) +{ + HANDLE_ERROR( + cudaMemcpyAsync(fb_gain_traj_d_, fb_gain_traj_, sizeof(float) * size(), cudaMemcpyHostToDevice, stream_)); + if (synchronize) + { + HANDLE_ERROR(cudaStreamSynchronize(stream_)); + } +} + +template +bool DDPFeedbackState::isEqual(const DDPFeedbackState& other) const +{ + if (this->getNumTimesteps() != other.getNumTimesteps()) + { + return false; + } + for (int i = 0; i < this->size(); i++) + { + if (this->fb_gain_traj_[i] != other.fb_gain_traj_[i]) + { + return false; + } + } + return true; +} + +/** + * GPU Class for DDP Methods + */ +template +DeviceDDPImpl::DeviceDDPImpl(int num_timesteps, cudaStream_t stream) : PARENT_CLASS(stream) +{ + this->setNumTimesteps(num_timesteps); + this->state_.setCUDAStream(stream); +} + +template +void DeviceDDPImpl::allocateCUDAMemory() +{ + // this->state_.allocateCUDAMemory is not needed as it is done within setNumTimesteps() +} + +template +void DeviceDDPImpl::copyToDevice(bool synchronize) +{ + this->state_.copyToDevice(false); // Copy gains to GPU first + PARENT_CLASS::copyToDevice(synchronize); // Copy num_timesteps and fb_gain_ptrs to GPU +} + +template +void DeviceDDPImpl::deallocateCUDAMemory() +{ + this->state_.deallocateCUDAMemory(); +} + +template +void DeviceDDPImpl::setNumTimesteps(const int num_timesteps) +{ + this->state_.setNumTimesteps(num_timesteps); +} + +template +__host__ __device__ int DeviceDDPImpl::getNumTimesteps() const +{ + return this->state_.getNumTimesteps(); +} + +template +__device__ void DeviceDDPImpl::k(const float* __restrict__ x_act, const float* __restrict__ x_goal, + const int t, float* __restrict__ theta, + float* __restrict__ control_output) +{ + const float* fb_gain_t = &(this->state_.getConstFeedbackGainPtr()[DYN_T::STATE_DIM * DYN_T::CONTROL_DIM * t]); float e = 0; for (int i = 0; i < DYN_T::STATE_DIM; i++) { e = x_act[i] - x_goal[i]; if (DYN_T::CONTROL_DIM % 4 == 0) { // load 4 floats in at a time to save on global memory reads - float4* fb_gain_t4 = reinterpret_cast(&fb_gain_t[i * DYN_T::CONTROL_DIM]); + const float4* fb_gain_t4 = reinterpret_cast(&fb_gain_t[i * DYN_T::CONTROL_DIM]); for (int j = 0; j < DYN_T::CONTROL_DIM / 4; j++) { reinterpret_cast(control_output)[j] = fb_gain_t4[j] * e; @@ -28,7 +230,7 @@ __device__ void DeviceDDPImpl::k(const float* __ } else if (DYN_T::CONTROL_DIM % 2 == 0) { // load 2 floats in at a time to save on global memory reads - float2* fb_gain_t2 = reinterpret_cast(&fb_gain_t[i * DYN_T::CONTROL_DIM]); + const float2* fb_gain_t2 = reinterpret_cast(&fb_gain_t[i * DYN_T::CONTROL_DIM]); for (int j = 0; j < DYN_T::CONTROL_DIM / 2; j++) { reinterpret_cast(control_output)[j] = fb_gain_t2[j] * e; @@ -47,18 +249,18 @@ __device__ void DeviceDDPImpl::k(const float* __ /** * CPU Class for DDP Methods */ -template -DDPFeedback::DDPFeedback(DYN_T* model, float dt, int num_timesteps, cudaStream_t stream) +template +DDPFeedback::DDPFeedback(DYN_T* model, float dt, int num_timesteps, cudaStream_t stream) { model_ = model; - this->dt_ = dt; - this->num_timesteps_ = std::max(num_timesteps, NUM_TIMESTEPS); + this->setDt(dt); + this->setNumTimesteps(num_timesteps); this->gpu_controller_->freeCudaMem(); // Remove allocated CUDA mem from default constructor - this->gpu_controller_ = std::make_shared>(this->num_timesteps_, stream); + this->gpu_controller_ = std::make_shared>(this->num_timesteps_, stream); } -template -void DDPFeedback::initTrackingController() +template +void DDPFeedback::initTrackingController() { util::DefaultLogger logger; bool verbose = false; @@ -78,8 +280,8 @@ void DDPFeedback::initTrackingController() terminal_cost_ = std::make_shared>>(this->params_.Q_f); } -template -void DDPFeedback::setParams(const DDPParams& params) +template +void DDPFeedback::setParams(const DDPParams& params) { this->params_ = params; run_cost_ = @@ -87,10 +289,17 @@ void DDPFeedback::setParams(const DDPParams& params terminal_cost_ = std::make_shared>>(this->params_.Q_f); } -template -void DDPFeedback::computeFeedback(const Eigen::Ref& init_state, - const Eigen::Ref& goal_traj, - const Eigen::Ref& control_traj) +template +void DDPFeedback::setNumTimesteps(const int num_timesteps) +{ + PARENT_CLASS::setNumTimesteps(num_timesteps); + this->gpu_controller_->setNumTimesteps(num_timesteps); +} + +template +void DDPFeedback::computeFeedback(const Eigen::Ref& init_state, + const Eigen::Ref& goal_traj, + const Eigen::Ref& control_traj) { run_cost_->setTargets(goal_traj.data(), control_traj.data(), this->num_timesteps_); @@ -112,7 +321,7 @@ void DDPFeedback::computeFeedback(const Eigen::RefgetFeedbackStatePointer()->fb_gain_traj_[i_index + j] = result_.feedback_gain[i].data()[j]; + this->getFeedbackStatePointer()->getFeedbackGainPtr()[i_index + j] = result_.feedback_gain[i].data()[j]; } } // Actually put new feedback gain trajectory onto the GPU diff --git a/include/mppi/feedback_controllers/DDP/ddp.cuh b/include/mppi/feedback_controllers/DDP/ddp.cuh index 06bb4c3a..45fbe766 100644 --- a/include/mppi/feedback_controllers/DDP/ddp.cuh +++ b/include/mppi/feedback_controllers/DDP/ddp.cuh @@ -25,38 +25,57 @@ struct DDPParams int num_iterations = 1; }; -template +template struct DDPFeedbackState : GPUState { - static const int FEEDBACK_SIZE = DYN_T::CONTROL_DIM * DYN_T::STATE_DIM * N_TIMESTEPS; - static const int NUM_TIMESTEPS = N_TIMESTEPS; - /** * Variables **/ - float fb_gain_traj_[FEEDBACK_SIZE] MPPI_ALIGN(16) = { 0.0 }; // ensure it is aligned to 16 bytes + float* fb_gain_traj_ = nullptr; + float* fb_gain_traj_d_ = nullptr; + int num_timesteps_ = 0; + cudaStream_t stream_ = 0; + DDPFeedbackState(int num_timesteps = 1, cudaStream_t stream = 0); + + DDPFeedbackState(const DDPFeedbackState& other); // deep copy constructor + + ~DDPFeedbackState(); + + DDPFeedbackState& operator=(DDPFeedbackState other); + + template + friend void swap(DDPFeedbackState& first, DDPFeedbackState& second); /** * Methods **/ - bool isEqual(const DDPFeedbackState& other) const - { - for (int i = 0; i < FEEDBACK_SIZE; i++) - { - if (this->fb_gain_traj_[i] != other.fb_gain_traj_[i]) - { - return false; - } - } - return true; - } + + __host__ void allocateCUDAMemory(); + + __host__ void copyToDevice(bool synchronize = true); + + __host__ void deallocateCUDAMemory(); + + __host__ __device__ int getNumTimesteps() const; + + __host__ __device__ float* getFeedbackGainPtr() const; + + __host__ __device__ const float* getConstFeedbackGainPtr() const; + + bool isEqual(const DDPFeedbackState& other) const; + + void setNumTimesteps(const int num_timesteps); + + void setCUDAStream(const cudaStream_t stream); + + __host__ __device__ std::size_t size() const; }; /** * Needed for Test in base_plant_tester.cu **/ -template -bool operator==(const DDPFeedbackState& lhs, const DDPFeedbackState& rhs) +template +inline bool operator==(const DDPFeedbackState& lhs, const DDPFeedbackState& rhs) { return lhs.isEqual(rhs); }; @@ -66,52 +85,57 @@ bool operator==(const DDPFeedbackState& lhs, const DDPFeedba * methods for DDP on the GPU are implemented but it is not used directly since * setting up the GPU_FB_T value would be painful */ -template -class DeviceDDPImpl : public GPUFeedbackController> +template +class DeviceDDPImpl : public GPUFeedbackController> { public: - using PARAMS_T = DDPFeedbackState; + using PARAMS_T = DDPFeedbackState; + using PARENT_CLASS = GPUFeedbackController>; // static const int SHARED_MEM_REQUEST_BLK_BYTES = DYN_T::CONTROL_DIM * DYN_T::STATE_DIM; DeviceDDPImpl(int num_timesteps, cudaStream_t stream = 0); - DeviceDDPImpl(cudaStream_t stream = 0) - : GPUFeedbackController>(stream){}; + DeviceDDPImpl(cudaStream_t stream = 0) : PARENT_CLASS(stream) + { + this->state_.setNumTimesteps(1); + }; + + void allocateCUDAMemory(); + void deallocateCUDAMemory(); + void copyToDevice(bool synchronize = true); - void allocateCUDAMemory(){}; - void deallocateCUDAMemory(){}; + void setNumTimesteps(const int num_timesteps); - __device__ void k(const float* __restrict__ x_act, const float* __restrict__ x_goal, const int t, float* __restrict__ theta, float* __restrict__ control_output); + __host__ __device__ int getNumTimesteps() const; -protected: - // Needed for allocating memory for feedback gains - int num_timesteps_ = 1; + __device__ void k(const float* __restrict__ x_act, const float* __restrict__ x_goal, const int t, + float* __restrict__ theta, float* __restrict__ control_output); }; /** * Alias class for DDP GPU Controller. This sets up the class derivation correctly and is * used inside of the CPU version of DDP */ -template -class DeviceDDP : public DeviceDDPImpl, DYN_T, NUM_TIMESTEPS> +template +class DeviceDDP : public DeviceDDPImpl, DYN_T> { public: DeviceDDP(int num_timesteps, cudaStream_t stream = 0) - : DeviceDDPImpl, DYN_T, NUM_TIMESTEPS>(num_timesteps, stream){}; + : DeviceDDPImpl, DYN_T>(num_timesteps, stream){}; - DeviceDDP(cudaStream_t stream = 0) : DeviceDDPImpl, DYN_T, NUM_TIMESTEPS>(stream){}; + DeviceDDP(cudaStream_t stream = 0) : DeviceDDPImpl, DYN_T>(stream){}; }; /** * CPU Class for DDP. This is what the user should interact with */ -template -class DDPFeedback : public FeedbackController, DDPParams, NUM_TIMESTEPS> +template +class DDPFeedback : public FeedbackController, DDPParams> { public: /** * Aliases **/ typedef util::EigenAlignedVector feedback_gain_trajectory; - typedef FeedbackController, DDPParams, NUM_TIMESTEPS> PARENT_CLASS; + typedef FeedbackController, DDPParams> PARENT_CLASS; using control_array = typename PARENT_CLASS::control_array; using state_array = typename PARENT_CLASS::state_array; @@ -135,7 +159,7 @@ public: control_array control_max_; DYN_T* model_; - DDPFeedback(DYN_T* model, float dt, int num_timesteps = NUM_TIMESTEPS, cudaStream_t stream = 0); + DDPFeedback(DYN_T* model, float dt, int num_timesteps = 1, cudaStream_t stream = 0); void setParams(const DDPParams& params) override; @@ -145,7 +169,7 @@ public: INTERNAL_STATE_T& fb_state) { int index = DYN_T::STATE_DIM * DYN_T::CONTROL_DIM * t; - Eigen::Map fb_gain(&(fb_state.fb_gain_traj_[index])); + Eigen::Map fb_gain(&(fb_state.getConstFeedbackGainPtr()[index])); control_array u_output = fb_gain * (x_act - x_goal); return u_output; } @@ -155,6 +179,8 @@ public: return result_.feedback_gain; } + void setNumTimesteps(const int num_timesteps); + void computeFeedback(const Eigen::Ref& init_state, const Eigen::Ref& goal_traj, const Eigen::Ref& control_traj); diff --git a/include/mppi/feedback_controllers/feedback.cu b/include/mppi/feedback_controllers/feedback.cu index 6bd0a8bf..bf609b09 100644 --- a/include/mppi/feedback_controllers/feedback.cu +++ b/include/mppi/feedback_controllers/feedback.cu @@ -22,9 +22,9 @@ void GPUFeedbackController::freeCudaMem() { CLASS_T* derived = static_cast(this); derived->deallocateCUDAMemory(); - cudaFree(feedback_d_); - GPUMemStatus_ = false; + HANDLE_ERROR(cudaFree(feedback_d_)); feedback_d_ = nullptr; + GPUMemStatus_ = false; } } diff --git a/include/mppi/feedback_controllers/feedback.cuh b/include/mppi/feedback_controllers/feedback.cuh index 70dff08a..2eb34079 100644 --- a/include/mppi/feedback_controllers/feedback.cuh +++ b/include/mppi/feedback_controllers/feedback.cuh @@ -91,7 +91,8 @@ public: /** * ==================== NECESSARY METHODS TO OVERWRITE ===================== */ - __device__ void k(const float* __restrict__ x_act, const float* __restrict__ x_goal, const int t, float* __restrict__ theta, float* __restrict__ control_output) + __device__ void k(const float* __restrict__ x_act, const float* __restrict__ x_goal, const int t, + float* __restrict__ theta, float* __restrict__ control_output) { } /** @@ -108,8 +109,10 @@ public: { } - __device__ void initializeFeedback(const float* __restrict__ x, const float* __restrict__ u, float* __restrict__ theta, const float t, const float dt) - {} + __device__ void initializeFeedback(const float* __restrict__ x, const float* __restrict__ u, + float* __restrict__ theta, const float t, const float dt) + { + } // Abstract method to copy information to GPU // void copyToDevice() {} @@ -132,7 +135,7 @@ protected: * Write the feedback controller to use the GPUFeedback_act as thee GPU_FEEDBACK_T template option * It will then automatically create the right pointer */ -template +template class FeedbackController { public: @@ -142,19 +145,20 @@ public: typedef PARAMS_T TEMPLATED_PARAMS; typedef GPU_FB_T TEMPLATED_GPU_FEEDBACK; typedef typename GPU_FB_T::FEEDBACK_STATE_T TEMPLATED_FEEDBACK_STATE; - static const int FB_TIMESTEPS = NUM_TIMESTEPS; + // static const int FB_TIMESTEPS = NUM_TIMESTEPS; using state_array = typename DYN_T::state_array; using control_array = typename DYN_T::control_array; typedef Eigen::Matrix control_trajectory; // A control trajectory + Eigen::Dynamic> control_trajectory; // A control trajectory typedef Eigen::Matrix state_trajectory; // A state trajectory + Eigen::Dynamic> state_trajectory; // A state trajectory // Constructors and Generators - FeedbackController(float dt = 0.01, int num_timesteps = NUM_TIMESTEPS, cudaStream_t stream = 0) - : dt_(dt), num_timesteps_(num_timesteps) + FeedbackController(float dt = 0.01, int num_timesteps = 1, cudaStream_t stream = 0) { + setDt(dt); + setNumTimesteps(num_timesteps); gpu_controller_ = std::make_shared(stream); auto logger = std::make_shared(); setLogger(logger); @@ -195,8 +199,8 @@ public: * - x_goal: the state we want to be at * - index: the number of timesteps from the initial time we are */ - virtual __host__ control_array k(const Eigen::Ref& x_act, const Eigen::Ref& x_goal, - int t) + virtual __host__ control_array k(const Eigen::Ref& x_act, + const Eigen::Ref& x_goal, int t) { TEMPLATED_FEEDBACK_STATE* gpu_feedback_state = getFeedbackStatePointer(); return k_(x_act, x_goal, t, *gpu_feedback_state); @@ -204,18 +208,19 @@ public: /** * Feeback Control Method to overwrite. */ - virtual __host__ control_array k_(const Eigen::Ref& x_act, const Eigen::Ref& x_goal, - int t, TEMPLATED_FEEDBACK_STATE& fb_state) = 0; + virtual __host__ control_array k_(const Eigen::Ref& x_act, + const Eigen::Ref& x_goal, int t, + TEMPLATED_FEEDBACK_STATE& fb_state) = 0; // might not be a needed method virtual __host__ void computeFeedback(const Eigen::Ref& init_state, - const Eigen::Ref& goal_traj, - const Eigen::Ref& control_traj) = 0; + const Eigen::Ref& goal_traj, + const Eigen::Ref& control_traj) = 0; // TODO Construct a default version of this method that uses the state_ variable automatically virtual __host__ control_array interpolateFeedback_(const Eigen::Ref& state, - const Eigen::Ref& goal_state, double rel_time, - TEMPLATED_FEEDBACK_STATE& fb_state) + const Eigen::Ref& goal_state, double rel_time, + TEMPLATED_FEEDBACK_STATE& fb_state) { int lower_idx = (int)(rel_time / dt_); int upper_idx = lower_idx + 1; @@ -228,7 +233,7 @@ public: } virtual __host__ control_array interpolateFeedback(const Eigen::Ref& state, - const Eigen::Ref& goal_state, double rel_time) + const Eigen::Ref& goal_state, double rel_time) { TEMPLATED_FEEDBACK_STATE* fb_state = getFeedbackStatePointer(); return interpolateFeedback_(state, goal_state, rel_time, *fb_state); @@ -272,15 +277,25 @@ public: this->gpu_controller_->setFeedbackState(gpu_fb_state); } - float getDt() + float getDt() const { return dt_; } - void setDt(float dt) + void setDt(const float dt) { dt_ = dt; } + int getNumTimesteps() const + { + return num_timesteps_; + } + + void setNumTimesteps(const int num_timesteps) + { + num_timesteps_ = num_timesteps; + } + __host__ void setLogger(const mppi::util::MPPILoggerPtr& logger) { logger_ = logger; @@ -306,7 +321,7 @@ public: protected: std::shared_ptr gpu_controller_; float dt_; - int num_timesteps_; + int num_timesteps_ = 0; PARAMS_T params_; mppi::util::MPPILoggerPtr logger_ = nullptr; }; diff --git a/include/mppi/instantiations/autorally_mppi/autorally_mppi.cuh b/include/mppi/instantiations/autorally_mppi/autorally_mppi.cuh index a343f0d0..61f33a2f 100644 --- a/include/mppi/instantiations/autorally_mppi/autorally_mppi.cuh +++ b/include/mppi/instantiations/autorally_mppi/autorally_mppi.cuh @@ -8,13 +8,10 @@ // #ifdef USE_NEURAL_NETWORK_MODEL__ /*Use neural network dynamics model*/ const int MPPI_NUM_ROLLOUTS__ = 1920; -const int BLOCKSIZE_X = 8; -const int BLOCKSIZE_Y = 16; -const int NUM_TIMESTEPS = 150; typedef NeuralNetModel<7, 2, 3> DynamicsModel; typedef ARStandardCost CostFunctionClass; -typedef DDPFeedback FEEDBACK_T; +typedef DDPFeedback FEEDBACK_T; typedef mppi::sampling_distributions::GaussianDistribution Sampler; // #elif USE_BASIS_FUNC_MODEL__ /*Use the basis function model* */ diff --git a/include/mppi/sampling_distributions/gaussian/gaussian.cu b/include/mppi/sampling_distributions/gaussian/gaussian.cu index 98419066..a8da7030 100644 --- a/include/mppi/sampling_distributions/gaussian/gaussian.cu +++ b/include/mppi/sampling_distributions/gaussian/gaussian.cu @@ -477,6 +477,12 @@ __host__ void GAUSSIAN_CLASS::setHostOptimalControlSequence(float* optimal_contr } } +GAUSSIAN_TEMPLATE +__host__ __device__ float* GAUSSIAN_CLASS::getDeviceOptimalControlSequence(const int& distribution_idx) +{ + return &(this->control_means_d_[this->getNumTimesteps() * CONTROL_DIM * distribution_idx]); +} + GAUSSIAN_TEMPLATE __host__ __device__ float GAUSSIAN_CLASS::computeLikelihoodRatioCost(const float* __restrict__ u, float* __restrict__ theta_d, diff --git a/include/mppi/sampling_distributions/gaussian/gaussian.cuh b/include/mppi/sampling_distributions/gaussian/gaussian.cuh index 0546dd46..6772fe9e 100644 --- a/include/mppi/sampling_distributions/gaussian/gaussian.cuh +++ b/include/mppi/sampling_distributions/gaussian/gaussian.cuh @@ -160,6 +160,8 @@ public: PARENT_CLASS::setNumDistributions(num_distributions, synchronize); } + __host__ __device__ float* getDeviceOptimalControlSequence(const int& distribution_idx); + __host__ void updateDistributionParamsFromDevice(const float* trajectory_weights_d, float normalizer, const int& distribution_i, bool synchronize = false) override; diff --git a/include/mppi/sampling_distributions/sampling_distribution.cuh b/include/mppi/sampling_distributions/sampling_distribution.cuh index eb591ff3..bcb56656 100644 --- a/include/mppi/sampling_distributions/sampling_distribution.cuh +++ b/include/mppi/sampling_distributions/sampling_distribution.cuh @@ -402,6 +402,14 @@ public: __host__ void setHostOptimalControlSequence(float* optimal_control_trajectory, const int& distribution_idx, bool synchronize = true); + /** + * @brief Get the Device Optimal Control Sequence + * + * @param distribution_idx which distribution we are wanting the optimal control from (Useful for Tube and R-MPPI) + * @return device-side pointer to the optimal control sequence + */ + __host__ __device__ float* getDeviceOptimalControlSequence(const int& distribution_idx); + /** * @brief takes in the cost of each sample generated and conducts an update of the distribution (For Gaussians, mean * update) diff --git a/scripts/run_autoformatter.sh b/scripts/run_autoformatter.sh index 85b3658e..9a9a5e81 100755 --- a/scripts/run_autoformatter.sh +++ b/scripts/run_autoformatter.sh @@ -1,26 +1,31 @@ #! /usr/bin/env bash + +# if no arguments passed in, use current folder as path if [ "$1" == "" ]; then - dir="./" + paths=("./") else - dir="$1" + paths=( "$@" ) fi -# Format cuda header files -echo "Formatting cuh files..." -find $dir -name "*.cuh" ! -path "*build*" ! -path "*submodules*" | xargs -I % bash -c "clang-format --style=file -i --assume-filename=cu %" +for dir in "${paths[@]}"; do + # Format cuda header files + echo "Formatting cuh files..." + find $dir -name "*.cuh" ! -path "*build*" ! -path "*submodules*" | xargs -I % bash -c "clang-format --style=file -i --assume-filename=cu %" -# Format cuda source files -echo "Formatting cu files..." -find $dir -name "*.cu" ! -path "*build*" ! -path "*submodules*" | xargs -I % bash -c "clang-format --style=file -i %" + # Format cuda source files + echo "Formatting cu files..." + find $dir -name "*.cu" ! -path "*build*" ! -path "*submodules*" | xargs -I % bash -c "clang-format --style=file -i %" -# Format cpp source files -echo "Formatting cpp files..." -find $dir -name "*.cpp" ! -path "*build*" ! -path "*submodules*" | xargs -I % bash -c "clang-format --style=file -i %" + # Format cpp source files + echo "Formatting cpp files..." + find $dir -name "*.cpp" ! -path "*build*" ! -path "*submodules*" | xargs -I % bash -c "clang-format --style=file -i %" -# Format h source files -echo "Formatting h files..." -find $dir -name "*.h" ! -path "*build*" ! -path "*submodules*" | xargs -I % bash -c "clang-format --style=file -i %" + # Format h source files + echo "Formatting h files..." + find $dir -name "*.h" ! -path "*build*" ! -path "*submodules*" | xargs -I % bash -c "clang-format --style=file -i %" -# Format hpp source files -echo "Formatting hpp files..." -find $dir -name "*.hpp" ! -path "*build*" ! -path "*submodules*" | xargs -I % bash -c "clang-format --style=file -i %" + # Format hpp source files + echo "Formatting hpp files..." + find $dir -name "*.hpp" ! -path "*build*" ! -path "*submodules*" | xargs -I % bash -c "clang-format --style=file -i %" + shift # remove argument 1 and shift all arguments down +done diff --git a/src/controllers/autorally/autorally_mppi.cu b/src/controllers/autorally/autorally_mppi.cu index cff30fe5..df289ef2 100644 --- a/src/controllers/autorally/autorally_mppi.cu +++ b/src/controllers/autorally/autorally_mppi.cu @@ -7,7 +7,6 @@ */ // Convenience typedef for the MPPI Controller. -template class VanillaMPPIController; +template class VanillaMPPIController; // template class VanillaMPPIController; diff --git a/src/controllers/cartpole/cartpole_mppi.cu b/src/controllers/cartpole/cartpole_mppi.cu index f4c0df17..71d56fe0 100644 --- a/src/controllers/cartpole/cartpole_mppi.cu +++ b/src/controllers/cartpole/cartpole_mppi.cu @@ -5,14 +5,10 @@ * Will have a dynamics model of cartpole, some cost function, * and a controller of just MPPI, (not tube or R) */ -// Num_timesteps, num_rollouts, blockdim x, blockdim y -const int NUMBER_TIMESTEPS = 100; // template class GPUFeedbackController, CartpoleDynamics>, CartpoleDynamics>; // template class DeviceDDPImpl, CartpoleDynamics>; // template class DeviceDDP; -template class DDPFeedback; - -template class DDPFeedback; +template class DDPFeedback; #define SAMPLER_T mppi::sampling_distributions::GaussianDistribution @@ -27,20 +23,12 @@ template class mppi::sampling_distributions::GaussianDistributionImpl< template class mppi::sampling_distributions::SamplingDistribution< SAMPLER_T, mppi::sampling_distributions::GaussianParams, CartpoleDynamics::DYN_PARAMS_T>; -template class Controller, - SAMPLER_T, NUMBER_TIMESTEPS, 2048>; -template class Controller, - SAMPLER_T, NUMBER_TIMESTEPS, 256>; -template class Controller, SAMPLER_T, 150, - 512>; +template class Controller, + SAMPLER_T>; template class VanillaMPPIController, NUMBER_TIMESTEPS, 2048>; -template class VanillaMPPIController, NUMBER_TIMESTEPS, 256>; -template class VanillaMPPIController, 150, - 512>; + DDPFeedback>; #undef SAMPLER_T // template class TubeMPPIController, NUMBER_TIMESTEPS, 2048>; +// DDPFeedback, 2048>; diff --git a/src/controllers/double_integrator/double_integrator_mppi.cu b/src/controllers/double_integrator/double_integrator_mppi.cu index f29b245f..08f8ed71 100644 --- a/src/controllers/double_integrator/double_integrator_mppi.cu +++ b/src/controllers/double_integrator/double_integrator_mppi.cu @@ -9,13 +9,7 @@ typedef mppi::sampling_distributions::GaussianDistribution Sampler; template class VanillaMPPIController, 100, 512, Sampler>; -template class VanillaMPPIController, 50, 1024, Sampler>; + DDPFeedback, Sampler>; template class TubeMPPIController, 100, 512, Sampler>; -template class TubeMPPIController, 50, 1024, Sampler>; -template class TubeMPPIController, 100, 1024, Sampler>; + DDPFeedback, Sampler>; diff --git a/src/controllers/quadrotor/quadrotor_mppi.cu b/src/controllers/quadrotor/quadrotor_mppi.cu index a2f62aa5..dd000845 100644 --- a/src/controllers/quadrotor/quadrotor_mppi.cu +++ b/src/controllers/quadrotor/quadrotor_mppi.cu @@ -2,7 +2,7 @@ typedef mppi::sampling_distributions::GaussianDistribution Sampler; -template class VanillaMPPIController, - 100, 512, Sampler>; -template class VanillaMPPIController, 100, 512, +template class VanillaMPPIController, + Sampler>; +template class VanillaMPPIController, Sampler>; diff --git a/tests/controllers/controller_generic_tests.cu b/tests/controllers/controller_generic_tests.cu index e9354d1f..9edc0856 100644 --- a/tests/controllers/controller_generic_tests.cu +++ b/tests/controllers/controller_generic_tests.cu @@ -13,25 +13,26 @@ static const int number_rollouts = 1200; static const int NUM_TIMESTEPS = 100; -using FEEDBACK_T = DDPFeedback; +using FEEDBACK_T = DDPFeedback; const dim3 rolloutDim(1, 2, 1); using SAMPLER_T = mppi::sampling_distributions::GaussianDistribution; -class TestController : public Controller +class TestController : public Controller { public: - typedef Controller PARENT_CLASS; + typedef Controller PARENT_CLASS; using PARAMS_T = PARENT_CLASS::TEMPLATED_PARAMS; TestController(MockDynamics* model, MockCost* cost, FEEDBACK_T* fb_controller, SAMPLER_T* sampler, float dt, - int max_iter, float lambda, float alpha, int num_timesteps = 100, - const Eigen::Ref& init_control_traj = control_trajectory::Zero(), + int max_iter, float lambda, float alpha, int num_timesteps, int num_rollouts, + const Eigen::Ref& init_control_traj = + control_trajectory::Zero(MockDynamics::CONTROL_DIM, 1), cudaStream_t stream = nullptr) - : PARENT_CLASS(model, cost, fb_controller, sampler, dt, max_iter, lambda, alpha, num_timesteps, init_control_traj, - stream) + : PARENT_CLASS(model, cost, fb_controller, sampler, dt, max_iter, lambda, alpha, num_timesteps, num_rollouts, + init_control_traj, stream) { // Allocate CUDA memory for the controller - allocateCUDAMemoryHelper(0); + allocateCUDAMemoryHelper(1); } TestController(MockDynamics* model, MockCost* cost, FEEDBACK_T* fb_controller, SAMPLER_T* sampler, PARAMS_T& params, @@ -39,7 +40,7 @@ public: : PARENT_CLASS(model, cost, fb_controller, sampler, params, stream) { // Allocate CUDA memory for the controller - allocateCUDAMemoryHelper(0); + allocateCUDAMemoryHelper(1); } virtual void computeControl(const Eigen::Ref& state, int optimization_stride) override @@ -52,7 +53,7 @@ public: const std::array noise) { int trajectory_size = control_trajectory().size(); - for (int i = 0; i < number_rollouts; i++) + for (int i = 0; i < this->getNumRollouts(); i++) { HANDLE_ERROR(cudaMemcpyAsync(this->sampler_->getControlSample(i, 0, 0), noise[i].data(), sizeof(float) * trajectory_size, cudaMemcpyHostToDevice, stream_)); @@ -97,7 +98,8 @@ protected: EXPECT_CALL(*mockDynamics, GPUSetup()).Times(1); // EXPECT_CALL(mockFeedback, GPUSetup()).Times(1); - controller = new TestController(mockDynamics, mockCost, mockFeedback, sampler, dt, max_iter, lambda, alpha); + controller = new TestController(mockDynamics, mockCost, mockFeedback, sampler, dt, max_iter, lambda, alpha, + NUM_TIMESTEPS, number_rollouts); auto controller_params = controller->getParams(); controller_params.dynamics_rollout_dim_ = rolloutDim; controller_params.cost_rollout_dim_ = rolloutDim; @@ -105,11 +107,14 @@ protected: } void TearDown() override { + EXPECT_CALL(*mockDynamics, freeCudaMem()).Times(1); + EXPECT_CALL(*mockCost, freeCudaMem()).Times(1); delete controller; delete mockDynamics; delete mockCost; delete mockFeedback; delete sampler; + HANDLE_ERROR(cudaStreamDestroy(stream)); } MockDynamics* mockDynamics; @@ -129,7 +134,8 @@ TEST_F(ControllerTests, ConstructorDestructor) { int num_timesteps = 10; - TestController::control_trajectory init_control_trajectory = TestController::control_trajectory::Ones(); + TestController::control_trajectory init_control_trajectory = + TestController::control_trajectory::Ones(MockDynamics::CONTROL_DIM, num_timesteps); // expect double check rebind EXPECT_CALL(*mockCost, bindToStream(stream)).Times(1); @@ -140,8 +146,9 @@ TEST_F(ControllerTests, ConstructorDestructor) EXPECT_CALL(*mockCost, GPUSetup()).Times(1); EXPECT_CALL(*mockDynamics, GPUSetup()).Times(1); // EXPECT_CALL(mockFeedback, GPUSetup()).Times(1); - TestController* controller_test = new TestController(mockDynamics, mockCost, mockFeedback, sampler, dt, max_iter, - lambda, alpha, num_timesteps, init_control_trajectory, stream); + TestController* controller_test = + new TestController(mockDynamics, mockCost, mockFeedback, sampler, dt, max_iter, lambda, alpha, num_timesteps, + number_rollouts, init_control_trajectory, stream); EXPECT_EQ(controller_test->model_, mockDynamics); EXPECT_EQ(controller_test->cost_, mockCost); @@ -150,7 +157,7 @@ TEST_F(ControllerTests, ConstructorDestructor) EXPECT_EQ(controller_test->getLambda(), lambda); EXPECT_EQ(controller_test->getAlpha(), alpha); EXPECT_EQ(controller_test->getNumTimesteps(), num_timesteps); - EXPECT_EQ(controller_test->getControlSeq(), init_control_trajectory); + EXPECT_EQ(controller_test->getNumRollouts(), number_rollouts); EXPECT_EQ(controller_test->getStream(), stream); EXPECT_EQ(controller_test->getFeedbackEnabled(), false); @@ -158,6 +165,68 @@ TEST_F(ControllerTests, ConstructorDestructor) // EXPECT_NE(controller_test->getRandomSeed(), 0); // TODO check for correct defaults + EXPECT_CALL(*mockDynamics, freeCudaMem()).Times(1); + EXPECT_CALL(*mockCost, freeCudaMem()).Times(1); + delete controller_test; +} + +TEST_F(ControllerTests, ArgumentConstructorControlTrajectories) +{ + int num_timesteps = 10; + + TestController::control_trajectory init_control_trajectory = + TestController::control_trajectory::Random(MockDynamics::CONTROL_DIM, 1); + + // expect double check rebind + EXPECT_CALL(*mockCost, bindToStream(stream)).Times(1); + EXPECT_CALL(*mockDynamics, bindToStream(stream)).Times(1); + // // EXPECT_CALL(mockFeedback, bindToStream(stream)).Times(1); + + // // expect GPU setup called again + EXPECT_CALL(*mockCost, GPUSetup()).Times(1); + EXPECT_CALL(*mockDynamics, GPUSetup()).Times(1); + TestController* controller_test = + new TestController(mockDynamics, mockCost, mockFeedback, sampler, dt, max_iter, lambda, alpha, num_timesteps, + number_rollouts, init_control_trajectory, stream); + + auto new_controller_params = controller_test->getParams(); + TestController::control_array model_zero_control = mockDynamics->getZeroControl(); + TestController::control_array expected_end_control = + (init_control_trajectory.col(0).array() - model_zero_control.array()) * + new_controller_params.slide_control_scale_.array() + + model_zero_control.array(); + + // Check control trajectories + for (int t = 0; t < num_timesteps; t++) + { + if (t < init_control_trajectory.cols()) + { + EXPECT_FLOAT_EQ(fabsf((new_controller_params.init_control_traj_.col(t) - init_control_trajectory.col(t)).sum()), + 0.0f) + << "Params init control at time " << t << " did not match initial controls passed into constructor"; + } + else + { + EXPECT_NEAR(fabsf((new_controller_params.init_control_traj_.col(t) - expected_end_control).sum()), 0.0f, 1e-10f) + << "Params init control at time " << t << " did not match initial controls passed into constructor"; + } + } + TestController::control_trajectory new_control_trajectory = controller_test->getControlSeq(); + for (int t = 0; t < num_timesteps; t++) + { + if (t < init_control_trajectory.cols()) + { + EXPECT_NEAR(fabsf((new_control_trajectory.col(t) - init_control_trajectory.col(t)).sum()), 0.0f, 1e-10f) + << "Control vector at time " << t << " did not match initial controls passed into constructor"; + } + else + { + EXPECT_NEAR(fabsf((new_control_trajectory.col(t) - expected_end_control).sum()), 0.0f, 1e-10f) + << "Control vector at time " << t << " did not match initial controls passed into constructor"; + } + } + EXPECT_CALL(*mockDynamics, freeCudaMem()).Times(1); + EXPECT_CALL(*mockCost, freeCudaMem()).Times(1); delete controller_test; } @@ -166,11 +235,13 @@ TEST_F(ControllerTests, ParamBasedConstructor) int num_timesteps = 10; TestController::TEMPLATED_PARAMS controller_params; controller_params.num_timesteps_ = num_timesteps; + controller_params.num_rollouts_ = number_rollouts; controller_params.dt_ = dt; controller_params.num_iters_ = max_iter; controller_params.lambda_ = lambda; controller_params.alpha_ = alpha; - controller_params.init_control_traj_ = TestController::control_trajectory::Ones(); + controller_params.init_control_traj_ = + TestController::control_trajectory::Ones(MockDynamics::CONTROL_DIM, num_timesteps); // expect double check rebind EXPECT_CALL(*mockCost, bindToStream(stream)).Times(1); @@ -191,7 +262,8 @@ TEST_F(ControllerTests, ParamBasedConstructor) EXPECT_EQ(controller_test->getLambda(), lambda); EXPECT_EQ(controller_test->getAlpha(), alpha); EXPECT_EQ(controller_test->getNumTimesteps(), num_timesteps); - EXPECT_EQ(controller_test->getControlSeq(), controller_params.init_control_traj_); + EXPECT_EQ(controller_test->getNumRollouts(), number_rollouts); + // EXPECT_EQ(controller_test->getControlSeq(), controller_params.init_control_traj_); EXPECT_EQ(controller_test->getStream(), stream); EXPECT_EQ(controller_test->getFeedbackEnabled(), false); @@ -199,21 +271,121 @@ TEST_F(ControllerTests, ParamBasedConstructor) // EXPECT_NE(controller_test->getRandomSeed(), 0); // TODO check for correct defaults + EXPECT_CALL(*mockDynamics, freeCudaMem()).Times(1); + EXPECT_CALL(*mockCost, freeCudaMem()).Times(1); + delete controller_test; +} + +TEST_F(ControllerTests, ParamConstructorControlTrajectories) +{ + int num_timesteps = 10; + TestController::TEMPLATED_PARAMS controller_params; + controller_params.num_timesteps_ = num_timesteps; + controller_params.num_rollouts_ = number_rollouts; + controller_params.dt_ = dt; + controller_params.num_iters_ = max_iter; + controller_params.lambda_ = lambda; + controller_params.alpha_ = alpha; + controller_params.init_control_traj_ = TestController::control_trajectory::Random(MockDynamics::CONTROL_DIM, 1); + // fill in empty portions of control trajectories with last control value + controller_params.slide_control_scale_ = TestController::control_array::Ones(); + + // expect double check rebind + EXPECT_CALL(*mockCost, bindToStream(stream)).Times(1); + EXPECT_CALL(*mockDynamics, bindToStream(stream)).Times(1); + + // // expect GPU setup called again + EXPECT_CALL(*mockCost, GPUSetup()).Times(1); + EXPECT_CALL(*mockDynamics, GPUSetup()).Times(1); + TestController* controller_test = + new TestController(mockDynamics, mockCost, mockFeedback, sampler, controller_params, stream); + + auto new_controller_params = controller_test->getParams(); + // Check control trajectories + for (int t = 0; t < num_timesteps; t++) + { + EXPECT_FLOAT_EQ( + fabsf((new_controller_params.init_control_traj_.col(t) - controller_params.init_control_traj_.col(0)).sum()), + 0.0f) + << "Params init control at time " << t << " did not match initial controls passed into constructor"; + } + auto new_control_trajectory = controller_test->getControlSeq(); + for (int t = 0; t < num_timesteps; t++) + { + EXPECT_FLOAT_EQ(fabsf((new_control_trajectory.col(t) - controller_params.init_control_traj_.col(0)).sum()), 0.0f) + << "Control vector at time " << t << " did not match initial controls passed into constructor"; + } + EXPECT_CALL(*mockDynamics, freeCudaMem()).Times(1); + EXPECT_CALL(*mockCost, freeCudaMem()).Times(1); delete controller_test; } TEST_F(ControllerTests, setNumTimesteps) { controller->setNumTimesteps(10); + auto control_traj = controller->getControlSeq(); + auto target_state_traj = controller->getTargetStateSeq(); + auto actual_state_traj = controller->getActualStateSeq(); + auto feedback_state_traj = controller->getFeedbackPropagatedStateSeq(); + auto target_output_traj = controller->getTargetOutputSeq(); + auto actual_output_traj = controller->getActualOutputSeq(); + auto controller_params = controller->getParams(); + auto sampler_params = controller->getSamplingParams(); + EXPECT_EQ(controller->getNumTimesteps(), 10); + EXPECT_EQ(control_traj.cols(), 10); + EXPECT_EQ(target_state_traj.cols(), 10); + EXPECT_EQ(actual_state_traj.cols(), 10); + EXPECT_EQ(feedback_state_traj.cols(), 10); + EXPECT_EQ(target_output_traj.cols(), 10); + EXPECT_EQ(actual_output_traj.cols(), 10); + EXPECT_EQ(controller_params.init_control_traj_.cols(), 10); + EXPECT_EQ(sampler_params.num_timesteps, 10); controller->setNumTimesteps(1000); - EXPECT_EQ(controller->getNumTimesteps(), 100); + control_traj = controller->getControlSeq(); + target_state_traj = controller->getTargetStateSeq(); + actual_state_traj = controller->getActualStateSeq(); + feedback_state_traj = controller->getFeedbackPropagatedStateSeq(); + target_output_traj = controller->getTargetOutputSeq(); + actual_output_traj = controller->getActualOutputSeq(); + controller_params = controller->getParams(); + sampler_params = controller->getSamplingParams(); + + EXPECT_EQ(controller->getNumTimesteps(), 1000); + EXPECT_EQ(control_traj.cols(), 1000); + EXPECT_EQ(target_state_traj.cols(), 1000); + EXPECT_EQ(actual_state_traj.cols(), 1000); + EXPECT_EQ(feedback_state_traj.cols(), 1000); + EXPECT_EQ(target_output_traj.cols(), 1000); + EXPECT_EQ(actual_output_traj.cols(), 1000); + EXPECT_EQ(controller_params.init_control_traj_.cols(), 1000); + EXPECT_EQ(sampler_params.num_timesteps, 1000); +} + +TEST_F(ControllerTests, setNumRollouts) +{ + controller->setNumRollouts(10); + auto sampler_params = controller->getSamplingParams(); + auto cost_trajectory = controller->getSampledCostSeq(); + + EXPECT_EQ(controller->getNumRollouts(), 10); + EXPECT_EQ(cost_trajectory.rows(), 10); + EXPECT_EQ(sampler_params.num_rollouts, 10); + + controller->setNumRollouts(1000); + sampler_params = controller->getSamplingParams(); + cost_trajectory = controller->getSampledCostSeq(); + + EXPECT_EQ(controller->getNumRollouts(), 1000); + EXPECT_EQ(cost_trajectory.rows(), 1000); + EXPECT_EQ(sampler_params.num_rollouts, 1000); } TEST_F(ControllerTests, smoothControlTrajectory) { - TestController::control_trajectory u; + TestController::control_trajectory u = + TestController::control_trajectory::Zero(MockDynamics::CONTROL_DIM, NUM_TIMESTEPS); u.col(0) = TestController::control_array::Ones(); u.col(1) = 2.0 * TestController::control_array::Ones(); Eigen::Matrix control_history = @@ -239,7 +411,8 @@ TEST_F(ControllerTests, smoothControlTrajectory) TEST_F(ControllerTests, slideControlSequenceHelper) { - TestController::control_trajectory u; + TestController::control_trajectory u = + TestController::control_trajectory::Zero(MockDynamics::CONTROL_DIM, NUM_TIMESTEPS); for (int i = 0; i < controller->getNumTimesteps(); i++) { TestController::control_array control = TestController::control_array::Ones(); @@ -289,8 +462,11 @@ TEST_F(ControllerTests, computeStateTrajectoryHelper) EXPECT_CALL(*mockDynamics, step(testing::_, testing::_, testing::_, testing::_, testing::_, testing::_, dt)) .Times(controller->getNumTimesteps() - 1); - TestController::state_trajectory result = TestController::state_trajectory::Ones(); - TestController::control_trajectory u = TestController::control_trajectory::Zero(); + TestController::state_trajectory result = + TestController::state_trajectory::Ones(MockDynamics::STATE_DIM, controller->getNumTimesteps()); + TestController::control_trajectory u = + TestController::control_trajectory::Zero(MockDynamics::CONTROL_DIM, controller->getNumTimesteps()); + EXPECT_CALL(*mockDynamics, enforceConstraints(testing::_, testing::_)).Times(controller->getNumTimesteps() - 1); controller->computeStateTrajectoryHelper(result, x, u); // TODO: Figure out if we can actually check output if output is not part of input @@ -305,7 +481,8 @@ TEST_F(ControllerTests, computeStateTrajectoryHelper) TEST_F(ControllerTests, interpolateControl) { - TestController::control_trajectory traj; + TestController::control_trajectory traj = + TestController::control_trajectory::Zero(MockDynamics::CONTROL_DIM, NUM_TIMESTEPS); for (int i = 0; i < controller->getNumTimesteps(); i++) { traj.col(i) = TestController::control_array::Ones() * i; @@ -322,12 +499,13 @@ TEST_F(ControllerTests, interpolateFeedback) { controller->initFeedback(); auto fb_state = controller->getFeedbackState(); - for (int i = 0; i < fb_state.FEEDBACK_SIZE; i++) + for (int i = 0; i < fb_state.size(); i++) { - fb_state.fb_gain_traj_[i] = i; + fb_state.getFeedbackGainPtr()[i] = i; } - TestController::state_trajectory s_traj = TestController::state_trajectory::Zero(); + TestController::state_trajectory s_traj = + TestController::state_trajectory::Zero(MockDynamics::STATE_DIM, controller->getNumTimesteps()); TestController::state_array state = TestController::state_array::Ones(); for (double i = 0; i < controller->getNumTimesteps() - 1; i += 0.25) @@ -343,7 +521,7 @@ TEST_F(ControllerTests, getCurrentControlTest) { EXPECT_CALL(*mockDynamics, enforceConstraints(testing::_, testing::_)).Times(4 * (controller->getNumTimesteps() - 1)); - TestController::control_trajectory traj; + TestController::control_trajectory traj = controller->getControlSeq(); controller->initFeedback(); auto fb_state = controller->getFeedbackState(); for (int i = 0; i < controller->getNumTimesteps(); i++) @@ -351,12 +529,13 @@ TEST_F(ControllerTests, getCurrentControlTest) for (int j = 0; j < MockDynamics::STATE_DIM * MockDynamics::CONTROL_DIM; j++) { int i_index = i * MockDynamics::STATE_DIM * MockDynamics::CONTROL_DIM; - fb_state.fb_gain_traj_[i_index + j] = i_index + j; + fb_state.getFeedbackGainPtr()[i_index + j] = i_index + j; } traj.col(i) = TestController::control_array::Ones() * i; } - TestController::state_trajectory s_traj = TestController::state_trajectory::Zero(); + TestController::state_trajectory s_traj = + TestController::state_trajectory::Zero(MockDynamics::STATE_DIM, controller->getNumTimesteps()); TestController::state_array state = TestController::state_array::Ones(); for (double i = 0; i < controller->getNumTimesteps() - 1; i += 0.25) @@ -371,7 +550,8 @@ TEST_F(ControllerTests, getCurrentControlTest) TEST_F(ControllerTests, saveControlHistoryHelper_1) { int steps = 1; - TestController::control_trajectory u = TestController::control_trajectory::Random(); + TestController::control_trajectory u = + TestController::control_trajectory::Random(MockDynamics::CONTROL_DIM, NUM_TIMESTEPS); Eigen::Matrix u_history; u_history.setOnes(); @@ -387,7 +567,8 @@ TEST_F(ControllerTests, saveControlHistoryHelper_1) TEST_F(ControllerTests, saveControlHistoryHelper_2) { int steps = 4; - TestController::control_trajectory u = TestController::control_trajectory::Random(); + TestController::control_trajectory u = + TestController::control_trajectory::Random(MockDynamics::CONTROL_DIM, NUM_TIMESTEPS); Eigen::Matrix u_history; u_history.setOnes(); diff --git a/tests/controllers/controller_kernel_testing.cu b/tests/controllers/controller_kernel_testing.cu index 78f48c70..0cc527a2 100644 --- a/tests/controllers/controller_kernel_testing.cu +++ b/tests/controllers/controller_kernel_testing.cu @@ -49,31 +49,43 @@ public: #include -const int MAX_TIMESTEPS = 200; -using DI_FEEDBACK_T = DDPFeedback; +using DI_FEEDBACK_T = DDPFeedback; template -using DI_Vanilla = VanillaMPPIController; +struct DI_Vanilla +{ + typedef VanillaMPPIController CONTROLLER_T; + static const int num_rollouts = NUM_ROLLOUTS; +}; template -using DI_Colored = ColoredMPPIController; +struct DI_Colored +{ + typedef ColoredMPPIController CONTROLLER_T; + static const int num_rollouts = NUM_ROLLOUTS; +}; template -using DI_Tube = - TubeMPPIController; +struct DI_Tube +{ + typedef TubeMPPIController CONTROLLER_T; + static const int num_rollouts = NUM_ROLLOUTS; +}; template -using DI_Robust = RobustMPPIController; +struct DI_Robust +{ + typedef RobustMPPIController CONTROLLER_T; + static const int num_rollouts = NUM_ROLLOUTS; +}; // TODO: Add more dynamics/cost function specializations -template +template class ControllerKernelChoiceTest : public ::testing::Test { public: + using CONTROLLER_T = typename CONTROLLER_STRUCT::CONTROLLER_T; using DYN_T = typename CONTROLLER_T::TEMPLATED_DYNAMICS; using COST_T = typename CONTROLLER_T::TEMPLATED_COSTS; using FB_T = typename CONTROLLER_T::TEMPLATED_FEEDBACK; @@ -114,18 +126,20 @@ public: { params.dt_ = dt; params.num_timesteps_ = num_timesteps; + params.num_rollouts_ = CONTROLLER_STRUCT::num_rollouts; params.dynamics_rollout_dim_ = rollout_dyn; params.cost_rollout_dim_ = rollout_cost; } void TearDown() override { + HANDLE_ERROR(cudaStreamSynchronize(stream)); controller.reset(); + HANDLE_ERROR(cudaStreamDestroy(stream)); delete fb_controller; delete sampler; delete model; delete cost; - HANDLE_ERROR(cudaStreamSynchronize(stream)); } cudaStream_t stream; diff --git a/tests/controllers/rmppi_test.cu b/tests/controllers/rmppi_test.cu index b0e26146..8d3797fc 100644 --- a/tests/controllers/rmppi_test.cu +++ b/tests/controllers/rmppi_test.cu @@ -16,24 +16,26 @@ using DYN = DoubleIntegratorDynamics; using COST = DoubleIntegratorCircleCost; using SAMPLING = mppi::sampling_distributions::GaussianDistribution; using SAMPLER_PARAMS = SAMPLING::SAMPLING_PARAMS_T; -using FB_CONTROLLER = DDPFeedback; -using CONTROLLER_T = RobustMPPIController; +using FB_CONTROLLER = DDPFeedback; +using CONTROLLER_T = RobustMPPIController; class TestRobust : public CONTROLLER_T { public: - TestRobust(DYN* model, COST* cost, DDPFeedback* fb_controller, SAMPLING* sampler, float dt, - int max_iter, float lambda, float alpha, float value_function_threshold, int num_timesteps, - const Eigen::Ref& init_control_traj, cudaStream_t stream) - : RobustMPPIController(model, cost, fb_controller, sampler, dt, max_iter, lambda, alpha, value_function_threshold, - num_timesteps, init_control_traj, 9, 1, stream) - { - } - TestRobust(DYN* model, COST* cost, DDPFeedback* fb_controller, SAMPLING* sampler, - RobustMPPIController::TEMPLATED_PARAMS& params, cudaStream_t stream) - : RobustMPPIController(model, cost, fb_controller, sampler, params, stream) - { - } + using CONTROLLER_T::RobustMPPIController; + // TestRobust(DYN* model, COST* cost, DDPFeedback* fb_controller, SAMPLING* sampler, float dt, + // int max_iter, float lambda, float alpha, float value_function_threshold, int num_timesteps, + // const Eigen::Ref& init_control_traj, cudaStream_t stream) + // : RobustMPPIController(model, cost, fb_controller, sampler, dt, max_iter, lambda, alpha, + // value_function_threshold, + // num_timesteps, init_control_traj, 9, 1, stream) + // { + // } + // TestRobust(DYN* model, COST* cost, DDPFeedback* fb_controller, SAMPLING* sampler, + // RobustMPPIController::TEMPLATED_PARAMS& params, cudaStream_t stream) + // : RobustMPPIController(model, cost, fb_controller, sampler, params, stream) + // { + // } // Test to make sure that its nonzero // Test to make sure that cuda memory is allocated @@ -109,12 +111,25 @@ public: { return this->fb_controller_->getFeedbackGainsEigen(); } + + sampled_cost_traj getCostsOfNominalSamples() const + { + return this->trajectory_costs_nominal_; + } + + control_trajectory getNominalControlSeq() const + { + return this->nominal_control_trajectory_; + } }; // Text fixture for nominal state selection class RMPPINominalStateCandidates : public ::testing::Test { public: + int num_rollouts = 2048; + int num_timesteps = 100; + protected: void SetUp() override { @@ -131,8 +146,10 @@ protected: // controller_params.control_std_dev_ << 0.0001, 0.0001; controller_params.num_iters_ = 3; controller_params.value_function_threshold_ = 1000.0; - controller_params.num_timesteps_ = 100; - controller_params.init_control_traj_.setZero(); + controller_params.num_timesteps_ = num_timesteps; + controller_params.num_rollouts_ = num_rollouts; + controller_params.init_control_traj_ = + CONTROLLER_T::control_trajectory::Random(DYN::CONTROL_DIM, controller_params.num_timesteps_); controller_params.dynamics_rollout_dim_ = dim3(64, 4, 2); controller_params.cost_rollout_dim_ = dim3(64, 4, 2); @@ -168,6 +185,53 @@ protected: float alpha = 0.01; }; +TEST_F(RMPPINominalStateCandidates, UpdateNumberOfNominalTrajectories) +{ + auto original_sampled_cost_traj = test_controller->getCostsOfNominalSamples(); + const int new_num_rollouts = original_sampled_cost_traj.rows() + 1; + test_controller->setNumRollouts(new_num_rollouts); + auto changed_sampled_cost_traj = test_controller->getCostsOfNominalSamples(); + EXPECT_EQ(original_sampled_cost_traj.rows(), num_rollouts); + EXPECT_EQ(changed_sampled_cost_traj.rows(), new_num_rollouts); + EXPECT_NE(changed_sampled_cost_traj.rows(), original_sampled_cost_traj.rows()); +} + +TEST_F(RMPPINominalStateCandidates, UpdateLengthOfNominalTrajectories) +{ + auto orig_nominal_state_trajectory = test_controller->getTargetStateSeq(); + auto orig_real_state_trajectory = test_controller->getActualStateSeq(); + auto orig_real_control_trajectory = test_controller->getControlSeq(); + auto orig_nominal_control_trajectory = test_controller->getNominalControlSeq(); + const int new_num_timesteps = orig_real_state_trajectory.cols() + 1; + test_controller->setNumTimesteps(new_num_timesteps); + auto changed_nominal_state_trajectory = test_controller->getTargetStateSeq(); + auto changed_real_state_trajectory = test_controller->getActualStateSeq(); + auto changed_real_control_trajectory = test_controller->getControlSeq(); + auto changed_nominal_control_trajectory = test_controller->getNominalControlSeq(); + auto new_controller_params = test_controller->getParams(); + EXPECT_EQ(new_controller_params.init_control_traj_.cols(), new_num_timesteps); + for (int t = 0; t < new_num_timesteps; t++) + { + EXPECT_FLOAT_EQ( + fabsf((changed_real_control_trajectory.col(t) - new_controller_params.init_control_traj_.col(t)).sum()), 0.0f) + << "Real control at time " << t << " did not match initial control trajectory"; + EXPECT_FLOAT_EQ( + fabsf((changed_nominal_control_trajectory.col(t) - new_controller_params.init_control_traj_.col(t)).sum()), + 0.0f) + << "Nominal control at time " << t << " did not match initial control trajectory"; + } + + EXPECT_EQ(orig_nominal_state_trajectory.cols(), num_timesteps); + EXPECT_EQ(orig_real_state_trajectory.cols(), num_timesteps); + EXPECT_EQ(orig_nominal_control_trajectory.cols(), num_timesteps); + EXPECT_EQ(orig_real_control_trajectory.cols(), num_timesteps); + EXPECT_EQ(changed_nominal_state_trajectory.cols(), new_num_timesteps); + EXPECT_EQ(changed_real_state_trajectory.cols(), new_num_timesteps); + EXPECT_EQ(changed_nominal_control_trajectory.cols(), new_num_timesteps); + EXPECT_EQ(changed_real_control_trajectory.cols(), new_num_timesteps); + EXPECT_NE(changed_real_state_trajectory.cols(), orig_real_state_trajectory.cols()); +} + TEST_F(RMPPINominalStateCandidates, UpdateNumCandidates_LessThan3) { ::testing::FLAGS_gtest_death_test_style = "threadsafe"; @@ -308,7 +372,7 @@ protected: sampler_params.std_dev[0] = 0.001; sampler_params.std_dev[1] = 0.001; sampler = new SAMPLING(sampler_params); - init_control_traj.setZero(); + init_control_traj = TestRobust::control_trajectory::Zero(DYN::CONTROL_DIM, 100); // Q, Qf, R auto fb_params = fb_controller->getParams(); @@ -318,7 +382,7 @@ protected: fb_controller->setParams(fb_params); test_controller = - new TestRobust(model, cost, fb_controller, sampler, dt, 3, lambda, alpha, 1000.0, 100, init_control_traj, 0); + new TestRobust(model, cost, fb_controller, sampler, dt, 3, lambda, alpha, 1000.0, 100, 2048, init_control_traj); auto controller_params = test_controller->getParams(); controller_params.dynamics_rollout_dim_ = dim3(64, 4, 2); controller_params.cost_rollout_dim_ = dim3(64, 1, 2); @@ -538,7 +602,8 @@ TEST_F(RMPPINominalStateSelection, DDPFeedbackGainInternalStorage) int i_index = i * DYN::STATE_DIM * DYN::CONTROL_DIM; for (size_t j = 0; j < DYN::CONTROL_DIM * DYN::STATE_DIM; j++) { - ASSERT_FLOAT_EQ(fb_parm.fb_gain_traj_[i_index + j], feedback_gain_eigen_aligned[i].data()[j]) << " at i = " << i; + ASSERT_FLOAT_EQ(fb_parm.getConstFeedbackGainPtr()[i_index + j], feedback_gain_eigen_aligned[i].data()[j]) + << " at i = " << i; } } } @@ -563,7 +628,8 @@ TEST(RMPPITest, RobustMPPILargeVariance) using DYNAMICS = DoubleIntegratorDynamics; using COST_T = DoubleIntegratorCircleCost; const int num_timesteps = 50; // Optimization time horizon - using FEEDBACK_T = DDPFeedback; + const int num_rollouts = 1024; + using FEEDBACK_T = DDPFeedback; // Noise enters the system during the "true" state propagation. In this case the noise is nominal DYNAMICS model(100); // Initialize the double integrator dynamics COST_T cost; // Initialize the cost function @@ -617,8 +683,9 @@ TEST(RMPPITest, RobustMPPILargeVariance) // 1024, 64, 8, 1>(&model, &cost2, dt, max_iter, gamma, value_function_threshold, Q, Qf, R, control_var); // Initialize the R MPPI controller - auto controller = RobustMPPIController( - &model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, value_function_threshold); + auto controller = RobustMPPIController( + &model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, value_function_threshold, num_timesteps, + num_rollouts); auto controller_params = controller.getParams(); controller_params.dynamics_rollout_dim_ = dim3(64, 4, 2); @@ -716,7 +783,7 @@ TEST(RMPPITest, RobustMPPILargeVarianceRobustCost) using DYNAMICS = DoubleIntegratorDynamics; using COST_T = DoubleIntegratorRobustCost; const int num_timesteps = 50; // Optimization time horizon - using FEEDBACK_T = DDPFeedback; + using FEEDBACK_T = DDPFeedback; float dt = 0.02; // Timestep of dynamics propagation // Noise enters the system during the "true" state propagation. In this case the noise is nominal @@ -777,11 +844,12 @@ TEST(RMPPITest, RobustMPPILargeVarianceRobustCost) // auto controller2 = RobustMPPIController(&model, &cost2, dt, max_iter, gamma, value_function_threshold, Q, Qf, R, control_var); - using CONTROLLER_PARAMS = - typename RobustMPPIController::TEMPLATED_PARAMS; + using CONTROLLER_PARAMS = typename RobustMPPIController::TEMPLATED_PARAMS; CONTROLLER_PARAMS controller_params; controller_params.dt_ = dt; controller_params.num_iters_ = max_iter; + controller_params.num_timesteps_ = num_timesteps; + controller_params.num_rollouts_ = 1024; controller_params.lambda_ = lambda; controller_params.alpha_ = alpha; controller_params.value_function_threshold_ = value_function_threshold; @@ -790,8 +858,8 @@ TEST(RMPPITest, RobustMPPILargeVarianceRobustCost) controller_params.eval_dyn_kernel_dim_ = dim3(64, 4, 1); controller_params.eval_cost_kernel_dim_ = dim3(num_timesteps, 1, 1); // Initialize the R MPPI controller - auto controller = RobustMPPIController( - &model, &cost, &fb_controller, &sampler, controller_params); + auto controller = RobustMPPIController(&model, &cost, &fb_controller, + &sampler, controller_params); controller.setKernelChoice(kernelType::USE_SPLIT_KERNELS); int fail_count = 0; diff --git a/tests/controllers/tube_mppi_test.cu b/tests/controllers/tube_mppi_test.cu index 862e1692..850385a4 100644 --- a/tests/controllers/tube_mppi_test.cu +++ b/tests/controllers/tube_mppi_test.cu @@ -25,84 +25,159 @@ bool tubeFailure(float* s) class DoubleIntegratorTubeMPPI : public ::testing::Test { public: - static const int num_timesteps = 100; - static const int num_rollouts = 512; + const int num_timesteps = 100; + const int num_rollouts = 512; using DYN = DoubleIntegratorDynamics; using COST = DoubleIntegratorCircleCost; - using FB_CONTROLLER = DDPFeedback; + using FB_CONTROLLER = DDPFeedback; using SAMPLING = mppi::sampling_distributions::GaussianDistribution; - using VANILLA_CONTROLLER = VanillaMPPIController; - using TUBE_CONTROLLER = TubeMPPIController; + using TUBE_CONTROLLER = TubeMPPIController; void SetUp() override + { + model = std::make_shared(); + cost = std::make_shared(); + sampler = std::make_shared(); + fb_controller = std::make_shared(model.get(), dt); + auto fb_params = fb_controller->getParams(); + + // DDP cost parameters + Eigen::MatrixXf Q; + Eigen::MatrixXf R; + fb_params.Q = 100 * FB_CONTROLLER::square_state_matrix::Identity(); + fb_params.Q_f = fb_params.Q; + fb_params.R = FB_CONTROLLER::square_control_matrix::Identity(); + fb_controller->setParams(fb_params); + } + + void TearDown() override { } -}; -TEST_F(DoubleIntegratorTubeMPPI, Construction) -{ - // Define the model and cost - DYN model; - COST cost; float dt = 0.01; - auto fb_controller = FB_CONTROLLER(&model, dt); - auto fb_params = fb_controller.getParams(); int max_iter = 10; float lambda = 0.5; float alpha = 0.0; - // DDP cost parameters - Eigen::MatrixXf Q; - Eigen::MatrixXf R; - fb_params.Q = 100 * FB_CONTROLLER::square_state_matrix::Identity(); - fb_params.Q_f = fb_params.Q; - fb_params.R = FB_CONTROLLER::square_control_matrix::Identity(); - fb_controller.setParams(fb_params); - - // Sampling Distribution setup - SAMPLING sampler = SAMPLING(); - - auto vanilla_controller = VANILLA_CONTROLLER(&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha); + std::shared_ptr model; + std::shared_ptr cost; + std::shared_ptr sampler; + std::shared_ptr fb_controller; +}; - auto controller = TUBE_CONTROLLER(&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha); +TEST_F(DoubleIntegratorTubeMPPI, Construction) +{ + auto controller = std::make_shared(model.get(), cost.get(), fb_controller.get(), sampler.get(), dt, + max_iter, lambda, alpha, num_timesteps, num_rollouts); // This controller needs the ancillary controller running separately for base plant reasons. + auto nominal_control_trajectory = controller->getControlSeq(); + auto real_control_trajectory = controller->getActualControlSeq(); + auto nominal_state_trajectory = controller->getTargetStateSeq(); + auto real_state_trajectory = controller->getActualStateSeq(); + + EXPECT_EQ(controller->getFeedbackEnabled(), true); + EXPECT_EQ(controller->getDt(), dt); + EXPECT_EQ(controller->getNumIters(), max_iter); + EXPECT_EQ(controller->getLambda(), lambda); + EXPECT_EQ(controller->getAlpha(), alpha); + EXPECT_EQ(controller->getNumTimesteps(), num_timesteps); + EXPECT_EQ(nominal_control_trajectory.cols(), controller->getNumTimesteps()); + EXPECT_EQ(real_control_trajectory.cols(), controller->getNumTimesteps()); + EXPECT_EQ(nominal_state_trajectory.cols(), controller->getNumTimesteps()); + EXPECT_EQ(real_state_trajectory.cols(), controller->getNumTimesteps()); + EXPECT_EQ(controller->getNumRollouts(), num_rollouts); } TEST_F(DoubleIntegratorTubeMPPI, ConstructionUsingParams) { - // Define the model and cost - DYN model; - COST cost; TUBE_CONTROLLER::TEMPLATED_PARAMS controller_params; - controller_params.dt_ = 0.01; - controller_params.num_iters_ = 10; - controller_params.lambda_ = 0.5; - controller_params.alpha_ = 0.0; - // control std dev - // controller_params.control_std_dev_ << 1, 1; - auto fb_controller = FB_CONTROLLER(&model, controller_params.dt_); - auto fb_params = fb_controller.getParams(); - - // DDP cost parameters - Eigen::MatrixXf Q; - Eigen::MatrixXf R; - fb_params.Q = 100 * FB_CONTROLLER::square_state_matrix::Identity(); - fb_params.Q_f = fb_params.Q; - fb_params.R = FB_CONTROLLER::square_control_matrix::Identity(); - fb_controller.setParams(fb_params); - - // Sampling Distribution setup - SAMPLING::SAMPLING_PARAMS_T sampler_params; - for (int i = 0; i < DYN::CONTROL_DIM; i++) + controller_params.dt_ = dt; + controller_params.num_iters_ = max_iter; + controller_params.lambda_ = lambda; + controller_params.alpha_ = alpha; + controller_params.num_timesteps_ = num_timesteps; + controller_params.num_rollouts_ = num_rollouts; + controller_params.init_control_traj_ = TUBE_CONTROLLER::control_trajectory::Random(DYN::CONTROL_DIM, 1); + controller_params.nominal_threshold_ = 31.0f; + + auto controller = + std::make_shared(model.get(), cost.get(), fb_controller.get(), sampler.get(), controller_params); + // This controller needs the ancillary controller running separately for base plant reasons. + auto nominal_control_trajectory = controller->getControlSeq(); + auto real_control_trajectory = controller->getActualControlSeq(); + auto nominal_state_trajectory = controller->getTargetStateSeq(); + auto real_state_trajectory = controller->getActualStateSeq(); + auto new_controller_params = controller->getParams(); + + EXPECT_EQ(controller->getFeedbackEnabled(), true); + EXPECT_EQ(controller->getDt(), dt); + EXPECT_EQ(controller->getNumIters(), max_iter); + EXPECT_EQ(controller->getLambda(), lambda); + EXPECT_EQ(controller->getAlpha(), alpha); + EXPECT_EQ(controller->getNumTimesteps(), num_timesteps); + EXPECT_EQ(nominal_control_trajectory.cols(), controller->getNumTimesteps()); + EXPECT_EQ(real_control_trajectory.cols(), controller->getNumTimesteps()); + EXPECT_EQ(nominal_state_trajectory.cols(), controller->getNumTimesteps()); + EXPECT_EQ(real_state_trajectory.cols(), controller->getNumTimesteps()); + EXPECT_EQ(controller->getNumRollouts(), num_rollouts); + EXPECT_FLOAT_EQ(controller->getNominalThreshold(), 31.0f); + + for (int t = 0; t < controller->getNumTimesteps(); t++) { - sampler_params.std_dev[i] = 1; + EXPECT_FLOAT_EQ(fabsf((nominal_control_trajectory.col(t) - new_controller_params.init_control_traj_.col(t)).sum()), + 0.0f); + EXPECT_FLOAT_EQ(fabsf((real_control_trajectory.col(t) - new_controller_params.init_control_traj_.col(t)).sum()), + 0.0f); } - SAMPLING sampler = SAMPLING(sampler_params); +} - auto vanilla_controller = VANILLA_CONTROLLER(&model, &cost, &fb_controller, &sampler, controller_params); - auto controller = TUBE_CONTROLLER(&model, &cost, &fb_controller, &sampler, controller_params); +TEST_F(DoubleIntegratorTubeMPPI, UpdateNominalThreshold) +{ + auto controller = std::make_shared(model.get(), cost.get(), fb_controller.get(), sampler.get(), dt, + max_iter, lambda, alpha, num_timesteps, num_rollouts); + float orig_nominal_threshold = controller->getNominalThreshold(); + float new_nominal_threshold = orig_nominal_threshold + 3.14f; + controller->setNominalThreshold(new_nominal_threshold); + EXPECT_NE(controller->getNominalThreshold(), orig_nominal_threshold); + EXPECT_FLOAT_EQ(controller->getNominalThreshold(), new_nominal_threshold); +} +TEST_F(DoubleIntegratorTubeMPPI, UpdateLengthOfNominalTrajectories) +{ + TUBE_CONTROLLER::TEMPLATED_PARAMS controller_params; + controller_params.dt_ = dt; + controller_params.num_iters_ = max_iter; + controller_params.lambda_ = lambda; + controller_params.alpha_ = alpha; + controller_params.num_timesteps_ = num_timesteps; + controller_params.num_rollouts_ = num_rollouts; + controller_params.init_control_traj_ = TUBE_CONTROLLER::control_trajectory::Random(DYN::CONTROL_DIM, 1); + controller_params.slide_control_scale_ = TUBE_CONTROLLER::control_array::Ones(); + + auto controller = + std::make_shared(model.get(), cost.get(), fb_controller.get(), sampler.get(), controller_params); + int new_num_timesteps = controller->getNumTimesteps() + 5; + controller->setNumTimesteps(new_num_timesteps); // This controller needs the ancillary controller running separately for base plant reasons. + auto nominal_control_trajectory = controller->getControlSeq(); + auto real_control_trajectory = controller->getActualControlSeq(); + auto nominal_state_trajectory = controller->getTargetStateSeq(); + auto real_state_trajectory = controller->getActualStateSeq(); + auto new_controller_params = controller->getParams(); + + EXPECT_EQ(controller->getNumTimesteps(), new_num_timesteps); + EXPECT_EQ(nominal_control_trajectory.cols(), new_num_timesteps); + EXPECT_EQ(real_control_trajectory.cols(), new_num_timesteps); + EXPECT_EQ(nominal_state_trajectory.cols(), new_num_timesteps); + EXPECT_EQ(real_state_trajectory.cols(), new_num_timesteps); + + for (int t = 0; t < controller->getNumTimesteps(); t++) + { + EXPECT_FLOAT_EQ(fabsf((nominal_control_trajectory.col(t) - new_controller_params.init_control_traj_.col(t)).sum()), + 0.0f); + EXPECT_FLOAT_EQ(fabsf((real_control_trajectory.col(t) - new_controller_params.init_control_traj_.col(t)).sum()), + 0.0f); + } } class DoubleIntegratorTracking : public ::testing::Test @@ -113,10 +188,10 @@ public: const unsigned int total_time_horizon = 500; using DYN = DoubleIntegratorDynamics; using COST = DoubleIntegratorCircleCost; - using FB_CONTROLLER = DDPFeedback; + using FB_CONTROLLER = DDPFeedback; using SAMPLING = mppi::sampling_distributions::GaussianDistribution; - using VANILLA_CONTROLLER = VanillaMPPIController; - using TUBE_CONTROLLER = TubeMPPIController; + using VANILLA_CONTROLLER = VanillaMPPIController; + using TUBE_CONTROLLER = TubeMPPIController; DYN model; COST cost; @@ -152,7 +227,8 @@ TEST_F(DoubleIntegratorTracking, VanillaMPPINominalVariance) std::vector nominal_trajectory_save(num_timesteps * total_time_horizon * DYN::STATE_DIM); // Initialize the vanilla MPPI controller - auto vanilla_controller = VANILLA_CONTROLLER(&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha); + auto vanilla_controller = VANILLA_CONTROLLER(&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, + num_timesteps, num_rollouts); auto controller_params = vanilla_controller.getParams(); controller_params.dynamics_rollout_dim_ = dim3(64, 4, 1); controller_params.cost_rollout_dim_ = dim3(50, 1, 1); @@ -211,7 +287,8 @@ TEST_F(DoubleIntegratorTracking, VanillaMPPILargeVariance) std::vector nominal_trajectory_save(num_timesteps * total_time_horizon * DYN::STATE_DIM); model.setStateVariance(100); // Initialize the vanilla MPPI controller - auto vanilla_controller = VANILLA_CONTROLLER(&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha); + auto vanilla_controller = VANILLA_CONTROLLER(&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, + num_timesteps, num_rollouts); auto controller_params = vanilla_controller.getParams(); controller_params.dynamics_rollout_dim_ = dim3(64, 4, 1); controller_params.cost_rollout_dim_ = dim3(50, 1, 1); @@ -274,7 +351,8 @@ TEST_F(DoubleIntegratorTracking, VanillaMPPILargeVarianceTracking) model.setStateVariance(100); // Initialize the vanilla MPPI controller - auto vanilla_controller = VANILLA_CONTROLLER(&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha); + auto vanilla_controller = VANILLA_CONTROLLER(&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, + num_timesteps, num_rollouts); auto controller_params = vanilla_controller.getParams(); controller_params.dynamics_rollout_dim_ = dim3(64, 4, 1); controller_params.cost_rollout_dim_ = dim3(50, 1, 1); @@ -379,7 +457,8 @@ TEST_F(DoubleIntegratorTracking, TubeMPPILargeVariance) std::vector feedback_trajectory_save(num_timesteps * total_time_horizon * DYN::STATE_DIM); // Initialize the tube MPPI controller - auto controller = TUBE_CONTROLLER(&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha); + auto controller = TUBE_CONTROLLER(&model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, num_timesteps, + num_rollouts); auto controller_params = controller.getParams(); controller_params.dynamics_rollout_dim_ = dim3(64, 1, 1); controller_params.cost_rollout_dim_ = dim3(50, 1, 1); diff --git a/tests/controllers/vanilla_mppi_test.cu b/tests/controllers/vanilla_mppi_test.cu index 1289220e..f7f50c96 100644 --- a/tests/controllers/vanilla_mppi_test.cu +++ b/tests/controllers/vanilla_mppi_test.cu @@ -5,13 +5,13 @@ class Cartpole_VanillaMPPI : public ::testing::Test { public: - static const int NUM_TIMESTEPS = 100; + const int NUM_TIMESTEPS = 100; static const int NUM_ROLLOUTS = 2048; using DYN_T = CartpoleDynamics; using COST_T = CartpoleQuadraticCost; - using FB_T = DDPFeedback; + using FB_T = DDPFeedback; using SAMPLING_T = mppi::sampling_distributions::GaussianDistribution; - using CONTROLLER_T = VanillaMPPIController; + using CONTROLLER_T = VanillaMPPIController; using control_trajectory = CONTROLLER_T::control_trajectory; using control_array = CONTROLLER_T::control_array; @@ -26,12 +26,13 @@ public: float gamma = 0.5; float lambda = 0.25; float alpha = 0.01; - control_trajectory init_control = control_trajectory::Constant(0); + control_trajectory init_control; control_array control_std_dev = control_array::Constant(5.0); cudaStream_t stream; void SetUp() override { + init_control = control_trajectory::Zero(DYN_T::CONTROL_DIM, NUM_TIMESTEPS); fb_controller = new FB_T(&model, dt); HANDLE_ERROR(cudaStreamCreate(&stream)); auto sampler_params = SAMPLING_T::SAMPLING_PARAMS_T(); @@ -43,7 +44,7 @@ public: EXPECT_EQ(model.getBlkSharedSizeBytes(), 0); sampler = new SAMPLING_T(sampler_params); controller = new CONTROLLER_T(&model, &cost, fb_controller, sampler, dt, max_iter, lambda, alpha, NUM_TIMESTEPS, - init_control, stream); + NUM_ROLLOUTS, init_control, stream); } void TearDown() override @@ -125,7 +126,7 @@ TEST_F(Cartpole_VanillaMPPI, SwingUpTest) controller->computeControl(current_state, 1); DYN_T::control_array control; - control = controller->getControlSeq().block(0, 0, DYN_T::CONTROL_DIM, 1); + control = controller->getControlSeq().col(0); // Increment the state model.computeStateDeriv(current_state, control, xdot); model.updateState(current_state, xdot, dt); @@ -155,6 +156,7 @@ TEST_F(Cartpole_VanillaMPPI, getSampledStateTrajectoriesTest) controller->calculateSampledStateTrajectories(); const auto outputs = controller->getSampledOutputTrajectories(); EXPECT_EQ(outputs.size(), 0.25 * NUM_ROLLOUTS); + EXPECT_EQ(outputs[0].cols(), NUM_TIMESTEPS); } class Quadrotor_VanillaMPPI : public ::testing::Test @@ -164,9 +166,9 @@ public: static const int NUM_ROLLOUTS = 2048; using DYN_T = QuadrotorDynamics; using COST_T = QuadrotorQuadraticCost; - using FB_T = DDPFeedback; + using FB_T = DDPFeedback; using SAMPLING_T = mppi::sampling_distributions::GaussianDistribution; - using CONTROLLER_T = VanillaMPPIController; + using CONTROLLER_T = VanillaMPPIController; using control_trajectory = CONTROLLER_T::control_trajectory; using control_array = CONTROLLER_T::control_array; @@ -176,7 +178,7 @@ public: float alpha = 0.9; cudaStream_t stream; control_array control_std_dev = control_array::Constant(0.5); - control_trajectory init_control = control_trajectory::Constant(0.0); + control_trajectory init_control = control_trajectory::Zero(DYN_T::CONTROL_DIM, NUM_TIMESTEPS); DYN_T model; COST_T cost; @@ -197,7 +199,7 @@ public: } sampler = new SAMPLING_T(sampler_params); controller = new CONTROLLER_T(&model, &cost, &fb_controller, sampler, dt, max_iter, lambda, alpha, NUM_TIMESTEPS, - init_control, stream); + NUM_ROLLOUTS, init_control, stream); } void TearDown() override @@ -258,7 +260,7 @@ TEST_F(Quadrotor_VanillaMPPI, HoverTest) // float xdot[DYN_T::STATE_DIM]; DYN_T::state_array xdot(4, 1); DYN_T::control_array control; - control = controller->getControlSeq().block(0, 0, DYN_T::CONTROL_DIM, 1); + control = controller->getControlSeq().col(0); int far_away_cnt = 0; Eigen::Vector3f goal_pos = new_params.getDesiredState().block<3, 1>(0, 0); @@ -291,7 +293,7 @@ TEST_F(Quadrotor_VanillaMPPI, HoverTest) controller->computeControl(current_state, 1); loop_end = std::chrono::steady_clock::now(); - control = controller->getControlSeq().block(0, 0, DYN_T::CONTROL_DIM, 1); + control = controller->getControlSeq().col(0); // block(0, 0, DYN_T::CONTROL_DIM, 1); // Increment the state model.computeStateDeriv(current_state, control, xdot); model.updateState(current_state, xdot, dt); diff --git a/tests/feedback_controllers/ddp_test.cu b/tests/feedback_controllers/ddp_test.cu index b4e6b2db..d2938397 100644 --- a/tests/feedback_controllers/ddp_test.cu +++ b/tests/feedback_controllers/ddp_test.cu @@ -158,7 +158,8 @@ TEST(DDPSolver_Test, Cartpole_Tracking) float lambda = 0.25; float alpha = 0.001; const int num_timesteps = 100; - auto fb_controller = DDPFeedback(&model, dt); + const int num_rollouts = 2048; + auto fb_controller = DDPFeedback(&model, dt, num_timesteps); auto sampler = SAMPLER_T(); auto sampler_params = sampler.getParams(); @@ -180,8 +181,8 @@ TEST(DDPSolver_Test, Cartpole_Tracking) fb_controller.initTrackingController(); auto controller = VanillaMPPIController, num_timesteps, 2048, SAMPLER_T>( - &model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha); + DDPFeedback, SAMPLER_T>( + &model, &cost, &fb_controller, &sampler, dt, max_iter, lambda, alpha, num_timesteps, num_rollouts); auto controller_params = controller.getParams(); controller_params.dynamics_rollout_dim_ = dim3(64, 8, 1); @@ -249,7 +250,7 @@ TEST(DDPSolver_Test, Quadrotor_Tracking) using DYN = QuadrotorDynamics; using COST = QuadrotorQuadraticCost; - using CONTROLLER = VanillaMPPIController, num_timesteps, 2048>; + using CONTROLLER = VanillaMPPIController>; std::array control_ranges; for (int i = 0; i < 3; i++) @@ -283,15 +284,15 @@ TEST(DDPSolver_Test, Quadrotor_Tracking) fb_params.R.diagonal() << 550, 550, 550, 1; fb_params.num_iterations = 100; - Eigen::MatrixXf control_traj = CONTROLLER::control_trajectory::Zero(); - CONTROLLER::state_trajectory ddp_state_traj = CONTROLLER::state_trajectory::Zero(); + Eigen::MatrixXf control_traj = CONTROLLER::control_trajectory::Zero(DYN::CONTROL_DIM, num_timesteps); + CONTROLLER::state_trajectory ddp_state_traj = CONTROLLER::state_trajectory::Zero(DYN::STATE_DIM, num_timesteps); for (int i = 0; i < num_timesteps; i++) { ddp_state_traj.col(i) = x_goal; control_traj.col(i) = model.zero_control_; } - auto fb_controller = DDPFeedback(&model, dt); + auto fb_controller = DDPFeedback(&model, dt, num_timesteps); fb_controller.setParams(fb_params); fb_controller.initTrackingController(); diff --git a/tests/feedback_controllers/generic_feedback_controller_test.cu b/tests/feedback_controllers/generic_feedback_controller_test.cu index c408924c..db4b32f5 100644 --- a/tests/feedback_controllers/generic_feedback_controller_test.cu +++ b/tests/feedback_controllers/generic_feedback_controller_test.cu @@ -38,15 +38,7 @@ public: MPPI_internal::Dynamics, DynamicsTesterParams>; using state_array = typename PARENT_CLASS::state_array; using control_array = typename PARENT_CLASS::control_array; - - DynamicsTester(cudaStream_t stream = 0) : PARENT_CLASS(stream) - { - } - - DynamicsTester(std::array control_rngs, cudaStream_t stream = 0) - : PARENT_CLASS(control_rngs, stream) - { - } + using PARENT_CLASS::Dynamics; void computeDynamics(const Eigen::Ref& state, const Eigen::Ref& control, Eigen::Ref state_der) @@ -105,10 +97,10 @@ public: } }; -class TestFeedbackController : public FeedbackController +class TestFeedbackController : public FeedbackController { public: - typedef FeedbackController PARENT_CLASS; + typedef FeedbackController PARENT_CLASS; using INTERNAL_STATE_T = typename PARENT_CLASS::TEMPLATED_FEEDBACK_STATE; TestFeedbackController(float dt = 0.01, int num_timesteps = 10, cudaStream_t stream = 0) diff --git a/tests/include/mppi_test/mock_classes/mock_controller.h b/tests/include/mppi_test/mock_classes/mock_controller.h index ff1029ce..024c2afc 100644 --- a/tests/include/mppi_test/mock_classes/mock_controller.h +++ b/tests/include/mppi_test/mock_classes/mock_controller.h @@ -12,7 +12,7 @@ // ===== mock controller ==== class MockController - : public Controller + : public Controller { public: MOCK_METHOD0(calculateSampledStateTrajectories, void()); diff --git a/tests/include/mppi_test/mock_classes/mock_dynamics.h b/tests/include/mppi_test/mock_classes/mock_dynamics.h index 24d46224..b64a864c 100644 --- a/tests/include/mppi_test/mock_classes/mock_dynamics.h +++ b/tests/include/mppi_test/mock_classes/mock_dynamics.h @@ -40,7 +40,6 @@ class MockDynamics : public MPPI_internal::Dynamics()); MOCK_METHOD0(getControlRangesRaw, void()); MOCK_METHOD0(getParams, mockDynamicsParams()); - // MOCK_METHOD0(freeCudaMem, void()); MOCK_METHOD0(paramsToDevice, void()); MOCK_METHOD3(computeDynamics, void(const Eigen::Ref&, const Eigen::Ref&, Eigen::Ref)); diff --git a/tests/include/mppi_test/mock_classes/mock_feedback.h b/tests/include/mppi_test/mock_classes/mock_feedback.h index 4a12b00e..395803df 100644 --- a/tests/include/mppi_test/mock_classes/mock_feedback.h +++ b/tests/include/mppi_test/mock_classes/mock_feedback.h @@ -23,7 +23,7 @@ class MockGPUFeedback : public GPUFeedbackController +class MockFeedback : public FeedbackController { public: MOCK_METHOD0(initTrackingController, void()); diff --git a/tests/mppi_core/base_plant_tester.cu b/tests/mppi_core/base_plant_tester.cu index 32f471a8..b08849a6 100644 --- a/tests/mppi_core/base_plant_tester.cu +++ b/tests/mppi_core/base_plant_tester.cu @@ -158,6 +158,8 @@ protected: mockController->model_ = &mockDynamics; mockController->sampler_ = &mockSampler; mockController->fb_controller_ = &mockFeedback; + mockController->setNumTimesteps(100); + mockController->setNumRollouts(512); plant = std::make_shared(mockController); } @@ -418,9 +420,9 @@ TEST_F(BasePlantTest, runControlIterationDebugFalseNoFeedbackTest) EXPECT_CALL(*mockController, computeControl(testing::_, testing::_)) .Times(1) .WillRepeatedly(testing::Invoke(wait_function)); - MockController::control_trajectory control_seq = MockController::control_trajectory::Zero(); + MockController::control_trajectory control_seq = MockController::control_trajectory::Zero(MockDynamics::CONTROL_DIM, NUM_TIMESTEPS); EXPECT_CALL(*mockController, getControlSeq()).Times(1).WillRepeatedly(testing::Return(control_seq)); - MockController::state_trajectory state_seq = MockController::state_trajectory::Zero(); + MockController::state_trajectory state_seq = MockController::state_trajectory::Zero(MockDynamics::STATE_DIM, NUM_TIMESTEPS); EXPECT_CALL(*mockController, getTargetStateSeq()).Times(1).WillRepeatedly(testing::Return(state_seq)); EXPECT_CALL(*mockController, computeFeedback(testing::_)).Times(0); @@ -501,9 +503,9 @@ TEST_F(BasePlantTest, runControlIterationDebugFalseFeedbackTest) EXPECT_CALL(*mockController, computeControl(testing::_, testing::_)) .Times(1) .WillRepeatedly(testing::Invoke(wait_function)); - MockController::control_trajectory control_seq = MockController::control_trajectory::Zero(); + MockController::control_trajectory control_seq = MockController::control_trajectory::Zero(MockDynamics::CONTROL_DIM, NUM_TIMESTEPS); EXPECT_CALL(*mockController, getControlSeq()).Times(1).WillRepeatedly(testing::Return(control_seq)); - MockController::state_trajectory state_seq = MockController::state_trajectory::Zero(); + MockController::state_trajectory state_seq = MockController::state_trajectory::Zero(MockDynamics::STATE_DIM, NUM_TIMESTEPS); EXPECT_CALL(*mockController, getTargetStateSeq()).Times(1).WillRepeatedly(testing::Return(state_seq)); EXPECT_CALL(*mockController, computeFeedback(testing::_)).Times(1).WillRepeatedly(testing::Invoke(wait_function)); @@ -579,9 +581,9 @@ TEST_F(BasePlantTest, runControlIterationDebugFalseFeedbackAvgTest) EXPECT_CALL(*mockController, computeControl(testing::_, testing::_)) .Times(1) .WillRepeatedly(testing::Invoke(wait_function)); - MockController::control_trajectory control_seq = MockController::control_trajectory::Zero(); + MockController::control_trajectory control_seq = MockController::control_trajectory::Zero(MockDynamics::CONTROL_DIM, NUM_TIMESTEPS); EXPECT_CALL(*mockController, getControlSeq()).Times(1).WillRepeatedly(testing::Return(control_seq)); - MockController::state_trajectory state_seq = MockController::state_trajectory::Zero(); + MockController::state_trajectory state_seq = MockController::state_trajectory::Zero(MockDynamics::STATE_DIM, NUM_TIMESTEPS); EXPECT_CALL(*mockController, getTargetStateSeq()).Times(1).WillRepeatedly(testing::Return(state_seq)); EXPECT_CALL(*mockController, computeFeedback(testing::_)).Times(1).WillRepeatedly(testing::Invoke(wait_function)); @@ -650,9 +652,9 @@ TEST_F(BasePlantTest, runControlLoopRegular) EXPECT_CALL(*mockController, computeControl(testing::_, testing::_)) .Times(iterations / 2) .WillRepeatedly(testing::Invoke(wait_function)); - MockController::control_trajectory control_seq = MockController::control_trajectory::Zero(); + MockController::control_trajectory control_seq = MockController::control_trajectory::Zero(MockDynamics::CONTROL_DIM, NUM_TIMESTEPS); EXPECT_CALL(*mockController, getControlSeq()).Times(iterations / 2).WillRepeatedly(testing::Return(control_seq)); - MockController::state_trajectory state_seq = MockController::state_trajectory::Zero(); + MockController::state_trajectory state_seq = MockController::state_trajectory::Zero(MockDynamics::STATE_DIM, NUM_TIMESTEPS); EXPECT_CALL(*mockController, getTargetStateSeq()).Times(iterations / 2).WillRepeatedly(testing::Return(state_seq)); EXPECT_CALL(*mockController, computeFeedback(testing::_)) .Times(iterations / 2) @@ -752,9 +754,9 @@ TEST_F(BasePlantTest, runControlLoopSlowed) EXPECT_CALL(*mockController, computeControl(testing::_, testing::_)) .Times(expected_iters) .WillRepeatedly(testing::Invoke(wait_function)); - MockController::control_trajectory control_seq = MockController::control_trajectory::Zero(); + MockController::control_trajectory control_seq = MockController::control_trajectory::Zero(MockDynamics::CONTROL_DIM, NUM_TIMESTEPS); EXPECT_CALL(*mockController, getControlSeq()).Times(expected_iters).WillRepeatedly(testing::Return(control_seq)); - MockController::state_trajectory state_seq = MockController::state_trajectory::Zero(); + MockController::state_trajectory state_seq = MockController::state_trajectory::Zero(MockDynamics::STATE_DIM, NUM_TIMESTEPS); EXPECT_CALL(*mockController, getTargetStateSeq()).Times(expected_iters).WillRepeatedly(testing::Return(state_seq)); EXPECT_CALL(*mockController, computeFeedback(testing::_)) .Times(expected_iters) diff --git a/tests/mppi_core/buffered_plant_tester.cu b/tests/mppi_core/buffered_plant_tester.cu index 17e3db05..9739f853 100644 --- a/tests/mppi_core/buffered_plant_tester.cu +++ b/tests/mppi_core/buffered_plant_tester.cu @@ -40,7 +40,7 @@ public: using buffer_trajectory = typename BufferedPlant::buffer_trajectory; - TestPlant(std::shared_ptr controller, double buffer_time_horizon = 2.0, int hz = 20, + TestPlant(std::shared_ptr controller, double buffer_time_horizon = 2.0, int hz = 20, int opt_stride = 1) : BufferedPlant(controller, hz, opt_stride) { @@ -142,6 +142,7 @@ protected: mockController->model_ = &mockDynamics; mockController->fb_controller_ = &mockFeedback; mockController->sampler_ = &mockSamplingDistribution; + mockController->setNumRollouts(512); EXPECT_CALL(*mockController->cost_, getParams()).Times(1); EXPECT_CALL(*mockController->model_, getParams()).Times(1); diff --git a/tests/mppi_core/rmppi_kernel_tests.cu b/tests/mppi_core/rmppi_kernel_tests.cu index 132b2095..e68dddcf 100644 --- a/tests/mppi_core/rmppi_kernel_tests.cu +++ b/tests/mppi_core/rmppi_kernel_tests.cu @@ -25,8 +25,8 @@ public: using COST_T = DoubleIntegratorCircleCost; // using COST_T = QuadraticCost; using SAMPLER_T = mppi::sampling_distributions::GaussianDistribution; - using FB_T = DDPFeedback; - using control_trajectory = Eigen::Matrix; + using FB_T = DDPFeedback; + using control_trajectory = Eigen::Matrix; using state_array = DYN_T::state_array; using output_array = DYN_T::output_array; using control_array = DYN_T::control_array; @@ -72,6 +72,8 @@ public: fb_controller->bindToStream(stream); curandSetStream(gen, stream); + fb_controller->setNumTimesteps(num_timesteps); + model->GPUSetup(); cost->GPUSetup(); sampler->GPUSetup(); @@ -84,16 +86,17 @@ public: delete cost; delete sampler; delete fb_controller; - if (initial_x_d) - { - HANDLE_ERROR(cudaFree(initial_x_d)); - initial_x_d = nullptr; - } - if (cost_trajectories_d) - { - HANDLE_ERROR(cudaFree(cost_trajectories_d)); - cost_trajectories_d = nullptr; - } + HANDLE_CURAND_ERROR(curandDestroyGenerator(gen)); + // if (initial_x_d) + // { + // HANDLE_ERROR(cudaFree(initial_x_d)); + // initial_x_d = nullptr; + // } + // if (cost_trajectories_d) + // { + // HANDLE_ERROR(cudaFree(cost_trajectories_d)); + // cost_trajectories_d = nullptr; + // } } DYN_T* model; @@ -140,7 +143,7 @@ TEST_F(RMPPIKernels, ValidateCombinedInitEvalKernelAgainstCPU) Eigen::MatrixXf trajectory_costs_cpu = Eigen::MatrixXf::Zero(num_rollouts, 1); Eigen::MatrixXf trajectory_costs_gpu = Eigen::MatrixXf::Zero(num_rollouts, 1); - control_trajectory nominal_trajectory = control_trajectory::Random(); + control_trajectory nominal_trajectory = control_trajectory::Random(DYN_T::CONTROL_DIM, num_timesteps); sampler->copyImportanceSamplerToDevice(nominal_trajectory.data(), 0, false); sampler->generateSamples(1, 0, gen, false); HANDLE_ERROR(cudaMemcpyAsync(initial_x_d, candidates.data(), sizeof(float) * DYN_T::STATE_DIM * num_candidates, @@ -190,6 +193,8 @@ TEST_F(RMPPIKernels, ValidateCombinedInitEvalKernelAgainstCPU) } } HANDLE_ERROR(cudaFree(strides_d)); + HANDLE_ERROR(cudaFree(initial_x_d)); + HANDLE_ERROR(cudaFree(cost_trajectories_d)); } TEST_F(RMPPIKernels, ValidateSplitInitEvalKernelAgainstCPU) @@ -224,7 +229,7 @@ TEST_F(RMPPIKernels, ValidateSplitInitEvalKernelAgainstCPU) Eigen::MatrixXf trajectory_costs_cpu = Eigen::MatrixXf::Zero(num_rollouts, 1); Eigen::MatrixXf trajectory_costs_gpu = Eigen::MatrixXf::Zero(num_rollouts, 1); - control_trajectory nominal_trajectory = control_trajectory::Random(); + control_trajectory nominal_trajectory = control_trajectory::Random(DYN_T::CONTROL_DIM, num_timesteps); sampler->copyImportanceSamplerToDevice(nominal_trajectory.data(), 0, false); sampler->generateSamples(1, 0, gen, false); HANDLE_ERROR(cudaMemcpyAsync(initial_x_d, candidates.data(), sizeof(float) * DYN_T::STATE_DIM * num_candidates, @@ -312,6 +317,8 @@ TEST_F(RMPPIKernels, ValidateSplitInitEvalKernelAgainstCPU) } HANDLE_ERROR(cudaFree(strides_d)); HANDLE_ERROR(cudaFree(output_d)); + HANDLE_ERROR(cudaFree(initial_x_d)); + HANDLE_ERROR(cudaFree(cost_trajectories_d)); } TEST_F(RMPPIKernels, ValidateCombinedRMPPIRolloutKernelAgainstCPU) @@ -330,7 +337,7 @@ TEST_F(RMPPIKernels, ValidateCombinedRMPPIRolloutKernelAgainstCPU) state_array initial_real_state = state_array::Random(); state_array initial_nominal_state = state_array::Random(); - control_trajectory nominal_trajectory = control_trajectory::Random(); + control_trajectory nominal_trajectory = control_trajectory::Random(DYN_T::CONTROL_DIM, num_timesteps); sampler->copyImportanceSamplerToDevice(nominal_trajectory.data(), nominal_idx, false); sampler->copyImportanceSamplerToDevice(nominal_trajectory.data(), real_idx, false); fb_controller->copyToDevice(false); @@ -385,6 +392,8 @@ TEST_F(RMPPIKernels, ValidateCombinedRMPPIRolloutKernelAgainstCPU) } } } + HANDLE_ERROR(cudaFree(initial_x_d)); + HANDLE_ERROR(cudaFree(cost_trajectories_d)); } TEST_F(RMPPIKernels, ValidateSplitRMPPIRolloutKernelAgainstCPU) @@ -405,7 +414,7 @@ TEST_F(RMPPIKernels, ValidateSplitRMPPIRolloutKernelAgainstCPU) state_array initial_real_state = state_array::Random(); state_array initial_nominal_state = state_array::Random(); - control_trajectory nominal_trajectory = control_trajectory::Random(); + control_trajectory nominal_trajectory = control_trajectory::Random(DYN_T::CONTROL_DIM, num_timesteps); sampler->copyImportanceSamplerToDevice(nominal_trajectory.data(), nominal_idx, false); sampler->copyImportanceSamplerToDevice(nominal_trajectory.data(), real_idx, false); fb_controller->copyToDevice(false); @@ -501,6 +510,8 @@ TEST_F(RMPPIKernels, ValidateSplitRMPPIRolloutKernelAgainstCPU) } } HANDLE_ERROR(cudaFree(output_d)); + HANDLE_ERROR(cudaFree(initial_x_d)); + HANDLE_ERROR(cudaFree(cost_trajectories_d)); } TEST_F(RMPPIKernels, ValidateCombinedRMPPIRolloutKernelAgainstMPPIRollout) @@ -521,7 +532,7 @@ TEST_F(RMPPIKernels, ValidateCombinedRMPPIRolloutKernelAgainstMPPIRollout) state_array initial_real_state = state_array::Random(); - control_trajectory nominal_trajectory = control_trajectory::Random(); + control_trajectory nominal_trajectory = control_trajectory::Random(DYN_T::CONTROL_DIM, num_timesteps); sampler->copyImportanceSamplerToDevice(nominal_trajectory.data(), nominal_idx, false); sampler->copyImportanceSamplerToDevice(nominal_trajectory.data(), real_idx, false); fb_controller->copyToDevice(false); @@ -588,4 +599,6 @@ TEST_F(RMPPIKernels, ValidateCombinedRMPPIRolloutKernelAgainstMPPIRollout) } } } + HANDLE_ERROR(cudaFree(initial_x_d)); + HANDLE_ERROR(cudaFree(cost_trajectories_d)); } diff --git a/tests/mppi_core/viz_kernels_test.cu b/tests/mppi_core/viz_kernels_test.cu index 0abbbf26..f5e4af58 100644 --- a/tests/mppi_core/viz_kernels_test.cu +++ b/tests/mppi_core/viz_kernels_test.cu @@ -80,8 +80,8 @@ protected: CartpoleDynamics dynamics = CartpoleDynamics(1, 1, 1); CartpoleQuadraticCost cost; - DDPFeedback fb_controller = - DDPFeedback(&dynamics, dt); + DDPFeedback fb_controller = + DDPFeedback(&dynamics, dt); float dt = 0.02; int num_rollouts = 20;