corl2018.bib



@Proceedings{CoRL2018,
  title =     {Proceedings of Machine Learning Research},
  booktitle = {Proceedings of Machine Learning Research},
  editor =    {Aude Billard and Anca Dragan and Jan Peters and Jun Morimoto},
  publisher = {PMLR},
  series =    {Proceedings of Machine Learning Research},
  volume =    87
}


@InProceedings{mueller18a,
  title = 	 {Driving Policy Transfer via Modularity and Abstraction},
  author = 	 {Mueller, Matthias and Dosovitskiy, Alexey and Ghanem, Bernard and Koltun, Vladlen},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {1--15},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://www.youtube.com/watch?v=BrMDJqI6H5U},
  code = {https://github.com/thias15/driving_policy_transfer},
  pdf = 	 {http://proceedings.mlr.press/v87/mueller18a/mueller18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/mueller18a.html},
  abstract = 	 {End-to-end approaches to autonomous driving have high sample complexity and are difficult to scale to realistic urban driving. Simulation can help end-to-end driving systems by providing a cheap, safe, and diverse training environment. Yet training driving policies in simulation brings up the problem of transferring such policies to the real world. We present an approach to transferring driving policies from simulation to reality via modularity and abstraction. Our approach is inspired by classic driving systems and aims to combine the benefits of modular architectures and end-to-end deep learning approaches. The key idea is to encapsulate the driving policy such that it is not directly exposed to raw perceptual input or low-level vehicle dynamics. We evaluate the presented approach in simulated urban environments and in the real world. In particular, we transfer a driving policy trained in simulation to a 1/5-scale robotic truck that is deployed in a variety of conditions, with no finetuning, on two continents.}
}


@InProceedings{ohnbar18a,
  title = 	 {Personalized Dynamics Models for Adaptive Assistive Navigation Systems},
  author = 	 {OhnBar, Eshed and Kitani, Kris and Asakawa, Chieko},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {16--39},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code  = {},
  pdf = 	 {http://proceedings.mlr.press/v87/ohnbar18a/ohnbar18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/ohnbar18a.html},
  abstract = 	 {Consider an assistive system that guides visually impaired users through speech and haptic feedback to their destination. Existing robotic and ubiquitous navigation technologies (e.g., portable, ground, or wearable systems) often operate in a generic, user-agnostic manner. However, to minimize confusion and navigation errors, our real-world analysis reveals a crucial need to adapt theinstructional guidance across different end-users with diverse mobility skills. To address this practical issue in scalable system design, we propose a novel model based reinforcement learning framework for personalizing the system-user interaction experience. When incrementally adapting the system to new users, we propose to use a weighted experts model for addressing data-efficiency limitations in transfer learning with deep models. A real-world dataset of navigation by blind users is used to show that the proposed approach allows for (1) more accurate long-term human behavior prediction (up to 20 seconds into the future) through improved reasoning over personal mobility characteristics, interaction with surrounding obstacles, and the current navigation goal, and (2) quick adaptation at the onset of learning, when data is limited.}
}


@InProceedings{xie18a,
  title = 	 {Few-Shot Goal Inference for Visuomotor Learning and Planning},
  author = 	 {Xie, Annie and Singh, Avi and Levine, Sergey and Finn, Chelsea},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {40--52},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://www.youtube.com/watch?v=kR-Y1GK7MHA&feature=youtu.be},
  code = {https://github.com/anxie/meta_classifier},
  pdf = 	 {http://proceedings.mlr.press/v87/xie18a/xie18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/xie18a.html},
  abstract = 	 {Reinforcement learning and planning methods require an objective or reward function that encodes the desired behavior. Yet, in practice, there is a wide range of scenarios where an objective is difficult to provide programmatically, such as tasks with visual observations involving unknown object positions or deformable objects. In these cases, prior methods use engineered problem-specific solutions, e.g., by instrumenting the environment with additional sensors to measure a proxy for the objective. Such solutions require a significant engineering effort on a per-task basis, and make it impractical for robots to continuously learn complex skills outside of laboratory settings. We aim to find a more general and scalable solution for specifying goals for robot learning in unconstrained environments. To that end, we formulate the few-shot objective learning problem, where the goal is to learn a task objective from only a few example images of successful end states for that task. We propose a simple solution to this problem: meta-learn a classifier that can recognize new goals from a few examples. We show how this approach can be used with both model-free reinforcement learning and visual model-based planning and show results in three domains: rope manipulation from images in simulation, visual navigation in a simulated 3D environment, and object arrangement into user-specified configurations on a real robot.}
}


@InProceedings{das18a,
  title = 	 {Neural Modular Control for Embodied Question Answering},
  author = 	 {Das, Abhishek and Gkioxari, Georgia and Lee, Stefan and Parikh, Devi and Batra, Dhruv},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {53--62},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code = {https://github.com/facebookresearch/EmbodiedQA},
  pdf = 	 {http://proceedings.mlr.press/v87/das18a/das18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/das18a.html},
  abstract = 	 {We present a modular approach for learning policies for navigation over long planning horizons from language input. Our hierarchical policy operates at multiple timescales, where the higher-level master policy proposes subgoals to be executed by specialized sub-policies. Our choice of subgoals is compositional and semantic, i.e. they can be sequentially combined in arbitrary orderings, and assume human-interpretable descriptions (e.g. ‘exit room’, ‘find kitchen’, ‘find refrigerator’, etc.). We use imitation learning to warm-start policies at each level of the hierarchy, dramatically increasing sample efficiency, followed by reinforcement learning. Independent reinforcement learning at each level of hierarchy enables sub-policies to adapt to consequences of their actions and recover from errors. Subsequent joint hierarchical training enables the master policy to adapt to the sub-policies. On the challenging EQA [1] benchmark in House3D [2], requiring navigating diverse realistic indoor environments, our approach outperforms prior work by a significant margin, both in terms of navigation and question answering.}
}


@InProceedings{yang18a,
  title = 	 {Visual Curiosity: Learning to Ask Questions to Learn Visual Recognition},
  author = 	 {Yang, Jianwei and Lu, Jiasen and Lee, Stefan and Batra, Dhruv and Parikh, Devi},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {63--80},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code = {https://github.com/jwyang/visual_curiosity},
  pdf = 	 {http://proceedings.mlr.press/v87/yang18a/yang18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/yang18a.html},
  abstract = 	 {In an open-world setting, it is inevitable that an intelligent agent (e.g., a robot) will encounter visual objects, attributes or relationships it does not recognize. In this work, we develop an agent empowered with visual curiosity, i.e. the ability to ask questions to an Oracle (e.g., human) about the contents in images (e.g., ‘What is the object on the left side of the red cube?’) and build visual recognition model based on the answers received (e.g., ‘Cylinder’). In order to do this, the agent must (1) understand what it recognizes and what it does not, (2) formulate a valid, unambiguous and informative ‘language’ query (a question) to ask the Oracle, (3) derive the parameters of visual classifiers from the Oracle response and (4) leverage the updated visual classifiers to ask more clarified questions. Specifically, we propose a novel framework and formulate the learning of visual curiosity as a reinforcement learning problem. In this framework, all components of our agent – visual recognition module (to see), question generation policy (to ask), answer digestion module (to understand) and graph memory module (to memorize) – are learned entirely end-to-end to maximize the reward derived from the scene graph obtained by the agent as a consequence of the dialog with the Oracle. Importantly, the question generation policy is disentangled from the visual recognition system and specifics of the ‘environment’ (scenes). Consequently, we demonstrate a sort of ‘double’ generalization – our question generation policy generalizes to new environments and a new pair of eyes, i.e., new visual system. Specifically, an agent trained on one set of environments (scenes) and with one particular visual recognition system is able to ask intelligent questions about new scenes when paired with a new visual recognition system. Trained on a synthetic dataset, our results show that our agent learns new visual concepts significantly faster than several heuristic baselines – even when tested on synthetic environments with novel objects, as well as in a realistic environment.}
}


@InProceedings{yu18a,
  title = 	 {Guided Feature Transformation (GFT): A Neural Language Grounding Module for Embodied Agents},
  author = 	 {Yu, Haonan and Lian, Xiaochen and Zhang, Haichao and Xu, Wei},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {81--98},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://www.youtube.com/watch?v=bOBb1uhuJxg},
  code = {https://github.com/idlrl/flare/blob/master/tutorial/examples/xworld3d_navigation.py},
  pdf = 	 {http://proceedings.mlr.press/v87/yu18a/yu18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/yu18a.html},
  abstract = 	 {Recently there has been a rising interest in training agents, embodied in virtual environments, to perform language-directed tasks by deep reinforcement learning. In this paper, we propose a simple but effective neural language grounding module for embodied agents that can be trained end to end from scratch taking raw pixels, unstructured linguistic commands, and sparse rewards as the inputs. We model the language grounding process as a language-guided transformation of visual features, where latent sentence embeddings are used as the transformation matrices. In several language-directed navigation tasks that feature challenging partial observability and require simple reasoning, our module significantly outperforms the state of the art. We also release XWORLD3D, an easy-to-customize 3D environment that can be modified to evaluate a variety of embodied agents.}
}


@InProceedings{jang18a,
  title = 	 {Grasp2Vec: Learning Object Representations from Self-Supervised Grasping},
  author = 	 {Jang, Eric and Devin, Coline and Vanhoucke, Vincent and Levine, Sergey},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {99--112},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code  = {},
  pdf = 	 {http://proceedings.mlr.press/v87/jang18a/jang18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/jang18a.html},
  abstract = 	 {Well structured visual representations can make robot learning faster and can improve generalization. In this paper, we study how we can acquire effective object-centric representations for robotic manipulation tasks without human labeling by using autonomous robot interaction with the environment. Such representation learning methods can benefit from continuous refinement of the representation as the robot collects more experience, allowing them to scale effectively without human intervention. Our representation learning approach is based on object persistence: when a robot removes an object from a scene, the representation of that scene should change according to the features of the object that was removed. We formulate an arithmetic relationship between feature vectors from this observation, and use it to learn a representation of scenes and objects that can then be used to identify object instances, localize them in the scene, and perform goal-directed grasping tasks where the robot must retrieve commanded objects from a bin. The same grasping procedure can also be used to automatically collect training data for our method, by recording images of scenes, grasping and removing an object, and recording the outcome. Our experiments demonstrate that this self-supervised approach for tasked grasping substantially outperforms direct reinforcement learning from images and prior representation learning methods. }
}


@InProceedings{zhao18a,
  title = 	 {Energy-Based Hindsight Experience Prioritization},
  author = 	 {Zhao, Rui and Tresp, Volker},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {113--122},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/jtsF2tTeUGQ},
  code = {https://github.com/ruizhaogit/EnergyBasedPrioritization},
  pdf = 	 {http://proceedings.mlr.press/v87/zhao18a/zhao18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/zhao18a.html},
  abstract = 	 {In Hindsight Experience Replay (HER), a reinforcement learning agent is trained by treating whatever it has achieved as virtual goals. However, in pre- vious work, the experience was replayed at random, without considering which episode might be the most valuable for learning. In this paper, we develop an energy-based framework for prioritizing hindsight experience in robotic manipulation tasks. Our approach is inspired by the work-energy principle in physics. We define a trajectory energy function as the sum of the transition energy of the target object over the trajectory. We hypothesize that replaying episodes that have high trajectory energy is more effective for reinforcement learning in robotics. To verify our hypothesis, we designed a framework for hindsight experience prioritization based on the trajectory energy of goal states. The trajectory energy function takes the potential, kinetic, and rotational energy into consideration. We evaluate our Energy-Based Prioritization (EBP) approach on four challenging robotic manipulation tasks in simulation. Our empirical results show that our proposed method surpasses state-of-the-art approaches in terms of both performance and sample-efficiency on all four tasks, without increas- ing computational time. A video showing experimental results is available at https://youtu.be/jtsF2tTeUGQ. }
}


@InProceedings{losey18a,
  title = 	 {Including Uncertainty when Learning from Human Corrections},
  author = 	 {Losey, Dylan P. and O'Malley, Marcia K.},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {123--132},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code  = {},
  pdf = 	 {http://proceedings.mlr.press/v87/losey18a/losey18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/losey18a.html},
  abstract = 	 {It is difficult for humans to efficiently teach robots how to correctly perform a task. One intuitive solution is for the robot to iteratively learn the human’s preferences from corrections, where the human improves the robot’s current behavior at each iteration. When learning from corrections, we argue that while the robot should estimate the most likely human preferences, it should also know what it does not know, and integrate this uncertainty as it makes decisions. We advance the state-of-the-art by introducing a Kalman filter for learning from corrections: this approach obtains the uncertainty of the estimated human preferences. Next, we demonstrate how the estimate uncertainty can be leveraged for active learning and risk-sensitive deployment. Our results indicate that obtaining and leveraging uncertainty leads to faster learning from human corrections. }
}


@InProceedings{kaufmann18a,
  title = 	 {Deep Drone Racing: Learning Agile Flight in Dynamic Environments},
  author = 	 {Kaufmann, Elia and Loquercio, Antonio and Ranftl, Rene and Dosovitskiy, Alexey and Koltun, Vladlen and Scaramuzza, Davide},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {133--145},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://www.youtube.com/watch?v=8RILnqPxo1s},
  code  = {},
  pdf = 	 {http://proceedings.mlr.press/v87/kaufmann18a/kaufmann18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/kaufmann18a.html},
  abstract = 	 {Autonomous agile flight brings up fundamental challenges in robotics, such as coping with unreliable state estimation, reacting optimally to dynamically changing environments, and coupling perception and action in real time under severe resource constraints. In this paper, we consider these challenges in the context of autonomous, vision-based drone racing in dynamic environments. Our approach combines a convolutional neural network (CNN) with a state-of-the-art path-planning and control system. The CNN directly maps raw images into a robust representation in the form of a waypoint and desired speed. This information is then used by the planner to generate a short, minimum-jerk trajectory segment and corresponding motor commands to reach the desired goal. We demonstrate our method in autonomous agile flight scenarios, in which a vision-based quadrotor traverses drone-racing tracks with possibly moving gates. Our method does not require any explicit map of the environment and runs fully onboard. We extensively test the precision and robustness of the approach in simulation and in the physical world. We also evaluate our method against state-of-the-art navigation approaches and professional human drone pilots. }
}


@InProceedings{yang18b,
  title = 	 {HDNET: Exploiting HD Maps for 3D Object Detection},
  author = 	 {Yang, Bin and Liang, Ming and Urtasun, Raquel},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {146--155},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code  = {},
  pdf = 	 {http://proceedings.mlr.press/v87/yang18b/yang18b.pdf},
  url = 	 {http://proceedings.mlr.press/v87/yang18b.html},
  abstract = 	 {In this paper we show that High-Definition (HD) maps provide strong priors that can boost the performance and robustness of modern 3D object detectors. Towards this goal, we design a single stage detector that extracts geometric and semantic features from the HD maps. As maps might not be available everywhere, we also propose a map prediction module that estimates the map on the fly from raw LiDAR data. We conduct extensive experiments on KITTI [1] as well as a large-scale 3D detection benchmark containing 1 million frames, and show that the proposed map-aware detector consistently outperforms the state-of-the-art in both mapped and un-mapped scenarios. Importantly the whole framework runs at 20 frames per second. }
}


@InProceedings{amiranashvili18a,
  title = 	 {Motion Perception in Reinforcement Learning with Dynamic Objects},
  author = 	 {Amiranashvili, Artemij and Dosovitskiy, Alexey and Koltun, Vladlen and Brox, Thomas},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {156--168},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/YALmehmmu3Q},
  code = {https://github.com/lmb-freiburg/flow_rl},
  pdf = 	 {http://proceedings.mlr.press/v87/amiranashvili18a/amiranashvili18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/amiranashvili18a.html},
  abstract = 	 {In dynamic environments, learned controllers are supposed to take motion into account when selecting the action to be taken. However, in existing reinforcement learning works motion is rarely treated explicitly; it is rather assumed that the controller learns the necessary motion representation from temporal stacks of frames implicitly. In this paper, we show that for continuous control tasks learning an explicit representation of motion clearly improves the quality of the learned controller in dynamic scenarios. We demonstrate this on common benchmark tasks (Walker, Swimmer, Hopper), on target reaching and ball catching tasks with simulated robotic arms, and on a dynamic single ball juggling task. Moreover, we find that when equipped with an appropriate network architecture, the agent can, on some tasks, learn motion features also with pure reinforcement learning, without additional supervision. }
}


@InProceedings{karkus18a,
  title = 	 {Particle Filter Networks with Application to Visual Localization},
  author = 	 {Karkus, Peter and Hsu, David and Lee, Wee Sun},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {169--178},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code  = {https://github.com/AdaCompNUS/pfnet},
  pdf = 	 {http://proceedings.mlr.press/v87/karkus18a/karkus18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/karkus18a.html},
  abstract = 	 {Particle filtering is a powerful approach to sequential state estimation and finds application in many domains, including robot localization, object tracking, etc. To apply particle filtering in practice, a critical challenge is to construct probabilistic system models, especially for systems with complex dynamics or rich sensory inputs such as camera images. This paper introduces the Particle Filter Network (PFnet), which encodes both a system model and a particle filter algorithm in a single neural network. The PF-net is fully differentiable and trained end-to-end from data. Instead of learning a generic system model, it learns a model optimized for the particle filter algorithm. We apply the PF-net to a visual localization task, in which a robot must localize in a rich 3-D world, using only a schematic 2-D floor map. In simulation experiments, PF-net consistently outperforms alternative learning architectures, as well as a traditional model-based method, under a variety of sensor inputs. Further, PF-net generalizes well to new, unseen environments. }
}


@InProceedings{martin18a,
  title = 	 {Sparse Gaussian Process Temporal Difference Learning for Marine Robot Navigation},
  author = 	 {Martin, John and Wang, Jinkun and Englot, Brendan},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {179--189},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code  = {},
  pdf = 	 {http://proceedings.mlr.press/v87/martin18a/martin18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/martin18a.html},
  abstract = 	 {We present a method for Temporal Difference (TD) learning that addresses several challenges faced by robots learning to navigate in a marine environment. For improved data efficiency, our method reduces TD updates to Gaussian Process regression. To make predictions amenable to online settings, we introduce a sparse approximation with improved quality over current rejection-based methods. We derive the predictive value function posterior and use the moments to obtain a new algorithm for model-free policy evaluation, SPGP-SARSA. With simple changes, we show SPGP-SARSA can be reduced to a model-based equivalent, SPGP-TD. We perform comprehensive simulation studies and also conduct physical learning trials with an underwater robot. Our results show SPGP-SARSA can outperform the state-of-the-art sparse method, replicate the prediction quality of its exact counterpart, and be applied to solve underwater navigation tasks. }
}


@InProceedings{guizilini18a,
  title = 	 {Fast 3D Modeling with Approximated Convolutional Kernels},
  author = 	 {Guizilini, Vitor and Ramos, Fabio},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {190--199},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code  = {},
  pdf = 	 {http://proceedings.mlr.press/v87/guizilini18a/guizilini18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/guizilini18a.html},
  abstract = 	 {This paper introduces a novel regression methodology for 3D reconstruction, with applications in robotics tasks such as terrain modeling and implicit surface calculation. The proposed methodology is based on projections into a high-dimensional space, that is able to fit arbitrarily complex data as a continuous function using a series of kernel evaluations within a linear regression model. We avoid direct kernel calculation by employing a novel sparse random Fourier feature vector, that approximates any shift-invariant kernel as a series of dot products relative to a set of inducing points placed throughout the input space. The varying properties of these inducing points produce non-stationarity in the resulting model, and can be jointly learned alongside linear regression weights. Furthermore, we show how convolution with arbitrary kernels can be performed directly in this high-dimensional continuous space, by training a neural network to learn the Fourier transform of the convolutional output based on information from the input kernels. Experimental results in terrain modeling and implicit surface calculation show that the proposed framework is able to outperform similar techniques in terms of computational speed without sacrificing accuracy, while enabling efficient convolution with arbitrary kernels for tasks such as global localization and template matching within these applications. }
}


@InProceedings{guizilini18b,
  title = 	 {Unpaired Learning of Dense Visual Depth Estimators for Urban Environments},
  author = 	 {Guizilini, Vitor and Ramos, Fabio},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {200--212},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code  = {},
  pdf = 	 {http://proceedings.mlr.press/v87/guizilini18b/guizilini18b.pdf},
  url = 	 {http://proceedings.mlr.press/v87/guizilini18b.html},
  abstract = 	 {This paper addresses the classical problem of learning-based monocular depth estimation in urban environments, in which a model is trained to directly map a single input image to its corresponding depth values. All currently available techniques treat monocular depth estimation as a regression problem, and thus require some sort of data pairing, either explicitly as input-output ground-truth pairs, using information from range sensors (i.e. laser), or as binocular stereo footage. We introduce a novel methodology that completely eliminates the need for data pairing, only requiring two unrelated datasets containing samples of input images and output depth values. A cycle-consistent generative adversarial network is used to learn a mapping between these two domains, based on a custom adversarial loss function specifically designed to improve performance on the task of monocular depth estimation, including local depth smoothness and boundary equilibrium. A wide range of experiments were conducted using a variety of well-known indoor and outdoor datasets, with depth estimates obtained from laser sensors, RGBD cameras and SLAM pointclouds. In all of them, the proposed CycleDepth framework reaches competitive results even under a more restricted training scenario. }
}


@InProceedings{stein18a,
  title = 	 {Learning over Subgoals for Efficient Navigation of Structured, Unknown Environments},
  author = 	 {Stein, Gregory J. and Bradley, Christopher and Roy, Nicholas},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {213--222},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/hmJ-LON5mQc},
  code = {},
  pdf = 	 {http://proceedings.mlr.press/v87/stein18a/stein18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/stein18a.html},
  abstract = 	 {
 We propose a novel technique for efficiently navigating unknown environments over long horizons by learning to predict properties of unknown space. We generate a dynamic action set defined by the current map, factor the Bellman Equation in terms of these actions, and estimate terms, such as the probability that navigating beyond a particular subgoal will lead to a dead-end, that are otherwise difficult to compute. Simulated agents navigating with our Learned Subgoal Planner in real-world floor plans demonstrate a 21% expected decrease in cost-to-go compared to standard optimistic planning techniques that rely on Dijkstra’s algorithm, and real-world agents show promising navigation performance as well. }
}


@InProceedings{subramani18a,
  title = 	 {Inferring geometric constraints in human demonstrations},
  author = 	 {Subramani, Guru and Zinn, Michael and Gleicher, Michael},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {223--236},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/-rra2QA51Is},
  code = {https://github.com/uwgraphics/CoRL2018Constraint},
  pdf = 	 {http://proceedings.mlr.press/v87/subramani18a/subramani18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/subramani18a.html},
  abstract = 	 {This paper presents an approach for inferring geometric constraints in human demonstrations. In our method, geometric constraint models are built to create representations of kinematic constraints such as fixed point, axial rotation, prismatic motion, planar motion and others across multiple degrees of freedom. Our method infers geometric constraints using both kinematic and force/torque information. The approach first fits all the constraint models using kinematic information and evaluates them individually using position, force and moment criteria. Our approach does not require information about the constraint type or contact geometry; it can determine both simultaneously. We present experimental evaluations using instrumented tongs that show how constraints can be robustly inferred in recordings of human demonstrations. }
}


@InProceedings{sauer18a,
  title = 	 {Conditional Affordance Learning for Driving in Urban Environments},
  author = 	 {Sauer, Axel and Savinov, Nikolay and Geiger, Andreas},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {237--252},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://www.youtube.com/watch?v=UtUbpigMgr0},
  code = {https://github.com/xl-sr/CAL},
  pdf = 	 {http://proceedings.mlr.press/v87/sauer18a/sauer18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/sauer18a.html},
  abstract = 	 {Most existing approaches to autonomous driving fall into one of two categories: modular pipelines, that build an extensive model of the environment, and imitation learning approaches, that map images directly to control outputs. A recently proposed third paradigm, direct perception, aims to combine the advantages of both by using a neural network to learn appropriate low-dimensional intermediate representations. However, existing direct perception approaches are restricted to simple highway situations, lacking the ability to navigate intersections, stop at traffic lights or respect speed limits. In this work, we propose a direct perception approach which maps video input to intermediate representations suitable for autonomous navigation in complex urban environments given high-level directional inputs. Compared to state-of-the-art reinforcement and conditional imitation learning approaches, we achieve an improvement of up to 68 % in goal-directed navigation on the challenging CARLA simulation benchmark. In addition, our approach is the first to handle traffic lights and speed signs by using image-level labels only, as well as smooth car-following, resulting in a significant reduction of traffic accidents in simulation. }
}


@InProceedings{wenzel18a,
  title = 	 {Modular Vehicle Control for Transferring Semantic Information Between Weather Conditions Using GANs},
  author = 	 {Wenzel, Patrick and Khan, Qadeer and Cremers, Daniel and Leal-Taixe, Laura},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {253--269},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://www.youtube.com/playlist?list=PLbT2smuiIncsR_s9YA6KFpsa8gMwus5u7},
  code = {https://github.com/pmwenzel/carla-domain-adaptation},
  pdf = 	 {http://proceedings.mlr.press/v87/wenzel18a/wenzel18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/wenzel18a.html},
  abstract = 	 {Even though end-to-end supervised learning has shown promising results for sensorimotor control of self-driving cars, its performance is greatly affected by the weather conditions under which it was trained, showing poor generalization to unseen conditions. In this paper, we show how knowledge can be transferred using semantic maps to new weather conditions without the need to obtain new ground truth data. To this end, we propose to divide the task of vehicle control into two independent modules: a control module which is only trained on one weather condition for which labeled steering data is available, and a perception module which is used as an interface between new weather conditions and the fixed control module. To generate the semantic data needed to train the perception module, we propose to use a generative adversarial network (GAN)-based model to retrieve the semantic information for the new conditions in an unsupervised manner. We introduce a master-servant architecture, where the master model (semantic labels available) trains the servant model (semantic labels not available). We show that our proposed method trained with ground truth data for a single weather condition is capable of achieving similar results on the task of steering angle prediction as an end-to-end model trained with ground truth data of 15 different weather conditions. }
}


@InProceedings{liang18a,
  title = 	 {GPU-Accelerated Robotic Simulation for Distributed Reinforcement Learning},
  author = 	 {Liang, Jacky and Makoviychuk, Viktor and Handa, Ankur and Chentanez, Nuttapong and Macklin, Miles and Fox, Dieter},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {270--282},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code  = {},
  pdf = 	 {http://proceedings.mlr.press/v87/liang18a/liang18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/liang18a.html},
  abstract = 	 {Most Deep Reinforcement Learning (Deep RL) algorithms require a prohibitively large number of training samples for learning complex tasks. Many recent works on speeding up Deep RL have focused on distributed training and simulation. While distributed training is often done on the GPU, simulation is not. In this work, we propose using GPU-accelerated RL simulations as an alternative to CPU ones. Using NVIDIA Flex, a GPU-based physics engine, we show promising speed-ups of learning various continuous-control, locomotion tasks. With one GPU and CPU core, we are able to train the Humanoid running task in less than 20 minutes, using 10-1000x fewer CPU cores than previous works. We also demonstrate the scalability of our simulator to multi-GPU settings to train more challenging locomotion tasks. }
}


@InProceedings{ushani18a,
  title = 	 {Feature Learning for Scene Flow Estimation from LIDAR},
  author = 	 {Ushani, Arash K. and Eustice, Ryan M.},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {283--292},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code = {https://github.com/aushani/flsf},
  pdf = 	 {http://proceedings.mlr.press/v87/ushani18a/ushani18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/ushani18a.html},
  abstract = 	 {To perform tasks in dynamic environments, many mobile robots must estimate the motion in the surrounding world. Recently, techniques have been developed to estimate scene flow directly from LIDAR scans, relying on hand-designed features. In this work, we build an encoding network to learn features from an occupancy grid. The network is trained so that these features are discriminative in finding matching or non-matching locations between successive timesteps. This learned feature space is then leveraged to estimate scene flow. We evaluate our method on the KITTI dataset and demonstrate performance that improves upon the accuracy of the current state-of-the-art. We provide an implementation of our method at https://github.com/aushani/flsf. }
}


@InProceedings{majumdar18a,
  title = 	 {PAC-Bayes Control: Synthesizing Controllers that Provably Generalize to Novel Environments},
  author = 	 {Majumdar, Anirudha and Goldstein, Maxwell},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {293--305},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://www.youtube.com/watch?v=zu_O-lW5X_8},
  code = {https://github.com/irom-lab/PAC-Bayes-Control},
  pdf = 	 {http://proceedings.mlr.press/v87/majumdar18a/majumdar18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/majumdar18a.html},
  abstract = 	 {Our goal is to synthesize controllers for robots that provably generalize well to novel environments given a dataset of example environments. The key technical idea behind our approach is to leverage tools from generalization theory in machine learning by exploiting a precise analogy (which we present in the form of a reduction) between robustness of controllers to novel environments and generalization of hypotheses in supervised learning. In particular, we utilize the Probably Approximately Correct (PAC)-Bayes framework, which allows us to obtain upper bounds (that hold with high probability) on the expected cost of (stochastic) controllers across novel environments. We propose control synthesis algorithms that explicitly seek to minimize this upper bound. The corresponding optimization problem can be solved efficiently using convex optimization (Relative Entropy Programming in particular) in the setting where we are optimizing over a finite control policy space. In the more general setting of continuously parameterized controllers, we minimize this upper bound using stochastic gradient descent. We present examples of our approach in the context of obstacle avoidance control with depth measurements. Our simulated examples demonstrate the potential of our approach to provide strong generalization guarantees on controllers for robotic systems with continuous state and action spaces, nonlinear dynamics, and partially observable state via sensor measurements.}
}


@InProceedings{tremblay18a,
  title = 	 {Deep Object Pose Estimation for Semantic Robotic Grasping of Household Objects},
  author = 	 {Tremblay, Jonathan and To, Thang and Sundaralingam, Balakumar and Xiang, Yu and Fox, Dieter and Birchfield, Stan},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {306--316},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code = {https://github.com/NVlabs/Deep_Object_Pose},
  pdf = 	 {http://proceedings.mlr.press/v87/tremblay18a/tremblay18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/tremblay18a.html},
  abstract = 	 {Using synthetic data for training deep neural networks for robotic manipulation holds the promise of an almost unlimited amount of pre-labeled training data, generated safely out of harm’s way. One of the key challenges of synthetic data, to date, has been to bridge the so-called reality gap, so that networks trained on synthetic data operate correctly when exposed to real-world data. We explore the reality gap in the context of 6-DoF pose estimation of known objects from a single RGB image. We show that for this problem the reality gap can be successfully spanned by a simple combination of domain randomized and photorealistic data. Using synthetic data generated in this manner, we introduce a one-shot deep neural network that is able to perform competitively against a state-of-the-art network trained on a combination of real and synthetic data. To our knowledge, this is the first deep network trained only on synthetic data that is able to achieve state-of-the-art performance on 6-DoF object pose estimation. Our network also generalizes better to novel environments including extreme lighting conditions, for which we show qualitative results. Using this network we demonstrate a real-time system estimating object poses with sufficient accuracy for real-world semantic grasping of known household objects in clutter by a real robot.}
}


@InProceedings{schenck18a,
  title = 	 {SPNets: Differentiable Fluid Dynamics for Deep Neural Networks},
  author = 	 {Schenck, Connor and Fox, Dieter},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {317--335},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/hg_tXHLIZ6o},
  code = {https://github.com/cschenck/SmoothParticleNets},
  pdf = 	 {http://proceedings.mlr.press/v87/schenck18a/schenck18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/schenck18a.html},
  abstract = 	 {In this paper we introduce Smooth Particle Networks (SPNets), a framework for integrating fluid dynamics with deep networks. SPNets adds two new layers to the neural network toolbox: ConvSP and ConvSDF, which enable computing physical interactions with unordered particle sets. We use these layers in combination with standard neural network layers to directly implement fluid dynamics inside a deep network, where the parameters of the network are the fluid parameters themselves (e.g., viscosity, cohesion, etc.). Because SPNets are implemented as a neural network, the resulting fluid dynamics are fully differentiable. We then show how this can be successfully used to learn fluid parameters from data, perform liquid control tasks, and learn policies to manipulate liquids. }
}


@InProceedings{bauza18a,
  title = 	 {A Data-Efficient Approach to Precise and Controlled Pushing},
  author = 	 {Bauza, Maria and Hogan, Francois R. and Rodriguez, Alberto},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {336--345},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://www.youtube.com/watch?v=Z45O480pij0},
  code = {},
  pdf = 	 {http://proceedings.mlr.press/v87/bauza18a/bauza18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/bauza18a.html},
  abstract = 	 {Decades of research in control theory have shown that simple controllers, when provided with timely feedback, can control complex systems. Pushing is an example of a complex mechanical system that is difficult to model accurately due to unknown system parameters such as coefficients of friction and pressure distributions. In this paper, we explore the data-complexity required for controlling, rather than modeling, such a system. Results show that a model-based control approach, where the dynamical model is learned from data, is capable of performing complex pushing trajectories with a minimal amount of training data (<10 data points). The dynamics of pushing interactions are modeled using a Gaussian process (GP) and are leveraged within a model predictive control approach that linearizes the GP and imposes actuator and task constraints for a planar manipulation task. }
}


@InProceedings{bruce18a,
  title = 	 {Learning Deployable Navigation Policies at Kilometer Scale from a Single Traversal},
  author = 	 {Bruce, Jake and Sunderhauf, Niko and Mirowski, Piotr and Hadsell, Raia and Milford, Michael},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {346--361},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code  = {},
  pdf = 	 {http://proceedings.mlr.press/v87/bruce18a/bruce18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/bruce18a.html},
  abstract = 	 {Model-free reinforcement learning has recently been shown to be effective at learning navigation policies from complex image input. However, these algorithms tend to require large amounts of interaction with the environment, which can be prohibitively costly to obtain on robots in the real world. We present an approach for efficiently learning goal-directed navigation policies on a mobile robot, from only a single coverage traversal of recorded data. The navigation agent learns an effective policy over a diverse action space in a large heterogeneous environment consisting of more than 2km of travel, through buildings and outdoor regions that collectively exhibit large variations in visual appearance, self-similarity, and connectivity. We compare pretrained visual encoders that enable precomputation of visual embeddings to achieve a throughput of tens of thousands of transitions per second at training time on a commodity desktop computer, allowing agents to learn from millions of trajectories of experience in a matter of hours. We propose multiple forms of computationally efficient stochastic augmentation to enable the learned policy to generalise beyond these precomputed embeddings, and demonstrate successful deployment of the learned policy on the real robot without fine tuning, despite environmental appearance differences at test time. The dataset and code required to reproduce these results and apply the technique to other datasets and robots is made publicly available at rl-navigation.github.io/deployable. }
}


@InProceedings{brown18a,
  title = 	 {Risk-Aware Active Inverse Reinforcement Learning},
  author = 	 {Brown, Daniel S. and Cui, Yuchen and Niekum, Scott},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {362--372},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code = {https://github.com/Pearl-UTexas/ActiveVaR},
  pdf = 	 {http://proceedings.mlr.press/v87/brown18a/brown18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/brown18a.html},
  abstract = 	 {Active learning from demonstration allows a robot to query a human for specific types of input to achieve efficient learning. Existing work has explored a variety of active query strategies; however, to our knowledge, none of these strategies directly minimize the performance risk of the policy the robot is learning. Utilizing recent advances in performance bounds for inverse reinforcement learning, we propose a risk-aware active inverse reinforcement learning algorithm that focuses active queries on areas of the state space with the potential for large generalization error. We show that risk-aware active learning outperforms standard active IRL approaches on gridworld, simulated driving, and table setting tasks, while also providing a performance-based stopping criterion that allows a robot to know when it has received enough demonstrations to safely perform a task. }
}


@InProceedings{florence18a,
  title = 	 {Dense Object Nets: Learning Dense Visual Object Descriptors By and For Robotic Manipulation},
  author = 	 {Florence, Peter R. and Manuelli, Lucas and Tedrake, Russ},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {373--385},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/L5UW1VapKNE},
  code = {https://github.com/RobotLocomotion/pytorch-dense-correspondence},
  pdf = 	 {http://proceedings.mlr.press/v87/florence18a/florence18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/florence18a.html},
  abstract = 	 {What is the right object representation for manipulation? We would like robots to visually perceive scenes and learn an understanding of the objects in them that (i) is task-agnostic and can be used as a building block for a variety of manipulation tasks, (ii) is generally applicable to both rigid and non-rigid objects, (iii) takes advantage of the strong priors provided by 3D vision, and (iv) is entirely learned from self-supervision. This is hard to achieve with previous methods: much recent work in grasping does not extend to grasping specific objects or other tasks, whereas task-specific learning may require many trials to generalize well across object configurations or other tasks. In this paper we present Dense Object Nets, which build on recent developments in self-supervised dense descriptor learning, as a consistent object representation for visual understanding and manipulation. We demonstrate they can be trained quickly (approximately 20 minutes) for a wide variety of previously unseen and potentially non-rigid objects. We additionally present novel contributions to enable multi-object descriptor learning, and show that by modifying our training procedure, we can either acquire descriptors which generalize across classes of objects, or descriptors that are distinct for each object instance. Finally, we demonstrate the novel application of learned dense descriptors to robotic manipulation. We demonstrate grasping of specific points on an object across potentially deformed object configurations, and demonstrate using class general descriptors to transfer specific grasps across objects in a class. }
}


@InProceedings{morere18a,
  title = 	 {Bayesian RL for Goal-Only Rewards},
  author = 	 {Morere, Philippe and Ramos, Fabio},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {386--398},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code = {https://github.com/PhilippeMorere/EMU-Q},
  pdf = 	 {http://proceedings.mlr.press/v87/morere18a/morere18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/morere18a.html},
  abstract = 	 {We address the challenging problem of reinforcement learning under goal-only rewards [1], where rewards are only non-zero when the goal is achieved. This reward definition alleviates the need for cumbersome reward engineering, making the reward formulation trivial. Classic exploration heuristics such as Boltzmann or epsilon-greedy exploration are highly inefficient in domains with goal-only rewards. We solve this problem by leveraging value function posterior variance information to direct exploration where uncertainty is higher. The proposed algorithm (EMU-Q) achieves data-efficient exploration, and balances exploration and exploitation explicitly at a policy level granting users more control over the learning process. We introduce general features approximating kernels, allowing to greatly reduce the algorithm complexity from O(N^3) in the number of transitions to O(M^2) in the number of features. We demonstrate EMU-Q is competitive with other exploration techniques on a variety of continuous control tasks and on a robotic manipulator. }
}


@InProceedings{vinitsky18a,
  title = 	 {Benchmarks for reinforcement learning in mixed-autonomy traffic},
  author = 	 {Vinitsky, Eugene and Kreidieh, Aboudy and Flem, Luc Le and Kheterpal, Nishant and Jang, Kathy and Wu, Cathy and Wu, Fangyu and Liaw, Richard and Liang, Eric and Bayen, Alexandre M.},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {399--409},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code  = {},
  pdf = 	 {http://proceedings.mlr.press/v87/vinitsky18a/vinitsky18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/vinitsky18a.html},
  abstract = 	 {We release new benchmarks in the use of deep reinforcement learning (RL) to create controllers for mixed-autonomy traffic, where connected and autonomous vehicles (CAVs) interact with human drivers and infrastructure. Benchmarks, such as Mujoco or the Arcade Learning Environment, have spurred new research by enabling researchers to effectively compare their results so that they can focus on algorithmic improvements and control techniques rather than system design. To promote similar advances in traffic control via RL, we propose four benchmarks, based on three new traffic scenarios, illustrating distinct reinforcement learning problems with applications to mixed-autonomy traffic. We provide an introduction to each control problem, an overview of their MDP structures, and preliminary performance results from commonly used RL algorithms. For the purpose of reproducibility, the benchmarks, reference implementations, and tutorials are available at https://github.com/flow-project/flow.}
}


@InProceedings{wang18a,
  title = 	 {Intervention Aided Reinforcement Learning for Safe and Practical Policy Optimization in Navigation},
  author = 	 {Wang, Fan and Zhou, Bo and Chen, Ke and Fan, Tingxiang and Zhang, Xi and Li, Jiangyong and Tian, Hao and Pan, Jia},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {410--421},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/jdMntfs9dYQ},
  code = {https://github.com/PaddlePaddle/PARL/tree/develop/parl/examples/IARL/},
  pdf = 	 {http://proceedings.mlr.press/v87/wang18a/wang18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/wang18a.html},
  abstract = 	 {Combining deep neural networks with reinforcement learning has shown great potential in the next-generation intelligent control. However, there are challenges in terms of safety and cost in practical applications. In this pa- per, we propose the Intervention Aided Reinforcement Learning (IARL) framework, which utilizes human intervened robot-environment interaction to improve the policy. We used the Unmanned Aerial Vehicle (UAV) as the test platform. We built neural networks as our policy to map sensor readings to control signals on the UAV. Our experiment scenarios cover both simulation and reality. We show that our approach substantially reduces the human intervention and improves the performance in autonomous navigation1, at the same time it ensures safety and keeps training cost acceptable. }
}


@InProceedings{cheng18a,
  title = 	 {Reinforcement Learning of Active Vision for Manipulating Objects under Occlusions},
  author = 	 {Cheng, Ricson and Agarwal, Arpit and Fragkiadaki, Katerina},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {422--431},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code = {https://github.com/ricsonc/ActiveVisionManipulation},
  pdf = 	 {http://proceedings.mlr.press/v87/cheng18a/cheng18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/cheng18a.html},
  abstract = 	 {We consider artificial agents that learn to jointly control their gripper and camera in order to reinforcement learn manipulation policies in the presence of occlusions from distractor objects. Distractors often occlude the object of interest and cause it to disappear from the field of view. We propose hand/eye controllers that learn to move the camera to keep the object within the field of view and visible, in coordination to manipulating it to achieve the desired goal, e.g., pushing it to a target location. We incorporate structural biases of object-centric attention within our actor-critic architectures, which our experiments suggest to be a key for good performance. Our results further highlight the importance of curriculum with regards to environment difficulty. The resulting active vision / manipulation policies outperform static camera setups for a variety of cluttered environments. }
}


@InProceedings{gehring18a,
  title = 	 {Adaptable replanning with compressed linear action models for learning from demonstrations},
  author = 	 {Gehring, Clement and Kaelbling, Leslie Pack and Lozano-Perez, Tomas},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {432--442},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code = {https://github.mit.edu/gehring/CLAM-SPOPT},
  pdf = 	 {http://proceedings.mlr.press/v87/gehring18a/gehring18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/gehring18a.html},
  abstract = 	 {We propose an adaptable and efficient model-based reinforcement learning approach well suited for continuous domains with sparse samples, a setting often encountered when learning from demonstrations. The flexibility of our method originates from the approximate transition models, estimated from data, and the online replanning approach proposed. Together, these components allow for immediate adaptation to a new task, given in the form of a reward function. The efficiency of our method comes from two approximations. First, rather than representing a complete distribution over the results of taking an action, which is difficult in continuous state spaces, it learns a linear model of the expected transition for each action. Second, it uses a novel strategy for compressing these linear action models, which significantly reduces space and time for learning models, and supports efficient online generation of open-loop plans. The effectiveness of these methods is demonstrated in a simulated driving domain with a 20-dimensional continuous input space. }
}


@InProceedings{senanayake18a,
  title = 	 {Automorphing Kernels for Nonstationarity in Mapping Unstructured Environments},
  author = 	 {Senanayake, Ransalu and Tompkins, Anthony and Ramos, Fabio},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {443--455},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube ={https://youtu.be/IirUlJS49Yw},
  code = {https://github.com/MushroomHunting/autormorphing-kernels},
  pdf = 	 {http://proceedings.mlr.press/v87/senanayake18a/senanayake18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/senanayake18a.html},
  abstract = 	 {In order to deploy robots in previously unseen and unstructured environments, the robots should have the capacity to learn on their own and adapt to the changes in the environments. For instance, in mobile robotics, a robot should be able to learn a map of the environment from data itself without the intervention of a human to tune the parameters of the model. To this end, leveraging the latest developments in automatic machine learning (AutoML), probabilistic programming, and statistical sampling, under the Hilbert mapping framework which can represent the occupancy of the environment as a continuous function of locations, we formulate a Bayesian framework to learn all parameters of the map. Crucially, this way, the robot is capable of learning the optimal shapes and placement of the kernels in Hilbert maps by merely embedding high-level human knowledge of the problem by means of prior probability distributions. Since the proposed framework employs stochastic variational inference, the model learns tens of thousands of parameters within minutes in both big data and data-scarce regimes. Experiments conducted on simulated and real-world datasets in static and dynamic environments indicate the proposed method significantly outperforms existing stationary occupancy mapping techniques, verifying the importance of learning the interdependent position-shape relationship of kernels alongside other model parameters. }
}


@InProceedings{sarlin18a,
  title = 	 {Leveraging Deep Visual Descriptors for Hierarchical Efficient Localization},
  author = 	 {Sarlin, Paul-Edouard and Debraine, Frederic and Dymczyk, Marcin and Siegwart, Roland},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {456--465},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://www.youtube.com/watch?v=8cg697oLUtg},
  code = {https://github.com/ethz-asl/hierarchical_loc},
  pdf = 	 {http://proceedings.mlr.press/v87/sarlin18a/sarlin18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/sarlin18a.html},
  abstract = 	 {Many robotics applications require precise pose estimates despite operating in large and changing environments. This can be addressed by visual localization, using a pre-computed 3D model of the surroundings. The pose estimation then amounts to finding correspondences between 2D keypoints in a query image and 3D points in the model using local descriptors. However, computational power is often limited on robotic platforms, making this task challenging in large-scale environments. Binary feature descriptors significantly speed up this 2D-3D matching, and have become popular in the robotics community, but also strongly impair the robustness to perceptual aliasing and changes in viewpoint, illumination and scene structure. In this work, we propose to leverage recent advances in deep learning to perform an efficient hierarchical localization. We first localize at the map level using learned image-wide global descriptors, and subsequently estimate a precise pose from 2D-3D matches computed in the candidate places only. This restricts the local search and thus allows to efficiently exploit powerful non-binary descriptors usually dismissed on resource-constrained devices. Our approach results in state-of-the-art localization performance while running in real-time on a popular mobile platform, enabling new prospects for robotics research.}
}


@InProceedings{richards18a,
  title = 	 {The Lyapunov Neural Network: Adaptive Stability Certification for Safe Learning of Dynamical Systems},
  author = 	 {Richards, Spencer M. and Berkenkamp, Felix and Krause, Andreas},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {466--476},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code = {https://github.com/befelix/safe_learning},
  pdf = 	 {http://proceedings.mlr.press/v87/richards18a/richards18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/richards18a.html},
  abstract = 	 {Learning algorithms have shown considerable prowess in simulation by allowing robots to adapt to uncertain environments and improve their performance. However, such algorithms are rarely used in practice on safety-critical systems, since the learned policy typically does not yield any safety guarantees. That is, the required exploration may cause physical harm to the robot or its environment. In this paper, we present a method to learn accurate safety certificates for nonlinear, closed-loop dynamical systems. Specifically, we construct a neural network Lyapunov function and a training algorithm that adapts it to the shape of the largest safe region in the state space. The algorithm relies only on knowledge of inputs and outputs of the dynamics, rather than on any specific model structure. We demonstrate our method by learning the safe region of attraction for a simulated inverted pendulum. Furthermore, we discuss how our method can be used in safe learning algorithms together with statistical models of dynamical systems. }
}


@InProceedings{gualtieri18a,
  title = 	 {Learning 6-DoF Grasping and Pick-Place Using Attention Focus},
  author = 	 {Gualtieri, Marcus and Platt, Robert},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {477--486},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/syS5gYXONPY},
  code  = {https://github.com/mgualti/DeepRLManip},
  pdf = 	 {http://proceedings.mlr.press/v87/gualtieri18a/gualtieri18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/gualtieri18a.html},
  abstract = 	 {We address a class of manipulation problems where the robot perceives the scene with a depth sensor and can move its end effector in a space with six degrees of freedom—3D position and orientation. Our approach is to formulate the problem as a Markov decision process (MDP) with abstract yet generally applicable state and action representations. Finding a good solution to the MDP requires adding constraints on the allowed actions. We develop a specific set of constraints called hierarchical SE(3) sampling (HSE3S) which causes the robot to learn a sequence of gazes to focus attention on the task-relevant parts of the scene. We demonstrate the effectiveness of our approach on three challenging pick-place tasks (with novel objects in clutter and nontrivial places) both in simulation and on a real robot, even though all training is done in simulation. }
}


@InProceedings{laversanne-finot18a,
  title = 	 {Curiosity Driven Exploration of Learned Disentangled Goal Spaces},
  author = 	 {Laversanne-Finot, Adrien and Pere, Alexandre and Oudeyer, Pierre-Yves},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {487--504},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code = {https://github.com/flowersteam/Curiosity_Driven_Goal_Exploration},
  pdf = 	 {http://proceedings.mlr.press/v87/laversanne-finot18a/laversanne-finot18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/laversanne-finot18a.html},
  abstract = 	 {Intrinsically motivated goal exploration processes enable agents to explore efficiently complex environments with high-dimensional continuous actions. They have been applied successfully to real world robots to discover repertoires of policies producing a wide diversity of effects. Often these algorithms relied on engineered goal spaces but it was recently shown that one can use deep representation learning algorithms to learn an adequate goal space in simple environments. In this paper we show that using a disentangled goal space (i.e. a representation where each latent variable is sensitive to a single degree of freedom) leads to better exploration performances than an entangled one. We further show that when the representation is disentangled, one can leverage it by sampling goals that maximize learning progress in a modular manner. Finally, we show that the measure of learning progress, used to drive curiosity-driven exploration, can be used simultaneously to discover abstract independently controllable features of the environment. }
}


@InProceedings{blukis18a,
  title = 	 {Mapping Navigation Instructions to Continuous Control Actions with Position-Visitation Prediction},
  author = 	 {Blukis, Valts and Misra, Dipendra and Knepper, Ross A. and Artzi, Yoav},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {505--518},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/hbeU64UX3CM},
  code = {https://github.com/clic-lab/drif},
  pdf = 	 {http://proceedings.mlr.press/v87/blukis18a/blukis18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/blukis18a.html},
  abstract = 	 {We propose an approach for mapping natural language instructions and raw observations to continuous control of a quadcopter drone. Our model predicts interpretable position-visitation distributions indicating where the agent should go during execution and where it should stop, and uses the predicted distributions to select the actions to execute. This two-step model decomposition allows for simple and efficient training using a combination of supervised learning and imitation learning. We evaluate our approach with a realistic drone simulator, and demonstrate absolute task-completion accuracy improvements of 16.85% over two state-of-the-art instruction-following methods. }
}


@InProceedings{biyik18a,
  title = 	 {Batch Active Preference-Based Learning of Reward Functions},
  author = 	 {Biyik, Erdem and Sadigh, Dorsa},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {519--528},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://www.youtube.com/watch?v=cQ7vvUg9rU4},
  youtube2 = {https://www.youtube.com/watch?v=MaswyWRep5g},
  code = {https://github.com/Stanford-ILIAD/batch-active-preference-based-learning},
  pdf = 	 {http://proceedings.mlr.press/v87/biyik18a/biyik18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/biyik18a.html},
  abstract = 	 {Data generation and labeling are usually an expensive part of learning for robotics. While active learning methods are commonly used to tackle the former problem, preference-based learning is a concept that attempts to solve the latter by querying users with preference questions. In this paper, we will develop a new algorithm, batch active preference-based learning, that enables efficient learning of reward functions using as few data samples as possible while still having short query generation times. We introduce several approximations to the batch active learning problem, and provide theoretical guarantees for the convergence of our algorithms. Finally, we present our experimental results for a variety of robotics tasks in simulation. Our results suggest that our batch active learning algorithm requires only a few queries that are computed in a short amount of time. We then showcase our algorithm in a study to learn human users’ preferences. }
}


@InProceedings{clarke18a,
  title = 	 {Learning Audio Feedback for Estimating Amount and Flow of Granular Material},
  author = 	 {Clarke, Samuel and Rhodes, Travers and Atkeson, Christopher G. and Kroemer, Oliver},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {529--550},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://www.youtube.com/watch?v=rNrjvdOb2TA},
  code = {https://github.com/iamlab-cmu/granularAudioCoRL2018},
  pdf = 	 {http://proceedings.mlr.press/v87/clarke18a/clarke18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/clarke18a.html},
  abstract = 	 {Granular materials produce audio-frequency mechanical vibrations in air and structures when manipulated. These vibrations correlate with both the nature of the events and the intrinsic properties of the materials producing them. We therefore propose learning to use audio-frequency vibrations from contact events to estimate the flow and amount of granular materials during scooping and pouring tasks. We evaluated multiple deep and shallow learning frameworks on a dataset of 13,750 shaking and pouring samples across five different granular materials. Our results indicate that audio is an informative sensor modality for accurately estimating flow and amounts, with a mean RMSE of 2.8g across the five materials for pouring. We also demonstrate how the learned networks can be used to pour a desired amount of material. }
}


@InProceedings{long18a,
  title = 	 {HybridNet: Integrating Model-based and Data-driven Learning to Predict Evolution of Dynamical Systems},
  author = 	 {Long, Yun and She, Xueyuan and Mukhopadhyay, Saibal},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {551--560},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube ={},
  code = {https://github.gatech.edu/ylong32/CORL-2018-HybridNet},
  pdf = 	 {http://proceedings.mlr.press/v87/long18a/long18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/long18a.html},
  abstract = 	 {The robotic systems continuously interact with complex dynamical systems in the physical world. Reliable predictions of spatiotemporal evolution of these dynamical systems, with limited knowledge of system dynamics, are crucial for autonomous operation. In this paper, we present HybridNet, a framework that integrates data-driven deep learning and model-driven computation to reliably predict spatiotemporal evolution of a dynamical systems even with in-exact knowledge of their parameters. A data-driven deep neural network (DNN) with Convolutional LSTM (ConvLSTM) as the backbone is employed to predict the time-varying evolution of the external forces/perturbations. On the other hand, the model-driven computation is performed using Cellular Neural Network (CeNN), a neuro-inspired algorithm to model dynamical systems defined by coupled partial differential equations (PDEs). CeNN converts the intricate numerical computation into a series of convolution operations, enabling a trainable PDE solver. With a feedback control loop, HybridNet can learn the physical parameters governing the system’s dynamics in real-time, and accordingly adapt the computation models to enhance prediction accuracy for time-evolving dynamical systems. The experimental results on two dynamical systems, namely, heat convection-diffusion system, and fluid dynamical system, demonstrate that the HybridNet produces higher accuracy than the state-of-the-art deep learning based approach. }
}


@InProceedings{mahmood18a,
  title = 	 {Benchmarking Reinforcement Learning Algorithms on Real-World Robots},
  author = 	 {Mahmood, A. Rupam and Korenkevych, Dmytro and Vasan, Gautham and Ma, William and Bergstra, James},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {561--591},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube  ={https://www.youtube.com/watch?v=ovDfhvjpQd8},
  code = {https://github.com/kindredresearch/SenseAct},
  pdf = 	 {http://proceedings.mlr.press/v87/mahmood18a/mahmood18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/mahmood18a.html},
  abstract = 	 {
 Through many recent successes in simulation, model-free reinforcement learning has emerged as a promising approach to solving continuous control robotic tasks. The research community is now able to reproduce, analyze and build quickly on these results due to open source implementations of learning algorithms and simulated benchmark tasks. To carry forward these successes to real-world applications, it is crucial to withhold utilizing the unique advantages of simulations that do not transfer to the real world and experiment directly with physical robots. However, reinforcement learning research with physical robots faces substantial resistance due to the lack of benchmark tasks and supporting source code. In this work, we introduce several reinforcement learning tasks with multiple commercially available robots that present varying levels of learning difficulty, setup, and repeatability. On these tasks, we test the learning performance of off-the-shelf implementations of four reinforcement learning algorithms and analyze sensitivity to their hyper-parameters to determine their readiness for applications in various real-world tasks. Our results show that with a careful setup of the task interface and computations, some of these implementations can be readily applicable to physical robots. We find that state-of-the-art learning algorithms are highly sensitive to their hyper-parameters and their relative ordering does not transfer across tasks, indicating the necessity of re-tuning them for each task for best performance. On the other hand, the best hyper-parameter configuration from one task may often result in effective learning on held-out tasks even with different robots, providing a reasonable default. We make the benchmark tasks publicly available to enhance reproducibility in real-world reinforcement learning.}
}


@InProceedings{shankar18a,
  title = 	 {Learning Neural Parsers with Deterministic Differentiable Imitation Learning},
  author = 	 {Shankar, Tanmay and Rhinehart, Nicholas and Muelling, Katharina and Kitani, Kris M.},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {592--604},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code = {https://github.com/tanmayshankar/ParsingbyImitation},
  pdf = 	 {http://proceedings.mlr.press/v87/shankar18a/shankar18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/shankar18a.html},
  abstract = 	 {
 We explore the problem of learning to decompose spatial tasks into segments, as exemplified by the problem of a painting robot covering a large object. Inspired by the ability of classical decision tree algorithms to construct structured parti- tions of their input spaces, we formulate the problem of decomposing objects into segments as a parsing approach. We make the insight that the derivation of a parse-tree that decomposes the object into segments closely resembles a decision tree constructed by ID3, which can be done when the ground-truth available. We learn to imitate an expert parsing oracle, such that our neural parser can generalize to parse natural images without ground truth. We introduce a novel deterministic policy gradient update, DRAG (i.e., DeteRministically AGgrevate) in the form of a deterministic actor-critic variant of AggreVaTeD [1], to train our neural parser. From another perspective, our approach is a variant of the Deterministic Policy Gradient [2, 3] suitable for the imitation learning setting. The deterministic policy representation offered by training our neural parser with DRAG allows it to outperform state of the art imitation and reinforcement learning approaches. }
}


@InProceedings{barsan18a,
  title = 	 {Learning to Localize Using a LiDAR Intensity Map},
  author = 	 {Barsan, Ioan Andrei and Wang, Shenlong and Pokrovsky, Andrei and Urtasun, Raquel},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {605--616},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code  = {},
  pdf = 	 {http://proceedings.mlr.press/v87/barsan18a/barsan18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/barsan18a.html},
  abstract = 	 {In this paper we propose a real-time, calibration-agnostic and effective localization system for self-driving cars. Our method learns to embed the online LiDAR sweeps and intensity map into a joint deep embedding space. Localization is then conducted through an efficient convolutional matching between the embeddings. Our full system can operate in real-time at 15Hz while achieving centimeter level accuracy across different LiDAR sensors and environments. Our experiments illustrate the performance of the proposed approach over a large-scale dataset consisting of over 4000km of driving. }
}


@InProceedings{clavera18a,
  title = 	 {Model-Based Reinforcement Learning via Meta-Policy Optimization},
  author = 	 {Clavera, Ignasi and Rothfuss, Jonas and Schulman, John and Fujita, Yasuhiro and Asfour, Tamim and Abbeel, Pieter},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {617--629},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code  = {},
  pdf = 	 {http://proceedings.mlr.press/v87/clavera18a/clavera18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/clavera18a.html},
  abstract = 	 {Model-based reinforcement learning approaches carry the promise of being data efficient. However, due to challenges in learning dynamics models that sufficiently match the real-world dynamics, they struggle to achieve the same asymptotic performance as model-free methods. We propose Model-Based Meta-Policy-Optimization (MB-MPO), an approach that foregoes the strong reliance on accurate learned dynamics models. Using an ensemble of learned dynamic models, MB-MPO meta-learns a policy that can quickly adapt to any model in the ensemble with one policy gradient step. This steers the meta-policy towards internalizing consistent dynamics predictions among the ensemble while shifting the burden of behaving optimally w.r.t. the model discrepancies towards the adaptation step. Our experiments show that MB-MPO is more robust to model imperfections than previous model-based approaches. Finally, we demonstrate that our approach is able to match the asymptotic performance of model-free methods while requiring significantly less experience. }
}


@InProceedings{maeda18a,
  title = 	 {Reinforcement Learning of Phase Oscillators for Fast Adaptation to Moving Targets},
  author = 	 {Maeda, Guilherme and Koc, Okan and Morimoto, Jun},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {630--640},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/Iy4zwseEAKs},
  code = {},
  pdf = 	 {http://proceedings.mlr.press/v87/maeda18a/maeda18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/maeda18a.html},
  abstract = 	 {Online movement generation in tasks involving real humanoid robots interacting with fast-moving targets is extremely difficult. This paper approaches this problem via imitation and reinforcement learning using phase variables. Imitation learning is used to acquire primitive trajectories of the demonstrator interacting with the target. The temporal progress of the robot is represented as a function of the target’s phase. Using a phase oscillator formulation, reinforcement learning optimizes a temporal policy such that the robot can quickly react to large/unexpected changes in the target movement. The phase representation decouples the temporal and spatial problems allowing the use of fast online solutions. The methodology is applicable in both cyclic and single-stroke movements. We applied the proposed method on a real bi-manual humanoid upper body with 14 degrees-of-freedom where the robot had to repeatedly push a ball hanging in front of it. In simulation, we show a human-robot interaction scenario where the robot changed its role from giver to receiver as a function of the interaction reward. }
}


@InProceedings{antonova18a,
  title = 	 {Global Search with Bernoulli Alternation Kernel for Task-oriented Grasping Informed by Simulation},
  author = 	 {Antonova, Rika and Kokic, Mia and Stork, Johannes A. and Kragic, Danica},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {641--650},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/MlCabBaYw7E},
  code = {},
  pdf = 	 {http://proceedings.mlr.press/v87/antonova18a/antonova18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/antonova18a.html},
  abstract = 	 {We develop an approach that benefits from large simulated datasets and takes full advantage of the limited online data that is most relevant. We propose a variant of Bayesian optimization that alternates between using informed and uninformed kernels. With this Bernoulli Alternation Kernel we ensure that discrepancies between simulation and reality do not hinder adapting robot control policies online. The proposed approach is applied to a challenging real-world problem of task-oriented grasping with novel objects. Our further contribution is a neural network architecture and training pipeline that use experience from grasping objects in simulation to learn grasp stability scores. We learn task scores from a labeled dataset with a convolutional network, which is used to construct an informed kernel for our variant of Bayesian optimization. Experiments on an ABB Yumi robot with real sensor data demonstrate success of our approach, despite the challenge of fulfilling task requirements and high uncertainty over physical properties of objects. }
}


@InProceedings{kalashnikov18a,
  title = 	 {Scalable Deep Reinforcement Learning for Vision-Based Robotic Manipulation},
  author = 	 {Kalashnikov, Dmitry and Irpan, Alex and Pastor, Peter and Ibarz, Julian and Herzog, Alexander and Jang, Eric and Quillen, Deirdre and Holly, Ethan and Kalakrishnan, Mrinal and Vanhoucke, Vincent and Levine, Sergey},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {651--673},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code  = {},
  pdf = 	 {http://proceedings.mlr.press/v87/kalashnikov18a/kalashnikov18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/kalashnikov18a.html},
  abstract = 	 {In this paper, we study the problem of learning vision-based dynamic manipulation skills using a scalable reinforcement learning approach. We study this problem in the context of grasping, a longstanding challenge in robotic manipulation. In contrast to static learning behaviors that choose a grasp point and then execute the desired grasp, our method enables closed-loop vision-based control, whereby the robot continuously updates its grasp strategy based on the most recent observations to optimize long-horizon grasp success. To that end, we introduce QT-Opt, a scalable self-supervised vision-based reinforcement learning framework that can leverage over 580k real-world grasp attempts to train a deep neural network Q-function with over 1.2M parameters to perform closed-loop, real-world grasping that generalizes to 96% grasp success on unseen objects. Aside from attaining a very high success rate, our method exhibits behaviors that are quite distinct from more standard grasping systems: using only RGB vision-based perception from an over-the-shoulder camera, our method automatically learns regrasping strategies, probes objects to find the most effective grasps, learns to reposition objects and perform other non-prehensile pre-grasp manipulations, and responds dynamically to disturbances and perturbations. }
}


@InProceedings{romoff18a,
  title = 	 {Reward Estimation for Variance Reduction in Deep Reinforcement Learning},
  author = 	 {Romoff, Joshua and Henderson, Peter and Piche, Alexandre and Francois-Lavet, Vincent and Pineau, Joelle},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {674--699},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code = {http://github.com/facebookresearch/reward-estimator-corl},
  pdf = 	 {http://proceedings.mlr.press/v87/romoff18a/romoff18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/romoff18a.html},
  abstract = 	 {Reinforcement Learning (RL) agents require the specification of a reward signal for learning behaviours. However, introduction of corrupt or stochastic rewards can yield high variance in learning. Such corruption may be a direct result of goal misspecification, randomness in the reward signal, or correlation of the reward with external factors that are not known to the agent. Corruption or stochasticity of the reward signal can be especially problematic in robotics, where goal specification can be particularly difficult for complex tasks. While many variance reduction techniques have been studied to improve the robustness of the RL process, handling such stochastic or corrupted reward structures remains difficult. As an alternative for handling this scenario in model-free RL methods, we suggest using an estimator for both rewards and value functions. We demonstrate that this improves performance under corrupted stochastic rewards in both the tabular and non-linear function approximation settings for a variety of noise types and environments. The use of reward estimation is a robust and easy-to-implement improvement for handling corrupted reward signals in model-free RL. }
}


@InProceedings{muratore18a,
  title = 	 {Domain Randomization for Simulation-Based Policy Optimization with Transferability Assessment},
  author = 	 {Muratore, Fabio and Treede, Felix and Gienger, Michael and Peters, Jan},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {700--713},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://www.youtube.com/watch?v=ORi9sjhs_tw},
  youtube2 = {https://www.youtube.com/watch?v=RQ7zq_bcv_k},
  code = {},
  pdf = 	 {http://proceedings.mlr.press/v87/muratore18a/muratore18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/muratore18a.html},
  abstract = 	 {Exploration-based reinforcement learning on real robot systems is generally time-intensive and can lead to catastrophic robot failures. Therefore, simulation-based policy search appears to be an appealing alternative. Unfor- tunately, running policy search on a slightly faulty simulator can easily lead to the maximization of the ‘Simulation Optimization Bias’ (SOB), where the policy exploits modeling errors of the simulator such that the resulting behavior can potentially damage the robot. For this reason, much work in robot reinforcement learning has focused on model-free methods that learn on real-world systems. The resulting lack of safe simulation-based policy learning techniques imposes severe limitations on the application of robot reinforcement learning. In this paper, we explore how physics simulations can be utilized for a robust policy optimization by perturbing the simulator’s parameters and training from model ensembles. We propose a new algorithm called Simulation-based Policy Optimization with Transferability Assessment (SPOTA) that uses a biased estimator of the SOB to formulate a stopping criterion for training. We show that the new simulation-based policy search algorithm is able to learn a control policy exclusively from a randomized simulator that can be applied directly to a different system without using any data from the latter. }
}


@InProceedings{nyga18a,
  title = 	 {Grounding Robot Plans from Natural Language Instructions with Incomplete World Knowledge},
  author = 	 {Nyga, Daniel and Roy, Subhro and Paul, Rohan and Park, Daehyung and Pomarlan, Mihai and Beetz, Michael and Roy, Nicholas},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {714--723},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/uWv-l7XMoB8},
  code = {},
  pdf = 	 {http://proceedings.mlr.press/v87/nyga18a/nyga18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/nyga18a.html},
  abstract = 	 {Our goal is to enable robots to interpret and execute high-level tasks conveyed using natural language instructions. For example, consider tasking a household robot to, “prepare my breakfast”, “clear the boxes on the table” or “make me a fruit milkshake”. Interpreting such underspecified instructions requires environmental context and background knowledge about how to accomplish complex tasks. Further, the robot’s workspace knowledge may be incomplete: the environment may only be partially-observed or background knowledge may be missing causing a failure in plan synthesis. We introduce a probabilistic model that utilizes background knowledge to infer latent or missing plan constituents based on semantic co-associations learned from noisy textual corpora of task descriptions. The ability to infer missing plan constituents enables information-seeking actions such as visual exploration or dialogue with the human to acquire new knowledge to fill incomplete plans. Results indicate robust plan inference from under-specified instructions in partially-known worlds. }
}


@InProceedings{chitnis18a,
  title = 	 {Learning What Information to Give in Partially Observed Domains},
  author = 	 {Chitnis, Rohan and Kaelbling, Leslie Pack and Lozano-Perez, Tomas},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {724--733},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://www.youtube.com/watch?v=slJB_fJsOdM},
  code = {},
  pdf = 	 {http://proceedings.mlr.press/v87/chitnis18a/chitnis18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/chitnis18a.html},
  abstract = 	 {In many robotic applications, an autonomous agent must act within and explore a partially observed environment that is unobserved by its human team-mate. We consider such a setting in which the agent can, while acting, transmit declarative information to the human that helps them understand aspects of this unseen environment. In this work, we address the algorithmic question of how the agent should plan out what actions to take and what information to transmit. Naturally, one would expect the human to have preferences, which we model information-theoretically by scoring transmitted information based on the change it induces in weighted entropy of the human’s belief state. We formulate this setting as a belief MDP and give a tractable algorithm for solving it approximately. Then, we give an algorithm that allows the agent to learn the human’s preferences online, through exploration. We validate our approach experimentally in simulated discrete and continuous partially observed search-and-recover domains. Visit http://tinyurl.com/chitnis-corl-18 for a supplementary video.}
}


@InProceedings{matas18a,
  title = 	 {Sim-to-Real Reinforcement Learning for Deformable Object Manipulation},
  author = 	 {Matas, Jan and James, Stephen and Davison, Andrew J.},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {734--743},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://www.youtube.com/watch?v=Dr0RvX1F-YQ},
  code = {},
  pdf = 	 {http://proceedings.mlr.press/v87/matas18a/matas18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/matas18a.html},
  abstract = 	 {We have seen much recent progress in rigid object manipulation, but interaction with deformable objects has notably lagged behind. Due to the large configuration space of deformable objects, solutions using traditional modelling approaches require significant engineering work. Perhaps then, bypassing the need for explicit modelling and instead learning the control in an end-to-end manner serves as a better approach? Despite the growing interest in the use of end-to-end robot learning approaches, only a small amount of work has focused on their applicability to deformable object manipulation. Moreover, due to the large amount of data needed to learn these end-to-end solutions, an emerging trend is to learn control policies in simulation and then transfer them over to the real world. To date, no work has explored whether it is possible to learn and transfer deformable object policies. We believe that if sim-to-real methods are to be employed further, then it should be possible to learn to interact with a wide variety of objects, and not only rigid objects. In this work, we use a combination of state-of-the-art deep reinforcement learning algorithms to solve the problem of manipulating deformable objects (specifically cloth). We evaluate our approach on three tasks—folding a towel up to a mark, folding a face towel diagonally, and draping a piece of cloth over a hanger. Our agents are fully trained in simulation with domain randomisation, and then successfully deployed in the real world without having seen any real deformable objects.}
}


@InProceedings{kumar18a,
  title = 	 {Expanding Motor Skills using Relay Networks},
  author = 	 {Kumar, Visak CV and Ha, Sehoon and Liu, C.Karen},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {744--756},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/TxUlcGnzuXE},
  code = {https://github.com/VisakK/RelayNetworks.git},
  pdf = 	 {http://proceedings.mlr.press/v87/kumar18a/kumar18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/kumar18a.html},
  abstract = 	 {While recent advances in deep reinforcement learning have achieved impressive results in learning motor skills, many policies are only capable within a limited set of initial states. We propose an algorithm that sequentially decomposes a complex robotic task into simpler subtasks and trains a local policy for each subtask such that the robot can expand its existing skill set gradually. Our key idea is to build a directed graph of local control policies represented by neural networks, which we refer to as relay neural networks. Starting from the first policy that attempts to achieve the task from a small set of initial states, the algorithm iteratively discovers the next subtask with increasingly more difficult initial states until the last subtask matches the initial state distribution of the original task. The policy of each subtask aims to drive the robot to a state where the policy of its preceding subtask is able to handle. By taking advantage of many existing actor-critic style policy search algorithms, we utilize the optimized value function to define “good states” for the next policy to relay to. }
}


@InProceedings{jain18a,
  title = 	 {Efficient Hierarchical Robot Motion Planning Under Uncertainty and Hybrid Dynamics},
  author = 	 {Jain, Ajinkya and Niekum, Scott},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {757--766},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://www.youtube.com/watch?v=Y9BoNvI0K5c},
  code = {https://github.com/Pearl-UTexas/POMDP-HD},
  pdf = 	 {http://proceedings.mlr.press/v87/jain18a/jain18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/jain18a.html},
  abstract = 	 {Noisy observations coupled with nonlinear dynamics pose one of the biggestchallengesinrobotmotionplanning. Bydecomposingnonlineardynamics into a discrete set of local dynamics models, hybrid dynamics provide a natural way to model nonlinear dynamics, especially in systems with sudden discontinuities in dynamics due to factors such as contacts. We propose a hierarchical POMDP planner that develops cost-optimized motion plans for hybrid dynamics models. The hierarchical planner first develops a high-level motion plan to sequence the local dynamics models to be visited and then converts it into a detailed continuous state plan. This hierarchical planning approach results in a decomposition of the POMDP planning problem into smaller sub-parts that can be solved with significantly lower computational costs. The ability to sequence the visitation of local dynamics models also provides a powerful way to leverage the hybrid dynamics to reduce state uncertainty. We evaluate the proposed planner on a navigation task in the simulated domain and on an assembly task with a robotic manipulator, showing that our approach can solve tasks having high observation noise and nonlinear dynamics effectively with significantly lower computational costs compared to direct planning approaches. }
}


@InProceedings{fan18a,
  title = 	 {SURREAL: Open-Source Reinforcement Learning Framework and Robot Manipulation Benchmark},
  author = 	 {Fan, Linxi and Zhu, Yuke and Zhu, Jiren and Liu, Zihua and Zeng, Orien and Gupta, Anchit and Creus-Costa, Joan and Savarese, Silvio and Fei-Fei, Li},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {767--782},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://www.youtube.com/watch?v=efeS9AczbTk},
  code  = {},
  pdf = 	 {http://proceedings.mlr.press/v87/fan18a/fan18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/fan18a.html},
  abstract = 	 {Reproducibility has been a significant challenge in deep reinforcement learning and robotics research. Open-source frameworks and standardized benchmarks can serve an integral role in rigorous evaluation and reproducible research.  We introduce SURREAL, an open-source scalable framework that supports state-of-the-art distributed reinforcement learning algorithms. We design a principled distributed learning formulation that accommodates both on-policy and off-policy learning. We demonstrate that SURREAL algorithms outperform existing open-source implementations in both agent performance and learning efficiency. We also introduce SURREAL Robotics Suite, an accessible set of benchmarking tasks in physical simulation for reproducible robot manipulation research. We provide extensive evaluations of SURREAL algorithms and establish strong baseline results.}
}


@InProceedings{james18a,
  title = 	 {Task-Embedded Control Networks for Few-Shot Imitation Learning},
  author = 	 {James, Stephen and Bloesch, Michael and Davison, Andrew J.},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {783--795},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/nQF4tRGnkiI},
  code = {},
  pdf = 	 {http://proceedings.mlr.press/v87/james18a/james18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/james18a.html},
  abstract = 	 {Much like humans, robots should have the ability to leverage knowledge from previously learned tasks in order to learn new tasks quickly in new and unfamiliar environments. Despite this, most robot learning approaches have focused on learning a single task, from scratch, with a limited notion of generalisation, and no way of leveraging the knowledge to learn other tasks more efficiently. One possible solution is meta-learning, but many of the related approaches are limited in their ability to scale to a large number of tasks and to learn further tasks without forgetting previously learned ones. With this in mind, we introduce Task-Embedded Control Networks, which employ ideas from metric learning in order to create a task embedding that can be used by a robot to learn new tasks from one or more demonstrations. In the area of visually-guided manipulation, we present simulation results in which we surpass the performance of a state-of-the-art method when using only visual information from each demonstration. Additionally, we demonstrate that our approach can also be used in conjunction with domain randomisation to train our few-shot learning ability in simulation and then deploy in the real world without any additional training. Once deployed, the robot can learn new tasks from a single real-world demonstration. }
}


@InProceedings{bobu18a,
  title = 	 {Learning under Misspecified Objective Spaces},
  author = 	 {Bobu, Andreea and Bajcsy, Andrea and Fisac, Jaime F. and Dragan, Anca D.},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {796--805},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/stnFye8HdcU},
  code = {https://github.com/andreea7b/beta_adaptive_pHRI},
  pdf = 	 {http://proceedings.mlr.press/v87/bobu18a/bobu18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/bobu18a.html},
  abstract = 	 {Learning robot objective functions from human input has become increasingly important, but state-of-the-art techniques assume that the human’s desired objective lies within the robot’s hypothesis space. When this is not true, even methods that keep track of uncertainty over the objective fail because they reason about which hypothesis might be correct, and not whether any of the hypotheses are correct. We focus specifically on learning from physical human corrections during the robot’s task execution, where not having a rich enough hypothesis space leads to the robot updating its objective in ways that the person did not actually intend. We observe that such corrections appear irrelevant to the robot, because they are not the best way of achieving any of the candidate objectives. Instead of naively trusting and learning from every human interaction, we propose robots learn conservatively by reasoning in real time about how relevant the human’s correction is for the robot’s hypothesis space. We test our inference method in an experiment with human interaction data, and demonstrate that this alleviates unintended learning in an in-person user study with a robot manipulator. }
}


@InProceedings{kahn18a,
  title = 	 {Composable Action-Conditioned Predictors: Flexible Off-Policy Learning for Robot Navigation},
  author = 	 {Kahn, Gregory and Villaflor, Adam and Abbeel, Pieter and Levine, Sergey},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {806--816},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code = {https://github.com/gkahn13/CAPs},
  pdf = 	 {http://proceedings.mlr.press/v87/kahn18a/kahn18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/kahn18a.html},
  abstract = 	 {A general-purpose intelligent robot must be able to learn autonomously and be able to accomplish multiple tasks in order to be deployed in the real world. However, standard reinforcement learning approaches learn separate task-specific policies and assume the reward function for each task is known a priori. We propose a framework that learns event cues from off-policy data, and can flexibly combine these event cues at test time to accomplish different tasks. These event cue labels are not assumed to be known a priori, but are instead labeled using learned models, such as computer vision detectors, and then “backed up” in time using an action-conditioned predictive model. We show that a simulated robotic car and a real-world RC car can gather data and train fully autonomously without any human-provided labels beyond those needed to train the detectors, and then at test-time be able to accomplish a variety of different tasks. Videos of the experiments and code can be found at github.com/gkahn13/CAPs }
}


@InProceedings{golemo18a,
  title = 	 {Sim-to-Real Transfer with Neural-Augmented Robot Simulation},
  author = 	 {Golemo, Florian and Taiga, Adrien Ali and Courville, Aaron and Oudeyer, Pierre-Yves},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {817--828},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/nw9YTbCEYH8},
  code = {https://github.com/aalitaiga/sim-to-real/},
  pdf = 	 {http://proceedings.mlr.press/v87/golemo18a/golemo18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/golemo18a.html},
  abstract = 	 {Despite the recent successes of deep reinforcement learning, teaching complex motor skills to a physical robot remains a hard problem. While learning directly on a real system is usually impractical, doing so in simulation has proven to be fast and safe. Nevertheless, because of the "reality gap," policies trained in simulation often perform poorly when deployed on a real system. In this work, we introduce a method for training a recurrent neural network on the differences between simulated and real robot trajectories and then using this model to augment the simulator. This Neural-Augmented Simulation (NAS) can be used to learn control policies that transfer significantly better to real environments than policies learned on existing simulators. We demonstrate the potential of our approach through a set of experiments on the Mujoco simulator with added backlash and the Poppy Ergo Jr robot. NAS allows us to learn policies that are competitive with ones that would have been learned directly on the real robot.}
}


@InProceedings{shan18a,
  title = 	 {Bayesian Generalized Kernel Inference for Terrain Traversability Mapping},
  author = 	 {Shan, Tixiao and Wang, Jinkun and Englot, Brendan and Doherty, Kevin},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {829--838},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/ewrCyDiWi-8},
  code = {},
  pdf = 	 {http://proceedings.mlr.press/v87/shan18a/shan18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/shan18a.html},
  abstract = 	 {We propose a new approach for traversability mapping with sparse lidar scans collected by ground vehicles, which leverages probabilistic inference to build descriptive terrain maps. Enabled by recent developments in sparse kernels, Bayesian generalized kernel inference is applied sequentially to the related problems of terrain elevation and traversability inference. The first inference step allows sparse data to support descriptive terrain modeling, and the second inference step relieves the burden typically associated with traversability computation. We explore the capabilities of the approach over a variety of data and terrain, demonstrating its suitability for online use in real-world applications. }
}


@InProceedings{kaushik18a,
  title = 	 {Multi-objective Model-based Policy Search for Data-efficient Learning with Sparse Rewards},
  author = 	 {Kaushik, Rituraj and Chatzilygeroudis, Konstantinos and Mouret, Jean-Baptiste},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {839--855},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/9ZLwUxAAq6M},
  code = {https://github.com/resibots/kaushik_2018_multi-dex},
  pdf = 	 {http://proceedings.mlr.press/v87/kaushik18a/kaushik18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/kaushik18a.html},
  abstract = 	 {The most data-efficient algorithms for reinforcement learning in robotics are model-based policy search algorithms, which alternate between learning a dynamical model of the robot and optimizing a policy to maximize the expected return given the model and its uncertainties. However, the current algorithms lack an effective exploration strategy to deal with sparse or misleading reward scenarios: if they do not experience any state with a positive reward during the initial random exploration, it is very unlikely to solve the problem. Here, we propose a novel model-based policy search algorithm, Multi-DEX, that leverages a learned dynamical model to efficiently explore the task space and solve tasks with sparse rewards in a few episodes. To achieve this, we frame the policy search problem as a multi-objective, model-based policy optimization problem with three objectives: (1) generate maximally novel state trajectories, (2) maximize the cumulative reward and (3) keep the system in state-space regions for which the model is as accurate as possible. We then optimize these objectives using a Pareto-based multi-objective optimization algorithm. The experiments show that Multi-DEX is able to solve sparse reward scenarios (with a simulated robotic arm) in much lower interaction time than VIME, TRPO, GEP-PG, CMA-ES and Black-DROPS. }
}


@InProceedings{alet18a,
  title = 	 {Modular meta-learning},
  author = 	 {Alet, Ferran and Lozano-Perez, Tomas and Kaelbling, Leslie P.},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {856--868},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/sdkEP7RfO60},
  code = {https://github.com/FerranAlet/modular-metalearning},
  pdf = 	 {http://proceedings.mlr.press/v87/alet18a/alet18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/alet18a.html},
  abstract = 	 {Many prediction problems, such as those that arise in the context of robotics, have a simplifying underlying structure that, if known, could accelerate learning. In this paper, we present a strategy for learning a set of neural network modules that can be combined in different ways. We train different modular structures on a set of related tasks and generalize to new tasks by composing the learned modules in new ways. By reusing modules to generalize we achieve combinatorial generalization, akin to the ”infinite use of finite means” displayed in language. Finally, we show this improves performance in two robotics-related problems.}
}


@InProceedings{stouraitis18a,
  title = 	 {Dyadic collaborative Manipulation through Hybrid Trajectory Optimization},
  author = 	 {Stouraitis, Theodoros and Chatzinikolaidis, Iordanis and Gienger, Michael and Vijayakumar, Sethu},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {869--878},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  video = {https://public.3.basecamp.com/p/uc8DDU9EDDrgmoU11TGLRkKk},
  code = {},
  pdf = 	 {http://proceedings.mlr.press/v87/stouraitis18a/stouraitis18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/stouraitis18a.html},
  abstract = 	 {This work provides a principled formalism to address the joint planning problem in dyadic collaborative manipulation (DcM) scenarios by representing the human’s intentions as task space forces and solving the joint problem holistically via model-based optimization. The proposed method is the first to empower robotic agents with the ability to plan in hybrid spaces—optimizing over discrete contact locations, continuous trajectory and force profiles, for co-manipulation tasks with varied dyadic objective goals. This ability is particularly important in large object manipulation scenarios that typically require change of grasp-holds. The task of finding the contact points, forces and the respective timing of grasp-hold changes are carried out by a joint optimization using non-linear solvers. We demonstrate the efficacy of the optimization method by investigating the effect of robot policy changes (trajectories, timings, grasp-holds) based on changes in collaborative partner policies using physically based dynamic simulations. We also realize, in hardware, effective co-manipulation of a large object by the human and the robot, including eminent grasp changes as well as optimal dyadic interactions to realize the joint task. }
}


@InProceedings{mandlekar18a,
  title = 	 {ROBOTURK: A Crowdsourcing Platform for Robotic Skill Learning through Imitation},
  author = 	 {Mandlekar, Ajay and Zhu, Yuke and Garg, Animesh and Booher, Jonathan and Spero, Max and Tung, Albert and Gao, Julian and Emmons, John and Gupta, Anchit and Orbay, Emre and Savarese, Silvio and Fei-Fei, Li},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {879--893},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://www.youtube.com/watch?v=LBH-dTf7xCA&feature=youtu.be},
  code = {},
  pdf = 	 {http://proceedings.mlr.press/v87/mandlekar18a/mandlekar18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/mandlekar18a.html},
  abstract = 	 {Imitation Learning has empowered recent advances in learning robotic manipulation tasks by addressing shortcomings of Reinforcement Learning such as exploration and reward specification. However, research in this area has been limited to modest-sized datasets due to the difficulty of collecting large quantities of task demonstrations through existing mechanisms. This work introduces ROBO-TURK to address this challenge. ROBOTURK is a crowdsourcing platform for high quality 6-DoF trajectory based teleoperation through the use of widely available mobile devices (e.g. iPhone). We evaluate ROBOTURK on three manipulation tasks of varying timescales (15-120s) and observe that our user interface is statistically similar to special purpose hardware such as virtual reality controllers in terms of task completion times. Furthermore, we observe that poor network conditions, such as low bandwidth and high delay links, do not substantially affect the remote users’ ability to perform task demonstrations successfully on ROBOTURK. Lastly, we demonstrate the efficacy of ROBOTURK through the collection of a pilot dataset; using ROBOTURK, we collected 137.5 hours of manipulation data from remote workers, amounting to over 2200 successful task demonstrations in 22 hours of total system usage. We show that the data obtained through ROBOTURK enables policy learning on multi-step manipulation tasks with sparse rewards and that using larger quantities of demonstrations during policy learning provides benefits in terms of both learning consistency and final performance. For additional results, videos, and to download our pilot dataset, visit roboturk.stanford.edu }
}


@InProceedings{zhang18a,
  title = 	 {Integrating kinematics and environment context into deep inverse reinforcement learning for predicting off-road vehicle trajectories},
  author = 	 {Zhang, Yanfu and Wang, Wenshan and Bonatti, Rogerio and Maturana, Daniel and Scherer, Sebastian},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {894--905},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/nuJjIdEEDBk},
  code = {https://github.com/yfzhang/vehicle-motion-forecasting},
  pdf = 	 {http://proceedings.mlr.press/v87/zhang18a/zhang18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/zhang18a.html},
  abstract = 	 {Predicting the motion of a mobile agent from a third-person perspective is an important component for many robotics applications, such as autonomous navigation and tracking. With accurate motion prediction of other agents, robots can plan for more intelligent behaviors to achieve specified objectives, instead of acting in a purely reactive way. Previous work addresses motion prediction by either only filtering kinematics, or using hand-designed and learned representations of the environment. Instead of separating kinematic and environmental context, we propose a novel approach to integrate both into an inverse reinforcement learning (IRL) framework for trajectory prediction. Instead of exponentially increasing the state-space complexity with kinematics, we propose a two-stage neural network architecture that considers motion and environment together to recover the reward function. The first-stage network learns feature representations of the environment using low-level LiDAR statistics and the second-stage network combines those learned features with kinematics data. We collected over 30 km of off-road driving data and validated experimentally that our method can effectively extract useful environmental and kinematic features. We generate accurate predictions of the distribution of future trajectories of the vehicle, encoding complex behaviors such as multi-modal distributions at road intersections, and even show different predictions at the same intersection depending on the vehicle’s speed. }
}


@InProceedings{sharma18a,
  title = 	 {Multiple Interactions Made Easy (MIME): Large Scale Demonstrations Data for Imitation},
  author = 	 {Sharma, Pratyusha and Mohan, Lekha and Pinto, Lerrel and Gupta, Abhinav},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {906--915},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/M_k5OzQti0U},
  code = {},
  pdf = 	 {http://proceedings.mlr.press/v87/sharma18a/sharma18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/sharma18a.html},
  abstract = 	 {In recent years, we have seen an emergence of data-driven approaches in robotics. However, most existing efforts and datasets are either in simulation or focus on a single task in isolation such as grasping, pushing or poking. In order to make progress and capture the space of manipulation, we would need to collect a large-scale dataset of diverse tasks such as pouring, opening bottles, stacking objects etc. But how does one collect such a dataset? In this paper, we present the largest available robotic-demonstration dataset (MIME) that contains 8260 human-robot demonstrations over 20 different robotic tasks2. These tasks range from the simple task of pushing objects to the difficult task of stacking household objects. Our dataset consists of videos of human demonstrations and kinesthetic trajectories of robot demonstrations. We also propose to use this dataset for the task of mapping 3rd person video features to robot trajectories. Furthermore, we present two different approaches using this dataset and evaluate the predicted robot trajectories against ground-truth trajectories. We hope our dataset inspires research in multiple areas including visual imitation, trajectory prediction and multi-task robotic learning. }
}


@InProceedings{iscen18a,
  title = 	 {Policies Modulating Trajectory Generators},
  author = 	 {Iscen, Atil and Caluwaerts, Ken and Tan, Jie and Zhang, Tingnan and Coumans, Erwin and Sindhwani, Vikas and Vanhoucke, Vincent},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {916--926},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/_6SbXK4i0mY},
  code = {},
  pdf = 	 {http://proceedings.mlr.press/v87/iscen18a/iscen18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/iscen18a.html},
  abstract = 	 {We propose an architecture for learning complex controllable behaviors by having simple Policies Modulate Trajectory Generators (PMTG), a powerful combination that can provide both memory and prior knowledge to the controller. The result is a flexible architecture that is applicable to a class of problems with periodic motion for which one has an insight into the class of trajectories that might lead to a desired behavior. We illustrate the basics of our architecture using a synthetic control problem, then go on to learn speed-controlled locomotion for a quadrupedal robot by using Deep Reinforcement Learning and Evolutionary Strategies. We demonstrate that a simple linear policy, when paired with a parametric Trajectory Generator for quadrupedal gaits, can induce walking behaviors with controllable speed from 4-dimensional IMU observations alone, and can be learned in under 1000 rollouts. We also transfer these policies to a real robot and show locomotion with controllable forward velocity. }
}


@InProceedings{figueroa18a,
  title = 	 {A Physically-Consistent Bayesian Non-Parametric Mixture Model for Dynamical System Learning},
  author = 	 {Figueroa, Nadia and Billard, Aude},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {927--946},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/HfV4jbJBWTQ},
  code = {https://nbfigueroa.github.io/pc-gmm-ds-learning/},
  pdf = 	 {http://proceedings.mlr.press/v87/figueroa18a/figueroa18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/figueroa18a.html},
  abstract = 	 {We propose a physically-consistent Bayesian non-parametric approach for fitting Gaussian Mixture Models (GMM) to trajectory data. Physical-consistency of the GMM is ensured by imposing a prior on the component assignments biased by a novel similarity metric that leverages locality and directionality. The resulting GMM is then used to learn globally asymptotically stable Dynamical Systems (DS) via a Linear Parameter Varying (LPV) re-formulation. The proposed DS learning scheme accurately encodes challenging nonlinear motions automatically. Finally, a data-efficient incremental learning framework is introduced that encodes a DS from batches of trajectories, while preserving global stability. Our contributions are validated on 2D datasets and a variety of tasks that involve single-target complex motions with a KUKA LWR 4+ robot arm. }
}


@InProceedings{casas18a,
  title = 	 {IntentNet: Learning to Predict Intention from Raw Sensor Data},
  author = 	 {Casas, Sergio and Luo, Wenjie and Urtasun, Raquel},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {947--956},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code  = {},
  pdf = 	 {http://proceedings.mlr.press/v87/casas18a/casas18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/casas18a.html},
  abstract = 	 {In order to plan a safe maneuver, self-driving vehicles need to understand the intent of other traffic participants. We define intent as a combination of discrete high level behaviors as well as continuous trajectories describing future motion. In this paper we develop a one-stage detector and forecaster that exploits both 3D point clouds produced by a LiDAR sensor as well as dynamic maps of the environment. Our multi-task model achieves better accuracy than the respective separate modules while saving computation, which is critical to reduce reaction time in self-driving applications. }
}


@InProceedings{hristov18a,
  title = 	 {Interpretable Latent Spaces for Learning from Demonstration},
  author = 	 {Hristov, Yordan and Lascarides, Alex and Ramamoorthy, Subramanian},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {957--968},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code = {https://github.com/yordanh/interp_latent_spaces},
  pdf = 	 {http://proceedings.mlr.press/v87/hristov18a/hristov18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/hristov18a.html},
  abstract = 	 {Effective human-robot interaction, such as in robot learning from human demonstration, requires the learning agent to be able to ground abstract concepts (such as those contained within instructions) in a corresponding high-dimensional sensory input stream from the world. Models such as deep neural networks, with high capacity through their large parameter spaces, can be used to compress the high-dimensional sensory data to lower dimensional representations. These low-dimensional representations facilitate symbol grounding, but may not guarantee that the representation would be human-interpretable. We propose a method which utilises the grouping of user-defined symbols and their corresponding sensory observations in order to align the learnt compressed latent representation with the semantic notions contained in the abstract labels. We demonstrate this through experiments with both simulated and real-world object data, showing that such alignment can be achieved in a process of physical symbol grounding. }
}


@InProceedings{rebecq18a,
  title = 	 {ESIM: an Open Event Camera Simulator},
  author = 	 {Rebecq, Henri and Gehrig, Daniel and Scaramuzza, Davide},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {969--982},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {},
  code  = {},
  pdf = 	 {http://proceedings.mlr.press/v87/rebecq18a/rebecq18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/rebecq18a.html},
  abstract = 	 {Event cameras are revolutionary sensors that work radically differently from standard cameras. Instead of capturing intensity images at a fixed rate, event cameras measure changes of intensity asynchronously, in the form of a stream of events, which encode per-pixel brightness changes. In the last few years, their outstanding properties (asynchronous sensing, no motion blur, high dynamic range) have led to exciting vision applications, with very low-latency and high robustness. However, these sensors are still scarce and expensive to get, slowing down progress of the research community. To address these issues, there is a huge demand for cheap, high-quality synthetic, labeled event for algorithm prototyping, deep learning and algorithm benchmarking. The development of such a simulator, however, is not trivial since event cameras work fundamentally differently from frame-based cameras. We present the first event camera simulator that can generate a large amount of reliable event data. The key component of our simulator is a theoretically sound, adaptive rendering scheme that only samples frames when necessary, through a tight coupling between the rendering engine and the event simulator. We release an open source implementation of our simulator.}
}


@InProceedings{ebert18a,
  title = 	 {Robustness via Retrying: Closed-Loop Robotic Manipulation with Self-Supervised Learning},
  author = 	 {Ebert, Frederik and Dasari, Sudeep and Lee, Alex X. and Levine, Sergey and Finn, Chelsea},
  booktitle = 	 {Proceedings of The 2nd Conference on Robot Learning},
  pages = 	 {983--993},
  year = 	 {2018},
  editor = 	 {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
  volume = 	 {87},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {},
  month = 	 {29--31 Oct},
  publisher = 	 {PMLR},
  youtube = {https://youtu.be/gWJJsDX34Cs},
  code = {https://github.com/febert/robustness_via_retrying},
  pdf = 	 {http://proceedings.mlr.press/v87/ebert18a/ebert18a.pdf},
  url = 	 {http://proceedings.mlr.press/v87/ebert18a.html},
  abstract = 	 {Prediction is an appealing objective for self-supervised learning of behavioral skills, particularly for autonomous robots. However, effectively utilizing predictive models for control, especially with raw image inputs, poses a number of major challenges. How should the predictions be used? What happens when they are inaccurate? In this paper, we tackle these questions by proposing a method for learning robotic skills from raw image observations, using only autonomously collected experience. We show that even an imperfect model can complete complex tasks if it can continuously retry, but this requires the model to not lose track of the objective (e.g., the object of interest). To enable a robot to continuously retry a task, we devise a self-supervised algorithm for learning image registration, which can keep track of objects of interest for the duration of the trial. We demonstrate that this idea can be combined with a video-prediction based controller to enable complex behaviors to be learned from scratch using only raw visual inputs, including grasping, repositioning objects, and non-prehensile manipulation. Our real-world experiments demonstrate that a model trained with 160 robot hours of autonomously collected, unlabeled data is able to successfully perform complex manipulation tasks with a wide range of objects not seen during training.}
}