forked from mlresearch/v87
-
Notifications
You must be signed in to change notification settings - Fork 0
/
corl2018.bib
2044 lines (1363 loc) · 140 KB
/
corl2018.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@Proceedings{CoRL2018,
title = {Proceedings of Machine Learning Research},
booktitle = {Proceedings of Machine Learning Research},
editor = {Aude Billard and Anca Dragan and Jan Peters and Jun Morimoto},
publisher = {PMLR},
series = {Proceedings of Machine Learning Research},
volume = 87
}
@InProceedings{mueller18a,
title = {Driving Policy Transfer via Modularity and Abstraction},
author = {Mueller, Matthias and Dosovitskiy, Alexey and Ghanem, Bernard and Koltun, Vladlen},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {1--15},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {https://www.youtube.com/watch?v=BrMDJqI6H5U},
code = {https://github.com/thias15/driving_policy_transfer},
pdf = {http://proceedings.mlr.press/v87/mueller18a/mueller18a.pdf},
url = {http://proceedings.mlr.press/v87/mueller18a.html},
abstract = {End-to-end approaches to autonomous driving have high sample complexity and are difficult to scale to realistic urban driving. Simulation can help end-to-end driving systems by providing a cheap, safe, and diverse training environment. Yet training driving policies in simulation brings up the problem of transferring such policies to the real world. We present an approach to transferring driving policies from simulation to reality via modularity and abstraction. Our approach is inspired by classic driving systems and aims to combine the benefits of modular architectures and end-to-end deep learning approaches. The key idea is to encapsulate the driving policy such that it is not directly exposed to raw perceptual input or low-level vehicle dynamics. We evaluate the presented approach in simulated urban environments and in the real world. In particular, we transfer a driving policy trained in simulation to a 1/5-scale robotic truck that is deployed in a variety of conditions, with no finetuning, on two continents.}
}
@InProceedings{ohnbar18a,
title = {Personalized Dynamics Models for Adaptive Assistive Navigation Systems},
author = {OhnBar, Eshed and Kitani, Kris and Asakawa, Chieko},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {16--39},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {},
code = {},
pdf = {http://proceedings.mlr.press/v87/ohnbar18a/ohnbar18a.pdf},
url = {http://proceedings.mlr.press/v87/ohnbar18a.html},
abstract = {Consider an assistive system that guides visually impaired users through speech and haptic feedback to their destination. Existing robotic and ubiquitous navigation technologies (e.g., portable, ground, or wearable systems) often operate in a generic, user-agnostic manner. However, to minimize confusion and navigation errors, our real-world analysis reveals a crucial need to adapt theinstructional guidance across different end-users with diverse mobility skills. To address this practical issue in scalable system design, we propose a novel model based reinforcement learning framework for personalizing the system-user interaction experience. When incrementally adapting the system to new users, we propose to use a weighted experts model for addressing data-efficiency limitations in transfer learning with deep models. A real-world dataset of navigation by blind users is used to show that the proposed approach allows for (1) more accurate long-term human behavior prediction (up to 20 seconds into the future) through improved reasoning over personal mobility characteristics, interaction with surrounding obstacles, and the current navigation goal, and (2) quick adaptation at the onset of learning, when data is limited.}
}
@InProceedings{xie18a,
title = {Few-Shot Goal Inference for Visuomotor Learning and Planning},
author = {Xie, Annie and Singh, Avi and Levine, Sergey and Finn, Chelsea},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {40--52},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {https://www.youtube.com/watch?v=kR-Y1GK7MHA&feature=youtu.be},
code = {https://github.com/anxie/meta_classifier},
pdf = {http://proceedings.mlr.press/v87/xie18a/xie18a.pdf},
url = {http://proceedings.mlr.press/v87/xie18a.html},
abstract = {Reinforcement learning and planning methods require an objective or reward function that encodes the desired behavior. Yet, in practice, there is a wide range of scenarios where an objective is difficult to provide programmatically, such as tasks with visual observations involving unknown object positions or deformable objects. In these cases, prior methods use engineered problem-specific solutions, e.g., by instrumenting the environment with additional sensors to measure a proxy for the objective. Such solutions require a significant engineering effort on a per-task basis, and make it impractical for robots to continuously learn complex skills outside of laboratory settings. We aim to find a more general and scalable solution for specifying goals for robot learning in unconstrained environments. To that end, we formulate the few-shot objective learning problem, where the goal is to learn a task objective from only a few example images of successful end states for that task. We propose a simple solution to this problem: meta-learn a classifier that can recognize new goals from a few examples. We show how this approach can be used with both model-free reinforcement learning and visual model-based planning and show results in three domains: rope manipulation from images in simulation, visual navigation in a simulated 3D environment, and object arrangement into user-specified configurations on a real robot.}
}
@InProceedings{das18a,
title = {Neural Modular Control for Embodied Question Answering},
author = {Das, Abhishek and Gkioxari, Georgia and Lee, Stefan and Parikh, Devi and Batra, Dhruv},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {53--62},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {},
code = {https://github.com/facebookresearch/EmbodiedQA},
pdf = {http://proceedings.mlr.press/v87/das18a/das18a.pdf},
url = {http://proceedings.mlr.press/v87/das18a.html},
abstract = {We present a modular approach for learning policies for navigation over long planning horizons from language input. Our hierarchical policy operates at multiple timescales, where the higher-level master policy proposes subgoals to be executed by specialized sub-policies. Our choice of subgoals is compositional and semantic, i.e. they can be sequentially combined in arbitrary orderings, and assume human-interpretable descriptions (e.g. ‘exit room’, ‘find kitchen’, ‘find refrigerator’, etc.). We use imitation learning to warm-start policies at each level of the hierarchy, dramatically increasing sample efficiency, followed by reinforcement learning. Independent reinforcement learning at each level of hierarchy enables sub-policies to adapt to consequences of their actions and recover from errors. Subsequent joint hierarchical training enables the master policy to adapt to the sub-policies. On the challenging EQA [1] benchmark in House3D [2], requiring navigating diverse realistic indoor environments, our approach outperforms prior work by a significant margin, both in terms of navigation and question answering.}
}
@InProceedings{yang18a,
title = {Visual Curiosity: Learning to Ask Questions to Learn Visual Recognition},
author = {Yang, Jianwei and Lu, Jiasen and Lee, Stefan and Batra, Dhruv and Parikh, Devi},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {63--80},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {},
code = {https://github.com/jwyang/visual_curiosity},
pdf = {http://proceedings.mlr.press/v87/yang18a/yang18a.pdf},
url = {http://proceedings.mlr.press/v87/yang18a.html},
abstract = {In an open-world setting, it is inevitable that an intelligent agent (e.g., a robot) will encounter visual objects, attributes or relationships it does not recognize. In this work, we develop an agent empowered with visual curiosity, i.e. the ability to ask questions to an Oracle (e.g., human) about the contents in images (e.g., ‘What is the object on the left side of the red cube?’) and build visual recognition model based on the answers received (e.g., ‘Cylinder’). In order to do this, the agent must (1) understand what it recognizes and what it does not, (2) formulate a valid, unambiguous and informative ‘language’ query (a question) to ask the Oracle, (3) derive the parameters of visual classifiers from the Oracle response and (4) leverage the updated visual classifiers to ask more clarified questions. Specifically, we propose a novel framework and formulate the learning of visual curiosity as a reinforcement learning problem. In this framework, all components of our agent – visual recognition module (to see), question generation policy (to ask), answer digestion module (to understand) and graph memory module (to memorize) – are learned entirely end-to-end to maximize the reward derived from the scene graph obtained by the agent as a consequence of the dialog with the Oracle. Importantly, the question generation policy is disentangled from the visual recognition system and specifics of the ‘environment’ (scenes). Consequently, we demonstrate a sort of ‘double’ generalization – our question generation policy generalizes to new environments and a new pair of eyes, i.e., new visual system. Specifically, an agent trained on one set of environments (scenes) and with one particular visual recognition system is able to ask intelligent questions about new scenes when paired with a new visual recognition system. Trained on a synthetic dataset, our results show that our agent learns new visual concepts significantly faster than several heuristic baselines – even when tested on synthetic environments with novel objects, as well as in a realistic environment.}
}
@InProceedings{yu18a,
title = {Guided Feature Transformation (GFT): A Neural Language Grounding Module for Embodied Agents},
author = {Yu, Haonan and Lian, Xiaochen and Zhang, Haichao and Xu, Wei},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {81--98},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {https://www.youtube.com/watch?v=bOBb1uhuJxg},
code = {https://github.com/idlrl/flare/blob/master/tutorial/examples/xworld3d_navigation.py},
pdf = {http://proceedings.mlr.press/v87/yu18a/yu18a.pdf},
url = {http://proceedings.mlr.press/v87/yu18a.html},
abstract = {Recently there has been a rising interest in training agents, embodied in virtual environments, to perform language-directed tasks by deep reinforcement learning. In this paper, we propose a simple but effective neural language grounding module for embodied agents that can be trained end to end from scratch taking raw pixels, unstructured linguistic commands, and sparse rewards as the inputs. We model the language grounding process as a language-guided transformation of visual features, where latent sentence embeddings are used as the transformation matrices. In several language-directed navigation tasks that feature challenging partial observability and require simple reasoning, our module significantly outperforms the state of the art. We also release XWORLD3D, an easy-to-customize 3D environment that can be modified to evaluate a variety of embodied agents.}
}
@InProceedings{jang18a,
title = {Grasp2Vec: Learning Object Representations from Self-Supervised Grasping},
author = {Jang, Eric and Devin, Coline and Vanhoucke, Vincent and Levine, Sergey},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {99--112},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {},
code = {},
pdf = {http://proceedings.mlr.press/v87/jang18a/jang18a.pdf},
url = {http://proceedings.mlr.press/v87/jang18a.html},
abstract = {Well structured visual representations can make robot learning faster and can improve generalization. In this paper, we study how we can acquire effective object-centric representations for robotic manipulation tasks without human labeling by using autonomous robot interaction with the environment. Such representation learning methods can benefit from continuous refinement of the representation as the robot collects more experience, allowing them to scale effectively without human intervention. Our representation learning approach is based on object persistence: when a robot removes an object from a scene, the representation of that scene should change according to the features of the object that was removed. We formulate an arithmetic relationship between feature vectors from this observation, and use it to learn a representation of scenes and objects that can then be used to identify object instances, localize them in the scene, and perform goal-directed grasping tasks where the robot must retrieve commanded objects from a bin. The same grasping procedure can also be used to automatically collect training data for our method, by recording images of scenes, grasping and removing an object, and recording the outcome. Our experiments demonstrate that this self-supervised approach for tasked grasping substantially outperforms direct reinforcement learning from images and prior representation learning methods. }
}
@InProceedings{zhao18a,
title = {Energy-Based Hindsight Experience Prioritization},
author = {Zhao, Rui and Tresp, Volker},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {113--122},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {https://youtu.be/jtsF2tTeUGQ},
code = {https://github.com/ruizhaogit/EnergyBasedPrioritization},
pdf = {http://proceedings.mlr.press/v87/zhao18a/zhao18a.pdf},
url = {http://proceedings.mlr.press/v87/zhao18a.html},
abstract = {In Hindsight Experience Replay (HER), a reinforcement learning agent is trained by treating whatever it has achieved as virtual goals. However, in pre- vious work, the experience was replayed at random, without considering which episode might be the most valuable for learning. In this paper, we develop an energy-based framework for prioritizing hindsight experience in robotic manipulation tasks. Our approach is inspired by the work-energy principle in physics. We define a trajectory energy function as the sum of the transition energy of the target object over the trajectory. We hypothesize that replaying episodes that have high trajectory energy is more effective for reinforcement learning in robotics. To verify our hypothesis, we designed a framework for hindsight experience prioritization based on the trajectory energy of goal states. The trajectory energy function takes the potential, kinetic, and rotational energy into consideration. We evaluate our Energy-Based Prioritization (EBP) approach on four challenging robotic manipulation tasks in simulation. Our empirical results show that our proposed method surpasses state-of-the-art approaches in terms of both performance and sample-efficiency on all four tasks, without increas- ing computational time. A video showing experimental results is available at https://youtu.be/jtsF2tTeUGQ. }
}
@InProceedings{losey18a,
title = {Including Uncertainty when Learning from Human Corrections},
author = {Losey, Dylan P. and O'Malley, Marcia K.},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {123--132},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {},
code = {},
pdf = {http://proceedings.mlr.press/v87/losey18a/losey18a.pdf},
url = {http://proceedings.mlr.press/v87/losey18a.html},
abstract = {It is difficult for humans to efficiently teach robots how to correctly perform a task. One intuitive solution is for the robot to iteratively learn the human’s preferences from corrections, where the human improves the robot’s current behavior at each iteration. When learning from corrections, we argue that while the robot should estimate the most likely human preferences, it should also know what it does not know, and integrate this uncertainty as it makes decisions. We advance the state-of-the-art by introducing a Kalman filter for learning from corrections: this approach obtains the uncertainty of the estimated human preferences. Next, we demonstrate how the estimate uncertainty can be leveraged for active learning and risk-sensitive deployment. Our results indicate that obtaining and leveraging uncertainty leads to faster learning from human corrections. }
}
@InProceedings{kaufmann18a,
title = {Deep Drone Racing: Learning Agile Flight in Dynamic Environments},
author = {Kaufmann, Elia and Loquercio, Antonio and Ranftl, Rene and Dosovitskiy, Alexey and Koltun, Vladlen and Scaramuzza, Davide},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {133--145},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {https://www.youtube.com/watch?v=8RILnqPxo1s},
code = {},
pdf = {http://proceedings.mlr.press/v87/kaufmann18a/kaufmann18a.pdf},
url = {http://proceedings.mlr.press/v87/kaufmann18a.html},
abstract = {Autonomous agile flight brings up fundamental challenges in robotics, such as coping with unreliable state estimation, reacting optimally to dynamically changing environments, and coupling perception and action in real time under severe resource constraints. In this paper, we consider these challenges in the context of autonomous, vision-based drone racing in dynamic environments. Our approach combines a convolutional neural network (CNN) with a state-of-the-art path-planning and control system. The CNN directly maps raw images into a robust representation in the form of a waypoint and desired speed. This information is then used by the planner to generate a short, minimum-jerk trajectory segment and corresponding motor commands to reach the desired goal. We demonstrate our method in autonomous agile flight scenarios, in which a vision-based quadrotor traverses drone-racing tracks with possibly moving gates. Our method does not require any explicit map of the environment and runs fully onboard. We extensively test the precision and robustness of the approach in simulation and in the physical world. We also evaluate our method against state-of-the-art navigation approaches and professional human drone pilots. }
}
@InProceedings{yang18b,
title = {HDNET: Exploiting HD Maps for 3D Object Detection},
author = {Yang, Bin and Liang, Ming and Urtasun, Raquel},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {146--155},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {},
code = {},
pdf = {http://proceedings.mlr.press/v87/yang18b/yang18b.pdf},
url = {http://proceedings.mlr.press/v87/yang18b.html},
abstract = {In this paper we show that High-Definition (HD) maps provide strong priors that can boost the performance and robustness of modern 3D object detectors. Towards this goal, we design a single stage detector that extracts geometric and semantic features from the HD maps. As maps might not be available everywhere, we also propose a map prediction module that estimates the map on the fly from raw LiDAR data. We conduct extensive experiments on KITTI [1] as well as a large-scale 3D detection benchmark containing 1 million frames, and show that the proposed map-aware detector consistently outperforms the state-of-the-art in both mapped and un-mapped scenarios. Importantly the whole framework runs at 20 frames per second. }
}
@InProceedings{amiranashvili18a,
title = {Motion Perception in Reinforcement Learning with Dynamic Objects},
author = {Amiranashvili, Artemij and Dosovitskiy, Alexey and Koltun, Vladlen and Brox, Thomas},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {156--168},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {https://youtu.be/YALmehmmu3Q},
code = {https://github.com/lmb-freiburg/flow_rl},
pdf = {http://proceedings.mlr.press/v87/amiranashvili18a/amiranashvili18a.pdf},
url = {http://proceedings.mlr.press/v87/amiranashvili18a.html},
abstract = {In dynamic environments, learned controllers are supposed to take motion into account when selecting the action to be taken. However, in existing reinforcement learning works motion is rarely treated explicitly; it is rather assumed that the controller learns the necessary motion representation from temporal stacks of frames implicitly. In this paper, we show that for continuous control tasks learning an explicit representation of motion clearly improves the quality of the learned controller in dynamic scenarios. We demonstrate this on common benchmark tasks (Walker, Swimmer, Hopper), on target reaching and ball catching tasks with simulated robotic arms, and on a dynamic single ball juggling task. Moreover, we find that when equipped with an appropriate network architecture, the agent can, on some tasks, learn motion features also with pure reinforcement learning, without additional supervision. }
}
@InProceedings{karkus18a,
title = {Particle Filter Networks with Application to Visual Localization},
author = {Karkus, Peter and Hsu, David and Lee, Wee Sun},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {169--178},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {},
code = {https://github.com/AdaCompNUS/pfnet},
pdf = {http://proceedings.mlr.press/v87/karkus18a/karkus18a.pdf},
url = {http://proceedings.mlr.press/v87/karkus18a.html},
abstract = {Particle filtering is a powerful approach to sequential state estimation and finds application in many domains, including robot localization, object tracking, etc. To apply particle filtering in practice, a critical challenge is to construct probabilistic system models, especially for systems with complex dynamics or rich sensory inputs such as camera images. This paper introduces the Particle Filter Network (PFnet), which encodes both a system model and a particle filter algorithm in a single neural network. The PF-net is fully differentiable and trained end-to-end from data. Instead of learning a generic system model, it learns a model optimized for the particle filter algorithm. We apply the PF-net to a visual localization task, in which a robot must localize in a rich 3-D world, using only a schematic 2-D floor map. In simulation experiments, PF-net consistently outperforms alternative learning architectures, as well as a traditional model-based method, under a variety of sensor inputs. Further, PF-net generalizes well to new, unseen environments. }
}
@InProceedings{martin18a,
title = {Sparse Gaussian Process Temporal Difference Learning for Marine Robot Navigation},
author = {Martin, John and Wang, Jinkun and Englot, Brendan},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {179--189},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {},
code = {},
pdf = {http://proceedings.mlr.press/v87/martin18a/martin18a.pdf},
url = {http://proceedings.mlr.press/v87/martin18a.html},
abstract = {We present a method for Temporal Difference (TD) learning that addresses several challenges faced by robots learning to navigate in a marine environment. For improved data efficiency, our method reduces TD updates to Gaussian Process regression. To make predictions amenable to online settings, we introduce a sparse approximation with improved quality over current rejection-based methods. We derive the predictive value function posterior and use the moments to obtain a new algorithm for model-free policy evaluation, SPGP-SARSA. With simple changes, we show SPGP-SARSA can be reduced to a model-based equivalent, SPGP-TD. We perform comprehensive simulation studies and also conduct physical learning trials with an underwater robot. Our results show SPGP-SARSA can outperform the state-of-the-art sparse method, replicate the prediction quality of its exact counterpart, and be applied to solve underwater navigation tasks. }
}
@InProceedings{guizilini18a,
title = {Fast 3D Modeling with Approximated Convolutional Kernels},
author = {Guizilini, Vitor and Ramos, Fabio},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {190--199},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {},
code = {},
pdf = {http://proceedings.mlr.press/v87/guizilini18a/guizilini18a.pdf},
url = {http://proceedings.mlr.press/v87/guizilini18a.html},
abstract = {This paper introduces a novel regression methodology for 3D reconstruction, with applications in robotics tasks such as terrain modeling and implicit surface calculation. The proposed methodology is based on projections into a high-dimensional space, that is able to fit arbitrarily complex data as a continuous function using a series of kernel evaluations within a linear regression model. We avoid direct kernel calculation by employing a novel sparse random Fourier feature vector, that approximates any shift-invariant kernel as a series of dot products relative to a set of inducing points placed throughout the input space. The varying properties of these inducing points produce non-stationarity in the resulting model, and can be jointly learned alongside linear regression weights. Furthermore, we show how convolution with arbitrary kernels can be performed directly in this high-dimensional continuous space, by training a neural network to learn the Fourier transform of the convolutional output based on information from the input kernels. Experimental results in terrain modeling and implicit surface calculation show that the proposed framework is able to outperform similar techniques in terms of computational speed without sacrificing accuracy, while enabling efficient convolution with arbitrary kernels for tasks such as global localization and template matching within these applications. }
}
@InProceedings{guizilini18b,
title = {Unpaired Learning of Dense Visual Depth Estimators for Urban Environments},
author = {Guizilini, Vitor and Ramos, Fabio},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {200--212},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {},
code = {},
pdf = {http://proceedings.mlr.press/v87/guizilini18b/guizilini18b.pdf},
url = {http://proceedings.mlr.press/v87/guizilini18b.html},
abstract = {This paper addresses the classical problem of learning-based monocular depth estimation in urban environments, in which a model is trained to directly map a single input image to its corresponding depth values. All currently available techniques treat monocular depth estimation as a regression problem, and thus require some sort of data pairing, either explicitly as input-output ground-truth pairs, using information from range sensors (i.e. laser), or as binocular stereo footage. We introduce a novel methodology that completely eliminates the need for data pairing, only requiring two unrelated datasets containing samples of input images and output depth values. A cycle-consistent generative adversarial network is used to learn a mapping between these two domains, based on a custom adversarial loss function specifically designed to improve performance on the task of monocular depth estimation, including local depth smoothness and boundary equilibrium. A wide range of experiments were conducted using a variety of well-known indoor and outdoor datasets, with depth estimates obtained from laser sensors, RGBD cameras and SLAM pointclouds. In all of them, the proposed CycleDepth framework reaches competitive results even under a more restricted training scenario. }
}
@InProceedings{stein18a,
title = {Learning over Subgoals for Efficient Navigation of Structured, Unknown Environments},
author = {Stein, Gregory J. and Bradley, Christopher and Roy, Nicholas},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {213--222},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {https://youtu.be/hmJ-LON5mQc},
code = {},
pdf = {http://proceedings.mlr.press/v87/stein18a/stein18a.pdf},
url = {http://proceedings.mlr.press/v87/stein18a.html},
abstract = {
We propose a novel technique for efficiently navigating unknown environments over long horizons by learning to predict properties of unknown space. We generate a dynamic action set defined by the current map, factor the Bellman Equation in terms of these actions, and estimate terms, such as the probability that navigating beyond a particular subgoal will lead to a dead-end, that are otherwise difficult to compute. Simulated agents navigating with our Learned Subgoal Planner in real-world floor plans demonstrate a 21% expected decrease in cost-to-go compared to standard optimistic planning techniques that rely on Dijkstra’s algorithm, and real-world agents show promising navigation performance as well. }
}
@InProceedings{subramani18a,
title = {Inferring geometric constraints in human demonstrations},
author = {Subramani, Guru and Zinn, Michael and Gleicher, Michael},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {223--236},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {https://youtu.be/-rra2QA51Is},
code = {https://github.com/uwgraphics/CoRL2018Constraint},
pdf = {http://proceedings.mlr.press/v87/subramani18a/subramani18a.pdf},
url = {http://proceedings.mlr.press/v87/subramani18a.html},
abstract = {This paper presents an approach for inferring geometric constraints in human demonstrations. In our method, geometric constraint models are built to create representations of kinematic constraints such as fixed point, axial rotation, prismatic motion, planar motion and others across multiple degrees of freedom. Our method infers geometric constraints using both kinematic and force/torque information. The approach first fits all the constraint models using kinematic information and evaluates them individually using position, force and moment criteria. Our approach does not require information about the constraint type or contact geometry; it can determine both simultaneously. We present experimental evaluations using instrumented tongs that show how constraints can be robustly inferred in recordings of human demonstrations. }
}
@InProceedings{sauer18a,
title = {Conditional Affordance Learning for Driving in Urban Environments},
author = {Sauer, Axel and Savinov, Nikolay and Geiger, Andreas},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {237--252},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {https://www.youtube.com/watch?v=UtUbpigMgr0},
code = {https://github.com/xl-sr/CAL},
pdf = {http://proceedings.mlr.press/v87/sauer18a/sauer18a.pdf},
url = {http://proceedings.mlr.press/v87/sauer18a.html},
abstract = {Most existing approaches to autonomous driving fall into one of two categories: modular pipelines, that build an extensive model of the environment, and imitation learning approaches, that map images directly to control outputs. A recently proposed third paradigm, direct perception, aims to combine the advantages of both by using a neural network to learn appropriate low-dimensional intermediate representations. However, existing direct perception approaches are restricted to simple highway situations, lacking the ability to navigate intersections, stop at traffic lights or respect speed limits. In this work, we propose a direct perception approach which maps video input to intermediate representations suitable for autonomous navigation in complex urban environments given high-level directional inputs. Compared to state-of-the-art reinforcement and conditional imitation learning approaches, we achieve an improvement of up to 68 % in goal-directed navigation on the challenging CARLA simulation benchmark. In addition, our approach is the first to handle traffic lights and speed signs by using image-level labels only, as well as smooth car-following, resulting in a significant reduction of traffic accidents in simulation. }
}
@InProceedings{wenzel18a,
title = {Modular Vehicle Control for Transferring Semantic Information Between Weather Conditions Using GANs},
author = {Wenzel, Patrick and Khan, Qadeer and Cremers, Daniel and Leal-Taixe, Laura},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {253--269},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {https://www.youtube.com/playlist?list=PLbT2smuiIncsR_s9YA6KFpsa8gMwus5u7},
code = {https://github.com/pmwenzel/carla-domain-adaptation},
pdf = {http://proceedings.mlr.press/v87/wenzel18a/wenzel18a.pdf},
url = {http://proceedings.mlr.press/v87/wenzel18a.html},
abstract = {Even though end-to-end supervised learning has shown promising results for sensorimotor control of self-driving cars, its performance is greatly affected by the weather conditions under which it was trained, showing poor generalization to unseen conditions. In this paper, we show how knowledge can be transferred using semantic maps to new weather conditions without the need to obtain new ground truth data. To this end, we propose to divide the task of vehicle control into two independent modules: a control module which is only trained on one weather condition for which labeled steering data is available, and a perception module which is used as an interface between new weather conditions and the fixed control module. To generate the semantic data needed to train the perception module, we propose to use a generative adversarial network (GAN)-based model to retrieve the semantic information for the new conditions in an unsupervised manner. We introduce a master-servant architecture, where the master model (semantic labels available) trains the servant model (semantic labels not available). We show that our proposed method trained with ground truth data for a single weather condition is capable of achieving similar results on the task of steering angle prediction as an end-to-end model trained with ground truth data of 15 different weather conditions. }
}
@InProceedings{liang18a,
title = {GPU-Accelerated Robotic Simulation for Distributed Reinforcement Learning},
author = {Liang, Jacky and Makoviychuk, Viktor and Handa, Ankur and Chentanez, Nuttapong and Macklin, Miles and Fox, Dieter},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {270--282},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {},
code = {},
pdf = {http://proceedings.mlr.press/v87/liang18a/liang18a.pdf},
url = {http://proceedings.mlr.press/v87/liang18a.html},
abstract = {Most Deep Reinforcement Learning (Deep RL) algorithms require a prohibitively large number of training samples for learning complex tasks. Many recent works on speeding up Deep RL have focused on distributed training and simulation. While distributed training is often done on the GPU, simulation is not. In this work, we propose using GPU-accelerated RL simulations as an alternative to CPU ones. Using NVIDIA Flex, a GPU-based physics engine, we show promising speed-ups of learning various continuous-control, locomotion tasks. With one GPU and CPU core, we are able to train the Humanoid running task in less than 20 minutes, using 10-1000x fewer CPU cores than previous works. We also demonstrate the scalability of our simulator to multi-GPU settings to train more challenging locomotion tasks. }
}
@InProceedings{ushani18a,
title = {Feature Learning for Scene Flow Estimation from LIDAR},
author = {Ushani, Arash K. and Eustice, Ryan M.},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {283--292},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {},
code = {https://github.com/aushani/flsf},
pdf = {http://proceedings.mlr.press/v87/ushani18a/ushani18a.pdf},
url = {http://proceedings.mlr.press/v87/ushani18a.html},
abstract = {To perform tasks in dynamic environments, many mobile robots must estimate the motion in the surrounding world. Recently, techniques have been developed to estimate scene flow directly from LIDAR scans, relying on hand-designed features. In this work, we build an encoding network to learn features from an occupancy grid. The network is trained so that these features are discriminative in finding matching or non-matching locations between successive timesteps. This learned feature space is then leveraged to estimate scene flow. We evaluate our method on the KITTI dataset and demonstrate performance that improves upon the accuracy of the current state-of-the-art. We provide an implementation of our method at https://github.com/aushani/flsf. }
}
@InProceedings{majumdar18a,
title = {PAC-Bayes Control: Synthesizing Controllers that Provably Generalize to Novel Environments},
author = {Majumdar, Anirudha and Goldstein, Maxwell},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {293--305},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {https://www.youtube.com/watch?v=zu_O-lW5X_8},
code = {https://github.com/irom-lab/PAC-Bayes-Control},
pdf = {http://proceedings.mlr.press/v87/majumdar18a/majumdar18a.pdf},
url = {http://proceedings.mlr.press/v87/majumdar18a.html},
abstract = {Our goal is to synthesize controllers for robots that provably generalize well to novel environments given a dataset of example environments. The key technical idea behind our approach is to leverage tools from generalization theory in machine learning by exploiting a precise analogy (which we present in the form of a reduction) between robustness of controllers to novel environments and generalization of hypotheses in supervised learning. In particular, we utilize the Probably Approximately Correct (PAC)-Bayes framework, which allows us to obtain upper bounds (that hold with high probability) on the expected cost of (stochastic) controllers across novel environments. We propose control synthesis algorithms that explicitly seek to minimize this upper bound. The corresponding optimization problem can be solved efficiently using convex optimization (Relative Entropy Programming in particular) in the setting where we are optimizing over a finite control policy space. In the more general setting of continuously parameterized controllers, we minimize this upper bound using stochastic gradient descent. We present examples of our approach in the context of obstacle avoidance control with depth measurements. Our simulated examples demonstrate the potential of our approach to provide strong generalization guarantees on controllers for robotic systems with continuous state and action spaces, nonlinear dynamics, and partially observable state via sensor measurements.}
}
@InProceedings{tremblay18a,
title = {Deep Object Pose Estimation for Semantic Robotic Grasping of Household Objects},
author = {Tremblay, Jonathan and To, Thang and Sundaralingam, Balakumar and Xiang, Yu and Fox, Dieter and Birchfield, Stan},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {306--316},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {},
code = {https://github.com/NVlabs/Deep_Object_Pose},
pdf = {http://proceedings.mlr.press/v87/tremblay18a/tremblay18a.pdf},
url = {http://proceedings.mlr.press/v87/tremblay18a.html},
abstract = {Using synthetic data for training deep neural networks for robotic manipulation holds the promise of an almost unlimited amount of pre-labeled training data, generated safely out of harm’s way. One of the key challenges of synthetic data, to date, has been to bridge the so-called reality gap, so that networks trained on synthetic data operate correctly when exposed to real-world data. We explore the reality gap in the context of 6-DoF pose estimation of known objects from a single RGB image. We show that for this problem the reality gap can be successfully spanned by a simple combination of domain randomized and photorealistic data. Using synthetic data generated in this manner, we introduce a one-shot deep neural network that is able to perform competitively against a state-of-the-art network trained on a combination of real and synthetic data. To our knowledge, this is the first deep network trained only on synthetic data that is able to achieve state-of-the-art performance on 6-DoF object pose estimation. Our network also generalizes better to novel environments including extreme lighting conditions, for which we show qualitative results. Using this network we demonstrate a real-time system estimating object poses with sufficient accuracy for real-world semantic grasping of known household objects in clutter by a real robot.}
}
@InProceedings{schenck18a,
title = {SPNets: Differentiable Fluid Dynamics for Deep Neural Networks},
author = {Schenck, Connor and Fox, Dieter},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {317--335},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {https://youtu.be/hg_tXHLIZ6o},
code = {https://github.com/cschenck/SmoothParticleNets},
pdf = {http://proceedings.mlr.press/v87/schenck18a/schenck18a.pdf},
url = {http://proceedings.mlr.press/v87/schenck18a.html},
abstract = {In this paper we introduce Smooth Particle Networks (SPNets), a framework for integrating fluid dynamics with deep networks. SPNets adds two new layers to the neural network toolbox: ConvSP and ConvSDF, which enable computing physical interactions with unordered particle sets. We use these layers in combination with standard neural network layers to directly implement fluid dynamics inside a deep network, where the parameters of the network are the fluid parameters themselves (e.g., viscosity, cohesion, etc.). Because SPNets are implemented as a neural network, the resulting fluid dynamics are fully differentiable. We then show how this can be successfully used to learn fluid parameters from data, perform liquid control tasks, and learn policies to manipulate liquids. }
}
@InProceedings{bauza18a,
title = {A Data-Efficient Approach to Precise and Controlled Pushing},
author = {Bauza, Maria and Hogan, Francois R. and Rodriguez, Alberto},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {336--345},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {https://www.youtube.com/watch?v=Z45O480pij0},
code = {},
pdf = {http://proceedings.mlr.press/v87/bauza18a/bauza18a.pdf},
url = {http://proceedings.mlr.press/v87/bauza18a.html},
abstract = {Decades of research in control theory have shown that simple controllers, when provided with timely feedback, can control complex systems. Pushing is an example of a complex mechanical system that is difficult to model accurately due to unknown system parameters such as coefficients of friction and pressure distributions. In this paper, we explore the data-complexity required for controlling, rather than modeling, such a system. Results show that a model-based control approach, where the dynamical model is learned from data, is capable of performing complex pushing trajectories with a minimal amount of training data (<10 data points). The dynamics of pushing interactions are modeled using a Gaussian process (GP) and are leveraged within a model predictive control approach that linearizes the GP and imposes actuator and task constraints for a planar manipulation task. }
}
@InProceedings{bruce18a,
title = {Learning Deployable Navigation Policies at Kilometer Scale from a Single Traversal},
author = {Bruce, Jake and Sunderhauf, Niko and Mirowski, Piotr and Hadsell, Raia and Milford, Michael},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {346--361},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {},
code = {},
pdf = {http://proceedings.mlr.press/v87/bruce18a/bruce18a.pdf},
url = {http://proceedings.mlr.press/v87/bruce18a.html},
abstract = {Model-free reinforcement learning has recently been shown to be effective at learning navigation policies from complex image input. However, these algorithms tend to require large amounts of interaction with the environment, which can be prohibitively costly to obtain on robots in the real world. We present an approach for efficiently learning goal-directed navigation policies on a mobile robot, from only a single coverage traversal of recorded data. The navigation agent learns an effective policy over a diverse action space in a large heterogeneous environment consisting of more than 2km of travel, through buildings and outdoor regions that collectively exhibit large variations in visual appearance, self-similarity, and connectivity. We compare pretrained visual encoders that enable precomputation of visual embeddings to achieve a throughput of tens of thousands of transitions per second at training time on a commodity desktop computer, allowing agents to learn from millions of trajectories of experience in a matter of hours. We propose multiple forms of computationally efficient stochastic augmentation to enable the learned policy to generalise beyond these precomputed embeddings, and demonstrate successful deployment of the learned policy on the real robot without fine tuning, despite environmental appearance differences at test time. The dataset and code required to reproduce these results and apply the technique to other datasets and robots is made publicly available at rl-navigation.github.io/deployable. }
}
@InProceedings{brown18a,
title = {Risk-Aware Active Inverse Reinforcement Learning},
author = {Brown, Daniel S. and Cui, Yuchen and Niekum, Scott},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {362--372},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {},
code = {https://github.com/Pearl-UTexas/ActiveVaR},
pdf = {http://proceedings.mlr.press/v87/brown18a/brown18a.pdf},
url = {http://proceedings.mlr.press/v87/brown18a.html},
abstract = {Active learning from demonstration allows a robot to query a human for specific types of input to achieve efficient learning. Existing work has explored a variety of active query strategies; however, to our knowledge, none of these strategies directly minimize the performance risk of the policy the robot is learning. Utilizing recent advances in performance bounds for inverse reinforcement learning, we propose a risk-aware active inverse reinforcement learning algorithm that focuses active queries on areas of the state space with the potential for large generalization error. We show that risk-aware active learning outperforms standard active IRL approaches on gridworld, simulated driving, and table setting tasks, while also providing a performance-based stopping criterion that allows a robot to know when it has received enough demonstrations to safely perform a task. }
}
@InProceedings{florence18a,
title = {Dense Object Nets: Learning Dense Visual Object Descriptors By and For Robotic Manipulation},
author = {Florence, Peter R. and Manuelli, Lucas and Tedrake, Russ},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {373--385},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {https://youtu.be/L5UW1VapKNE},
code = {https://github.com/RobotLocomotion/pytorch-dense-correspondence},
pdf = {http://proceedings.mlr.press/v87/florence18a/florence18a.pdf},
url = {http://proceedings.mlr.press/v87/florence18a.html},
abstract = {What is the right object representation for manipulation? We would like robots to visually perceive scenes and learn an understanding of the objects in them that (i) is task-agnostic and can be used as a building block for a variety of manipulation tasks, (ii) is generally applicable to both rigid and non-rigid objects, (iii) takes advantage of the strong priors provided by 3D vision, and (iv) is entirely learned from self-supervision. This is hard to achieve with previous methods: much recent work in grasping does not extend to grasping specific objects or other tasks, whereas task-specific learning may require many trials to generalize well across object configurations or other tasks. In this paper we present Dense Object Nets, which build on recent developments in self-supervised dense descriptor learning, as a consistent object representation for visual understanding and manipulation. We demonstrate they can be trained quickly (approximately 20 minutes) for a wide variety of previously unseen and potentially non-rigid objects. We additionally present novel contributions to enable multi-object descriptor learning, and show that by modifying our training procedure, we can either acquire descriptors which generalize across classes of objects, or descriptors that are distinct for each object instance. Finally, we demonstrate the novel application of learned dense descriptors to robotic manipulation. We demonstrate grasping of specific points on an object across potentially deformed object configurations, and demonstrate using class general descriptors to transfer specific grasps across objects in a class. }
}
@InProceedings{morere18a,
title = {Bayesian RL for Goal-Only Rewards},
author = {Morere, Philippe and Ramos, Fabio},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {386--398},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {},
code = {https://github.com/PhilippeMorere/EMU-Q},
pdf = {http://proceedings.mlr.press/v87/morere18a/morere18a.pdf},
url = {http://proceedings.mlr.press/v87/morere18a.html},
abstract = {We address the challenging problem of reinforcement learning under goal-only rewards [1], where rewards are only non-zero when the goal is achieved. This reward definition alleviates the need for cumbersome reward engineering, making the reward formulation trivial. Classic exploration heuristics such as Boltzmann or epsilon-greedy exploration are highly inefficient in domains with goal-only rewards. We solve this problem by leveraging value function posterior variance information to direct exploration where uncertainty is higher. The proposed algorithm (EMU-Q) achieves data-efficient exploration, and balances exploration and exploitation explicitly at a policy level granting users more control over the learning process. We introduce general features approximating kernels, allowing to greatly reduce the algorithm complexity from O(N^3) in the number of transitions to O(M^2) in the number of features. We demonstrate EMU-Q is competitive with other exploration techniques on a variety of continuous control tasks and on a robotic manipulator. }
}
@InProceedings{vinitsky18a,
title = {Benchmarks for reinforcement learning in mixed-autonomy traffic},
author = {Vinitsky, Eugene and Kreidieh, Aboudy and Flem, Luc Le and Kheterpal, Nishant and Jang, Kathy and Wu, Cathy and Wu, Fangyu and Liaw, Richard and Liang, Eric and Bayen, Alexandre M.},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {399--409},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {},
code = {},
pdf = {http://proceedings.mlr.press/v87/vinitsky18a/vinitsky18a.pdf},
url = {http://proceedings.mlr.press/v87/vinitsky18a.html},
abstract = {We release new benchmarks in the use of deep reinforcement learning (RL) to create controllers for mixed-autonomy traffic, where connected and autonomous vehicles (CAVs) interact with human drivers and infrastructure. Benchmarks, such as Mujoco or the Arcade Learning Environment, have spurred new research by enabling researchers to effectively compare their results so that they can focus on algorithmic improvements and control techniques rather than system design. To promote similar advances in traffic control via RL, we propose four benchmarks, based on three new traffic scenarios, illustrating distinct reinforcement learning problems with applications to mixed-autonomy traffic. We provide an introduction to each control problem, an overview of their MDP structures, and preliminary performance results from commonly used RL algorithms. For the purpose of reproducibility, the benchmarks, reference implementations, and tutorials are available at https://github.com/flow-project/flow.}
}
@InProceedings{wang18a,
title = {Intervention Aided Reinforcement Learning for Safe and Practical Policy Optimization in Navigation},
author = {Wang, Fan and Zhou, Bo and Chen, Ke and Fan, Tingxiang and Zhang, Xi and Li, Jiangyong and Tian, Hao and Pan, Jia},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {410--421},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {https://youtu.be/jdMntfs9dYQ},
code = {https://github.com/PaddlePaddle/PARL/tree/develop/parl/examples/IARL/},
pdf = {http://proceedings.mlr.press/v87/wang18a/wang18a.pdf},
url = {http://proceedings.mlr.press/v87/wang18a.html},
abstract = {Combining deep neural networks with reinforcement learning has shown great potential in the next-generation intelligent control. However, there are challenges in terms of safety and cost in practical applications. In this pa- per, we propose the Intervention Aided Reinforcement Learning (IARL) framework, which utilizes human intervened robot-environment interaction to improve the policy. We used the Unmanned Aerial Vehicle (UAV) as the test platform. We built neural networks as our policy to map sensor readings to control signals on the UAV. Our experiment scenarios cover both simulation and reality. We show that our approach substantially reduces the human intervention and improves the performance in autonomous navigation1, at the same time it ensures safety and keeps training cost acceptable. }
}
@InProceedings{cheng18a,
title = {Reinforcement Learning of Active Vision for Manipulating Objects under Occlusions},
author = {Cheng, Ricson and Agarwal, Arpit and Fragkiadaki, Katerina},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {422--431},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {},
code = {https://github.com/ricsonc/ActiveVisionManipulation},
pdf = {http://proceedings.mlr.press/v87/cheng18a/cheng18a.pdf},
url = {http://proceedings.mlr.press/v87/cheng18a.html},
abstract = {We consider artificial agents that learn to jointly control their gripper and camera in order to reinforcement learn manipulation policies in the presence of occlusions from distractor objects. Distractors often occlude the object of interest and cause it to disappear from the field of view. We propose hand/eye controllers that learn to move the camera to keep the object within the field of view and visible, in coordination to manipulating it to achieve the desired goal, e.g., pushing it to a target location. We incorporate structural biases of object-centric attention within our actor-critic architectures, which our experiments suggest to be a key for good performance. Our results further highlight the importance of curriculum with regards to environment difficulty. The resulting active vision / manipulation policies outperform static camera setups for a variety of cluttered environments. }
}
@InProceedings{gehring18a,
title = {Adaptable replanning with compressed linear action models for learning from demonstrations},
author = {Gehring, Clement and Kaelbling, Leslie Pack and Lozano-Perez, Tomas},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {432--442},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {},
code = {https://github.mit.edu/gehring/CLAM-SPOPT},
pdf = {http://proceedings.mlr.press/v87/gehring18a/gehring18a.pdf},
url = {http://proceedings.mlr.press/v87/gehring18a.html},
abstract = {We propose an adaptable and efficient model-based reinforcement learning approach well suited for continuous domains with sparse samples, a setting often encountered when learning from demonstrations. The flexibility of our method originates from the approximate transition models, estimated from data, and the online replanning approach proposed. Together, these components allow for immediate adaptation to a new task, given in the form of a reward function. The efficiency of our method comes from two approximations. First, rather than representing a complete distribution over the results of taking an action, which is difficult in continuous state spaces, it learns a linear model of the expected transition for each action. Second, it uses a novel strategy for compressing these linear action models, which significantly reduces space and time for learning models, and supports efficient online generation of open-loop plans. The effectiveness of these methods is demonstrated in a simulated driving domain with a 20-dimensional continuous input space. }
}
@InProceedings{senanayake18a,
title = {Automorphing Kernels for Nonstationarity in Mapping Unstructured Environments},
author = {Senanayake, Ransalu and Tompkins, Anthony and Ramos, Fabio},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {443--455},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube ={https://youtu.be/IirUlJS49Yw},
code = {https://github.com/MushroomHunting/autormorphing-kernels},
pdf = {http://proceedings.mlr.press/v87/senanayake18a/senanayake18a.pdf},
url = {http://proceedings.mlr.press/v87/senanayake18a.html},
abstract = {In order to deploy robots in previously unseen and unstructured environments, the robots should have the capacity to learn on their own and adapt to the changes in the environments. For instance, in mobile robotics, a robot should be able to learn a map of the environment from data itself without the intervention of a human to tune the parameters of the model. To this end, leveraging the latest developments in automatic machine learning (AutoML), probabilistic programming, and statistical sampling, under the Hilbert mapping framework which can represent the occupancy of the environment as a continuous function of locations, we formulate a Bayesian framework to learn all parameters of the map. Crucially, this way, the robot is capable of learning the optimal shapes and placement of the kernels in Hilbert maps by merely embedding high-level human knowledge of the problem by means of prior probability distributions. Since the proposed framework employs stochastic variational inference, the model learns tens of thousands of parameters within minutes in both big data and data-scarce regimes. Experiments conducted on simulated and real-world datasets in static and dynamic environments indicate the proposed method significantly outperforms existing stationary occupancy mapping techniques, verifying the importance of learning the interdependent position-shape relationship of kernels alongside other model parameters. }
}
@InProceedings{sarlin18a,
title = {Leveraging Deep Visual Descriptors for Hierarchical Efficient Localization},
author = {Sarlin, Paul-Edouard and Debraine, Frederic and Dymczyk, Marcin and Siegwart, Roland},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {456--465},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},
volume = {87},
series = {Proceedings of Machine Learning Research},
address = {},
month = {29--31 Oct},
publisher = {PMLR},
youtube = {https://www.youtube.com/watch?v=8cg697oLUtg},
code = {https://github.com/ethz-asl/hierarchical_loc},
pdf = {http://proceedings.mlr.press/v87/sarlin18a/sarlin18a.pdf},
url = {http://proceedings.mlr.press/v87/sarlin18a.html},
abstract = {Many robotics applications require precise pose estimates despite operating in large and changing environments. This can be addressed by visual localization, using a pre-computed 3D model of the surroundings. The pose estimation then amounts to finding correspondences between 2D keypoints in a query image and 3D points in the model using local descriptors. However, computational power is often limited on robotic platforms, making this task challenging in large-scale environments. Binary feature descriptors significantly speed up this 2D-3D matching, and have become popular in the robotics community, but also strongly impair the robustness to perceptual aliasing and changes in viewpoint, illumination and scene structure. In this work, we propose to leverage recent advances in deep learning to perform an efficient hierarchical localization. We first localize at the map level using learned image-wide global descriptors, and subsequently estimate a precise pose from 2D-3D matches computed in the candidate places only. This restricts the local search and thus allows to efficiently exploit powerful non-binary descriptors usually dismissed on resource-constrained devices. Our approach results in state-of-the-art localization performance while running in real-time on a popular mobile platform, enabling new prospects for robotics research.}
}
@InProceedings{richards18a,
title = {The Lyapunov Neural Network: Adaptive Stability Certification for Safe Learning of Dynamical Systems},
author = {Richards, Spencer M. and Berkenkamp, Felix and Krause, Andreas},
booktitle = {Proceedings of The 2nd Conference on Robot Learning},
pages = {466--476},
year = {2018},
editor = {Billard, Aude and Dragan, Anca and Peters, Jan and Morimoto, Jun},