test.bib

@Misc{Hangl-2018-arXiv:1709.06049,
  title = {{A novel Skill-based Programming Paradigm based on Autonomous
      Playing and Skill-centric Testing}},
  author = {Hangl, Simon and Mennel, Andreas and Justus H. Piater},
  year = 2017,
  month = 9,
  day = 18,
  howpublished = {arXiv:1709.06049},
  url = {https://iis.uibk.ac.at/public/papers/Hangl-2018-arXiv:1709.06049.pdf},
  abstract = {
        We introduce a novel paradigm for robot programming
         with which we aim to make robot programming
more accessible for unexperienced users. In order to do so we
incorporate two major components in one single framework:
autonomous skill acquisition by robotic playing and visual programming.
 Simple robot program skeletons solving a task for one
specific situation, so-called basic behaviours, are provided by the
user. The robot then learns how to solve the same task in many
different situations by autonomous playing which reduces the
barrier for unexperienced robot programmers. Programmers
can use a mix of visual programming and kinesthetic teaching
in order to provide these simple program skeletons. The robot
program can be implemented interactively by programming
parts with visual programming and kinesthetic teaching. We
further integrate work on experience-based skill-centric robot
software testing which enables the user to continuously test
implemented skills without having to deal with the details of
specific components.
      }
}
@Article{Wachter-2018-RAS,
  title = {{Integrating Multi-Purpose Natural Language Understanding,
      Robot's Memory, and Symbolic Planning for Task Execution in
      Humanoid Robots}},
  author = {W\"{a}chter, Mirko and Ovchinnikova, Ekaterina and Wittenbeck, Valerij and Kaiser, Peter and Szedmak, Sandor and Mustafa, Wail and Kraft, Dirk and Kr\"{u}ger, Norbert and Piater, Justus and Asfour, Tamim},
  journal = {{Robotics and Autonomous Systems}},
  year = 2018,
  month = 1,
  volume = 99,
  pages = {148--165},
  publisher = {Elsevier},
  doi = {10.1016/j.robot.2017.10.012},
  url = {https://iis.uibk.ac.at/public/papers/Wachter-2018-RAS.pdf},
  abstract = {We propose an approach for instructing a robot using
      natural language to solve complex tasks in a dynamic
      environment. We introduce a framework that allows a humanoid
      robot to understand natural language, derive symbolic
      representations of its sensorimotor experience, generate complex
      plans according to the current world state, monitor plan
      execution, replace missing objects, and suggest possible object
      locations. The framework is implemented within the robot
      development environment ArmarX and is based on the concept of
      structural bootstrapping developed in the context of the
      European project Xperience. We evaluate the framework on the
      humanoid robot ARMAR-III in the context of two experiments: a
      demonstration of the real execution of a complex task in the
      kitchen environment on ARMAR-III and an experiment with
      untrained users in a simulation environment.},
  keywords = {structural bootstrapping, natural language understanding, planning, task execution, object replacement, humanoid robotics}
}
@InProceedings{Stabinger-2017-ICCV,
  title = {{Evaluation of Deep Learning on an Abstract Image Classification Dataset}},
  author = {Sebastian, Stabinger and Antonio, Rodr\'{\i}guez-S\'{a}nchez},
  booktitle = {{Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}},
  year = 2017,
  month = 1,
  day = 1,
  pages = {2767--2772},
  note = {Workshop on Mutual Benefits of Cognitive and
	Computer Vision (MBCC)},
  url = {https://arxiv.org/abs/1708.07770},
  abstract = {
        Convolutional Neural Networks have become state of the art
        methods for image classification over the last couple of
        years. By now they perform better than human subjects on many
        of the image classification datasets. Most of these datasets
        are based on the notion of concrete classes (i.e. images are
        classified by the type of object in the image). In this paper
        we present a novel image classification dataset, using
        abstract classes, which should be easy to solve for humans,
        but variations of it are challenging for CNNs. The
        classification performance of popular CNN architectures is
        evaluated on this dataset and variations of the dataset that
        might be interesting for further research are identified.
      }
}
@Article{Hangl-2017-EI,
  title = {{Autonomous robots: potential, advances and future direction}},
  author = {Hangl, Simon and Ugur, Emre and Piater, Justus},
  journal = {{e\&i Elektrotechnik und Informationstechnik}},
  year = 2017,
  month = 9,
  day = 6,
  volume = 134,
  number = 6,
  pages = {293--298},
  publisher = {Springer},
  doi = {10.1007/s00502-017-0516-0},
  url = {https://iis.uibk.ac.at/public/papers/Hangl-2017-EI.pdf},
  abstract = {Recent advances in machine learning, such as deep neural networks, have caused a huge boost in many different areas of artificial intelligence and robotics. These methods typically require a large corpus of well-prepared and labelled training data, which limits the applicability to robotics. In our opinion, a fundamental challenge in autonomous robotics is to design systems that are simple enough to solve simple tasks. These systems should grow in complexity step by step and more complex models like neural networks should be trained by re-using the information acquired over the robot's lifetime. Ultimately, high-level abstractions should be generated from these models, bridging the gap from low-level sensor data to high-level AI systems. We present first steps into this direction and analyse their limitations and future extensions in order to achieve the goal of designing autonomous agents.}
}
@InProceedings{Shukla-2017-ASL4GUP,
  title = {{Supervised learning of gesture-action associations for human-robot collaboration}},
  author = {Shukla, Dadhichi and Erkent, \"{O}zg\"{u}r and Piater, Justus},
  booktitle = {{1st International Workshop on Adaptive Shot Learning for Gesture Understanding and Production}},
  year = 2017,
  month = 05,
  pages = {5--10},
  publisher = {IEEE},
  doi = {10.1109/FG.2017.97},
  note = {Workshop at the 12th IEEE International Conference on Automatic Face and Gesture Recognition (FG 2017), Washington D.C., USA},
  url = {https://iis.uibk.ac.at/public/papers/Shukla-2017-ASL4GUP.pdf},
  abstract = { As human-robot collaboration methodologies develop robots need to adapt fast learning methods in domestic scenarios. The paper presents a novel approach to learn associations between the human hand gestures and the robot's manipulation actions. The role of the robot is to operate as an assistant to the user. In this context we propose a supervised learning framework to explore the gesture-action space for human-robot collaboration scenario. The framework enables the robot to learn the gesture-action associations on the fly while performing the task with the user; an example of zero-shot learning. We discuss the effect of an accurate gesture detection in performing the task. The accuracy of the gesture detection system directly accounts for the amount of effort put by the user and the number of actions performed by the robot.}
}
@Article{Savarimuthu-2017-TSMCS,
  title = {{Teaching a Robot the Semantics of Assembly Tasks
      }},
  author = {Savarimuthu, Thiusius and Buch, Anders and Schlette, Christian and Wantia, Nils and Rossmann, J\"{u}rgen and Mart\'{\i}nez, David and Aleny\`{a}, Guillem and Torras, Carme and Ude, Ale\v{s} and Nemec, Bojan and Kramberger, Alja\v{z} and W\"{o}rg\"{o}tter, Florentin and Aksoy, Eren and Papon, Jeremie and Haller, Simon and Piater, Justus and Kr\"{u}ger, Norbert},
  journal = {{IEEE Transactions on Systems, Man,
        and Cybernetics: Systems}},
  year = 2017,
  month = 01,
  volume = {PP},
  number = 99,
  pages = {1--23},
  publisher = {IEEE   Systems, Man, and Cybernetics Society},
  doi = {10.1109/TSMC.2016.2635479},
  url = {https://iis.uibk.ac.at/public/papers/Savarimuthu-2017-TSMCS.pdf},
  abstract = {
          We present a three-level cognitive system in a learning by demonstration context.
          The system allows for learning and transfer on the sensorimotor level as well as
          the planning level. The fundamentally different data structures associated with
          these two levels are connected by an efficient mid-level representation based on
          so-called "semantic event chains." We describe details of the representations and
          quantify the effect of the associated learning procedures for each level under
          different amounts of noise. Moreover, we demonstrate the performance of the overall
          system by three demonstrations that have been performed at a project review. The
          described system has a technical readiness level (TRL) of 4, which in an ongoing
          follow-up project will be raised to TRL 6.
      },
  keywords = {robot sensing systems, planning, vision, learning by demonstration (LbD), object recognition, robotic assembly}
}
@Book{Piater-2013-OAGM,
  title = {{Proceedings of the 37th Annual Workshop of the Austrian
      Association for Pattern Recognition (\"{O}AGM/AAPR)}},
  editor = {Piater, Justus and Rodr\'{\i}guez S\'{a}nchez, Antonio},
  year = 2013,
  month = 4,
  day = 6,
  url = {https://arxiv.org/abs/1304.1876},
  abstract = {This volume represents the proceedings of the 37th
      Annual Workshop of the Austrian Association for Pattern
      Recognition (\"{O}AGM/AAPR), held May 23--24, 2013, in
      Innsbruck, Austria.}
}
@InCollection{Rodriguez-2012-ShapeTunedComputations,
  title = {{The roles of endstopped and curvature tuned computations
      in a hierarchical representation of 2D shape}},
  author = {Rodr\'{\i}guez-S\'{a}nchez, Antonio and Tsotsos, John},
  booktitle = {{Developing and Applying Biologically-inspired
      Vision Systems: Interdisciplinary concepts}},
  editor = {Pomplun, M. and Suzuki, J.},
  year = 2012,
  month = 11,
  pages = {184--207},
  publisher = {IGI Global},
  doi = {10.4018/978-1-4666-2539-6.ch008},
  url = {http://dx.doi.org/10.4018/978-1-4666-2539-6.ch008},
  abstract = {Computational models of visual processes are of
      interest in fields such as cybernetics, robotics, computer
      vision and others. This chapter argues for the importance of
      intermediate representation layers in the visual cortex that
      have direct impact on the next generation of object recognition
      strategies in computer vision. Biological inspiration - and even
      biological realism - is currently of great interest in the
      computer vision community. We propose that endstopping and
      curvature cells are of great importance for shape selectivity
      and show how their combination can lead to shape selective
      neurons, providing an approach that does not require learning
      between early stages based on Gabor or Difference of Gaussian
      filters and later stages closer to object
      representations.}
}
@InCollection{Detry-2010-MotorInteractionLearning,
  title = {{Learning Continuous Grasp Affordances by
      Sensorimotor Exploration}},
  author = {Detry, Renaud and Ba\c{s}eski, Emre and Popovi\'{c}, Mila and Touati, Younes and Kr\"{u}ger, Norbert and Kroemer, Oliver and Peters, Jan and Piater, Justus},
  booktitle = {{From Motor to Interaction Learning in
      Robots}},
  editor = {Sigaud, Olivier and Peters, Jan},
  year = 2010,
  month = 1,
  day = 1,
  volume = {264/2010},
  pages = {451--465},
  publisher = {Springer},
  address = {{Berlin, Heidelberg, New York}},
  doi = {10.1007/978-3-642-05181-4_19},
  url = {https://iis.uibk.ac.at/public/papers/Detry-2010-MotorInteractionLearning.pdf},
  abstract = {We develop means of learning and representing object
      grasp affordances probabilistically. By grasp affordance, we
      refer to an entity that is able to assess whether a given
      relative object-gripper configuration will yield a stable
      grasp. These affordances are represented with grasp densities,
      continuous probability density functions defined on the space of
      3D positions and orientations. Grasp densities are registered
      with a visual model of the object they characterize. They are
      exploited by aligning them to a target object using visual pose
      estimation. Grasp densities are refined through experience: A
      robot ``plays'' with an object by executing grasps
      drawn randomly for the object's grasp density. The robot then
      uses the outcomes of these grasps to build a richer density
      through an importance sampling mechanism. Initial grasp
      densities, called hypothesis densities, are bootstrapped from
      grasps collected using a motion capture system, or from grasps
      generated from the visual model of the object. Refined
      densities, called empirical densities, represent affordances
      that have been confirmed through physical experience. The
      applicability of our method is demonstrated by producing
      empirical densities for two object with a real robot and its
      3-finger hand. Hypothesis densities are created from visual cues
      and human demonstration.}
}
@Book{Fritz-2009-ICVS,
  title = {{Computer Vision Systems: Seventh
      International Conference}},
  editor = {Fritz, Mario and Schiele, Bernt and Piater, Justus},
  year = 2009,
  month = 10,
  volume = 5815,
  publisher = {Springer},
  address = {{Berlin, Heidelberg, New York}},
  doi = {10.1007/978-3-642-04667-4},
  note = {October 13--15, Li\`{e}ge, Belgium},
  url = {http://dx.doi.org/10.1007/978-3-642-04667-4},
  series = {LNCS},
  keywords = {computer vision}
}
@Misc{Crowley-2004-MVA,
  title = {{Introduction to the special issue:
      International Conference on Vision Systems}},
  author = {Crowley, James and Piater, Justus},
  booktitle = {{Machine Vision and Applications}},
  volume = 16,
  number = 1,
  pages = {4--5},
  publisher = {Springer},
  address = {{Berlin, Heidelberg, New York}},
  doi = {10.1007/s00138-004-0158-1},
  url = {http://dx.doi.org/10.1007/s00138-004-0158-1},
  note = {editorial}
}
@Unpublished{Piater-2002-EM,
  title = {{Mixture Models and
      Expectation-Maximization}},
  author = {Piater, Justus},
  year = 2002,
  note = {Tutorial article, evolving from a lecture given at
        ENSIMAG, INPG, Grenoble, France},
  url = {http://www.montefiore.ulg.ac.be/~piater/courses/EM.pdf},
  keywords = {maximum-likelihood estimation}
}
@PhdThesis{Piater-2001-diss,
  title = {{Visual Feature Learning}},
  author = {Piater, Justus},
  year = 2001,
  month = 2,
  school = {Computer Science Department, University of
        Massachusetts Amherst},
  url = {https://iis.uibk.ac.at/public/papers/Piater-2001-diss.pdf},
  series = {Doctoral Dissertation},
  abstract = {Humans learn robust and efficient strategies for
      visual tasks through interaction with their environment. In
      contrast, most current computer vision systems have no such
      learning capabilities. Motivated by insights from psychology and
      neurobiology, I combine machine learning and computer vision
      techniques to develop algorithms for visual learning in
      open-ended tasks. Learning is incremental and makes only weak
      assumptions about the task environment.  I begin by introducing
      an infinite feature space that contains combinations of local
      edge and texture signatures not unlike those represented in the
      human visual cortex. Such features can express distinctions over
      a wide range of specificity or generality. The learning
      objective is to select a small number of highly useful features
      from this space in a task-driven manner. Features are learned by
      general-to-specific random sampling. This is illustrated on two
      different tasks, for which I give very similar learning
      algorithms based on the same principles and the same feature
      space.  The first system incrementally learns to discriminate
      visual scenes. Whenever it fails to recognize a scene, new
      features are sought that improve discrimination. Highly
      distinctive features are incorporated into dynamically updated
      Bayesian network classifiers. Even after all recognition errors
      have been eliminated, the system can continue to learn better
      features, resembling mechanisms underlying human visual
      expertise. This tends to improve classification accuracy on
      independent test images, while reducing the number of features
      used for recognition.  In the second task, the visual system
      learns to anticipate useful hand configurations for a
      haptically-guided dextrous robotic grasping system, much like
      humans do when they pre-shape their hand during a reach. Visual
      features are learned that correlate reliably with the
      orientation of the hand. A finger configuration is recommended
      based on the expected grasp quality achieved by each
      configuration.  The results demonstrate how a largely
      uncommitted visual system can adapt and specialize to solve
      particular visual tasks. Such visual learning systems have great
      potential in application scenarios that are hard to model in
      advance, e.g. autonomous robots operating in natural
      environments. Moreover, this dissertation contributes to our
      understanding of human visual learning by providing a
      computational model of task-driven development of feature
      detectors.}
}
@TechReport{Piater-1999-LVDS,
  title = {{Toward Learning Visual Discrimination
      Strategies}},
  author = {Piater, Justus and Grupen, Roderic},
  year = 1998,
  month = 12,
  number = {99-01},
  institution = {Computer Science Department, University of
        Massachusetts Amherst},
  type = {Technical Report}
}