Publications – Laboratory for Machine Intelligence

2022

Tomašecić, Darian; Peer, Peter; Solina, Franc; Jaklič, Aleš; Štruc, Vitomir

Reconstructing Superquadrics from Intensity and Color Images Journal Article

In: Sensors, vol. 22, iss. 4, no. 5332, 2022.

Abstract | Links | BibTeX | Tags: arrs, CNN, depth data, depth estimation, depth sensing, intensity images, superquadric, superquadrics

@article{TomasevicSensors,

title = {Reconstructing Superquadrics from Intensity and Color Images},

author = {Darian Tomašecić and Peter Peer and Franc Solina and Aleš Jaklič and Vitomir Štruc},

url = {https://www.mdpi.com/1424-8220/22/14/5332/pdf?version=1658380987},

doi = {https://doi.org/10.3390/s22145332},

year  = {2022},

date = {2022-07-16},

journal = {Sensors},

volume = {22},

number = {5332},

issue = {4},

abstract = {The task of reconstructing 3D scenes based on visual data represents a longstanding problem in computer vision. Common reconstruction approaches rely on the use of multiple volumetric primitives to describe complex objects. Superquadrics (a class of volumetric primitives) have shown great promise due to their ability to describe various shapes with only a few parameters. Recent research has shown that deep learning methods can be used to accurately reconstruct random superquadrics from both 3D point cloud data and simple depth images. In this paper, we extended these reconstruction methods to intensity and color images. Specifically, we used a dedicated convolutional neural network (CNN) model to reconstruct a single superquadric from the given input image. We analyzed the results in a qualitative and quantitative manner, by visualizing reconstructed superquadrics as well as observing error and accuracy distributions of predictions. We showed that a CNN model designed around a simple ResNet backbone can be used to accurately reconstruct superquadrics from images containing one object, but only if one of the spatial parameters is fixed or if it can be determined from other image characteristics, e.g., shadows. Furthermore, we experimented with images of increasing complexity, for example, by adding textures, and observed that the results degraded only slightly. In addition, we show that our model outperforms the current state-of-the-art method on the studied task. Our final result is a highly accurate superquadric reconstruction model, which can also reconstruct superquadrics from real images of simple objects, without additional training.},

keywords = {arrs, CNN, depth data, depth estimation, depth sensing, intensity images, superquadric, superquadrics},

pubstate = {published},

tppubtype = {article}

}

2019

Krizaj, Janez; Peer, Peter; Struc, Vitomir; Dobrisek, Simon

Simultaneous multi-decent regression and feature learning for landmarking in depth image Journal Article

In: Neural Computing and Applications, 2019, ISBN: 0941-0643.

Abstract | Links | BibTeX | Tags: 3d, biometrics, depth data, face alignment, face analysis, landmarking

@article{Krizaj3Docalization,

title = {Simultaneous multi-decent regression and feature learning for landmarking in depth image},

author = {Janez Krizaj and Peter Peer and Vitomir Struc and Simon Dobrisek},

url = {https://link.springer.com/content/pdf/10.1007%2Fs00521-019-04529-7.pdf},

doi = {https://doi.org/10.1007/s00521-019-04529-7},

isbn = {0941-0643},

year  = {2019},

date = {2019-10-01},

journal = {Neural Computing and Applications},

abstract = {Face alignment (or facial landmarking) is an important task in many face-related applications, ranging from registration, tracking, and animation to higher-level classification problems such as face, expression, or attribute recognition. While several solutions have been presented in the literature for this task so far, reliably locating salient facial features across a wide range of posses still remains challenging. To address this issue, we propose in this paper a novel method for automatic facial landmark localization in 3D face data designed specifically to address appearance variability caused by significant pose variations. Our method builds on recent cascaded regression-based methods to facial landmarking and uses a gating mechanism to incorporate multiple linear cascaded regression models each trained for a limited range of poses into a single powerful landmarking model capable of processing arbitrary-posed input data. We develop two distinct approaches around the proposed gating mechanism: (1) the first uses a gated multiple ridge descent mechanism in conjunction with established (hand-crafted) histogram of gradients features for face alignment and achieves state-of-the-art landmarking performance across a wide range of facial poses and (2) the second simultaneously learns multiple-descent directions as well as binary features that are optimal for the alignment tasks and in addition to competitive landmarking results also ensures extremely rapid processing. We evaluate both approaches in rigorous experiments on several popular datasets of 3D face images, i.e., the FRGCv2 and Bosphorus 3D face datasets and image collections F and G from the University of Notre Dame. The results of our evaluation show that both approaches compare favorably to the state-of-the-art, while exhibiting considerable robustness to pose variations.},

keywords = {3d, biometrics, depth data, face alignment, face analysis, landmarking},

pubstate = {published},

tppubtype = {article}

}