2023
Journal Articles
Martin Pernuš; Vitomir Štruc; Simon Dobrišek
MaskFaceGAN: High Resolution Face Editing With Masked GAN Latent Code Optimization Journal Article
In: IEEE Transactions on Image Processing, 2023.
@article{MaskFaceGAN,
title = {MaskFaceGAN: High Resolution Face Editing With Masked GAN Latent Code Optimization},
author = {Martin Pernuš and Vitomir Štruc and Simon Dobrišek},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2023/02/MaskFaceGAN_compressed.pdf
https://arxiv.org/pdf/2103.11135.pdf},
year = {2023},
date = {2023-01-02},
journal = {IEEE Transactions on Image Processing},
abstract = {Face editing represents a popular research topic within the computer vision and image processing communities. While significant progress has been made recently in this area, existing solutions: (i) are still largely focused on low-resolution images, (ii) often generate editing results with visual artefacts, or (iii) lack fine-grained control over the editing procedure and alter multiple (entangled) attributes simultaneously, when trying to generate the desired facial semantics. In this paper, we aim to address these issues through a novel editing approach, called MaskFaceGAN that focuses on local attribute editing. The proposed approach is based on an optimization procedure that directly optimizes the latent code of a pre-trained (state-of-the-art) Generative Adversarial Network (i.e., StyleGAN2) with respect to several constraints that ensure: (i) preservation of relevant image content, (ii) generation of the targeted facial attributes, and (iii) spatially-selective treatment of local image regions. Th constraints are enforced with the help of an (differentiable) attribute classifier and face parser that provide the necessary reference information for the optimization procedure.
MaskFaceGAN is evaluated in extensive experiments on the CelebA-HQ, Helen and SiblingsDB-HQf datasets and in comparison with several state-of-the-art techniques from the literature. Our experimental results show that the proposed approach is able to edit face images with respect to several local facial attributes with unprecedented image quality and at high-resolutions (1024x1024), while exhibiting considerably less problems with attribute entanglement than competing solutions. The source code is publicly available from: https://github.com/MartinPernus/MaskFaceGAN.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
MaskFaceGAN is evaluated in extensive experiments on the CelebA-HQ, Helen and SiblingsDB-HQf datasets and in comparison with several state-of-the-art techniques from the literature. Our experimental results show that the proposed approach is able to edit face images with respect to several local facial attributes with unprecedented image quality and at high-resolutions (1024x1024), while exhibiting considerably less problems with attribute entanglement than competing solutions. The source code is publicly available from: https://github.com/MartinPernus/MaskFaceGAN.
Fevziye Irem Eyiokur; Alperen Kantarci; Mustafa Ekrem Erakin; Naser Damer; Ferda Ofli; Muhammad Imran; Janez Križaj; Albert Ali Salah; Alexander Waibel; Vitomir Štruc; Hazim K. Ekenel
A Survey on Computer Vision based Human Analysis in the COVID-19 Era Journal Article
In: Image and Vision Computing, vol. 130, no. 104610, pp. 1-19, 2023.
@article{IVC2023,
title = {A Survey on Computer Vision based Human Analysis in the COVID-19 Era},
author = {Fevziye Irem Eyiokur and Alperen Kantarci and Mustafa Ekrem Erakin and Naser Damer and Ferda Ofli and Muhammad Imran and Janez Križaj and Albert Ali Salah and Alexander Waibel and Vitomir Štruc and Hazim K. Ekenel },
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2023/01/FG4COVID19_PAPER_compressed.pdf
https://authors.elsevier.com/a/1gKOyxnVK7RBS},
doi = {https://doi.org/10.1016/j.imavis.2022.104610},
year = {2023},
date = {2023-01-01},
journal = {Image and Vision Computing},
volume = {130},
number = {104610},
pages = {1-19},
abstract = {The emergence of COVID-19 has had a global and profound impact, not only on society as a whole, but also on the lives of individuals. Various prevention measures were introduced around the world to limit the transmission of the disease, including
face masks, mandates for social distancing and regular disinfection in public spaces, and the use of screening applications. These developments also triggered the need for novel and improved computer vision techniques capable of (i) providing support to the prevention measures through an automated analysis of visual data, on the one hand, and (ii) facilitating normal operation of existing vision-based services, such as biometric authentication schemes, on the other. Especially important here, are computer vision techniques that focus on the analysis of people and faces in visual data and have been affected the most by the partial occlusions introduced by the mandates for facial masks.
Such computer vision based human analysis techniques include face and face-mask detection approaches, face recognition techniques, crowd counting solutions, age and expression estimation procedures, models for detecting face-hand interactions and many others, and have seen considerable attention over recent years. The goal of this survey is to provide an introduction to the problems induced by COVID-19 into such research and to present a comprehensive review of the work done in the computer vision based human analysis field. Particular attention is paid to the impact of facial masks on the performance of various methods and recent solutions to mitigate this problem. Additionally, a detailed review of existing datasets useful for the development and evaluation of methods for COVID-19 related applications is also provided. Finally, to help advance the field further, a discussion on the main open challenges and future research direction is given at the end of the survey. This work is intended to have a broad appeal and be useful not only for computer vision researchers but also the general public.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
face masks, mandates for social distancing and regular disinfection in public spaces, and the use of screening applications. These developments also triggered the need for novel and improved computer vision techniques capable of (i) providing support to the prevention measures through an automated analysis of visual data, on the one hand, and (ii) facilitating normal operation of existing vision-based services, such as biometric authentication schemes, on the other. Especially important here, are computer vision techniques that focus on the analysis of people and faces in visual data and have been affected the most by the partial occlusions introduced by the mandates for facial masks.
Such computer vision based human analysis techniques include face and face-mask detection approaches, face recognition techniques, crowd counting solutions, age and expression estimation procedures, models for detecting face-hand interactions and many others, and have seen considerable attention over recent years. The goal of this survey is to provide an introduction to the problems induced by COVID-19 into such research and to present a comprehensive review of the work done in the computer vision based human analysis field. Particular attention is paid to the impact of facial masks on the performance of various methods and recent solutions to mitigate this problem. Additionally, a detailed review of existing datasets useful for the development and evaluation of methods for COVID-19 related applications is also provided. Finally, to help advance the field further, a discussion on the main open challenges and future research direction is given at the end of the survey. This work is intended to have a broad appeal and be useful not only for computer vision researchers but also the general public.
Anja Hrovatič; Peter Peer; Vitomir Štruc; Žiga Emeršič
Efficient ear alignment using a two-stack hourglass network Journal Article
In: IET Biometrics , pp. 1-14, 2023, ISSN: 2047-4938.
@article{UhljiIETZiga,
title = {Efficient ear alignment using a two-stack hourglass network},
author = {Anja Hrovatič and Peter Peer and Vitomir Štruc and Žiga Emeršič},
url = {https://ietresearch.onlinelibrary.wiley.com/doi/epdf/10.1049/bme2.12109},
doi = {10.1049/bme2.12109},
issn = {2047-4938},
year = {2023},
date = {2023-01-01},
journal = {IET Biometrics },
pages = {1-14},
abstract = {Ear images have been shown to be a reliable modality for biometric recognition with desirable characteristics, such as high universality, distinctiveness, measurability and permanence. While a considerable amount of research has been directed towards ear recognition techniques, the problem of ear alignment is still under-explored in the open literature. Nonetheless, accurate alignment of ear images, especially in unconstrained acquisition scenarios, where the ear appearance is expected to vary widely due to pose and view point variations, is critical for the performance of all downstream tasks, including ear recognition. Here, the authors address this problem and present a framework for ear alignment that relies on a two-step procedure: (i) automatic landmark detection and (ii) fiducial point alignment. For the first (landmark detection) step, the authors implement and train a Two-Stack Hourglass model (2-SHGNet) capable of accurately predicting 55 landmarks on diverse ear images captured in uncontrolled conditions. For the second (alignment) step, the authors use the Random Sample Consensus (RANSAC) algorithm to align the estimated landmark/fiducial points with a pre-defined ear shape (i.e. a collection of average ear landmark positions). The authors evaluate the proposed framework in comprehensive experiments on the AWEx and ITWE datasets and show that the 2-SHGNet model leads to more accurate landmark predictions than competing state-of-the-art models from the literature. Furthermore, the authors also demonstrate that the alignment step significantly improves recognition accuracy with ear images from unconstrained environments compared to unaligned imagery.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Inproceedings
Richard Plesh; Peter Peer; Vitomir Štruc
GlassesGAN: Eyewear Personalization using Synthetic Appearance Discovery and Targeted Subspace Modeling Inproceedings
In: Proceedings of the IEEE/CVF International Conference on Computer Vision and Pattern Recognition (CVPR) , 2023.
@inproceedings{PleshCVPR2023,
title = {GlassesGAN: Eyewear Personalization using Synthetic Appearance Discovery and Targeted Subspace Modeling},
author = {Richard Plesh and Peter Peer and Vitomir Štruc},
year = {2023},
date = {2023-06-18},
booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision and Pattern Recognition (CVPR) },
abstract = {We present GlassesGAN, a novel image editing framework for custom design of glasses, that sets a new standard in terms of image quality, edit realism, and continuous multi-style edit capability. To facilitate the editing process with GlassesGAN, we propose a Targeted Subspace Modelling (TSM) procedure that, based on a novel mechanism for (synthetic) appearance discovery in the latent space of a pre-trained GAN generator, constructs an eyeglasses-specific (latent) subspace that the editing framework can utilize. Additionally, we also introduce an appearance-constrained subspace initialization (SI) technique that centers the latent representation of the given input image in the well-defined part of the constructed subspace to improve the reliability of the learned edits. We test GlassesGAN on two (diverse) high-resolution datasets (CelebA-HQ and SiblingsDB-HQf) and compare it to three state-of-the-art competitors, i.e., InterfaceGAN, GANSpace, and MaskGAN. The reported results show that GlassesGAN convincingly outperforms all competing techniques, while offering additional functionality (e.g., fine-grained multi-style editing) not available with any of the competitors. The source code will be made freely available.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Marija Ivanovska; Vitomir Štruc
Face Morphing Attack Detection with Denoising Diffusion Probabilistic Models Inproceedings
In: Proceedings of the International Workshop on Biometrics and Forensics (IWBF), pp. 1-6, 2023.
@inproceedings{IWBF2023_Marija,
title = {Face Morphing Attack Detection with Denoising Diffusion Probabilistic Models},
author = {Marija Ivanovska and Vitomir Štruc},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2023/03/IWBF2023_Morphing.pdf},
year = {2023},
date = {2023-02-28},
booktitle = {Proceedings of the International Workshop on Biometrics and Forensics (IWBF)},
pages = {1-6},
abstract = {Morphed face images have recently become a growing concern for existing face verification systems, as they are relatively easy to generate and can be used to impersonate someone's identity for various malicious purposes. Efficient Morphing Attack Detection (MAD) that generalizes well across different morphing techniques is, therefore, of paramount importance. Existing MAD techniques predominantly rely on discriminative models that learn from examples of bona fide and morphed images and, as a result, often exhibit sub-optimal generalization performance when confronted with unknown types of morphing attacks. To address this problem, we propose a novel, diffusion--based MAD method in this paper that learns only from the characteristics of bona fide images. Various forms of morphing attacks are then detected by our model as out-of-distribution samples. We perform rigorous experiments over four different datasets (CASIA-WebFace, FRLL-Morphs, FERET-Morphs and FRGC-Morphs) and compare the proposed solution to both discriminatively-trained and once-class MAD models. The experimental results show that our MAD model achieves highly competitive results on all considered datasets.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Žiga Babnik; Naser Damer; Vitomir Štruc
Optimization-Based Improvement of Face Image Quality Assessment Techniques Inproceedings
In: Proceedings of the International Workshop on Biometrics and Forensics (IWBF), 2023.
@inproceedings{iwbf2023babnik,
title = {Optimization-Based Improvement of Face Image Quality Assessment Techniques},
author = {Žiga Babnik and Naser Damer and Vitomir Štruc},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2023/03/IWBF_23___paper-1.pdf},
year = {2023},
date = {2023-02-28},
booktitle = {Proceedings of the International Workshop on Biometrics and Forensics (IWBF)},
abstract = {Contemporary face recognition~(FR) models achieve near-ideal recognition performance in constrained settings, yet do not fully translate the performance to unconstrained (real-world) scenarios. To help improve the performance and stability of FR systems in such unconstrained settings, face image quality assessment (FIQA) techniques try to infer sample-quality information from the input face images that can aid with the recognition process. While existing FIQA techniques are able to efficiently capture the differences between high and low quality images, they typically cannot fully distinguish between images of similar quality, leading to lower performance in many scenarios. To address this issue, we present in this paper a supervised quality-label optimization approach, aimed at improving the performance of existing FIQA techniques. The developed optimization procedure infuses additional information (computed with a selected FR model) into the initial quality scores generated with a given FIQA technique to produce better estimates of the ``actual'' image quality. We evaluate the proposed approach in comprehensive experiments with six state-of-the-art FIQA approaches (CR-FIQA, FaceQAN, SER-FIQ, PCNet, MagFace, SER-FIQ) on five commonly used benchmarks (LFW, CFP-FP, CPLFW, CALFW, XQLFW) using three targeted FR models (ArcFace, ElasticFace, CurricularFace) with highly encouraging results. },
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Klemen Grm; Berk Ozata; Vitomir Struc; Hazim K. Ekenel
Meet-in-the-middle: Multi-scale upsampling and matching for cross-resolution face recognition Inproceedings
In: WACV workshops, pp. 120-129, 2023.
@inproceedings{WACVW2023,
title = {Meet-in-the-middle: Multi-scale upsampling and matching for cross-resolution face recognition},
author = {Klemen Grm and Berk Ozata and Vitomir Struc and Hazim K. Ekenel},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2023/01/Meet_in_the_middle.pdf
https://arxiv.org/abs/2211.15225
https://openaccess.thecvf.com/content/WACV2023W/RWS/papers/Grm_Meet-in-the-Middle_Multi-Scale_Upsampling_and_Matching_for_Cross-Resolution_Face_Recognition_WACVW_2023_paper.pdf
},
year = {2023},
date = {2023-01-06},
booktitle = {WACV workshops},
pages = {120-129},
abstract = {In this paper, we aim to address the large domain gap between high-resolution face images, e.g., from professional portrait photography, and low-quality surveillance images, e.g., from security cameras. Establishing an identity match between disparate sources like this is a classical surveillance face identification scenario, which continues to be a challenging problem for modern face recognition techniques. To that end, we propose a method that combines face super-resolution, resolution matching, and multi-scale template accumulation to reliably recognize faces from long-range surveillance footage, including from low quality sources. The proposed approach does not require training or fine-tuning on the target dataset of real surveillance images. Extensive experiments show that our proposed method is able to outperform even existing methods fine-tuned to the SCFace dataset.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2022
Journal Articles
Chenquan Gan; Yucheng Yang; Qingyi Zhub; Deepak Kumar Jain; Vitomir Struc
DHF-Net: A hierarchical feature interactive fusion network for dialogue emotion recognition Journal Article
In: Expert Systems with Applications, vol. 210, 2022.
@article{TextEmotionESWA,
title = {DHF-Net: A hierarchical feature interactive fusion network for dialogue emotion recognition},
author = {Chenquan Gan and Yucheng Yang and Qingyi Zhub and Deepak Kumar Jain and Vitomir Struc},
url = {https://www.sciencedirect.com/science/article/pii/S0957417422016025?via%3Dihub},
doi = {https://doi.org/10.1016/j.eswa.2022.118525},
year = {2022},
date = {2022-12-30},
urldate = {2022-08-01},
journal = {Expert Systems with Applications},
volume = {210},
abstract = {To balance the trade-off between contextual information and fine-grained information in identifying specific emotions during a dialogue and combine the interaction of hierarchical feature related information, this paper proposes a hierarchical feature interactive fusion network (named DHF-Net), which not only can retain the integrity of the context sequence information but also can extract more fine-grained information. To obtain a deep semantic information, DHF-Net processes the task of recognizing dialogue emotion and dialogue act/intent separately, and then learns the cross-impact of two tasks through collaborative attention. Also, a bidirectional gate recurrent unit (Bi-GRU) connected hybrid convolutional neural network (CNN) group method is designed, by which the sequence information is smoothly sent to the multi-level local information layers for feature exaction. Experimental results show that, on two open session datasets, the performance of DHF-Net is improved by 1.8% and 1.2%, respectively.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Matej Vitek; Abhijit Das; Diego Rafael Lucio; Luiz Antonio Zanlorensi Jr.; David Menotti; Jalil Nourmohammadi Khiarak; Mohsen Akbari Shahpar; Meysam Asgari-Chenaghlu; Farhang Jaryani; Juan E. Tapia; Andres Valenzuela; Caiyong Wang; Yunlong Wang; Zhaofeng He; Zhenan Sun; Fadi Boutros; Naser Damer; Jonas Henry Grebe; Arjan Kuijper; Kiran Raja; Gourav Gupta; Georgios Zampoukis; Lazaros Tsochatzidis; Ioannis Pratikakis; S. V. Aruna Kumar; B. S. Harish; Umapada Pal; Peter Peer; Vitomir Štruc
Exploring Bias in Sclera Segmentation Models: A Group Evaluation Approach Journal Article
In: IEEE Transactions on Information Forensics and Security, 2022, ISSN: 1556-6013.
@article{TIFS_Sclera2022,
title = {Exploring Bias in Sclera Segmentation Models: A Group Evaluation Approach},
author = {Matej Vitek and Abhijit Das and Diego Rafael Lucio and Luiz Antonio Zanlorensi Jr. and David Menotti and Jalil Nourmohammadi Khiarak and Mohsen Akbari Shahpar and Meysam Asgari-Chenaghlu and Farhang Jaryani and Juan E. Tapia and Andres Valenzuela and Caiyong Wang and Yunlong Wang and Zhaofeng He and Zhenan Sun and Fadi Boutros and Naser Damer and Jonas Henry Grebe and Arjan Kuijper and Kiran Raja and Gourav Gupta and Georgios Zampoukis and Lazaros Tsochatzidis and Ioannis Pratikakis and S. V. Aruna Kumar and B. S. Harish and Umapada Pal and Peter Peer and Vitomir Štruc},
url = {https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9926136},
doi = {10.1109/TIFS.2022.3216468},
issn = {1556-6013},
year = {2022},
date = {2022-10-18},
urldate = {2022-10-18},
journal = {IEEE Transactions on Information Forensics and Security},
abstract = {Bias and fairness of biometric algorithms have been key topics of research in recent years, mainly due to the societal, legal and ethical implications of potentially unfair decisions made by automated decision-making models. A considerable amount of work has been done on this topic across different biometric modalities, aiming at better understanding the main sources of algorithmic bias or devising mitigation measures. In this work, we contribute to these efforts and present the first study investigating bias and fairness of sclera segmentation models. Although sclera segmentation techniques represent a key component of sclera-based biometric systems with a considerable impact on the overall recognition performance, the presence of different types of biases in sclera segmentation methods is still underexplored. To address this limitation, we describe the results of a group evaluation effort (involving seven research groups), organized to explore the performance of recent sclera segmentation models within a common experimental framework and study performance differences (and bias), originating from various demographic as well as environmental factors. Using five diverse datasets, we analyze seven independently developed sclera segmentation models in different experimental configurations. The results of our experiments suggest that there are significant differences in the overall segmentation performance across the seven models and that among the considered factors, ethnicity appears to be the biggest cause of bias. Additionally, we observe that training with representative and balanced data does not necessarily lead to less biased results. Finally, we find that in general there appears to be a negative correlation between the amount of bias observed (due to eye color, ethnicity and acquisition device) and the overall segmentation performance, suggesting that advances in the field of semantic segmentation may also help with mitigating bias.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Darian Tomašecić; Peter Peer; Franc Solina; Aleš Jaklič; Vitomir Štruc
Reconstructing Superquadrics from Intensity and Color Images Journal Article
In: Sensors, vol. 22, iss. 4, no. 5332, 2022.
@article{TomasevicSensors,
title = {Reconstructing Superquadrics from Intensity and Color Images},
author = {Darian Tomašecić and Peter Peer and Franc Solina and Aleš Jaklič and Vitomir Štruc},
url = {https://www.mdpi.com/1424-8220/22/14/5332/pdf?version=1658380987},
doi = {https://doi.org/10.3390/s22145332},
year = {2022},
date = {2022-07-16},
journal = {Sensors},
volume = {22},
number = {5332},
issue = {4},
abstract = {The task of reconstructing 3D scenes based on visual data represents a longstanding problem in computer vision. Common reconstruction approaches rely on the use of multiple volumetric primitives to describe complex objects. Superquadrics (a class of volumetric primitives) have shown great promise due to their ability to describe various shapes with only a few parameters. Recent research has shown that deep learning methods can be used to accurately reconstruct random superquadrics from both 3D point cloud data and simple depth images. In this paper, we extended these reconstruction methods to intensity and color images. Specifically, we used a dedicated convolutional neural network (CNN) model to reconstruct a single superquadric from the given input image. We analyzed the results in a qualitative and quantitative manner, by visualizing reconstructed superquadrics as well as observing error and accuracy distributions of predictions. We showed that a CNN model designed around a simple ResNet backbone can be used to accurately reconstruct superquadrics from images containing one object, but only if one of the spatial parameters is fixed or if it can be determined from other image characteristics, e.g., shadows. Furthermore, we experimented with images of increasing complexity, for example, by adding textures, and observed that the results degraded only slightly. In addition, we show that our model outperforms the current state-of-the-art method on the studied task. Our final result is a highly accurate superquadric reconstruction model, which can also reconstruct superquadrics from real images of simple objects, without additional training.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Daile Osorio-Roig; Christian Rathgeb; Pawel Drozdowski; Philipp Terhörst; Vitomir Štruc; Christoph Busch
An Attack on Feature Level-based Facial Soft-biometric Privacy Enhancement Journal Article
In: IEEE Transactions on Biometrics, Identity and Behavior (TBIOM), 2022.
@article{TBIOM_2022,
title = {An Attack on Feature Level-based Facial Soft-biometric Privacy Enhancement},
author = {Daile Osorio-Roig and Christian Rathgeb and Pawel Drozdowski and Philipp Terhörst and Vitomir Štruc and Christoph Busch},
url = {https://arxiv.org/pdf/2111.12405.pdf},
year = {2022},
date = {2022-05-02},
journal = {IEEE Transactions on Biometrics, Identity and Behavior (TBIOM)},
abstract = {In the recent past, different researchers have proposed novel privacy-enhancing face recognition systems designed to conceal soft-biometric information at feature level. These works have reported impressive results, but usually do not consider specific attacks in their analysis of privacy protection. In most cases, the privacy protection capabilities of these schemes are tested through simple machine learning-based classifiers and visualisations of dimensionality reduction tools. In this work, we introduce an attack on feature level-based facial soft–biometric privacy-enhancement techniques. The attack is based on two observations: (1) to achieve high recognition accuracy, certain similarities between facial representations have to be retained in their privacy-enhanced versions; (2) highly similar facial representations usually originate from face images with similar soft-biometric attributes. Based on these observations, the proposed attack compares a privacy-enhanced face representation against a set of privacy-enhanced face representations with known soft-biometric attributes. Subsequently, the best obtained similarity scores are analysed to infer the unknown soft-biometric attributes of the attacked privacy-enhanced face representation. That is, the attack only requires a relatively small database of arbitrary face images and the privacy-enhancing face recognition algorithm as a black-box. In the experiments, the attack is applied to two representative approaches which have previously been reported to reliably conceal the gender in privacy-enhanced face representations. It is shown that the presented attack is able to circumvent the privacy enhancement to a considerable degree and is able to correctly classify gender with an accuracy of up to approximately 90% for both of the analysed privacy-enhancing face recognition systems. Future works on privacy-enhancing face recognition are encouraged to include the proposed attack in evaluations on privacy protection.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Janez Križaj; Simon Dobrišek; Vitomir Štruc
In: Sensors, iss. 6, no. 2388, pp. 1-26, 2022.
@article{KrizajSensors2022,
title = {Making the most of single sensor information : a novel fusion approach for 3D face recognition using region covariance descriptors and Gaussian mixture models},
author = {Janez Križaj and Simon Dobrišek and Vitomir Štruc},
url = {https://www.mdpi.com/1424-8220/22/6/2388},
doi = {10.3390/s22062388},
year = {2022},
date = {2022-03-01},
journal = {Sensors},
number = {2388},
issue = {6},
pages = {1-26},
abstract = {Most commercially successful face recognition systems combine information from multiple sensors (2D and 3D, visible light and infrared, etc.) to achieve reliable recognition in various environments. When only a single sensor is available, the robustness as well as efficacy of the recognition process suffer. In this paper, we focus on face recognition using images captured by a single 3D sensor and propose a method based on the use of region covariance matrixes and Gaussian mixture models (GMMs). All steps of the proposed framework are automated, and no metadata, such as pre-annotated eye, nose, or mouth positions is required, while only a very simple clustering-based face detection is performed. The framework computes a set of region covariance descriptors from local regions of different face image representations and then uses the unscented transform to derive low-dimensional feature vectors, which are finally modeled by GMMs. In the last step, a support vector machine classification scheme is used to make a decision about the identity of the input 3D facial image. The proposed framework has several desirable characteristics, such as an inherent mechanism for data fusion/integration (through the region covariance matrixes), the ability to explore facial images at different levels of locality, and the ability to integrate a domain-specific prior knowledge into the modeling procedure. Several normalization techniques are incorporated into the proposed framework to further improve performance. Extensive experiments are performed on three prominent databases (FRGC v2, CASIA, and UMB-DB) yielding competitive results.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Marjan Stoimchev; Marija Ivanovska; Vitomir Štruc
Learning to Combine Local and Global Image Information for Contactless Palmprint Recognition Journal Article
In: Sensors, vol. 22, no. 1, pp. 1-26, 2022.
@article{Stoimchev2022,
title = {Learning to Combine Local and Global Image Information for Contactless Palmprint Recognition},
author = {Marjan Stoimchev and Marija Ivanovska and Vitomir Štruc},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2022/03/sensors-22-00073_reduced.pdf},
doi = {https://doi.org/10.3390/s22010073},
year = {2022},
date = {2022-01-01},
journal = {Sensors},
volume = {22},
number = {1},
pages = {1-26},
abstract = {In the past few years, there has been a leap from traditional palmprint recognition methodologies, which use handcrafted features, to deep-learning approaches that are able to automatically learn feature representations from the input data. However, the information that is extracted from such deep-learning models typically corresponds to the global image appearance, where only the most discriminative cues from the input image are considered. This characteristic is especially problematic when data is acquired in unconstrained settings, as in the case of contactless palmprint recognition systems, where visual artifacts caused by elastic deformations of the palmar surface are typically present in spatially local parts of the captured images. In this study we address the problem of elastic deformations by introducing a new approach to contactless palmprint recognition based on a novel CNN model, designed as a two-path architecture, where one path processes the input in a holistic manner, while the second path extracts local information from smaller image patches sampled from the input image. As elastic deformations can be assumed to most significantly affect the global appearance, while having a lesser impact on spatially local image areas, the local processing path addresses the issues related to elastic deformations thereby supplementing the information from the global processing path. The model is trained with a learning objective that combines the Additive Angular Margin (ArcFace) Loss and the well-known center loss. By using the proposed model design, the discriminative power of the learned image representation is significantly enhanced compared to standard holistic models, which, as we show in the experimental section, leads to state-of-the-art performance for contactless palmprint recognition. Our approach is tested on two publicly available contactless palmprint datasets—namely, IITD and CASIA—and is demonstrated to perform favorably against state-of-the-art methods from the literature. The source code for the proposed model is made publicly available.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Incollections
Peter Rot; Peter Peer; Vitomir Štruc
Detecting Soft-Biometric Privacy Enhancement Incollection
In: Rathgeb, Christian; Tolosana, Ruben; Vera-Rodriguez, Ruben; Busch, Christoph (Ed.): Handbook of Digital Face Manipulation and Detection, 2022.
@incollection{RotManipulationBook,
title = {Detecting Soft-Biometric Privacy Enhancement},
author = {Peter Rot and Peter Peer and Vitomir Štruc},
editor = {Christian Rathgeb and Ruben Tolosana and Ruben Vera-Rodriguez and Christoph Busch},
url = {https://link.springer.com/chapter/10.1007/978-3-030-87664-7_18},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {Handbook of Digital Face Manipulation and Detection},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Ruben Tolosana; Christian Rathgeb; Ruben Vera-Rodriguez; Christoph Busch; Luisa Verdilova; Siwei Lyu; Huy H. Nguyen; Junichi Yamagishi; Isao Echizen; Peter Rot; Klemen Grm; Vitomir Štruc; Antitza Datcheva; Zahid Akhtar; Sergio Romero-Tapiador; Julian Fierrez; Aythami Morales; Javier Ortega-Garcia; Els Kindt; Catherine Jasserand; Tarmo Kalvet; Marek Tiits
Future Trends in Digital Face Manipulation and Detection Incollection
In: Rathgeb, Christian; Tolosana, Ruben; Vera-Rodriguez, Ruben; Busch, Christoph (Ed.): Handbook of Digital Face Manipulation and Detection, pp. 463–482, 2022, ISBN: 978-3-030-87663-0.
@incollection{ManipulationFace2022,
title = {Future Trends in Digital Face Manipulation and Detection},
author = {Ruben Tolosana and Christian Rathgeb and Ruben Vera-Rodriguez and Christoph Busch and Luisa Verdilova and Siwei Lyu and Huy H. Nguyen and Junichi Yamagishi and Isao Echizen and Peter Rot and Klemen Grm and Vitomir Štruc and Antitza Datcheva and Zahid Akhtar and Sergio Romero-Tapiador and Julian Fierrez and Aythami Morales and Javier Ortega-Garcia and Els Kindt and Catherine Jasserand and Tarmo Kalvet and Marek Tiits},
editor = {Christian Rathgeb and Ruben Tolosana and Ruben Vera-Rodriguez and Christoph Busch},
url = {https://link.springer.com/chapter/10.1007/978-3-030-87664-7_21},
doi = {https://doi.org/10.1007/978-3-030-87664-7_21},
isbn = {978-3-030-87663-0},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {Handbook of Digital Face Manipulation and Detection},
pages = {463--482},
abstract = {Recently, digital face manipulation and its detection have sparked large interest in industry and academia around the world. Numerous approaches have been proposed in the literature to create realistic face manipulations, such as DeepFakes and face morphs. To the human eye manipulated images and videos can be almost indistinguishable from real content. Although impressive progress has been reported in the automatic detection of such face manipulations, this research field is often considered to be a cat and mouse game. This chapter briefly discusses the state of the art of digital face manipulation and detection. Issues and challenges that need to be tackled by the research community are summarized, along with future trends in the field.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Inproceedings
Darian Tomašević; Peter Peer; Vitomir Štruc
BiOcularGAN: Bimodal Synthesis and Annotation of Ocular Images Inproceedings
In: IEEE/IAPR International Joint Conference on Biometrics (IJCB 2022) , pp. 1-10, 2022.
@inproceedings{TomasevicIJCBBiOcular,
title = {BiOcularGAN: Bimodal Synthesis and Annotation of Ocular Images},
author = {Darian Tomašević and Peter Peer and Vitomir Štruc },
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2022/12/BiModal_StyleGAN.pdf
https://arxiv.org/pdf/2205.01536.pdf},
year = {2022},
date = {2022-10-20},
urldate = {2022-10-20},
booktitle = {IEEE/IAPR International Joint Conference on Biometrics (IJCB 2022) },
pages = {1-10},
abstract = {Current state-of-the-art segmentation techniques for ocular images are critically dependent on large-scale annotated datasets, which are labor-intensive to gather and often raise privacy concerns. In this paper, we present a novel framework, called BiOcularGAN, capable of generating synthetic large-scale datasets of photorealistic (visible light and near-infrared) ocular images, together with corresponding segmentation labels to address these issues. At its core, the framework relies on a novel Dual-Branch StyleGAN2 (DB-StyleGAN2) model that facilitates bimodal image generation, and a Semantic Mask Generator (SMG) component that produces semantic annotations by exploiting latent features of the DB-StyleGAN2 model. We evaluate BiOcularGAN through extensive experiments across five diverse ocular datasets and analyze the effects of bimodal data generation on image quality and the produced annotations. Our experimental results show that BiOcularGAN is able to produce high-quality matching bimodal images and annotations (with minimal manual intervention) that can be used to train highly competitive (deep) segmentation models (in a privacy aware-manner) that perform well across multiple real-world datasets. The source code for the BiOcularGAN framework is publicly available at: https://github.com/dariant/BiOcularGAN.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Marija Ivanovska; Andrej Kronovšek; Peter Peer; Vitomir Štruc; Borut Batagelj
Face Morphing Attack Detection Using Privacy-Aware Training Data Inproceedings
In: Proceedings of ERK 2022, pp. 1-4, 2022.
@inproceedings{MarijaMorphing,
title = {Face Morphing Attack Detection Using Privacy-Aware Training Data},
author = {Marija Ivanovska and Andrej Kronovšek and Peter Peer and Vitomir Štruc and Borut Batagelj },
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2022/08/2022_ERK__Face_Morphing_Attack_Detecton_Using_Privacy_Aware_Training_Data.pdf},
year = {2022},
date = {2022-08-01},
urldate = {2022-08-01},
booktitle = {Proceedings of ERK 2022},
pages = {1-4},
abstract = {Images of morphed faces pose a serious threat to face recognition--based security systems, as they can be used to illegally verify the identity of multiple people with a single morphed image. Modern detection algorithms learn to identify such morphing attacks using authentic images of real individuals. This approach raises various privacy concerns and limits the amount of publicly available training data. In this paper, we explore the efficacy of detection algorithms that are trained only on faces of non--existing people and their respective morphs. To this end, two dedicated algorithms are trained with synthetic data and then evaluated on three real-world datasets, i.e.: FRLL-Morphs, FERET-Morphs and FRGC-Morphs. Our results show that synthetic facial images can be successfully employed for the training process of the detection algorithms and generalize well to real-world scenarios.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Jaka Šircelj; Peter Peer; Franc Solina; Vitomir Štruc
Hierarchical Superquadric Decomposition with Implicit Space Separation Inproceedings
In: Proceedings of ERK 2022, pp. 1-4, 2022.
@inproceedings{SirceljSuperQuadrics,
title = {Hierarchical Superquadric Decomposition with Implicit Space Separation},
author = {Jaka Šircelj and Peter Peer and Franc Solina and Vitomir Štruc},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2022/08/sq_erk.pdf},
year = {2022},
date = {2022-08-01},
urldate = {2022-08-01},
booktitle = {Proceedings of ERK 2022},
pages = {1-4},
abstract = {We introduce a new method to reconstruct 3D objects using a set of volumetric primitives, i.e., superquadrics. The method hierarchically decomposes a target 3D object into pairs of superquadrics recovering finer and finer details. While such hierarchical methods have been studied before, we introduce a new way of splitting the object space using only properties of the predicted superquadrics. The method is trained and evaluated on the ShapeNet dataset. The results of our experiments suggest that reasonable reconstructions can be obtained with the proposed approach for a diverse set of objects with complex geometry.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Klemen Grm; Vitomir Štruc
Optimization-based Image Filter Design for Self-supervised Super-resolution Training Inproceedings
In: Proceedings of ERK 2022, 2022.
@inproceedings{Grm2022Erk,
title = {Optimization-based Image Filter Design for Self-supervised Super-resolution Training},
author = {Klemen Grm and Vitomir Štruc},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2022/08/erk22_filtri.pdf},
year = {2022},
date = {2022-08-01},
booktitle = {Proceedings of ERK 2022},
abstract = {Single-image super-resolution can be posed as a self - supervised machine learning task, where the training inputs and targets are derived from an unlabelled dataset of high-resolution images. For super-resolution training, the derivation takes the form of a degradation function that yields low-resolution images given high-resolution ones. Typically, the degradation function is selected manually based on heuristics, such as the desired magnification ratio of the super-resolution method being trained. In this paper, we instead propose principled, optimization-based methods for picking the image filter of the degradation function based on its desired properties in the frequency domain. We develop implicit and explicit methods for filter optimization and demonstrate the resulting filters are better at rejecting aliasing and matching the frequency domain characteristics of real-life low-resolution images than commonly used heuristic picks.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Žiga Babnik; Vitomir Štruc
Iterativna optimizacija ocen kakovosti slikovnih podatkov v sistemih za razpoznavanje obrazov Inproceedings
In: Proceedings of ERK 2022, pp. 1-4, 2022.
@inproceedings{BabnikErk2022,
title = {Iterativna optimizacija ocen kakovosti slikovnih podatkov v sistemih za razpoznavanje obrazov},
author = {Žiga Babnik and Vitomir Štruc},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2022/08/ERK_2022.pdf},
year = {2022},
date = {2022-08-01},
booktitle = {Proceedings of ERK 2022},
pages = {1-4},
abstract = {While recent face recognition (FR) systems achieve excellent results in many deployment scenarios, their performance in challenging real-world settings is still under question. For this reason, face image quality assessment (FIQA) techniques aim to support FR systems, by providing them with sample quality information that can be used to reject poor quality data unsuitable for recognition purposes. Several groups of FIQA methods relying on different concepts have been proposed in the literature, all of which can be used for generating quality scores of facial images that can serve as pseudo ground-truth (quality) labels and be exploited for training (regression-based) quality estimation models. Several FIQA approaches show that a significant amount of sample-quality information can be extracted from mated similarity-score distributions generated with some face matcher. Based on this insight, we propose in this paper a quality label optimization approach, which incorporates sample-quality information from mated-pair similarities into quality predictions of existing off-the-shelf FIQA techniques. We evaluate the proposed approach using three state-of-the-art FIQA methods over three diverse datasets. The results of our experiments show that the proposed optimization procedure heavily depends on the number of executed optimization iterations. At ten iterations, the approach seems to perform the best, consistently outperforming the base quality scores of the three FIQA methods, chosen for the experiments.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Žiga Babnik; Peter Peer; Vitomir Štruc
FaceQAN: Face Image Quality Assessment Through Adversarial Noise Exploration Inproceedings
In: IAPR International Conference on Pattern Recognition (ICPR), 2022.
@inproceedings{ICPR2022,
title = {FaceQAN: Face Image Quality Assessment Through Adversarial Noise Exploration},
author = {Žiga Babnik and Peter Peer and Vitomir Štruc},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2022/06/ICPR_2022___paper-17.pdf},
year = {2022},
date = {2022-05-17},
urldate = {2022-05-17},
booktitle = {IAPR International Conference on Pattern Recognition (ICPR)},
abstract = {Recent state-of-the-art face recognition (FR) approaches have achieved impressive performance, yet unconstrained face recognition still represents an open problem. Face image quality assessment (FIQA) approaches aim to estimate the quality of the input samples that can help provide information on the confidence of the recognition decision and eventually lead to improved results in challenging scenarios. While much progress has been made in face image quality assessment in recent years, computing reliable quality scores for diverse facial images and FR models remains challenging. In this paper, we propose a novel approach to face image quality assessment, called FaceQAN, that is based on adversarial examples and relies on the analysis of adversarial noise which can be calculated with any FR model learned by using some form of gradient descent. As such, the proposed approach is the first to link image quality to adversarial attacks. Comprehensive (cross-model as well as model-specific) experiments are conducted with four benchmark datasets, i.e., LFW, CFP–FP, XQLFW and IJB–C, four FR models, i.e., CosFace, ArcFace, CurricularFace and ElasticFace and in comparison to seven state-of-the-art FIQA methods to demonstrate the performance of FaceQAN. Experimental results show that FaceQAN achieves competitive results, while exhibiting several desirable characteristics. The source code for FaceQAN will be made publicly available.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Žiga Babnik; Vitomir Štruc
Assessing Bias in Face Image Quality Assessment Inproceedings
In: EUSIPCO 2022, 2022.
@inproceedings{EUSIPCO_2022,
title = {Assessing Bias in Face Image Quality Assessment},
author = {Žiga Babnik and Vitomir Štruc},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2022/06/EUSIPCO_2022___paper.pdf},
year = {2022},
date = {2022-05-16},
urldate = {2022-05-16},
booktitle = {EUSIPCO 2022},
abstract = {Face image quality assessment (FIQA) attempts to improve face recognition (FR) performance by providing additional information about sample quality.
Because FIQA methods attempt to estimate the utility of a sample for face recognition, it is reasonable to assume that these methods are heavily influenced by the underlying face recognition system. Although modern face recognition systems are known to perform well, several studies have found that such systems often exhibit problems with demographic bias. It is therefore likely that such problems are also present with FIQA techniques. To investigate the demographic biases associated with FIQA approaches, this paper presents a comprehensive study involving a variety of quality assessment methods (general-purpose image quality assessment, supervised face quality assessment, and unsupervised face quality assessment methods) and three diverse state-of-the-art FR models.
Our analysis on the Balanced Faces in the Wild (BFW) dataset shows that all techniques considered are affected more by variations in race than sex. While the general-purpose image quality assessment methods appear to be less biased with respect to the two demographic factors considered, the supervised and unsupervised face image quality assessment methods both show strong bias with a tendency to favor white individuals (of either sex). In addition, we found that methods that are less racially biased perform worse overall. This suggests that the observed bias in FIQA methods is to a significant extent related to the underlying face recognition system.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Because FIQA methods attempt to estimate the utility of a sample for face recognition, it is reasonable to assume that these methods are heavily influenced by the underlying face recognition system. Although modern face recognition systems are known to perform well, several studies have found that such systems often exhibit problems with demographic bias. It is therefore likely that such problems are also present with FIQA techniques. To investigate the demographic biases associated with FIQA approaches, this paper presents a comprehensive study involving a variety of quality assessment methods (general-purpose image quality assessment, supervised face quality assessment, and unsupervised face quality assessment methods) and three diverse state-of-the-art FR models.
Our analysis on the Balanced Faces in the Wild (BFW) dataset shows that all techniques considered are affected more by variations in race than sex. While the general-purpose image quality assessment methods appear to be less biased with respect to the two demographic factors considered, the supervised and unsupervised face image quality assessment methods both show strong bias with a tendency to favor white individuals (of either sex). In addition, we found that methods that are less racially biased perform worse overall. This suggests that the observed bias in FIQA methods is to a significant extent related to the underlying face recognition system.
Grega Dvoršak; Ankita Dwivedi; Vitomir Štruc; Peter Peer; Žiga Emeršič
Kinship Verification from Ear Images: An Explorative Study with Deep Learning Models Inproceedings
In: International Workshop on Biometrics and Forensics (IWBF), pp. 1–6, 2022.
@inproceedings{KinEars,
title = {Kinship Verification from Ear Images: An Explorative Study with Deep Learning Models},
author = {Grega Dvoršak and Ankita Dwivedi and Vitomir Štruc and Peter Peer and Žiga Emeršič},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2022/03/Gregovi_Uhlji_Template-2.pdf},
year = {2022},
date = {2022-04-21},
urldate = {2022-04-21},
booktitle = {International Workshop on Biometrics and Forensics (IWBF)},
pages = {1--6},
abstract = {The analysis of kin relations from visual data represents a challenging research problem with important real-world applications. However, research in this area has mostly been limited to the analysis of facial images, despite the potential of other physical (human) characteristics for this task. In this paper, we therefore study the problem of kinship verification from ear images and investigate whether salient appearance characteristics, useful for this task, can be extracted from ear data. To facilitate the study, we introduce a novel dataset, called KinEar, that contains data from 19 families with each family member having from 15 to 31 ear images. Using the KinEar data, we conduct experiments using a Siamese training setup and 5 recent deep learning backbones. The results of our experiments suggests that ear images represent a viable alternative to other modalities for kinship verification, as 4 out of 5 considered models reach a performance of over 60% in terms of the Area Under the Receiver Operating Characteristics (ROC-AUC). },
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Julijan Jug; Ajda Lampe; Peter Peer; Vitomir Štruc
Segmentacija telesa z uporabo večciljnega učenja Inproceedings
In: Proceedings of Rosus 2022, 2022.
@inproceedings{Rosus2022,
title = {Segmentacija telesa z uporabo večciljnega učenja},
author = {Julijan Jug and Ajda Lampe and Peter Peer and Vitomir Štruc},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2022/03/Rosus2020.pdf},
year = {2022},
date = {2022-03-17},
booktitle = {Proceedings of Rosus 2022},
abstract = {Segmentacija je pomemben del številnih problemov računalniškega vida, ki vključujejo človeške podobe, in je ena ključnih komponent, ki vpliva na uspešnost vseh nadaljnjih nalog. Več predhodnih del je ta problem obravnavalo z uporabo večciljnega modela, ki izkorišča korelacije med različnimi nalogami za izboljšanje uspešnosti segmentacije. Na podlagi uspešnosti takšnih rešitev v tem prispevku predstavljamo nov večciljni model za segmentacijo/razčlenjevanje ljudi, ki vključuje tri naloge, tj. (i) napoved skeletnih točk, (ii) napoved globinske predstavitve poze in (iii) segmentacijo človeškega telesa. Glavna ideja predlaganega modela Segmentacija-Skelet-Globinska predstavitev (ali na kratko iz angleščine SPD) je naučiti se boljšega modela segmentacije z izmenjavo znanja med različnimi, a med seboj povezanimi nalogami. SPD temelji na skupni hrbtenici globoke nevronske mreže, ki se razcepi na tri glave modela, specifične za nalogo, in se uči z uporabo cilja optimizacije za več nalog. Učinkovitost modela je analizirana s strogimi eksperimenti na nizih podatkov LIP in ATR ter v primerjavi z nedavnim (najsodobnejšim) večciljnim modelom segmentacije telesa. Predstavljene so tudi študije ablacije. Naši eksperimentalni rezultati kažejo, da je predlagani večciljni (segmentacijski) model zelo konkurenčen in da uvedba dodatnih nalog prispeva k večji skupni uspešnosti segmentacije.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Julijan Jug; Ajda Lampe; Vitomir Štruc; Peter Peer
Body Segmentation Using Multi-task Learning Inproceedings
In: International Conference on Artificial Intelligence in Information and Communication (ICAIIC), IEEE, 2022, ISBN: 978-1-6654-5818-4.
@inproceedings{JulijanJugBody,
title = {Body Segmentation Using Multi-task Learning},
author = {Julijan Jug and Ajda Lampe and Vitomir Štruc and Peter Peer},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2022/03/ICAIIC_paper.pdf},
doi = {10.1109/ICAIIC54071.2022.9722662},
isbn = {978-1-6654-5818-4},
year = {2022},
date = {2022-01-20},
urldate = {2022-01-20},
booktitle = {International Conference on Artificial Intelligence in Information and Communication (ICAIIC)},
publisher = {IEEE},
abstract = {Body segmentation is an important step in many computer vision problems involving human images and one of the key components that affects the performance of all downstream tasks. Several prior works have approached this problem using a multi-task model that exploits correlations between different tasks to improve segmentation performance. Based on the success of such solutions, we present in this paper a novel multi-task model for human segmentation/parsing that involves three tasks, i.e., (i) keypoint-based skeleton estimation, (ii) dense pose prediction, and (iii) human-body segmentation. The main idea behind the proposed Segmentation--Pose--DensePose model (or SPD for short) is to learn a better segmentation model by sharing knowledge across different, yet related tasks. SPD is based on a shared deep neural network backbone that branches off into three task-specific model heads and is learned using a multi-task optimization objective. The performance of the model is analysed through rigorous experiments on the LIP and ATR datasets and in comparison to a recent (state-of-the-art) multi-task body-segmentation model. Comprehensive ablation studies are also presented. Our experimental results show that the proposed multi-task (segmentation) model is highly competitive and that the introduction of additional tasks contributes towards a higher overall segmentation performance. },
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Benjamin Fele; Ajda Lampe; Peter Peer; Vitomir Štruc
C-VTON: Context-Driven Image-Based Virtual Try-On Network Inproceedings
In: IEEE/CVF Winter Applications in Computer Vision (WACV), pp. 1–10, 2022.
@inproceedings{WACV2022_Fele,
title = {C-VTON: Context-Driven Image-Based Virtual Try-On Network},
author = {Benjamin Fele and Ajda Lampe and Peter Peer and Vitomir Štruc},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2022/12/WACV2022_Benjamin_compressed-1.pdf},
year = {2022},
date = {2022-01-04},
urldate = {2022-01-04},
booktitle = {IEEE/CVF Winter Applications in Computer Vision (WACV)},
pages = {1--10},
abstract = {Image-based virtual try-on techniques have shown great promise for enhancing the user-experience and improving customer satisfaction on fashion-oriented e-commerce platforms. However, existing techniques are currently still limited in the quality of the try-on results they are able to produce from input images of diverse characteristics. In this work, we propose a Context-Driven Virtual Try-On Network (C-VTON) that addresses these limitations and convincingly transfers selected clothing items to the target subjects even under challenging pose configurations and in the presence of self-occlusions. At the core of the C-VTON pipeline are: (i) a geometric matching procedure that efficiently aligns the target clothing with the pose of the person in the input images, and (ii) a powerful image generator that utilizes various types of contextual information when synthesizing the final try-on result. C-VTON is evaluated in rigorous experiments on the VITON and MPV datasets and in comparison to state-of-the-art techniques from the literature. Experimental results show that the proposed approach is able to produce photo-realistic and visually convincing results and significantly improves on the existing state-of-the-art.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2021
Journal Articles
Žiga Emeršič; Diego Sušanj; Blaž Meden; Peter Peer; Vitomir Štruc
ContexedNet : Context-Aware Ear Detection in Unconstrained Settings Journal Article
In: IEEE Access, pp. 1–17, 2021, ISSN: 2169-3536.
@article{ContexedNet_Emersic_2021,
title = {ContexedNet : Context-Aware Ear Detection in Unconstrained Settings},
author = {Žiga Emeršič and Diego Sušanj and Blaž Meden and Peter Peer and Vitomir Štruc},
editor = {ContexedNet : Context-Aware Ear Detection in Unconstrained Settings},
url = {https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9583244},
issn = {2169-3536},
year = {2021},
date = {2021-10-20},
urldate = {2021-10-20},
journal = {IEEE Access},
pages = {1--17},
abstract = {Ear detection represents one of the key components of contemporary ear recognition systems. While significant progress has been made in the area of ear detection over recent years, most of the improvements are direct results of advances in the field of visual object detection. Only a limited number of techniques presented in the literature are domain--specific and designed explicitly with ear detection in mind. In this paper, we aim to address this gap and present a novel detection approach that does not rely only on general ear (object) appearance, but also exploits contextual information, i.e., face--part locations, to ensure accurate and robust ear detection with images captured in a wide variety of imaging conditions. The proposed approach is based on a Context--aware Ear Detection Network (ContexedNet) and poses ear detection as a semantic image segmentation problem. ContexedNet consists of two processing paths: 1) a context--provider that extracts probability maps corresponding to the locations of facial parts from the input image, and 2) a dedicated ear segmentation model that integrates the computed probability maps into a context--aware segmentation-based ear detection procedure. ContexedNet is evaluated in rigorous experiments on the AWE and UBEAR datasets and shown to ensure competitive performance when evaluated against state--of--the--art ear detection models from the literature. Additionally, because the proposed contextualization is model agnostic, it can also be utilized with other ear detection techniques to improve performance.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Blaz Meden, Peter Rot, Philipp Terhorst, Naser Damer, Arjan Kuijper, Walter J. Scheirer, Arun Ross, Peter Peer, Vitomir Struc
Privacy-Enhancing Face Biometrics: A Comprehensive Survey Journal Article
In: IEEE Transactions on Information Forensics and Security, vol. 16, pp. 4147-4183, 2021.
@article{TIFS_PrivacySurveyb,
title = {Privacy-Enhancing Face Biometrics: A Comprehensive Survey},
author = {Blaz Meden, Peter Rot, Philipp Terhorst, Naser Damer, Arjan Kuijper, Walter J. Scheirer, Arun Ross, Peter Peer, Vitomir Struc},
url = {https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9481149
https://lmi.fe.uni-lj.si/en/visual_privacy_of_faces__a_survey_preprint-compressed/},
doi = {10.1109/TIFS.2021.3096024},
year = {2021},
date = {2021-07-12},
journal = {IEEE Transactions on Information Forensics and Security},
volume = {16},
pages = {4147-4183},
abstract = {Biometric recognition technology has made significant advances over the last decade and is now used across a number of services and applications. However, this widespread deployment has also resulted in privacy concerns and evolving societal expectations about the appropriate use of the technology. For example, the ability to automatically extract age, gender, race, and health cues from biometric data has heightened concerns about privacy leakage. Face recognition technology, in particular, has been in the spotlight, and is now seen by many as posing a considerable risk to personal privacy. In response to these and similar concerns, researchers have intensified efforts towards developing techniques and computational models capable of ensuring privacy to individuals, while still facilitating the utility of face recognition technology in several application scenarios. These efforts have resulted in a multitude of privacy--enhancing techniques that aim at addressing privacy risks originating from biometric systems and providing technological solutions for legislative requirements set forth in privacy laws and regulations, such as GDPR. The goal of this overview paper is to provide a comprehensive introduction into privacy--related research in the area of biometrics and review existing work on textit{Biometric Privacy--Enhancing Techniques} (B--PETs) applied to face biometrics. To make this work useful for as wide of an audience as possible, several key topics are covered as well, including evaluation strategies used with B--PETs, existing datasets, relevant standards, and regulations and critical open issues that will have to be addressed in the future. },
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Klemen Pevec; Klemen Grm; Vitomir Štruc
Benchmarking Crowd-Counting Techniques across Image Characteristics Journal Article
In: Elektorethniski Vestnik, vol. 88, iss. 5, pp. 227-235, 2021.
@article{CrowdCountingPevec,
title = {Benchmarking Crowd-Counting Techniques across Image Characteristics},
author = {Klemen Pevec and Klemen Grm and Vitomir Štruc},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2022/10/Pevec.pdf
https://ev.fe.uni-lj.si/5-2021/Pevec.pdf},
year = {2021},
date = {2021-05-01},
journal = {Elektorethniski Vestnik},
volume = {88},
issue = {5},
pages = {227-235},
abstract = {Crowd--counting is a longstanding computer vision used in estimating the crowd sizes for security purposes at public protests in streets, public gatherings, for collecting crowd statistics at airports, malls, concerts, conferences, and other similar venues, and for monitoring people and crowds during public health crises (such as the one caused by COVID-19). Recently, the performance of automated methods for crowd--counting from single images has improved particularly due to the introduction of deep learning techniques and large labelled training datasets. However, the robustness of these methods to varying imaging conditions, such as weather, image perspective, and large variations in the crowd size has not been studied in-depth in the open literature. To address this gap, a systematic study on the robustness of four recently developed crowd--counting methods is performed in this paper to evaluate their performance with respect to variable (real-life) imaging scenarios that include different event types, weather conditions, image sources and crowd sizes. It is shown that the performance of the tested techniques is degraded in unclear weather conditions (i.e., fog, rain, snow) and also on images taken from large distances by drones. On the opposite, clear weather conditions, crowd--counting methods can provide accurate and usable results.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Borut Batagelj; Peter Peer; Vitomir Štruc; Simon Dobrišek
How to correctly detect face-masks for COVID-19 from visual information? Journal Article
In: Applied sciences, vol. 11, no. 5, pp. 1-24, 2021, ISBN: 2076-3417.
@article{Batagelj2021,
title = {How to correctly detect face-masks for COVID-19 from visual information?},
author = {Borut Batagelj and Peter Peer and Vitomir Štruc and Simon Dobrišek},
url = {https://www.mdpi.com/2076-3417/11/5/2070/pdf},
doi = {10.3390/app11052070},
isbn = {2076-3417},
year = {2021},
date = {2021-03-01},
journal = {Applied sciences},
volume = {11},
number = {5},
pages = {1-24},
abstract = {The new Coronavirus disease (COVID-19) has seriously affected the world. By the end of November 2020, the global number of new coronavirus cases had already exceeded 60 million and the number of deaths 1,410,378 according to information from the World Health Organization (WHO). To limit the spread of the disease, mandatory face-mask rules are now becoming common in public settings around the world. Additionally, many public service providers require customers to wear face-masks in accordance with predefined rules (e.g., covering both mouth and nose) when using public services. These developments inspired research into automatic (computer-vision-based) techniques for face-mask detection that can help monitor public behavior and contribute towards constraining the COVID-19 pandemic. Although existing research in this area resulted in efficient techniques for face-mask detection, these usually operate under the assumption that modern face detectors provide perfect detection performance (even for masked faces) and that the main goal of the techniques is to detect the presence of face-masks only. In this study, we revisit these common assumptions and explore the following research questions: (i) How well do existing face detectors perform with masked-face images? (ii) Is it possible to detect a proper (regulation-compliant) placement of facial masks? and (iii) How useful are existing face-mask detection techniques for monitoring applications during the COVID-19 pandemic? To answer these and related questions we conduct a comprehensive experimental evaluation of several recent face detectors for their performance with masked-face images. Furthermore, we investigate the usefulness of multiple off-the-shelf deep-learning models for recognizing correct face-mask placement. Finally, we design a complete pipeline for recognizing whether face-masks are worn correctly or not and compare the performance of the pipeline with standard face-mask detection models from the literature. To facilitate the study, we compile a large dataset of facial images from the publicly available MAFA and Wider Face datasets and annotate it with compliant and non-compliant labels. The annotation dataset, called Face-Mask-Label Dataset (FMLD), is made publicly available to the research community.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Tim Oblak; Jaka Šircelj; Vitomir Struc; Peter Peer; Franc Solina; Aleš Jaklic
Learning to predict superquadric parameters from depth images with explicit and implicit supervision Journal Article
In: IEEE Access, pp. 1-16, 2021, ISSN: 2169-3536.
@article{Oblak2021,
title = {Learning to predict superquadric parameters from depth images with explicit and implicit supervision},
author = {Tim Oblak and Jaka Šircelj and Vitomir Struc and Peter Peer and Franc Solina and Aleš Jaklic
},
url = {https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9274424},
doi = {10.1109/ACCESS.2020.3041584},
issn = {2169-3536},
year = {2021},
date = {2021-01-01},
journal = {IEEE Access},
pages = {1-16},
abstract = {Reconstruction of 3D space from visual data has always been a significant challenge in
the field of computer vision. A popular approach to address this problem can be found in the form of
bottom-up reconstruction techniques which try to model complex 3D scenes through a constellation of
volumetric primitives. Such techniques are inspired by the current understanding of the human visual
system and are, therefore, strongly related to the way humans process visual information, as suggested
by recent visual neuroscience literature. While advances have been made in recent years in the area of
3D reconstruction, the problem remains challenging due to the many possible ways of representing 3D
data, the ambiguity of determining the shape and general position in 3D space and the difficulty to train
efficient models for the prediction of volumetric primitives. In this paper, we address these challenges and
present a novel solution for recovering volumetric primitives from depth images. Specifically, we focus on
the recovery of superquadrics, a special type of parametric models able to describe a wide array of 3D
shapes using only a few parameters. We present a new learning objective that relies on the superquadric
(inside-outside) function and develop two learning strategies for training convolutional neural networks
(CNN) capable of predicting superquadric parameters. The first uses explicit supervision and penalizes the
difference between the predicted and reference superquadric parameters. The second strategy uses implicit
supervision and penalizes differences between the input depth images and depth images rendered from
the predicted parameters. CNN predictors for superquadric parameters are trained with both strategies and
evaluated on a large dataset of synthetic and real-world depth images. Experimental results show that both
strategies compare favourably to the existing state-of-the-art and result in high quality 3D reconstructions
of the modelled scenes at a much shorter processing time.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
the field of computer vision. A popular approach to address this problem can be found in the form of
bottom-up reconstruction techniques which try to model complex 3D scenes through a constellation of
volumetric primitives. Such techniques are inspired by the current understanding of the human visual
system and are, therefore, strongly related to the way humans process visual information, as suggested
by recent visual neuroscience literature. While advances have been made in recent years in the area of
3D reconstruction, the problem remains challenging due to the many possible ways of representing 3D
data, the ambiguity of determining the shape and general position in 3D space and the difficulty to train
efficient models for the prediction of volumetric primitives. In this paper, we address these challenges and
present a novel solution for recovering volumetric primitives from depth images. Specifically, we focus on
the recovery of superquadrics, a special type of parametric models able to describe a wide array of 3D
shapes using only a few parameters. We present a new learning objective that relies on the superquadric
(inside-outside) function and develop two learning strategies for training convolutional neural networks
(CNN) capable of predicting superquadric parameters. The first uses explicit supervision and penalizes the
difference between the predicted and reference superquadric parameters. The second strategy uses implicit
supervision and penalizes differences between the input depth images and depth images rendered from
the predicted parameters. CNN predictors for superquadric parameters are trained with both strategies and
evaluated on a large dataset of synthetic and real-world depth images. Experimental results show that both
strategies compare favourably to the existing state-of-the-art and result in high quality 3D reconstructions
of the modelled scenes at a much shorter processing time.
Martin Pernus; Vitomir Struc; Simon Dobrisek
High Resolution Face Editing with Masked GAN Latent Code Optimization Journal Article
In: CoRR, vol. abs/2103.11135, 2021.
@article{DBLP:journals/corr/abs-2103-11135,
title = {High Resolution Face Editing with Masked GAN Latent Code Optimization},
author = {Martin Pernus and Vitomir Struc and Simon Dobrisek},
url = {https://arxiv.org/abs/2103.11135},
year = {2021},
date = {2021-01-01},
journal = {CoRR},
volume = {abs/2103.11135},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Inproceedings
Marija Ivanovska; Vitomir Štruc
A Comparative Study on Discriminative and One--Class Learning Models for Deepfake Detection Inproceedings
In: Proceedings of ERK 2021, pp. 1–4, 2021.
@inproceedings{ERK_Marija_2021,
title = {A Comparative Study on Discriminative and One--Class Learning Models for Deepfake Detection},
author = {Marija Ivanovska and Vitomir Štruc},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2021/10/ERK_2021__A_Comparative_Study_of_Discriminative_and_One__Class_Learning_Models_for_Deepfake_Detection.pdf},
year = {2021},
date = {2021-09-20},
booktitle = {Proceedings of ERK 2021},
pages = {1--4},
abstract = {Deepfakes or manipulated face images, where a donor's face is swapped with the face of a target person, have gained enormous popularity among the general public recently. With the advancements in artificial intelligence and generative modeling
such images can nowadays be easily generated and used to spread misinformation and harm individuals, businesses or society. As the tools for generating deepfakes are rapidly improving, it is critical for deepfake detection models to be able to recognize advanced, sophisticated data manipulations, including those that have not been seen during training. In this paper, we explore the use of one--class learning models as an alternative to discriminative methods for the detection of deepfakes. We conduct a comparative study with three popular deepfake datasets and investigate the performance of selected (discriminative and one-class) detection models in matched- and cross-dataset experiments. Our results show that disciminative models significantly outperform one-class models when training and testing data come from the same dataset, but degrade considerably when the characteristics of the testing data deviate from the training setting. In such cases, one-class models tend to generalize much better.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
such images can nowadays be easily generated and used to spread misinformation and harm individuals, businesses or society. As the tools for generating deepfakes are rapidly improving, it is critical for deepfake detection models to be able to recognize advanced, sophisticated data manipulations, including those that have not been seen during training. In this paper, we explore the use of one--class learning models as an alternative to discriminative methods for the detection of deepfakes. We conduct a comparative study with three popular deepfake datasets and investigate the performance of selected (discriminative and one-class) detection models in matched- and cross-dataset experiments. Our results show that disciminative models significantly outperform one-class models when training and testing data come from the same dataset, but degrade considerably when the characteristics of the testing data deviate from the training setting. In such cases, one-class models tend to generalize much better.
Klemen Grm; Štruc Vitomir
Frequency Band Encoding for Face Super-Resolution Inproceedings
In: Proceedings of ERK 2021, pp. 1-4, 2021.
@inproceedings{Grm-SuperResolution_ERK2021,
title = {Frequency Band Encoding for Face Super-Resolution},
author = {Klemen Grm and Štruc Vitomir},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2021/10/SRAE_ERK21.pdf},
year = {2021},
date = {2021-09-10},
booktitle = {Proceedings of ERK 2021},
pages = {1-4},
abstract = {In this paper, we present a novel method for face super-resolution based on an encoder-decoder architecture. Unlike previous approaches, which focused primarily on directly reconstructing the high-resolution face appearance from low-resolution images, our method relies on a multi-stage approach where we learn a face representation in different frequency bands, followed by decoding the representation into a high-resolution image. Using quantitative experiments, we are able to demonstrate that this approach results in better face image reconstruction, as well as aiding in downstream semantic tasks such as face recognition and face verification.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Fadi Boutros; Naser Damer; Jan Niklas Kolf; Kiran Raja; Florian Kirchbuchner; Raghavendra Ramachandra; Arjan Kuijper; Pengcheng Fang; Chao Zhang; Fei Wang; David Montero; Naiara Aginako; Basilio Sierra; Marcos Nieto; Mustafa Ekrem Erakin; Ugur Demir; Hazım Kemal Ekenel; Asaki Kataoka; Kohei Ichikawa; Shizuma Kubo; Jie Zhang; Mingjie He; Dan Han; Shiguang Shan; Klemen Grm; Vitomir Štruc; Sachith Seneviratne; Nuran Kasthuriarachchi; Sanka Rasnayaka; Pedro C. Neto; Ana F. Sequeira; Joao Ribeiro Pinto; Mohsen Saffari; Jaime S. Cardoso
MFR 2021: Masked Face Recognition Competition Inproceedings
In: Proceedings of the IEEE International Joint Conference on Biometrics (IJCB 2021), 2021.
@inproceedings{MFR_IJCB2021,
title = {MFR 2021: Masked Face Recognition Competition},
author = {Fadi Boutros and Naser Damer and Jan Niklas Kolf and Kiran Raja and Florian Kirchbuchner and Raghavendra Ramachandra and Arjan Kuijper and Pengcheng Fang and Chao Zhang and Fei Wang and David Montero and Naiara Aginako and Basilio Sierra and Marcos Nieto and Mustafa Ekrem Erakin and Ugur Demir and Hazım Kemal Ekenel and Asaki Kataoka and Kohei Ichikawa and Shizuma Kubo and Jie Zhang and Mingjie He and Dan Han and Shiguang Shan and Klemen Grm and Vitomir Štruc and Sachith Seneviratne and Nuran Kasthuriarachchi and Sanka Rasnayaka and Pedro C. Neto and Ana F. Sequeira and Joao Ribeiro Pinto and Mohsen Saffari and Jaime S. Cardoso},
url = {https://ieeexplore.ieee.org/iel7/9484326/9484328/09484337.pdf?casa_token=OOL4s274P0YAAAAA:XE7ga2rP_wNom2Zeva75ZwNwN-HKz6kF1HZtkpzrdTdz36eaGcLffWkzOgIe3xU2PqaU30qTLws},
doi = {10.1109/IJCB52358.2021.9484337},
year = {2021},
date = {2021-08-01},
booktitle = {Proceedings of the IEEE International Joint Conference on Biometrics (IJCB 2021)},
abstract = {This paper presents a summary of the Masked Face Recognition Competitions (MFR) held within the 2021 International Joint Conference on Biometrics (IJCB 2021). The competition attracted a total of 10 participating teams with valid submissions. The affiliations of these teams are diverse and associated with academia and industry in nine different countries. These teams successfully submitted 18 valid solutions. The competition is designed to motivate solutions aiming at enhancing the face recognition accuracy of masked faces. Moreover, the competition considered the deployability of the proposed solutions by taking the compactness of the face recognition models into account. A private dataset representing a collaborative, multisession, real masked, capture scenario is used to evaluate the submitted solutions. In comparison to one of the topperforming academic face recognition solutions, 10 out of the 18 submitted solutions did score higher masked face verification accuracy.
},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Caiyong Wang; Yunlong Wang; Kunbo Zhang; Jawad Muhammad; Tianhao Lu; Qi Zhang; Qichuan Tian; Zhaofeng He; Zhenan Sun; Yiwen Zhang; Tianbao Liu; Wei Yang; Dongliang Wu; Yingfeng Liu; Ruiye Zhou; Huihai Wu; Hao Zhang; Junbao Wang; Jiayi Wang; Wantong Xiong; Xueyu Shi; Shao Zeng; Peihua Li; Haodong Sun; Jing Wang; Jiale Zhang; Qi Wang; Huijie Wu; Xinhui Zhang; Haiqing Li; Yu Chen; Liang Chen; Menghan Zhang; Ye Sun; Zhiyong Zhou; Fadi Boutros; Naser Damer; Arjan Kuijper; Juan Tapia; Andres Valenzuela; Christoph Busch; Gourav Gupta; Kiran Raja; Xi Wu; Xiaojie Li; Jingfu Yang; Hongyan Jing; Xin Wang; Bin Kong; Youbing Yin; Qi Song; Siwei Lyu; Shu Hu; Leon Premk; Matej Vitek; Vitomir Štruc; Peter Peer; Jalil Nourmohammadi Khiarak; Farhang Jaryani; Samaneh Salehi Nasab; Seyed Naeim Moafinejad; Yasin Amini; Morteza Noshad
NIR Iris Challenge Evaluation in Non-cooperative Environments: Segmentation and Localization Inproceedings
In: Proceedings of the IEEE International Joint Conference on Biometrics (IJCB 2021), 2021.
@inproceedings{NIR_IJCB2021,
title = {NIR Iris Challenge Evaluation in Non-cooperative Environments: Segmentation and Localization},
author = {Caiyong Wang and Yunlong Wang and Kunbo Zhang and Jawad Muhammad and Tianhao Lu and Qi Zhang and Qichuan Tian and Zhaofeng He and Zhenan Sun and Yiwen Zhang and Tianbao Liu and Wei Yang and Dongliang Wu and Yingfeng Liu and Ruiye Zhou and Huihai Wu and Hao Zhang and Junbao Wang and Jiayi Wang and Wantong Xiong and Xueyu Shi and Shao Zeng and Peihua Li and Haodong Sun and Jing Wang and Jiale Zhang and Qi Wang and Huijie Wu and Xinhui Zhang and Haiqing Li and Yu Chen and Liang Chen and Menghan Zhang and Ye Sun and Zhiyong Zhou and Fadi Boutros and Naser Damer and Arjan Kuijper and Juan Tapia and Andres Valenzuela and Christoph Busch and Gourav Gupta and Kiran Raja and Xi Wu and Xiaojie Li and Jingfu Yang and Hongyan Jing and Xin Wang and Bin Kong and Youbing Yin and Qi Song and Siwei Lyu and Shu Hu and Leon Premk and Matej Vitek and Vitomir Štruc and Peter Peer and Jalil Nourmohammadi Khiarak and Farhang Jaryani and Samaneh Salehi Nasab and Seyed Naeim Moafinejad and Yasin Amini and Morteza Noshad},
url = {https://ieeexplore.ieee.org/iel7/9484326/9484328/09484336.pdf?casa_token=FOKx4ltO-hYAAAAA:dCkNHfumDzPGkAipRdbppNWpzAiUYUrJL6OrAjNmimTxUA0Vmx311-3-J3ej7YQc_zONxEO-XKo},
doi = {10.1109/IJCB52358.2021.9484336},
year = {2021},
date = {2021-08-01},
booktitle = {Proceedings of the IEEE International Joint Conference on Biometrics (IJCB 2021)},
abstract = {For iris recognition in non-cooperative environments, iris segmentation has been regarded as the first most important challenge still open to the biometric community, affecting all downstream tasks from normalization to recognition. In recent years, deep learning technologies have gained significant popularity among various computer vision tasks and also been introduced in iris biometrics, especially iris segmentation. To investigate recent developments and attract more interest of researchers in the iris segmentation method, we organized the 2021 NIR Iris Challenge Evaluation in Non-cooperative Environments: Segmentation and Localization (NIR-ISL 2021) at the 2021 International Joint Conference on Biometrics (IJCB 2021). The challenge was used as a public platform to assess the performance of iris segmentation and localization methods on Asian and African NIR iris images captured in non-cooperative environments. The three best-performing entries achieved solid and satisfactory iris segmentation and localization results in most cases, and their code and models have been made publicly available for reproducibility research.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2020
Journal Articles
Philipp Terhorst; Kevin Riehl; Naser Damer; Peter Rot; Blaz Bortolato; Florian Kirchbuchner; Vitomir Struc; Arjan Kuijper
PE-MIU: a training-free privacy-enhancing face recognition approach based on minimum information units Journal Article
In: IEEE Access, vol. 2020, 2020.
@article{PEMIU_Access2020,
title = {PE-MIU: a training-free privacy-enhancing face recognition approach based on minimum information units},
author = {Philipp Terhorst and Kevin Riehl and Naser Damer and Peter Rot and Blaz Bortolato and Florian Kirchbuchner and Vitomir Struc and Arjan Kuijper},
url = {https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9094207},
year = {2020},
date = {2020-06-02},
journal = {IEEE Access},
volume = {2020},
abstract = {Research on soft-biometrics showed that privacy-sensitive information can be deduced from
biometric data. Utilizing biometric templates only, information about a persons gender, age, ethnicity,
sexual orientation, and health state can be deduced. For many applications, these templates are expected
to be used for recognition purposes only. Thus, extracting this information raises major privacy issues.
Previous work proposed two kinds of learning-based solutions for this problem. The first ones provide
strong privacy-enhancements, but limited to pre-defined attributes. The second ones achieve more comprehensive but weaker privacy-improvements. In this work, we propose a Privacy-Enhancing face recognition
approach based on Minimum Information Units (PE-MIU). PE-MIU, as we demonstrate in this work, is a
privacy-enhancement approach for face recognition templates that achieves strong privacy-improvements
and is not limited to pre-defined attributes. We exploit the structural differences between face recognition
and facial attribute estimation by creating templates in a mixed representation of minimal information
units. These representations contain pattern of privacy-sensitive attributes in a highly randomized form.
Therefore, the estimation of these attributes becomes hard for function creep attacks. During verification,
these units of a probe template are assigned to the units of a reference template by solving an optimal
best-matching problem. This allows our approach to maintain a high recognition ability. The experiments
are conducted on three publicly available datasets and with five state-of-the-art approaches. Moreover,
we conduct the experiments simulating an attacker that knows and adapts to the systems privacy mechanism.
The experiments demonstrate that PE-MIU is able to suppress privacy-sensitive information to a significantly
higher degree than previous work in all investigated scenarios. At the same time, our solution is able to
achieve a verification performance close to that of the unmodified recognition system. Unlike previous
works, our approach offers a strong and comprehensive privacy-enhancement without the need of training},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
biometric data. Utilizing biometric templates only, information about a persons gender, age, ethnicity,
sexual orientation, and health state can be deduced. For many applications, these templates are expected
to be used for recognition purposes only. Thus, extracting this information raises major privacy issues.
Previous work proposed two kinds of learning-based solutions for this problem. The first ones provide
strong privacy-enhancements, but limited to pre-defined attributes. The second ones achieve more comprehensive but weaker privacy-improvements. In this work, we propose a Privacy-Enhancing face recognition
approach based on Minimum Information Units (PE-MIU). PE-MIU, as we demonstrate in this work, is a
privacy-enhancement approach for face recognition templates that achieves strong privacy-improvements
and is not limited to pre-defined attributes. We exploit the structural differences between face recognition
and facial attribute estimation by creating templates in a mixed representation of minimal information
units. These representations contain pattern of privacy-sensitive attributes in a highly randomized form.
Therefore, the estimation of these attributes becomes hard for function creep attacks. During verification,
these units of a probe template are assigned to the units of a reference template by solving an optimal
best-matching problem. This allows our approach to maintain a high recognition ability. The experiments
are conducted on three publicly available datasets and with five state-of-the-art approaches. Moreover,
we conduct the experiments simulating an attacker that knows and adapts to the systems privacy mechanism.
The experiments demonstrate that PE-MIU is able to suppress privacy-sensitive information to a significantly
higher degree than previous work in all investigated scenarios. At the same time, our solution is able to
achieve a verification performance close to that of the unmodified recognition system. Unlike previous
works, our approach offers a strong and comprehensive privacy-enhancement without the need of training
Klemen Grm; Walter J. Scheirer; Vitomir Štruc
Face hallucination using cascaded super-resolution and identity priors Journal Article
In: IEEE Transactions on Image Processing, 2020.
@article{TIPKlemen_2020,
title = {Face hallucination using cascaded super-resolution and identity priors},
author = {Klemen Grm and Walter J. Scheirer and Vitomir Štruc},
url = {https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8866753
https://lmi.fe.uni-lj.si/wp-content/uploads/2023/02/IEEET_face_hallucination_compressed.pdf},
doi = {10.1109/TIP.2019.2945835},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {IEEE Transactions on Image Processing},
abstract = {In this paper we address the problem of hallucinating high-resolution facial images from low-resolution inputs at high magnification factors. We approach this task with convolutional neural networks (CNNs) and propose a novel (deep) face hallucination model that incorporates identity priors into the learning procedure. The model consists of two main parts: i) a cascaded super-resolution network that upscales the lowresolution facial images, and ii) an ensemble of face recognition models that act as identity priors for the super-resolution network during training. Different from most competing super-resolution techniques that rely on a single model for upscaling (even with large magnification factors), our network uses a cascade of multiple SR models that progressively upscale the low-resolution images using steps of 2×. This characteristic allows us to apply supervision signals (target appearances) at different resolutions and incorporate identity constraints at multiple-scales. The proposed C-SRIP model (Cascaded Super Resolution with Identity Priors) is able to upscale (tiny) low-resolution images captured in unconstrained conditions and produce visually convincing results for diverse low-resolution inputs. We rigorously evaluate the proposed model on the Labeled Faces in the Wild (LFW), Helen and CelebA datasets and report superior performance compared to the existing state-of-the-art.
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Matej Vitek; Peter Rot; Vitomir Struc; Peter Peer
A comprehensive investigation into sclera biometrics: a novel dataset and performance study Journal Article
In: Neural Computing and Applications, pp. 1-15, 2020.
@article{vitek2020comprehensive,
title = {A comprehensive investigation into sclera biometrics: a novel dataset and performance study},
author = {Matej Vitek and Peter Rot and Vitomir Struc and Peter Peer},
url = {https://link.springer.com/epdf/10.1007/s00521-020-04782-1},
doi = {https://doi.org/10.1007/s00521-020-04782-1},
year = {2020},
date = {2020-01-01},
journal = {Neural Computing and Applications},
pages = {1-15},
abstract = {The area of ocular biometrics is among the most popular branches of biometric recognition technology. This area has long been dominated by iris recognition research, while other ocular modalities such as the periocular region or the vasculature of the sclera have received significantly less attention in the literature. Consequently, ocular modalities beyond the iris are not well studied and their characteristics are today still not as well understood. While recent needs for more secure authentication schemes have considerably increased the interest in competing ocular modalities, progress in these areas is still held back by the lack of publicly available datasets that would allow for more targeted research into specific ocular characteristics next to the iris. In this paper, we aim to bridge this gap for the case of sclera biometrics and introduce a novel dataset designed for research into ocular biometrics and most importantly for research into the vasculature of the sclera. Our dataset, called Sclera Blood Vessels, Periocular and Iris (SBVPI), is, to the best of our knowledge, the first publicly available dataset designed specifically with research in sclera biometrics in mind. The dataset contains high-quality RGB ocular images, captured in the visible spectrum, belonging to 55 subjects. Unlike competing datasets, it comes with manual markups of various eye regions, such as the iris, pupil, canthus or eyelashes and a detailed pixel-wise annotation of the complete sclera vasculature for a subset of the images. Additionally, the datasets ship with gender and age labels. The unique characteristics of the dataset allow us to study aspects of sclera biometrics technology that have not been studied before in the literature (e.g. vasculature segmentation techniques) as well as issues that are of key importance for practical recognition systems. Thus, next to the SBVPI dataset we also present in this paper a comprehensive investigation into sclera biometrics and the main covariates that affect the performance of sclera segmentation and recognition techniques, such as gender, age, gaze direction or image resolution. Our experiments not only demonstrate the usefulness of the newly introduced dataset, but also contribute to a better understanding of sclera biometrics in general.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Incollections
Dejan Stepec; Ziga Emersic; Peter Peer; Vitomir Struc
Constellation-Based Deep Ear Recognition Incollection
In: Jiang, R.; Li, CT.; Crookes, D.; Meng, W.; Rosenberger, C. (Ed.): Deep Biometrics: Unsupervised and Semi-Supervised Learning, Springer, 2020, ISBN: 978-3-030-32582-4.
@incollection{Stepec2020COMEar,
title = {Constellation-Based Deep Ear Recognition},
author = {Dejan Stepec and Ziga Emersic and Peter Peer and Vitomir Struc},
editor = {R. Jiang and CT. Li and D. Crookes and W. Meng and C. Rosenberger},
url = {https://link.springer.com/chapter/10.1007/978-3-030-32583-1_8
https://lmi.fe.uni-lj.si/wp-content/uploads/2020/02/DeepBio2019___REMIX.pdf},
doi = {https://doi.org/10.1007/978-3-030-32583-1_8},
isbn = {978-3-030-32582-4},
year = {2020},
date = {2020-01-29},
booktitle = {Deep Biometrics: Unsupervised and Semi-Supervised Learning},
publisher = {Springer},
abstract = {This chapter introduces COM-Ear, a deep constellation model for ear recognition. Different from competing solutions, COM-Ear encodes global as well as local characteristics of ear images and generates descriptive ear representations that ensure competitive recognition performance. The model is designed as dual-path convolutional neural network (CNN), where one path processes the input in a holistic manner, and the second captures local images characteristics from image patches sampled from the input image. A novel pooling operation, called patch-relevant-information pooling, is also proposed and integrated into the COM-Ear model. The pooling operation helps to select features from the input patches that are locally important and to focus the attention of the network to image regions that are descriptive and important for representation purposes. The model is trained in an end-to-end manner using a combined cross-entropy and center loss. Extensive experiments on the recently introduced Extended Annotated Web Ears (AWEx).},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Inproceedings
Blaž Bortolato; Marija Ivanovska; Peter Rot; Janez Križaj; Philipp Terhorst; Naser Damer; Peter Peer; Vitomir Štruc
Learning privacy-enhancing face representations through feature disentanglement Inproceedings
In: Proceedings of FG 2020, IEEE, 2020.
@inproceedings{BortolatoFG2020,
title = {Learning privacy-enhancing face representations through feature disentanglement},
author = {Blaž Bortolato and Marija Ivanovska and Peter Rot and Janez Križaj and Philipp Terhorst and Naser Damer and Peter Peer and Vitomir Štruc},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2020/07/FG2020___Learning_privacy_enhancing_face_representations_through_feature_disentanglement-1.pdf
},
year = {2020},
date = {2020-11-04},
booktitle = {Proceedings of FG 2020},
publisher = {IEEE},
abstract = {Convolutional Neural Networks (CNNs) are today the de-facto standard for extracting compact and discriminative face representations (templates) from images in automatic face recognition systems. Due to the characteristics of CNN models, the generated representations typically encode a multitude of information ranging from identity to soft-biometric attributes, such as age, gender or ethnicity. However, since these representations were computed for the purpose of identity recognition only, the soft-biometric information contained in the templates represents a serious privacy risk. To mitigate this problem, we present in this paper a privacy-enhancing approach capable of suppressing potentially sensitive soft-biometric information in face representations without significantly compromising identity information. Specifically, we introduce a Privacy-Enhancing Face-Representation learning Network (PFRNet) that disentangles identity from attribute information in face representations and consequently allows to efficiently suppress soft-biometrics in face templates. We demonstrate the feasibility of PFRNet on the problem of gender suppression and show through rigorous experiments on the CelebA, Labeled Faces in the Wild (LFW) and Adience datasets that the proposed disentanglement-based approach is highly effective and improves significantly on the existing state-of-the-art.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
M. Vitek; A. Das; Y. Pourcenoux; A. Missler; C. Paumier; S. Das; I. De Ghosh; D. R. Lucio; L. A. Zanlorensi Jr.; D. Menotti; F. Boutros; N. Damer; J. H. Grebe; A. Kuijper; J. Hu; Y. He; C. Wang; H. Liu; Y. Wang; Z. Sun; D. Osorio-Roig; C. Rathgeb; C. Busch; J. Tapia; A.~Valenzuela; G. Zampoukis; L. Tsochatzidis; I. Pratikakis; S. Nathan; R. Suganya; V. Mehta; A. Dhall; K. Raja; G. Gupta; J. N. Khiarak; M. Akbari-Shahper; F. Jaryani; M. Asgari-Chenaghlu; R. Vyas; S. Dakshit; S. Dakshit; P. Peer; U. Pal; V. Štruc
SSBC 2020: Sclera Segmentation Benchmarking Competition in the Mobile Environment Inproceedings
In: International Joint Conference on Biometrics (IJCB 2020), pp. 1–10, 2020.
@inproceedings{SSBC2020,
title = {SSBC 2020: Sclera Segmentation Benchmarking Competition in the Mobile Environment},
author = {M. Vitek and A. Das and Y. Pourcenoux and A. Missler and C. Paumier and S. Das and I. De Ghosh and D. R. Lucio and L. A. Zanlorensi Jr. and D. Menotti and F. Boutros and N. Damer and J. H. Grebe and A. Kuijper and J. Hu and Y. He and C. Wang and H. Liu and Y. Wang and Z. Sun and D. Osorio-Roig and C. Rathgeb and C. Busch and J. Tapia and A.~Valenzuela and G. Zampoukis and L. Tsochatzidis and I. Pratikakis and S. Nathan and R. Suganya and V. Mehta and A. Dhall and K. Raja and G. Gupta and J. N. Khiarak and M. Akbari-Shahper and F. Jaryani and M. Asgari-Chenaghlu and R. Vyas and S. Dakshit and S. Dakshit and P. Peer and U. Pal and V. Štruc},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2020/11/IJCB_SSBC_2020.pdf},
year = {2020},
date = {2020-09-28},
booktitle = {International Joint Conference on Biometrics (IJCB 2020)},
pages = {1--10},
abstract = {The paper presents a summary of the 2020 Sclera Segmentation Benchmarking Competition (SSBC), the 7th in the series of group benchmarking efforts centred around the problem of sclera segmentation. Different from previous editions, the goal of SSBC 2020 was to evaluate the performance of sclera-segmentation models on images captured with mobile devices. The competition was used as a platform to assess the sensitivity of existing models to i) differences in mobile devices used for image capture and ii) changes in the ambient acquisition conditions. 26 research groups registered for SSBC 2020, out of which 13 took part in the final round and submitted a total of 16 segmentation models for scoring. These included a wide variety of deep-learning solutions as well as one approach based on standard image processing techniques. Experiments were conducted with three recent datasets. Most of the segmentation models achieved relatively consistent performance across images captured with different mobile devices (with slight differences across devices), but struggled most with low-quality images captured in challenging ambient conditions, i.e., in an indoor environment and with poor lighting. },
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Philipp Terhörst, Marco Huber, Naser Damer, Peter Rot, Florian Kirchbuchner, Vitomir Struc, Arjan Kuijper
Privacy Evaluation Protocols for the Evaluation of Soft-Biometric Privacy-Enhancing Technologies Inproceedings
In: Proceedings of the International Conference of the Biometrics Special Interest Group (BIOSIG) 2020, pp. 1-5, IEEE, 2020, ISSN: 1617-5468.
@inproceedings{Biosig_naser_2020,
title = {Privacy Evaluation Protocols for the Evaluation of Soft-Biometric Privacy-Enhancing Technologies},
author = {Philipp Terhörst, Marco Huber, Naser Damer, Peter Rot, Florian Kirchbuchner, Vitomir Struc, Arjan Kuijper},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2020/11/Biosig_privacy.pdf},
issn = {1617-5468},
year = {2020},
date = {2020-09-16},
booktitle = {Proceedings of the International Conference of the Biometrics Special Interest Group (BIOSIG) 2020},
pages = {1-5},
publisher = {IEEE},
abstract = {Biometric data includes privacy-sensitive information, such as soft-biometrics. Soft-biometric privacy enhancing technologies aim at limiting the possibility of deducing such information. Previous works proposed several solutions to this problem using several different evaluation processes, metrics, and attack scenarios. The absence of a standardized evaluation protocol makes a meaningful comparison of these solutions difficult. In this work, we propose privacy evaluation protocols (PEPs) for privacy-enhancing technologies (PETs) dealing with soft-biometric privacy. Our framework evaluates PETs in the most critical scenario of an attacker that knows and adapts to the systems privacy-mechanism. Moreover, our PEPs differentiate between PET of learning-based or training-free nature. To ensure that our protocol meets the highest standards in both cases, it is based on Kerckhoffs‘s principle of cryptography.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Andraž Puc; Vitomir Štruc; Klemen Grm
Analysis of Race and Gender Bias in Deep Age Estimation Model Inproceedings
In: Proceedings of EUSIPCO 2020, 2020.
@inproceedings{GrmEUSIPCO2020,
title = {Analysis of Race and Gender Bias in Deep Age Estimation Model},
author = {Andraž Puc and Vitomir Štruc and Klemen Grm},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2020/07/race_and_gender_bias_eusipco-2.pdf},
year = {2020},
date = {2020-09-01},
booktitle = {Proceedings of EUSIPCO 2020},
abstract = {Due to advances in deep learning and convolutional neural networks (CNNs) there has been significant progress in the field of visual age estimation from face images over recent years. While today's models are able to achieve considerable age estimation accuracy, their behaviour, especially with respect to specific demographic groups is still not well understood. In this paper, we take a deeper look at CNN-based age estimation models and analyze their performance across different race and gender groups. We use two publicly available off-the-shelf age estimation models, i.e., FaceNet and WideResNet, for our study and analyze their performance on the UTKFace and APPA-REAL datasets. We partition face images into sub-groups based on race, gender and combinations of race and gender. We then compare age estimation results and find that there are noticeable differences in performance across demographics. Specifically, our results show that age estimation accuracy is consistently higher for men than for women, while race does not appear to have consistent effects on the tested models across different test datasets.
},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Jaka Šircelj; Tim Oblak; Klemen Grm; Uroš Petković; Aleš Jaklič; Peter Peer; Vitomir Štruc; Franc Solina
Segmentation and Recovery of Superquadric Models using Convolutional Neural Networks Inproceedings
In: 25th Computer Vision Winter Workshop (CVWW 2020), 2020.
@inproceedings{sircelj2020sqcnn,
title = {Segmentation and Recovery of Superquadric Models using Convolutional Neural Networks},
author = {Jaka Šircelj and Tim Oblak and Klemen Grm and Uroš Petković and Aleš Jaklič and Peter Peer and Vitomir Štruc and Franc Solina},
url = {https://lmi.fe.uni-lj.si/en/sircelj2020cvww/
https://arxiv.org/abs/2001.10504},
year = {2020},
date = {2020-02-03},
booktitle = {25th Computer Vision Winter Workshop (CVWW 2020)},
abstract = {In this paper we address the problem of representing 3D visual data with parameterized volumetric shape primitives. Specifically, we present a (two-stage) approach built around convolutional neural networks (CNNs) capable of segmenting complex depth scenes into the simpler geometric structures that can be represented with superquadric models. In the first stage, our approach uses a Mask RCNN model to identify superquadric-like structures in depth scenes and then fits superquadric models to the segmented structures using a specially designed CNN regressor. Using our approach we are able to describe complex structures with a small number of interpretable parameters. We evaluated the proposed approach on synthetic as well as real-world depth data and show that our solution does not only result in competitive performance in comparison to the state-of-the-art, but is able to decompose scenes into a number of superquadric models at a fraction of the time required by competing approaches. We make all data and models used in the paper available from https://lmi.fe.uni-lj.si/en/research/resources/sq-seg.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2019
Journal Articles
Janez Krizaj; Peter Peer; Vitomir Struc; Simon Dobrisek
Simultaneous multi-decent regression and feature learning for landmarking in depth image Journal Article
In: Neural Computing and Applications, 2019, ISBN: 0941-0643.
@article{Krizaj3Docalization,
title = {Simultaneous multi-decent regression and feature learning for landmarking in depth image},
author = {Janez Krizaj and Peter Peer and Vitomir Struc and Simon Dobrisek},
url = {https://link.springer.com/content/pdf/10.1007%2Fs00521-019-04529-7.pdf},
doi = {https://doi.org/10.1007/s00521-019-04529-7},
isbn = {0941-0643},
year = {2019},
date = {2019-10-01},
journal = {Neural Computing and Applications},
abstract = {Face alignment (or facial landmarking) is an important task in many face-related applications, ranging from registration, tracking, and animation to higher-level classification problems such as face, expression, or attribute recognition. While several solutions have been presented in the literature for this task so far, reliably locating salient facial features across a wide range of posses still remains challenging. To address this issue, we propose in this paper a novel method for automatic facial landmark localization in 3D face data designed specifically to address appearance variability caused by significant pose variations. Our method builds on recent cascaded regression-based methods to facial landmarking and uses a gating mechanism to incorporate multiple linear cascaded regression models each trained for a limited range of poses into a single powerful landmarking model capable of processing arbitrary-posed input data. We develop two distinct approaches around the proposed gating mechanism: (1) the first uses a gated multiple ridge descent mechanism in conjunction with established (hand-crafted) histogram of gradients features for face alignment and achieves state-of-the-art landmarking performance across a wide range of facial poses and (2) the second simultaneously learns multiple-descent directions as well as binary features that are optimal for the alignment tasks and in addition to competitive landmarking results also ensures extremely rapid processing. We evaluate both approaches in rigorous experiments on several popular datasets of 3D face images, i.e., the FRGCv2 and Bosphorus 3D face datasets and image collections F and G from the University of Notre Dame. The results of our evaluation show that both approaches compare favorably to the state-of-the-art, while exhibiting considerable robustness to pose variations.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jure Kovač; Vitomir Štruc; Peter Peer
Frame-based classification for cross-speed gait recognition Journal Article
In: Multimedia Tools and Applications, vol. 78, no. 5, pp. 5621–5643, 2019, ISSN: 1573-7721.
@article{kovavc2019frame,
title = {Frame-based classification for cross-speed gait recognition},
author = {Jure Kovač and Vitomir Štruc and Peter Peer},
url = {http://rdcu.be/BfJP},
doi = {https://doi.org/10.1007/s11042-017-5469-0},
issn = {1573-7721},
year = {2019},
date = {2019-03-01},
journal = {Multimedia Tools and Applications},
volume = {78},
number = {5},
pages = {5621--5643},
publisher = {Springer},
abstract = {The use of human gait as the means of biometric identification has gained a lot of attention in the past few years, mostly due to its enormous potential. Such biometrics can be captured at public places from a distance without subjects collaboration, awareness and even consent. However, there are still numerous challenges caused by influence of covariate factors like changes of walking speed, view, clothing, footwear etc., that have negative impact on recognition performance. In this paper we tackle walking speed changes with a skeleton model-based gait recognition system focusing on improving algorithm robustness and improving the performance at higher walking speed changes. We achieve these by proposing frame based classification method, which overcomes the main shortcoming of distance based classification methods, which are very sensitive to gait cycle starting point detection. The proposed technique is starting point invariant with respect to gait cycle starts and as such ensures independence of classification from gait cycle start positions. Additionally, we propose wavelet transform based signal approximation, which enables the analysis of feature signals on different frequency space resolutions and diminishes the need for using feature transformation that require training. With the evaluation on OU-ISIR gait dataset we demonstrate state of the art performance of proposed methods.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Janez Križaj; Janez Perš; Simon Dobrišek; Vitomir Štruc
Sistem nadgrajene resničnosti za verifikacijo predmetov v skladiščnih okoljih Journal Article
In: Elektrotehniski Vestnik, vol. 86, no. 1/2, pp. 1–6, 2019.
@article{krivzaj2019sistem,
title = {Sistem nadgrajene resničnosti za verifikacijo predmetov v skladiščnih okoljih},
author = {Janez Križaj and Janez Perš and Simon Dobrišek and Vitomir Štruc},
url = {https://ev.fe.uni-lj.si/1-2-2019/Krizaj.pdf},
year = {2019},
date = {2019-01-01},
journal = {Elektrotehniski Vestnik},
volume = {86},
number = {1/2},
pages = {1--6},
publisher = {Elektrotehniski Vestnik},
abstract = {The paper proposes an augmented reality system for visual object verification that helps warehouse workers perform their work. The system sequentially captures images of objects that the warehouse workers encounter during their work and verifies whether the objects are the ones that the workers are supposed to fetch from storage. The system uses Android-powered smart glasses to capture image data and display results to the user, whereas the computationally-intensive verification task is carried out in the cloud and is implemented using recent deep-learning techniques. By doing so, the system is able to process images in near real-time and achieves a high verification accuracy as shown by the experimental results},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Incollections
Peter Rot; Matej Vitek; Klemen Grm; Žiga Emeršič; Peter Peer and Vitomir Štruc
Deep Sclera Segmentation and Recognition Incollection
In: Uhl, Andreas; Busch, Christoph; Marcel, Sebastien; Veldhuis, Rainer (Ed.): Handbook of Vascular Biometrics, pp. 395-432, Springer, 2019, ISBN: 978-3-030-27731-4.
@incollection{ScleraNetChapter,
title = {Deep Sclera Segmentation and Recognition},
author = {Peter Rot and Matej Vitek and Klemen Grm and Žiga Emeršič and Peter Peer
and Vitomir Štruc},
editor = {Andreas Uhl and Christoph Busch and Sebastien Marcel and Rainer Veldhuis},
url = {https://link.springer.com/content/pdf/10.1007%2F978-3-030-27731-4_13.pdf},
doi = {https://doi.org/10.1007/978-3-030-27731-4_13},
isbn = {978-3-030-27731-4},
year = {2019},
date = {2019-11-14},
booktitle = {Handbook of Vascular Biometrics},
pages = {395-432},
publisher = {Springer},
chapter = {13},
series = {Advances in Computer Vision and Pattern Recognition},
abstract = {In this chapter, we address the problem of biometric identity recognition from the vasculature of the human sclera. Specifically, we focus on the challenging task of multi-view sclera recognition, where the visible part of the sclera vasculature changes from image to image due to varying gaze (or view) directions. We propose a complete solution for this task built around Convolutional Neural Networks (CNNs) and make several contributions that result in state-of-the-art recognition performance, i.e.: (i) we develop a cascaded CNN assembly that is able to robustly segment the sclera vasculature from the input images regardless of gaze direction, and (ii) we present ScleraNET, a CNN model trained in a multi-task manner (combining losses pertaining to identity and view-direction recognition) that allows for the extraction of discriminative vasculature descriptors that can be used for identity inference. To evaluate the proposed contributions, we also introduce a new dataset of ocular images, called the Sclera Blood Vessels, Periocular and Iris (SBVPI) dataset, which represents one of the few publicly available datasets suitable for research in multi-view sclera segmentation and recognition. The datasets come with a rich set of annotations, such as a per-pixel markup of various eye parts (including the sclera vasculature), identity, gaze-direction and gender labels. We conduct rigorous experiments on SBVPI with competing techniques from the literature and show that the combination of the proposed segmentation and descriptor-computation models results in highly competitive recognition performance.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Emersic Ziga; Krizaj Janez; Struc Vitomir; Peer Peter
Deep ear recognition pipeline Incollection
In: Mahmoud, Hassaballah; M., Hosny Khalid (Ed.): Recent advances in computer vision : theories and applications, vol. 804, Springer, 2019, ISBN: 1860-9503.
@incollection{ZigaBook2019,
title = {Deep ear recognition pipeline},
author = {Emersic Ziga and Krizaj Janez and Struc Vitomir and Peer Peter},
editor = {Hassaballah Mahmoud and Hosny Khalid M.},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2019/08/Emeršič2019_Chapter_DeepEarRecognitionPipeline_submitted.pdf},
doi = {10.1007/978-3-030-03000-1_14},
isbn = {1860-9503},
year = {2019},
date = {2019-01-01},
booktitle = {Recent advances in computer vision : theories and applications},
volume = {804},
publisher = {Springer},
abstract = {Ear recognition has seen multiple improvements in recent years and still remains very active today. However, it has been approached from recognition and detection perspective separately. Furthermore, deep-learning-based approaches that are popular in other domains have seen limited use in ear recognition and even more so in ear detection. Moreover, to obtain a usable recognition system a unified pipeline is needed. The input in such system should be plain images of subjects and the output identities based only on ear biometrics. We conduct separate analysis through detection and identification experiments on the challenging dataset and, using the best approaches, present a novel, unified pipeline. The pipeline is based on convolutional neural networks (CNN) and presents, to the best of our knowledge, the first CNN-based ear recognition pipeline. The pipeline incorporates both, the detection of ears on arbitrary images of people, as well as recognition on these segmented ear regions. The experiments show that the presented system is a state-of-the-art system and, thus, a good foundation for future real-word ear recognition systems.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Inproceedings
Tim Oblak; Klemen Grm; Aleš Jaklič; Peter Peer; Vitomir Štruc; Franc Solina
Recovery of Superquadrics from Range Images using Deep Learning: A Preliminary Study Inproceedings
In: 2019 IEEE International Work Conference on Bioinspired Intelligence (IWOBI), pp. 45-52, IEEE, 2019.
@inproceedings{oblak2019recovery,
title = {Recovery of Superquadrics from Range Images using Deep Learning: A Preliminary Study},
author = {Tim Oblak and Klemen Grm and Aleš Jaklič and Peter Peer and Vitomir Štruc and Franc Solina},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2019/08/Superkvadriki_draft.pdf},
year = {2019},
date = {2019-06-01},
booktitle = {2019 IEEE International Work Conference on Bioinspired Intelligence (IWOBI)},
journal = {arXiv preprint arXiv:1904.06585},
pages = {45-52},
publisher = {IEEE},
abstract = {It has been a longstanding goal in computer vision to describe the 3D physical space in terms of parameterized volumetric models that would allow autonomous machines to understand and interact with their surroundings. Such models are typically motivated by human visual perception and aim to represents all elements of the physical word ranging from individual objects to complex scenes using a small set of parameters. One of the de facto standards to approach this problem are superquadrics - volumetric models that define various 3D shape primitives and can be fitted to actual 3D data (either in the form of point clouds or range images). However, existing solutions to superquadric recovery involve costly iterative fitting procedures, which limit the applicability of such techniques in practice. To alleviate this problem, we explore in this paper the possibility to recover superquadrics from range images without time consuming iterative parameter estimation techniques by using contemporary deep-learning models, more specifically, convolutional neural networks (CNNs). We pose the superquadric recovery problem as a regression task and develop a CNN regressor that is able to estimate the parameters of a superquadric model from a given range image. We train the regressor on a large set of synthetic range images, each containing a single (unrotated) superquadric shape and evaluate the learned model in comparative experiments with the current state-of-the-art. Additionally, we also present a qualitative analysis involving a dataset of real-world objects. The results of our experiments show that the proposed regressor not only outperforms the existing state-of-the-art, but also ensures a 270x faster execution time.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Žiga Emeršič; A. Kumar S. V.; B. S. Harish; W. Gutfeter; J. N. Khiarak; A. Pacut; E. Hansley; M. Pamplona Segundo; S. Sarkar; H. Park; G. Pyo Nam; I. J. Kim; S.G. Sangodkar; U. Kacar; M. Kirci; L. Yuan; J. Yuan; H. Zhao; F. Lu; J. Mao; X. Zhang; D. Yaman; F. I. Eyiokur; K. B. Ozler; H. K. Ekenel; D. Paul Chowdhury; S. Bakshi; P. K. Sa; B. Majhni; P. Peer; V. Štruc
The Unconstrained Ear Recognition Challenge 2019 Inproceedings
In: International Conference on Biometrics (ICB 2019), 2019.
@inproceedings{emervsivc2019unconstrained,
title = {The Unconstrained Ear Recognition Challenge 2019},
author = {Žiga Emeršič and A. Kumar S. V. and B. S. Harish and W. Gutfeter and J. N. Khiarak and A. Pacut and E. Hansley and M. Pamplona Segundo and S. Sarkar and H. Park and G. Pyo Nam and I. J. Kim and S.G. Sangodkar and U. Kacar and M. Kirci and L. Yuan and J. Yuan and H. Zhao and F. Lu and J. Mao and X. Zhang and D. Yaman and F. I. Eyiokur and K. B. Ozler and H. K. Ekenel and D. Paul Chowdhury and S. Bakshi and P. K. Sa and B. Majhni and P. Peer and V. Štruc},
url = {https://arxiv.org/pdf/1903.04143.pdf},
year = {2019},
date = {2019-06-01},
booktitle = {International Conference on Biometrics (ICB 2019)},
journal = {arXiv preprint arXiv:1903.04143},
abstract = {This paper presents a summary of the 2019 Unconstrained Ear Recognition Challenge (UERC), the second in a series of group benchmarking efforts centered around the problem of person recognition from ear images captured in uncontrolled settings. The goal of the challenge is to assess the performance of existing ear recognition techniques on a challenging large-scale ear dataset and to analyze performance of the technology from various viewpoints, such as generalization abilities to unseen data characteristics, sensitivity to rotations, occlusions and image resolution and performance bias on sub-groups of subjects, selected based on demographic criteria, i.e. gender and ethnicity. Research groups from 12 institutions entered the competition and submitted a total of 13 recognition approaches ranging from descriptor-based methods to deep-learning models. The majority of submissions focused on ensemble based methods combining either representations from multiple deep models or hand-crafted with learned image descriptors. Our analysis shows that methods incorporating deep learning models clearly outperform techniques relying solely on hand-crafted descriptors, even though both groups of techniques exhibit similar behaviour when it comes to robustness to various covariates, such presence of occlusions, changes in (head) pose, or variability in image resolution. The results of the challenge also show that there has been considerable progress since the first UERC in 2017, but that there is still ample room for further research in this area.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Klemen Grm; Martin Pernus; Leo Cluzel; Walter J. Scheirer; Simon Dobrisek; Vitomir Struc
Face Hallucination Revisited: An Exploratory Study on Dataset Bias Inproceedings
In: IEEE Conference on Computer Vision and Pattern Recognition Workshops, 2019.
@inproceedings{grm2019face,
title = {Face Hallucination Revisited: An Exploratory Study on Dataset Bias},
author = {Klemen Grm and Martin Pernus and Leo Cluzel and Walter J. Scheirer and Simon Dobrisek and Vitomir Struc},
url = {http://openaccess.thecvf.com/content_CVPRW_2019/papers/Biometrics/Grm_Face_Hallucination_Revisited_An_Exploratory_Study_on_Dataset_Bias_CVPRW_2019_paper.pdf
https://arxiv.org/pdf/1812.09010.pdf},
year = {2019},
date = {2019-06-01},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshops},
abstract = {Contemporary face hallucination (FH) models exhibit considerable ability to reconstruct high-resolution (HR) details from low-resolution (LR) face images. This ability is commonly learned from examples of corresponding HR-LR image pairs, created by artificially down-sampling the HR ground truth data. This down-sampling (or degradation) procedure not only defines the characteristics of the LR training data, but also determines the type of image degradations the learned FH models are eventually able to handle. If the image characteristics encountered with real-world LR images differ from the ones seen during training, FH models are still expected to perform well, but in practice may not produce the desired results. In this paper we study this problem and explore the bias introduced into FH models by the characteristics of the training data. We systematically analyze the generalization capabilities of several FH models in various scenarios where the degradation function does not match the training setup and conduct experiments with synthetically downgraded as well as real-life low-quality images. We make several interesting findings that provide insight into existing problems with FH models and point to future research directions.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Juš Lozej; Dejan Štepec; Vitomir Štruc; Peter Peer
Influence of segmentation on deep iris recognition performance Inproceedings
In: 7th IAPR/IEEE International Workshop on Biometrics and Forensics (IWBF 2019), 2019.
@inproceedings{lozej2019influence,
title = {Influence of segmentation on deep iris recognition performance},
author = {Juš Lozej and Dejan Štepec and Vitomir Štruc and Peter Peer},
url = {https://arxiv.org/pdf/1901.10431.pdf},
year = {2019},
date = {2019-03-01},
booktitle = {7th IAPR/IEEE International Workshop on Biometrics and Forensics (IWBF 2019)},
journal = {arXiv preprint arXiv:1901.10431},
abstract = {Despite the rise of deep learning in numerous areas of computer vision and image processing, iris recognition has not benefited considerably from these trends so far. Most of the existing research on deep iris recognition is focused on new models for generating discriminative and robust iris representations and relies on methodologies akin to traditional iris recognition pipelines. Hence, the proposed models do not approach iris recognition in an end-to-end manner, but rather use standard heuristic iris segmentation (and unwrapping) techniques to produce normalized inputs for the deep learning models. However, because deep learning is able to model very complex data distributions and nonlinear data changes, an obvious question arises. How important is the use of traditional segmentation methods in a deep learning setting? To answer this question, we present in this paper an empirical analysis of the impact of iris segmentation on the performance of deep learning models using a simple two stage pipeline consisting of a segmentation and a recognition step. We evaluate how the accuracy of segmentation influences recognition performance but also examine if segmentation is needed at all. We use the CASIA Thousand and SBVPI datasets for the experiments and report several interesting findings.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2018
Journal Articles
Klemen Grm; Vitomir Štruc
Deep face recognition for surveillance applications Journal Article
In: IEEE Intelligent Systems, vol. 33, no. 3, pp. 46–50, 2018.
@article{GrmIEEE2018,
title = {Deep face recognition for surveillance applications},
author = {Klemen Grm and Vitomir Štruc},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2019/08/UniversityOfLjubljana_IEEE_IS_Submission.pdf},
year = {2018},
date = {2018-05-01},
journal = {IEEE Intelligent Systems},
volume = {33},
number = {3},
pages = {46--50},
abstract = {Automated person recognition from surveillance quality footage is an open research problem with many potential application areas. In this paper, we aim at addressing this problem by presenting a face recognition approach tailored towards surveillance applications. The presented approach is based on domain-adapted convolutional neural networks and ranked second in the International Challenge on Biometric Recognition in the Wild (ICB-RW) 2016. We evaluate the performance of the presented approach on part of the Quis-Campi dataset and compare it against several existing face recognition techniques and one (state-of-the-art) commercial system. We find that the domain-adapted convolutional network outperforms all other assessed techniques, but is still inferior to human performance.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Žiga Emeršič; Blaž Meden; Peter Peer; Vitomir Štruc
Evaluation and analysis of ear recognition models: performance, complexity and resource requirements Journal Article
In: Neural Computing and Applications, pp. 1–16, 2018, ISBN: 0941-0643.
@article{emervsivc2018evaluation,
title = {Evaluation and analysis of ear recognition models: performance, complexity and resource requirements},
author = {Žiga Emeršič and Blaž Meden and Peter Peer and Vitomir Štruc},
url = {https://rdcu.be/Os7a},
doi = {https://doi.org/10.1007/s00521-018-3530-1},
isbn = {0941-0643},
year = {2018},
date = {2018-05-01},
journal = {Neural Computing and Applications},
pages = {1--16},
publisher = {Springer},
abstract = {Ear recognition technology has long been dominated by (local) descriptor-based techniques due to their formidable recognition performance and robustness to various sources of image variability. While deep-learning-based techniques have started to appear in this field only recently, they have already shown potential for further boosting the performance of ear recognition technology and dethroning descriptor-based methods as the current state of the art. However, while recognition performance is often the key factor when selecting recognition models for biometric technology, it is equally important that the behavior of the models is understood and their sensitivity to different covariates is known and well explored. Other factors, such as the train- and test-time complexity or resource requirements, are also paramount and need to be consider when designing recognition systems. To explore these issues, we present in this paper a comprehensive analysis of several descriptor- and deep-learning-based techniques for ear recognition. Our goal is to discover weak points of contemporary techniques, study the characteristics of the existing technology and identify open problems worth exploring in the future. We conduct our analysis through identification experiments on the challenging Annotated Web Ears (AWE) dataset and report our findings. The results of our analysis show that the presence of accessories and high degrees of head movement significantly impacts the identification performance of all types of recognition models, whereas mild degrees of the listed factors and other covariates such as gender and ethnicity impact the identification performance only to a limited extent. From a test-time-complexity point of view, the results suggest that lightweight deep models can be equally fast as descriptor-based methods given appropriate computing hardware, but require significantly more resources during training, where descriptor-based methods have a clear advantage. As an additional contribution, we also introduce a novel dataset of ear images, called AWE Extended (AWEx), which we collected from the web for the training of the deep models used in our experiments. AWEx contains 4104 images of 346 subjects and represents one of the largest and most challenging (publicly available) datasets of unconstrained ear images at the disposal of the research community.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Žiga Emeršič; Luka Gabriel; Vitomir Štruc; Peter Peer
Convolutional encoder--decoder networks for pixel-wise ear detection and segmentation Journal Article
In: IET Biometrics, vol. 7, no. 3, pp. 175–184, 2018.
@article{emervsivc2018convolutional,
title = {Convolutional encoder--decoder networks for pixel-wise ear detection and segmentation},
author = {Žiga Emeršič and Luka Gabriel and Vitomir Štruc and Peter Peer},
url = {https://arxiv.org/pdf/1702.00307.pdf},
year = {2018},
date = {2018-03-01},
journal = {IET Biometrics},
volume = {7},
number = {3},
pages = {175--184},
publisher = {IET},
abstract = {Object detection and segmentation represents the basis for many tasks in computer and machine vision. In biometric recognition systems the detection of the region-of-interest (ROI) is one of the most crucial steps in the processing pipeline, significantly impacting the performance of the entire recognition system. Existing approaches to ear detection, are commonly susceptible to the presence of severe occlusions, ear accessories or variable illumination conditions and often deteriorate in their performance if applied on ear images captured in unconstrained settings. To address these shortcomings, we present a novel ear detection technique based on convolutional encoder-decoder networks (CEDs). We formulate the problem of ear detection as a two-class segmentation problem and design and train a CED-network architecture to distinguish between image-pixels belonging to the ear and the non-ear class. Unlike competing techniques, our approach does not simply return a bounding box around the detected ear, but provides detailed, pixel-wise information about the location of the ears in the image. Experiments on a dataset gathered from the web (a.k.a. in the wild) show that the proposed technique ensures good detection results in the presence of various covariate factors and significantly outperforms competing methods from the literature.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Blaž Meden; Žiga Emeršič; Vitomir Štruc; Peter Peer
k-Same-Net: k-Anonymity with Generative Deep Neural Networks for Face Deidentification Journal Article
In: Entropy, vol. 20, no. 1, pp. 60, 2018.
@article{meden2018k,
title = {k-Same-Net: k-Anonymity with Generative Deep Neural Networks for Face Deidentification},
author = {Blaž Meden and Žiga Emeršič and Vitomir Štruc and Peter Peer},
url = {https://www.mdpi.com/1099-4300/20/1/60/pdf},
year = {2018},
date = {2018-01-01},
journal = {Entropy},
volume = {20},
number = {1},
pages = {60},
publisher = {Multidisciplinary Digital Publishing Institute},
abstract = {Image and video data are today being shared between government entities and other relevant stakeholders on a regular basis and require careful handling of the personal information contained therein. A popular approach to ensure privacy protection in such data is the use of deidentification techniques, which aim at concealing the identity of individuals in the imagery while still preserving certain aspects of the data after deidentification. In this work, we propose a novel approach towards face deidentification, called k-Same-Net, which combines recent Generative Neural Networks (GNNs) with the well-known k-Anonymitymechanism and provides formal guarantees regarding privacy protection on a closed set of identities. Our GNN is able to generate synthetic surrogate face images for deidentification by seamlessly combining features of identities used to train the GNN model. Furthermore, it allows us to control the image-generation process with a small set of appearance-related parameters that can be used to alter specific aspects (e.g., facial expressions, age, gender) of the synthesized surrogate images. We demonstrate the feasibility of k-Same-Net in comprehensive experiments on the XM2VTS and CK+ datasets. We evaluate the efficacy of the proposed approach through reidentification experiments with recent recognition models and compare our results with competing deidentification techniques from the literature. We also present facial expression recognition experiments to demonstrate the utility-preservation capabilities of k-Same-Net. Our experimental results suggest that k-Same-Net is a viable option for facial deidentification that exhibits several desirable characteristics when compared to existing solutions in this area.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Robert Šket; Tadej Debevec; Susanne Kublik; Michael Schloter; Anne Schoeller; Boštjan Murovec; Katarina Vogel Mikuš; Damjan Makuc; Klemen Pečnik; Janez Plavec; Igor B Mekjavić; Ola Eiken; Zala Prevoršek; Blaž Stres
In: Frontiers in Physiology, vol. 9, pp. 198, 2018, ISSN: 1664-042X.
@article{10.3389/fphys.2018.00198,
title = {Intestinal Metagenomes and Metabolomes in Healthy Young Males: Inactivity and Hypoxia Generated Negative Physiological Symptoms Precede Microbial Dysbiosis},
author = {Robert Šket and Tadej Debevec and Susanne Kublik and Michael Schloter and Anne Schoeller and Boštjan Murovec and Katarina Vogel Mikuš and Damjan Makuc and Klemen Pečnik and Janez Plavec and Igor B Mekjavić and Ola Eiken and Zala Prevoršek and Blaž Stres},
url = {https://www.frontiersin.org/article/10.3389/fphys.2018.00198},
doi = {10.3389/fphys.2018.00198},
issn = {1664-042X},
year = {2018},
date = {2018-01-01},
journal = {Frontiers in Physiology},
volume = {9},
pages = {198},
abstract = {We explored the metagenomic, metabolomic and trace metal makeup of intestinal microbiota and environment in healthy male participants during the run-in (5 day) and the following three 21-day interventions: normoxic bedrest (NBR), hypoxic bedrest (HBR) and hypoxic ambulation (HAmb) which were carried out within a controlled laboratory environment (circadian rhythm, fluid and dietary intakes, microbial bioburden, oxygen level, exercise). The fraction of inspired O2 (FiO2) and partial pressure of inspired O2 (PiO2) were 0.209 and 133.1 ± 0.3 mmHg for the NBR and 0.141 ± 0.004 and 90.0 ± 0.4 mmHg (~4000 m simulated altitude) for HBR and HAmb interventions, respectively. Shotgun metagenomes were analyzed at various taxonomic and functional levels, 1H- and 13C -metabolomes were processed using standard quantitative and human expert approaches, whereas metals were assessed using X-ray fluorescence spectrometry. Inactivity and hypoxia resulted in a significant increase in the genus Bacteroides in HBR, in genes coding for proteins involved in iron acquisition and metabolism, cell wall, capsule, virulence, defense and mucin degradation, such as beta-galactosidase (EC3.2.1.23), α-L-fucosidase (EC3.2.1.51), Sialidase (EC3.2.1.18) and α-N-acetylglucosaminidase (EC3.2.1.50). In contrast, the microbial metabolomes, intestinal element and metal profiles, the diversity of bacterial, archaeal and fungal microbial communities were not significantly affected. The observed progressive decrease in defecation frequency and concomitant increase in the electrical conductivity (EC) preceded or took place in absence of significant changes at the taxonomic, functional gene, metabolome and intestinal metal profile levels. The fact that the genus Bacteroides and proteins involved in iron acquisition and metabolism, cell wall, capsule, virulence and mucin degradation were enriched at the end of HBR suggest that both constipation and EC decreased intestinal metal availability leading to modified expression of co-regulated genes in Bacteroides genomes. Bayesian network analysis was used to derive the first hierarchical model of initial inactivity mediated deconditioning steps over time. The PlanHab wash-out period corresponded to a profound life-style change (i.e. reintroduction of exercise) that resulted in stepwise amelioration of the negative physiological symptoms, indicating that exercise apparently prevented the crosstalk between the microbial physiology, mucin degradation and proinflammatory immune activities in the host.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Boštjan Murovec; Damjan Makuc; Sabina Kolbl Repinc; Zala Prevoršek; Domen Zavec; Robert Šket; Klemen Pečnik; Janez Plavec; Blaž Stres
In: Journal of Environmental Management, vol. 222, pp. 428 - 435, 2018, ISSN: 0301-4797.
@article{MUROVEC2018428,
title = {1H NMR metabolomics of microbial metabolites in the four MW agricultural biogas plant reactors: A case study of inhibition mirroring the acute rumen acidosis symptoms},
author = {Boštjan Murovec and Damjan Makuc and Sabina Kolbl Repinc and Zala Prevoršek and Domen Zavec and Robert Šket and Klemen Pečnik and Janez Plavec and Blaž Stres},
url = {http://www.sciencedirect.com/science/article/pii/S0301479718305991},
doi = {https://doi.org/10.1016/j.jenvman.2018.05.068},
issn = {0301-4797},
year = {2018},
date = {2018-01-01},
journal = {Journal of Environmental Management},
volume = {222},
pages = {428 - 435},
abstract = {In this study, nuclear magnetic resonance (1H NMR) spectroscopic profiling was used to provide a more comprehensive view of microbial metabolites associated with poor reactor performance in a full-scale 4 MW mesophilic agricultural biogas plant under fully operational and also under inhibited conditions. Multivariate analyses were used to assess the significance of differences between reactors whereas artificial neural networks (ANN) were used to identify the key metabolites responsible for inhibition and their network of interaction. Based on the results of nm-MDS ordination the subsamples of each reactor were similar, but not identical, despite homogenization of the full-scale reactors before sampling. Hence, a certain extent of variability due to the size of the system under analysis was transferred into metabolome analysis. Multivariate analysis showed that fully active reactors were clustered separately from those containing inhibited reactor metabolites and were significantly different. Furthermore, the three distinct inhibited states were significantly different from each other. The inhibited metabolomes were enriched in acetate, caprylate, trimethylamine, thymine, pyruvate, alanine, xanthine and succinate. The differences in the metabolic fingerprint between inactive and fully active reactors observed in this study resembled closely the metabolites differentiating the (sub) acute rumen acidosis inflicted and healthy rumen metabolomes, creating thus favorable conditions for the growth and activity of pathogenic bacteria. The consistency of our data with those reported before for rumen ecosystems shows that 1H NMR based metabolomics is a reliable approach for the evaluation of metabolic events at full-scale biogas reactors.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Inproceedings
Janez Križaj; Žiga Emeršič; Simon Dobrišek; Peter Peer; Vitomir Štruc
Localization of Facial Landmarks in Depth Images Using Gated Multiple Ridge Descent Inproceedings
In: 2018 IEEE International Work Conference on Bioinspired Intelligence (IWOBI), pp. 1–8, IEEE 2018.
@inproceedings{krivzaj2018localization,
title = {Localization of Facial Landmarks in Depth Images Using Gated Multiple Ridge Descent},
author = {Janez Križaj and Žiga Emeršič and Simon Dobrišek and Peter Peer and Vitomir Štruc},
url = {https://ieeexplore.ieee.org/abstract/document/8464215},
year = {2018},
date = {2018-09-01},
booktitle = {2018 IEEE International Work Conference on Bioinspired Intelligence (IWOBI)},
pages = {1--8},
organization = {IEEE},
abstract = {A novel method for automatic facial landmark localization is presented. The method builds on the supervised descent framework, which was shown to successfully localize landmarks in the presence of large expression variations and mild occlusions, but struggles when localizing landmarks on faces with large pose variations. We propose an extension of the supervised descent framework that trains multiple descent maps and results in increased robustness to pose variations. The performance of the proposed method is demonstrated on the Bosphorus, the FRGC and the UND data sets for the problem of facial landmark localization from 3D data. Our experimental results show that the proposed method exhibits increased robustness to pose variations, while retaining high performance in the case of expression and occlusion variations.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Matej Kristan; Ales Leonardis; Jiri Matas; Michael Felsberg; Roman Pflugfelder; Luka Cehovin Zajc; Tomas Vojir; Goutam Bhat; Alan Lukezic; Abdelrahman Eldesokey; Vitomir Štruc; Klemen Grm; others
The sixth visual object tracking VOT2018 challenge results Inproceedings
In: European Conference on Computer Vision Workshops (ECCV-W 2018), 2018.
@inproceedings{kristan2018sixth,
title = {The sixth visual object tracking VOT2018 challenge results},
author = {Matej Kristan and Ales Leonardis and Jiri Matas and Michael Felsberg and Roman Pflugfelder and Luka Cehovin Zajc and Tomas Vojir and Goutam Bhat and Alan Lukezic and Abdelrahman Eldesokey and Vitomir Štruc and Klemen Grm and others},
url = {http://openaccess.thecvf.com/content_ECCVW_2018/papers/11129/Kristan_The_sixth_Visual_Object_Tracking_VOT2018_challenge_results_ECCVW_2018_paper.pdf},
year = {2018},
date = {2018-09-01},
booktitle = {European Conference on Computer Vision Workshops (ECCV-W 2018)},
abstract = {The Visual Object Tracking challenge VOT2018 is the sixth annual tracker benchmarking activity organized by the VOT initiative. Results of over eighty trackers are presented; many are state-of-the-art trackers published at major computer vision conferences or in journals in the recent years. The evaluation included the standard VOT and other popular methodologies for short-term tracking analysis and a “real-time” experiment simulating a situation where a tracker processes images as if provided by a continuously running sensor. A long-term tracking subchallenge has been introduced to the set of standard VOT sub-challenges. The new subchallenge focuses on long-term tracking properties, namely coping with target disappearance and reappearance. A new dataset has been compiled and a performance evaluation methodology that focuses on long-term tracking capabilities has been adopted. The VOT toolkit has been updated to support both standard short-term and the new longterm tracking subchallenges. Performance of the tested trackers typically by far exceeds standard baselines. The source code for most of the trackers is publicly available from the VOT page. The dataset, the evaluation kit and the results are publicly available at the challenge website.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Peter Rot; Žiga Emeršič; Vitomir Struc; Peter Peer
Deep multi-class eye segmentation for ocular biometrics Inproceedings
In: 2018 IEEE International Work Conference on Bioinspired Intelligence (IWOBI), pp. 1–8, IEEE 2018.
@inproceedings{rot2018deep,
title = {Deep multi-class eye segmentation for ocular biometrics},
author = {Peter Rot and Žiga Emeršič and Vitomir Struc and Peter Peer},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2019/08/MultiClassReduced.pdf},
year = {2018},
date = {2018-07-01},
booktitle = {2018 IEEE International Work Conference on Bioinspired Intelligence (IWOBI)},
pages = {1--8},
organization = {IEEE},
abstract = {Segmentation techniques for ocular biometrics typically focus on finding a single eye region in the input image at the time. Only limited work has been done on multi-class eye segmentation despite a number of obvious advantages. In this paper we address this gap and present a deep multi-class eye segmentation model build around the SegNet architecture. We train the model on a small dataset (of 120 samples) of eye images and observe it to generalize well to unseen images and to ensure highly accurate segmentation results. We evaluate the model on the Multi-Angle Sclera Database (MASD) dataset and describe comprehensive experiments focusing on: i) segmentation performance, ii) error analysis, iii) the sensitivity of the model to changes in view direction, and iv) comparisons with competing single-class techniques. Our results show that the proposed model is viable solution for multi-class eye segmentation suitable for recognition (multi-biometric) pipelines based on ocular characteristics.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Juš Lozej; Blaž Meden; Vitomir Struc; Peter Peer
End-to-end iris segmentation using U-Net Inproceedings
In: 2018 IEEE International Work Conference on Bioinspired Intelligence (IWOBI), pp. 1–6, IEEE 2018.
@inproceedings{lozej2018end,
title = {End-to-end iris segmentation using U-Net},
author = {Juš Lozej and Blaž Meden and Vitomir Struc and Peter Peer},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2019/08/IWOBI_2018_paper_15.pdf},
year = {2018},
date = {2018-07-01},
booktitle = {2018 IEEE International Work Conference on Bioinspired Intelligence (IWOBI)},
pages = {1--6},
organization = {IEEE},
abstract = {Iris segmentation is an important research topic that received significant attention from the research community over the years. Traditional iris segmentation techniques have typically been focused on hand-crafted procedures that, nonetheless, achieved remarkable segmentation performance even with images captured in difficult settings. With the success of deep-learning models, researchers are increasingly looking towards convolutional neural networks (CNNs) to further improve on the accuracy of existing iris segmentation techniques and several CNN-based techniques have already been presented recently in the literature. In this paper we also consider deep-learning models for iris segmentation and present an iris segmentation approach based on the popular U-Net architecture. Our model is trainable end-to-end and, hence, avoids the need for hand designing the segmentation procedure. We evaluate the model on the CASIA dataset and report encouraging results in comparison to existing techniques used in this area.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Blaz Meden; Peter Peer; Vitomir Struc
Selective Face Deidentification with End-to-End Perceptual Loss Learning Inproceedings
In: 2018 IEEE International Work Conference on Bioinspired Intelligence (IWOBI), pp. 1–7, IEEE 2018.
@inproceedings{meden2018selective,
title = {Selective Face Deidentification with End-to-End Perceptual Loss Learning},
author = {Blaz Meden and Peter Peer and Vitomir Struc},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2019/08/Selective_Face_Deidentification_with_End_to_End_Perceptual_Loss_Learning.pdf},
year = {2018},
date = {2018-06-01},
booktitle = {2018 IEEE International Work Conference on Bioinspired Intelligence (IWOBI)},
pages = {1--7},
organization = {IEEE},
abstract = {Privacy is a highly debatable topic in the modern technological era. With the advent of massive video and image data (which in a lot of cases contains personal information on the recorded subjects), there is an imminent need for efficient privacy protection mechanisms. To this end, we develop in this work a novel Face Deidentification Network (FaDeNet) that is able to alter the input faces in such a way that automated recognition fail to recognize the subjects in the images, while this is still possible for human observers. FaDeNet is based an encoder-decoder architecture that is trained to auto-encode the input image, while (at the same time) minimizing the recognition performance of a secondary network that is used as an socalled identity critic in FaDeNet. We present experiments on the Radbound Faces Dataset and observe encouraging results.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Sandipan Banerjee; Joel Brogan; Janez Krizaj; Aparna Bharati; Brandon RichardWebster; Vitomir Struc; Patrick J. Flynn; Walter J. Scheirer
To frontalize or not to frontalize: Do we really need elaborate pre-processing to improve face recognition? Inproceedings
In: 2018 IEEE Winter Conference on Applications of Computer Vision (WACV), pp. 20–29, IEEE 2018.
@inproceedings{banerjee2018frontalize,
title = {To frontalize or not to frontalize: Do we really need elaborate pre-processing to improve face recognition?},
author = {Sandipan Banerjee and Joel Brogan and Janez Krizaj and Aparna Bharati and Brandon RichardWebster and Vitomir Struc and Patrick J. Flynn and Walter J. Scheirer},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2019/08/To_Frontalize_or_Not_To_Frontalize_Do_We_Really_Ne.pdf},
year = {2018},
date = {2018-05-01},
booktitle = {2018 IEEE Winter Conference on Applications of Computer Vision (WACV)},
pages = {20--29},
organization = {IEEE},
abstract = {Face recognition performance has improved remarkably in the last decade. Much of this success can be attributed to the development of deep learning techniques such as convolutional neural networks (CNNs). While CNNs have pushed the state-of-the-art forward, their training process requires a large amount of clean and correctly labelled training data. If a CNN is intended to tolerate facial pose, then we face an important question: should this training data be diverse in its pose distribution, or should face images be normalized to a single pose in a pre-processing step? To address this question, we evaluate a number of facial landmarking algorithms and a popular frontalization method to understand their effect on facial recognition performance. Additionally, we introduce a new, automatic, single-image frontalization scheme that exceeds the performance of the reference frontalization algorithm for video-to-video face matching on the Point and Shoot Challenge (PaSC) dataset. Additionally, we investigate failure modes of each frontalization method on different facial yaw using the CMU Multi-PIE dataset. We assert that the subsequent recognition and verification performance serves to quantify the effectiveness of each pose correction scheme.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Žiga Emeršič; Nil Oleart Playa; Vitomir Štruc; Peter Peer
Towards Accessories-Aware Ear Recognition Inproceedings
In: 2018 IEEE International Work Conference on Bioinspired Intelligence (IWOBI), pp. 1–8, IEEE 2018.
@inproceedings{emervsivc2018towards,
title = {Towards Accessories-Aware Ear Recognition},
author = {Žiga Emeršič and Nil Oleart Playa and Vitomir Štruc and Peter Peer},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2019/08/iwobi-2018-inpaint-1.pdf},
doi = {10.1109/IWOBI.2018.8464138},
year = {2018},
date = {2018-03-01},
booktitle = {2018 IEEE International Work Conference on Bioinspired Intelligence (IWOBI)},
pages = {1--8},
organization = {IEEE},
abstract = {Automatic ear recognition is gaining popularity within the research community due to numerous desirable properties, such as high recognition performance, the possibility of capturing ear images at a distance and in a covert manner, etc. Despite this popularity and the corresponding research effort that is being directed towards ear recognition technology, open problems still remain. One of the most important issues stopping ear recognition systems from being widely available are ear occlusions and accessories. Ear accessories not only mask biometric features and by this reduce the overall recognition performance, but also introduce new non-biometric features that can be exploited for spoofing purposes. Ignoring ear accessories during recognition can, therefore, present a security threat to ear recognition and also adversely affect performance. Despite the importance of this topic there has been, to the best of our knowledge, no ear recognition studies that would address these problems. In this work we try to close this gap and study the impact of ear accessories on the recognition performance of several state-of-the-art ear recognition techniques. We consider ear accessories as a tool for spoofing attacks and show that CNN-based recognition approaches are more susceptible to spoofing attacks than traditional descriptor-based approaches. Furthermore, we demonstrate that using inpainting techniques or average coloring can mitigate the problems caused by ear accessories and slightly outperforms (standard) black color to mask ear accessories.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Rosaura G. Vidal; Sreya Banerjee; Klemen Grm; Vitomir Struc; Walter J. Scheirer
UG^ 2: A Video Benchmark for Assessing the Impact of Image Restoration and Enhancement on Automatic Visual Recognition Inproceedings
In: 2018 IEEE Winter Conference on Applications of Computer Vision (WACV), pp. 1597–1606, IEEE 2018.
@inproceedings{vidal2018ug,
title = {UG^ 2: A Video Benchmark for Assessing the Impact of Image Restoration and Enhancement on Automatic Visual Recognition},
author = {Rosaura G. Vidal and Sreya Banerjee and Klemen Grm and Vitomir Struc and Walter J. Scheirer},
url = {https://arxiv.org/pdf/1710.02909.pdf},
year = {2018},
date = {2018-02-01},
booktitle = {2018 IEEE Winter Conference on Applications of Computer Vision (WACV)},
pages = {1597--1606},
organization = {IEEE},
abstract = {Advances in image restoration and enhancement techniques have led to discussion about how such algorithms can be applied as a pre-processing step to improve automatic visual recognition. In principle, techniques like deblurring and super-resolution should yield improvements by de-emphasizing noise and increasing signal in an input image. But the historically divergent goals of computational photography and visual recognition communities have created a significant need for more work in this direction. To facilitate new research, we introduce a new benchmark dataset called UG2, which contains three difficult real-world scenarios: uncontrolled videos taken by UAVs and manned gliders, as well as controlled videos taken on the ground. Over 150,000 annotated frames for hundreds of ImageNet classes are available, which are used for baseline experiments that assess the impact of known and unknown image artifacts and other conditions on common deep learning-based object classification approaches. Further, current image restoration and enhancement techniques are evaluated by determining whether or not they improve baseline classification performance. Results show that there is plenty of room for algorithmic innovation, making this dataset a useful tool going forward.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Abhijit Das; Umapada Pal; Miguel A. Ferrer; Michael Blumenstein; Dejan Štepec; Peter Rot; Žiga Emeršič; Peter Peer; Vitomir Štruc
SSBC 2018: Sclera Segmentation Benchmarking Competition Inproceedings
In: 2018 International Conference on Biometrics (ICB), 2018.
@inproceedings{Dasicb2018,
title = {SSBC 2018: Sclera Segmentation Benchmarking Competition},
author = {Abhijit Das and Umapada Pal and Miguel A. Ferrer and Michael Blumenstein and Dejan Štepec and Peter Rot and Žiga Emeršič and Peter Peer and Vitomir Štruc},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2019/08/icb2018_sserbc.pdf},
year = {2018},
date = {2018-02-01},
booktitle = {2018 International Conference on Biometrics (ICB)},
abstract = {This paper summarises the results of the Sclera Segmentation Benchmarking Competition (SSBC 2018). It was organised in the context of the 11th IAPR International Conference on Biometrics (ICB 2018). The aim of this competition was to record the developments on sclera segmentation in the cross-sensor environment (sclera trait captured using multiple acquiring sensors). Additionally, the competition also aimed to gain the attention of researchers on this subject of research. For the purpose of benchmarking, we have developed two datasets of sclera images captured using different sensors. The first dataset was collected using a DSLR camera and the second one was collected using a mobile phone camera. The first dataset is the Multi-Angle Sclera Dataset (MASD version 1), which was used in the context of the previous versions of sclera segmentation competitions. The images in the second dataset were captured using .a mobile phone rear camera of 8-megapixel. As baseline manual segmentation mask of the sclera images from both the datasets were developed. Precision and recall-based statistical measures were employed to evaluate the effectiveness of the submitted segmentation technique and to rank them. Six algorithms were submitted towards the segmentation task. This paper analyses the results produced by these algorithms/system and defines a way forward for this subject of research. Both the datasets along with some of the accompanying ground truth/baseline mask will be freely available for research purposes upon request to authors by email.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2017
Journal Articles
Žiga Emeršič; Vitomir Štruc; Peter Peer
Ear recognition: More than a survey Journal Article
In: Neurocomputing, vol. 255, pp. 26–39, 2017.
@article{emervsivc2017ear,
title = {Ear recognition: More than a survey},
author = {Žiga Emeršič and Vitomir Štruc and Peter Peer},
url = {https://arxiv.org/pdf/1611.06203.pdf},
year = {2017},
date = {2017-01-01},
journal = {Neurocomputing},
volume = {255},
pages = {26--39},
publisher = {Elsevier},
abstract = {Automatic identity recognition from ear images represents an active field of research within the biometric community. The ability to capture ear images from a distance and in a covert manner makes the technology an appealing choice for surveillance and security applications as well as other application domains. Significant contributions have been made in the field over recent years, but open research problems still remain and hinder a wider (commercial) deployment of the technology. This paper presents an overview of the field of automatic ear recognition (from 2D images) and focuses specifically on the most recent, descriptor-based methods proposed in this area. Open challenges are discussed and potential research directions are outlined with the goal of providing the reader with a point of reference for issues worth examining in the future. In addition to a comprehensive review on ear recognition technology, the paper also introduces a new, fully unconstrained dataset of ear images gathered from the web and a toolbox implementing several state-of-the-art techniques for ear recognition. The dataset and toolbox are meant to address some of the open issues in the field and are made publicly available to the research community.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Blaž Meden; Refik Can Malli; Sebastjan Fabijan; Hazim Kemal Ekenel; Vitomir Štruc; Peter Peer
Face deidentification with generative deep neural networks Journal Article
In: IET Signal Processing, vol. 11, no. 9, pp. 1046–1054, 2017.
@article{meden2017face,
title = {Face deidentification with generative deep neural networks},
author = {Blaž Meden and Refik Can Malli and Sebastjan Fabijan and Hazim Kemal Ekenel and Vitomir Štruc and Peter Peer},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2019/08/Face_Deidentification_with_Generative_Deep_Neural_Networks.pdf},
year = {2017},
date = {2017-01-01},
journal = {IET Signal Processing},
volume = {11},
number = {9},
pages = {1046--1054},
publisher = {IET},
abstract = {Face deidentification is an active topic amongst privacy and security researchers. Early deidentification methods relying on image blurring or pixelisation have been replaced in recent years with techniques based on formal anonymity models that provide privacy guaranties and retain certain characteristics of the data even after deidentification. The latter aspect is important, as it allows the deidentified data to be used in applications for which identity information is irrelevant. In this work, the authors present a novel face deidentification pipeline, which ensures anonymity by synthesising artificial surrogate faces using generative neural networks (GNNs). The generated faces are used to deidentify subjects in images or videos, while preserving non-identity-related aspects of the data and consequently enabling data utilisation. Since generative networks are highly adaptive and can utilise diverse parameters (pertaining to the appearance of the generated output in terms of facial expressions, gender, race etc.), they represent a natural choice for the problem of face deidentification. To demonstrate the feasibility of the authors’ approach, they perform experiments using automated recognition tools and human annotators. Their results show that the recognition performance on deidentified images is close to chance, suggesting that the deidentification process based on GNNs is effective.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Klemen Grm; Vitomir Štruc; Anais Artiges; Matthieu Caron; Hazim K. Ekenel
Strengths and weaknesses of deep learning models for face recognition against image degradations Journal Article
In: IET Biometrics, vol. 7, no. 1, pp. 81–89, 2017.
@article{grm2017strengths,
title = {Strengths and weaknesses of deep learning models for face recognition against image degradations},
author = {Klemen Grm and Vitomir Štruc and Anais Artiges and Matthieu Caron and Hazim K. Ekenel},
url = {https://arxiv.org/pdf/1710.01494.pdf},
year = {2017},
date = {2017-01-01},
journal = {IET Biometrics},
volume = {7},
number = {1},
pages = {81--89},
publisher = {IET},
abstract = {Convolutional neural network (CNN) based approaches are the state of the art in various computer vision tasks including face recognition. Considerable research effort is currently being directed toward further improving CNNs by focusing on model architectures and training techniques. However, studies systematically exploring the strengths and weaknesses of existing deep models for face recognition are still relatively scarce. In this paper, we try to fill this gap and study the effects of different covariates on the verification performance of four recent CNN models using the Labelled Faces in the Wild dataset. Specifically, we investigate the influence of covariates related to image quality and model characteristics, and analyse their impact on the face verification performance of different deep CNN models. Based on comprehensive and rigorous experimentation, we identify the strengths and weaknesses of the deep learning models, and present key areas for potential future research. Our results indicate that high levels of noise, blur, missing pixels, and brightness have a detrimental effect on the verification performance of all models, whereas the impact of contrast changes and compression artefacts is limited. We find that the descriptor-computation strategy and colour information does not have a significant influence on performance.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Robert Šket; Nicole Treichel; Susanne Kublik; Tadej Debevec; Ola Eiken; Igor Mekjavić; Michael Schloter; Marius Vital; Jenna Chandler; James M Tiedje; Boštjan Murovec; Zala Prevoršek; Matevž Likar; Blaž Stres
In: PLOS ONE, vol. 12, no. 12, pp. 1-26, 2017.
@article{10.1371/journal.pone.0188556,
title = {Hypoxia and inactivity related physiological changes precede or take place in absence of significant rearrangements in bacterial community structure: The PlanHab randomized trial pilot study},
author = {Robert Šket and Nicole Treichel and Susanne Kublik and Tadej Debevec and Ola Eiken and Igor Mekjavić and Michael Schloter and Marius Vital and Jenna Chandler and James M Tiedje and Boštjan Murovec and Zala Prevoršek and Matevž Likar and Blaž Stres},
url = {https://doi.org/10.1371/journal.pone.0188556},
doi = {10.1371/journal.pone.0188556},
year = {2017},
date = {2017-01-01},
journal = {PLOS ONE},
volume = {12},
number = {12},
pages = {1-26},
publisher = {Public Library of Science},
abstract = {We explored the assembly of intestinal microbiota in healthy male participants during the randomized crossover design of run-in (5 day) and experimental phases (21-day normoxic bed rest (NBR), hypoxic bed rest (HBR) and hypoxic ambulation (HAmb) in a strictly controlled laboratory environment, with balanced fluid and dietary intakes, controlled circadian rhythm, microbial ambiental burden and 24/7 medical surveillance. The fraction of inspired O2 (FiO2) and partial pressure of inspired O2 (PiO2) were 0.209 and 133.1 ± 0.3 mmHg for NBR and 0.141 ± 0.004 and 90.0 ± 0.4 mmHg for both hypoxic variants (HBR and HAmb; ~4000 m simulated altitude), respectively. A number of parameters linked to intestinal environment such as defecation frequency, intestinal electrical conductivity (IEC), sterol and polyphenol content and diversity, indole, aromaticity and spectral characteristics of dissolved organic matter (DOM) were measured (64 variables). The structure and diversity of bacterial microbial community was assessed using 16S rRNA amplicon sequencing. Inactivity negatively affected frequency of defecation and in combination with hypoxia increased IEC (p < 0.05). In contrast, sterol and polyphenol diversity and content, various characteristics of DOM and aromatic compounds, the structure and diversity of bacterial microbial community were not significantly affected over time. A new in-house PlanHab database was established to integrate all measured variables on host physiology, diet, experiment, immune and metabolic markers (n = 231). The observed progressive decrease in defecation frequency and concomitant increase in IEC suggested that the transition from healthy physiological state towards the developed symptoms of low magnitude obesity-related syndromes was dose dependent on the extent of time spent in inactivity and preceded or took place in absence of significant rearrangements in bacterial microbial community. Species B. thetaiotamicron, B. fragilis, B. dorei and other Bacteroides with reported relevance for dysbiotic medical conditions were significantly enriched in HBR, characterized with most severe inflammation symptoms, indicating a shift towards host mucin degradation and proinflammatory immune crosstalk.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Robert Šket; Nicole Treichel; Tadej Debevec; Ola Eiken; Igor Mekjavic; Michael Schloter; Marius Vital; Jenna Chandler; James M Tiedje; Boštjan Murovec; Zala Prevoršek; Blaž Stres
In: Frontiers in Physiology, vol. 8, pp. 250, 2017, ISSN: 1664-042X.
@article{10.3389/fphys.2017.00250,
title = {Hypoxia and Inactivity Related Physiological Changes (Constipation, Inflammation) Are Not Reflected at the Level of Gut Metabolites and Butyrate Producing Microbial Community: The PlanHab Study},
author = {Robert Šket and Nicole Treichel and Tadej Debevec and Ola Eiken and Igor Mekjavic and Michael Schloter and Marius Vital and Jenna Chandler and James M Tiedje and Boštjan Murovec and Zala Prevoršek and Blaž Stres},
url = {https://www.frontiersin.org/article/10.3389/fphys.2017.00250},
doi = {10.3389/fphys.2017.00250},
issn = {1664-042X},
year = {2017},
date = {2017-01-01},
journal = {Frontiers in Physiology},
volume = {8},
pages = {250},
abstract = {We explored the assembly of intestinal microbiota in healthy male participants during the run-in (5 day) and experimental phases (21-day normoxic bed rest (NBR), hypoxic bedrest (HBR) and hypoxic ambulation (HAmb) in a strictly controlled laboratory environment, balanced fluid and dietary intakes, controlled circadian rhythm, microbial ambiental burden and 24/7 medical surveillance. The fraction of inspired O2 (FiO2) and partial pressure of inspired O2 (PiO2) were 0.209 and 133.1 ± 0.3 mmHg for NBR and 0.141 ± 0.004 and 90.0 ± 0.4 mmHg for both hypoxic variants (HBR and HAmb; ~4000 m simulated altitude), respectively. A number of parameters linked to intestinal transit spanning Bristol Stool Scale, defecation rates, zonulin, α1-antitrypsin, eosinophil derived neurotoxin, bile acids, reducing sugars, short chain fatty acids, total soluble organic carbon, water content, diet composition and food intake were measured (167 variables). The abundance, structure and diversity of butyrate producing microbial community were assessed using the two primary bacterial butyrate synthesis pathways, butyryl-CoA: acetate CoA-transferase (but) and butyrate kinase (buk) genes. Inactivity negatively affected fecal consistency and in combination with hypoxia aggravated the state of gut inflammation (p < 0.05). In contrast, gut permeability, various metabolic markers, the structure, diversity and abundance of butyrate producing microbial community were not significantly affected. Rearrangements in the butyrate producing microbial community structure were explained by experimental setup (13.4 %), experimentally structured metabolites (12.8 %) and gut metabolite-immunological markers (11.9 %), with 61.9% remaining unexplained. Many of the measured parameters were found to be correlated and were hence omitted from further analyses. The observed progressive increase in two immunological intestinal markers suggested that the transition from healthy physiological state towards the developed symptoms of low magnitude obesity-related syndromes was primarily driven by the onset of inactivity (lack of exercise in NBR) that were exacerbated by systemic hypoxia (HBR) and significantly alleviated by exercise, despite hypoxia (HAmb). Butyrate producing community in colon exhibited apparent resilience towards short-term modifications in host exercise or hypoxia. Progressive constipation (decreased intestinal motility) and increased local inflammation marker suggest that changes in microbial colonization and metabolism were taking place at the location of small intestine.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Inproceedings
Primož Lavrič; Žiga Emeršič; Blaž Meden; Vitomir Štruc; Peter Peer
Do it Yourself: Building a Low-Cost Iris Recognition System at Home Using Off-The-Shelf Components Inproceedings
In: Electrotechnical and Computer Science Conference ERK 2017, 2017.
@inproceedings{ERK2017,
title = {Do it Yourself: Building a Low-Cost Iris Recognition System at Home Using Off-The-Shelf Components},
author = {Primož Lavrič and Žiga Emeršič and Blaž Meden and Vitomir Štruc and Peter Peer},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2019/08/lavricdo_it.pdf},
year = {2017},
date = {2017-09-01},
booktitle = {Electrotechnical and Computer Science Conference ERK 2017},
abstract = {Among the different biometric traits that can be used for person recognition, the human iris is generally consid-ered to be among the most accurate. However, despite a plethora of desirable characteristics, iris recognition is not widely as widely used as competing biometric modalities likely due to the high cost of existing commercial iris-recognition systems. In this paper we contribute towards the availability of low-cost iris recognition systems and present a prototype system built using off-the-shelf components. We describe the prototype device, the pipeline used for iris recognition, evaluate the performance of our solution on a small in-house dataset and discuss directions for future work. The current version of our prototype includes complete hardware and software implementations and has a combined bill-of-materials of 110 EUR.
},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Grm Klemen; Dobrišek Simon; Štruc Vitomir
Evaluating image superresolution algorithms for cross-resolution face recognition Inproceedings
In: Proceedings of the Twenty-sixth International Electrotechnical and Computer Science Conference ERK 2017, 2017.
@inproceedings{ERK2017Grm,
title = {Evaluating image superresolution algorithms for cross-resolution face recognition},
author = {Grm Klemen and Dobrišek Simon and Štruc Vitomir},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2019/08/review_submission.pdf},
year = {2017},
date = {2017-09-01},
booktitle = {Proceedings of the Twenty-sixth International Electrotechnical and Computer Science Conference ERK 2017},
abstract = {With recent advancements in deep learning and convolutional neural networks (CNNs), face recognition has seen significant performance improvements over the last few years. However, low-resolution images still remain challenging, with CNNs performing relatively poorly compared to humans. One possibility to improve performance in these settings often advocated in the literature is the use of super-resolution (SR). In this paper, we explore the usefulness of SR algorithms for cross-resolution face recognition in experiments on the Labeled Faces in the Wild (LFW) and SCface datasets using four recent deep CNN models. We conduct experiments with synthetically down-sampled images as well as real-life low-resolution imagery captured by surveillance cameras. Our experiments show that image super-resolution can improve face recognition performance considerably on very low-resolution images (of size 24 x 24 or 32 x 32 pixels), when images are artificially down-sampled, but has a lesser (or sometimes even a detrimental) effect with real-life images leaving significant room for further research in this area.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Novosel Rok; Meden Blaž; Emeršič Žiga; Štruc Vitomir; Peter Peer
Face recognition with Raspberry Pi for IoT Environments. Inproceedings
In: Proceedings of the Twenty-sixth International Electrotechnical and Computer Science Conference ERK 2017, 2017.
@inproceedings{ERK2017c,
title = {Face recognition with Raspberry Pi for IoT Environments.},
author = {Novosel Rok and Meden Blaž and Emeršič Žiga and Štruc Vitomir and Peter Peer},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2019/08/novoselface_recognition.pdf},
year = {2017},
date = {2017-09-01},
booktitle = {Proceedings of the Twenty-sixth International Electrotechnical and Computer Science Conference ERK 2017},
abstract = {IoT has seen steady growth over recent years – smart home appliances, smart personal gear, personal assistants and many more. The same is true for the field of bio-metrics where the need for automatic and secure recognition schemes have spurred the development of fingerprint-and face-recognition mechanisms found today in most smart phones and similar hand-held devices. Devices used in the Internet of Things (IoT) are often low-powered with limited computational resources. This means that biomet-ric recognition pipelines aimed at IoT need to be streamlined and as efficient as possible. Towards this end, we describe in this paper how image-based biometrics can be leveraged in an IoT environment using a Raspberry Pi. We present a proof-of-concept web-based information system, secured by a face-recognition procedure, that gives authorized users access to potentially sensitive information.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Žiga Emeršič; Dejan Štepec; Vitomir Štruc; Peter Peer; Anjith George; Adii Ahmad; Elshibani Omar; Terrance E. Boult; Reza Safdaii; Yuxiang Zhou; others Stefanos Zafeiriou; Dogucan Yaman; Fevziye I. Eyoikur; Hazim K. Ekenel
The unconstrained ear recognition challenge Inproceedings
In: 2017 IEEE International Joint Conference on Biometrics (IJCB), pp. 715–724, IEEE 2017.
@inproceedings{emervsivc2017unconstrained,
title = {The unconstrained ear recognition challenge},
author = {Žiga Emeršič and Dejan Štepec and Vitomir Štruc and Peter Peer and Anjith George and Adii Ahmad and Elshibani Omar and Terrance E. Boult and Reza Safdaii and Yuxiang Zhou and others Stefanos Zafeiriou and Dogucan Yaman and Fevziye I. Eyoikur and Hazim K. Ekenel},
url = {https://arxiv.org/pdf/1708.06997.pdf},
year = {2017},
date = {2017-09-01},
booktitle = {2017 IEEE International Joint Conference on Biometrics (IJCB)},
pages = {715--724},
organization = {IEEE},
abstract = {In this paper we present the results o f the Unconstrained Ear Recognition Challenge (UERC), a group benchmarking effort centered around the problem o f person recognition from ear images captured in uncontrolled conditions. The goal o f the challenge was to assess the performance of existing ear recognition techniques on a challenging largescale dataset and identify open problems that need to be addressed in the future. Five groups from three continents participated in the challenge and contributed six ear recognition techniques fo r the evaluation, while multiple baselines were made available for the challenge by the UERC organizers. A comprehensive analysis was conducted with all participating approaches addressing essential research questions pertaining to the sensitivity o f the technology to head rotation, flipping, gallery size, large-scale recognition and others. The top performer o f the UERC was found to ensure robust performance on a smaller part o f the dataset (with 180 subjects) regardless o f image characteristics, but still exhibited a significant performance drop when the entire dataset comprising 3,704 subjects was used for testing.
},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Žiga Emeršič; Dejan Štepec; Vitomir Štruc; Peter Peer
Training convolutional neural networks with limited training data for ear recognition in the wild Inproceedings
In: IEEE International Conference on Automatic Face and Gesture Recognition, Workshop on Biometrics in the Wild 2017, 2017.
@inproceedings{emervsivc2017training,
title = {Training convolutional neural networks with limited training data for ear recognition in the wild},
author = {Žiga Emeršič and Dejan Štepec and Vitomir Štruc and Peter Peer},
url = {https://arxiv.org/pdf/1711.09952.pdf},
year = {2017},
date = {2017-05-01},
booktitle = {IEEE International Conference on Automatic Face and Gesture Recognition, Workshop on Biometrics in the Wild 2017},
journal = {arXiv preprint arXiv:1711.09952},
abstract = {Identity recognition from ear images is an active field of research within the biometric community. The ability to capture ear images from a distance and in a covert manner makes ear recognition technology an appealing choice for surveillance and security applications as well as related application domains. In contrast to other biometric modalities, where large datasets captured in uncontrolled settings are readily available, datasets of ear images are still limited in size and mostly of laboratory-like quality. As a consequence, ear recognition technology has not benefited yet from advances in deep learning and convolutional neural networks (CNNs) and is still lacking behind other modalities that experienced significant performance gains owing to deep recognition technology. In this paper we address this problem and aim at building a CNNbased ear recognition model. We explore different strategies towards model training with limited amounts of training data and show that by selecting an appropriate model architecture, using aggressive data augmentation and selective learning on existing (pre-trained) models, we are able to learn an effective CNN-based model using a little more than 1300 training images. The result of our work is the first CNN-based approach to ear recognition that is also made publicly available to the research community. With our model we are able to improve on the rank one recognition rate of the previous state-of-the-art by more than 25% on a challenging dataset of ear images captured from the web (a.k.a. in the wild).},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Ziga Emersic; Blaz Meden; Peter Peer; Vitornir Struc
Covariate analysis of descriptor-based ear recognition techniques Inproceedings
In: 2017 international conference and workshop on bioinspired intelligence (IWOBI), pp. 1–9, IEEE 2017.
@inproceedings{emersic2017covariate,
title = {Covariate analysis of descriptor-based ear recognition techniques},
author = {Ziga Emersic and Blaz Meden and Peter Peer and Vitornir Struc},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2019/08/Covariate_Analysis_of_Descriptor_based_Ear_Recognition_Techniques.pdf},
year = {2017},
date = {2017-01-01},
booktitle = {2017 international conference and workshop on bioinspired intelligence (IWOBI)},
pages = {1--9},
organization = {IEEE},
abstract = {Dense descriptor-based feature extraction techniques represent a popular choice for implementing biometric ear recognition system and are in general considered to be the current state-of-the-art in this area. In this paper, we study the impact of various factors (i.e., head rotation, presence of occlusions, gender and ethnicity) on the performance of 8 state-of-the-art descriptor-based ear recognition techniques. Our goal is to pinpoint weak points of the existing technology and identify open problems worth exploring in the future. We conduct our covariate analysis through identification experiments on the challenging AWE (Annotated Web Ears) dataset and report our findings. The results of our study show that high degrees of head movement and presence of accessories significantly impact the identification performance, whereas mild degrees of the listed factors and other covariates such as gender and ethnicity impact the identification performance only to a limited extent.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Blaz Meden; Ziga Emersic; Vitomir Struc; Peter Peer
k-Same-Net: Neural-Network-Based Face Deidentification Inproceedings
In: 2017 International Conference and Workshop on Bioinspired Intelligence (IWOBI), pp. 1–7, IEEE 2017.
@inproceedings{meden2017kappa,
title = {k-Same-Net: Neural-Network-Based Face Deidentification},
author = {Blaz Meden and Ziga Emersic and Vitomir Struc and Peter Peer},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2019/08/k-same-net.pdf},
year = {2017},
date = {2017-01-01},
booktitle = {2017 International Conference and Workshop on Bioinspired Intelligence (IWOBI)},
pages = {1--7},
organization = {IEEE},
abstract = {An increasing amount of video and image data is being shared between government entities and other relevant stakeholders and requires careful handling of personal information. A popular approach for privacy protection in such data is the use of deidentification techniques, which aim at concealing the identity of individuals in the imagery while still preserving certain aspects of the data deidentification. In this work, we propose a novel approach towards face deidentification, called k-Same-Net, which combines recent generative neural networks (GNNs) with the well-known k-anonymity mechanism and provides formal guarantees regarding privacy protection on a closed set of identities. Our GNN is able to generate synthetic surrogate face images for dedentification by seamlessly combining features of identities used to train the GNN mode. furthermore, it allows us to guide the image-generation process with a small set of appearance-related parameters that can be used to alter specific aspects (e.g., facial expressions, age, gender) of the synthesized surrogate images. We demonstrate the feasibility of k-Same-Net in comparative experiments with competing techniques on the XM2VTS dataset and discuss the main characteristics of our approach.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Abhijit Das; Umapada Pal; Miguel A Ferrer; Michael Blumenstein; Dejan Štepec; Peter Rot; Ziga Emeršič; Peter Peer; Vitomir Štruc; SV Aruna Kumar; Harish B S
SSERBC 2017: Sclera segmentation and eye recognition benchmarking competition Inproceedings
In: 2017 IEEE International Joint Conference on Biometrics (IJCB), pp. 742–747, IEEE 2017.
@inproceedings{das2017sserbc,
title = {SSERBC 2017: Sclera segmentation and eye recognition benchmarking competition},
author = {Abhijit Das and Umapada Pal and Miguel A Ferrer and Michael Blumenstein and Dejan Štepec and Peter Rot and Ziga Emeršič and Peter Peer and Vitomir Štruc and SV Aruna Kumar and Harish B S},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2019/08/SSERBC2017.pdf},
year = {2017},
date = {2017-01-01},
booktitle = {2017 IEEE International Joint Conference on Biometrics (IJCB)},
pages = {742--747},
organization = {IEEE},
abstract = {This paper summarises the results of the Sclera Segmentation and Eye Recognition Benchmarking Competition (SSERBC 2017). It was organised in the context of the International Joint Conference on Biometrics (IJCB 2017). The aim of this competition was to record the recent developments in sclera segmentation and eye recognition in the visible spectrum (using iris, sclera and peri-ocular, and their fusion), and also to gain the attention of researchers on this subject.
In this regard, we have used the Multi-Angle Sclera Dataset (MASD version 1). It is comprised of 2624 images taken from both the eyes of 82 identities. Therefore, it consists of images of 164 (82*2) eyes. A manual segmentation mask of these images was created to baseline both tasks.
Precision and recall based statistical measures were employed to evaluate the effectiveness of the segmentation and the ranks of the segmentation task. Recognition accuracy measure has been employed to measure the recognition task. Manually segmented sclera, iris and periocular regions were used in the recognition task. Sixteen teams registered for the competition, and among them, six teams submitted their algorithms or systems for the segmentation task and two of them submitted their recognition algorithm or systems.
The results produced by these algorithms or systems reflect current developments in the literature of sclera segmentation and eye recognition, employing cutting edge techniques. The MASD version 1 dataset with some of the ground truth will be freely available for research purposes. The success of the competition also demonstrates the recent interests of researchers from academia as well as industry on this subject},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
In this regard, we have used the Multi-Angle Sclera Dataset (MASD version 1). It is comprised of 2624 images taken from both the eyes of 82 identities. Therefore, it consists of images of 164 (82*2) eyes. A manual segmentation mask of these images was created to baseline both tasks.
Precision and recall based statistical measures were employed to evaluate the effectiveness of the segmentation and the ranks of the segmentation task. Recognition accuracy measure has been employed to measure the recognition task. Manually segmented sclera, iris and periocular regions were used in the recognition task. Sixteen teams registered for the competition, and among them, six teams submitted their algorithms or systems for the segmentation task and two of them submitted their recognition algorithm or systems.
The results produced by these algorithms or systems reflect current developments in the literature of sclera segmentation and eye recognition, employing cutting edge techniques. The MASD version 1 dataset with some of the ground truth will be freely available for research purposes. The success of the competition also demonstrates the recent interests of researchers from academia as well as industry on this subject
2016
Journal Articles
Jaka Kravanja; Mario Žganec; Jerneja Žganec-Gros; Simon Dobrišek; Vitomir Štruc
Robust Depth Image Acquisition Using Modulated Pattern Projection and Probabilistic Graphical Models Journal Article
In: Sensors, vol. 16, no. 10, pp. 1740, 2016.
@article{kravanja2016robust,
title = {Robust Depth Image Acquisition Using Modulated Pattern Projection and Probabilistic Graphical Models},
author = {Jaka Kravanja and Mario Žganec and Jerneja Žganec-Gros and Simon Dobrišek and Vitomir Štruc},
url = {http://luks.fe.uni-lj.si/nluks/wp-content/uploads/2016/11/sensors-16-01740-1.pdf},
doi = {10.3390/s16101740},
year = {2016},
date = {2016-10-20},
journal = {Sensors},
volume = {16},
number = {10},
pages = {1740},
publisher = {Multidisciplinary Digital Publishing Institute},
abstract = {Depth image acquisition with structured light approaches in outdoor environments is a challenging problem due to external factors, such as ambient sunlight, which commonly affect the acquisition procedure. This paper presents a novel structured light sensor designed specifically for operation in outdoor environments. The sensor exploits a modulated sequence of structured light projected onto the target scene to counteract environmental factors and estimate a spatial distortion map in a robust manner. The correspondence between the projected pattern and the estimated distortion map is then established using a probabilistic framework based on graphical models. Finally, the depth image of the target scene is reconstructed using a number of reference frames recorded during the calibration process. We evaluate the proposed sensor on experimental data in indoor and outdoor environments and present comparative experiments with other existing methods, as well as commercial sensors.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jaka Kravanja; Mario Žganec; Jerneja Žganec-Gros; Simon Dobrišek; Vitomir Štruc
Exploiting Spatio-Temporal Information for Light-Plane Labeling in Depth-Image Sensors Using Probabilistic Graphical Models Journal Article
In: Informatica, vol. 27, no. 1, pp. 67–84, 2016.
@article{kravanja2016exploiting,
title = {Exploiting Spatio-Temporal Information for Light-Plane Labeling in Depth-Image Sensors Using Probabilistic Graphical Models},
author = {Jaka Kravanja and Mario Žganec and Jerneja Žganec-Gros and Simon Dobrišek and Vitomir Štruc},
url = {http://luks.fe.uni-lj.si/nluks/wp-content/uploads/2016/11/jaka_informatica_camera.pdf},
year = {2016},
date = {2016-03-30},
journal = {Informatica},
volume = {27},
number = {1},
pages = {67--84},
publisher = {Vilnius University Institute of Mathematics and Informatics},
abstract = {This paper proposes a novel approach to light plane labeling in depth-image sensors relying on “uncoded” structured light. The proposed approach adopts probabilistic graphical models (PGMs) to solve the correspondence problem between the projected and the detected light patterns. The procedure for solving the correspondence problem is designed to take the spatial relations between the parts of the projected pattern and prior knowledge about the structure of the pattern into account, but it also exploits temporal information to achieve reliable light-plane labeling. The procedure is assessed on a database of light patterns detected with a specially developed imaging sensor that, unlike most existing solutions on the market, was shown to work reliably in outdoor environments as well as in the presence of other identical (active) sensors directed at the same scene. The results of our experiments show that the proposed approach is able to reliably solve the correspondence problem and assign light-plane labels to the detected pattern with a high accuracy, even when large spatial discontinuities are present in the observed scene.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Inproceedings
Walter Scheirer; Patrick Flynn; Changxing Ding; Guodong Guo; Vitomir Štruc; Mohamad Al Jazaery; Simon Dobrišek; Klemen Grm; Dacheng Tao; Yu Zhu; Joel Brogan; Sandipan Banerjee; Aparna Bharati; Brandon Richard Webster
Report on the BTAS 2016 Video Person Recognition Evaluation Inproceedings
In: Proceedings of the IEEE International Conference on Biometrics: Theory, Applications ans Systems (BTAS), IEEE, 2016.
@inproceedings{BTAS2016,
title = {Report on the BTAS 2016 Video Person Recognition Evaluation},
author = {Walter Scheirer and Patrick Flynn and Changxing Ding and Guodong Guo and Vitomir Štruc and Mohamad Al Jazaery and Simon Dobrišek and Klemen Grm and Dacheng Tao and Yu Zhu and Joel Brogan and Sandipan Banerjee and Aparna Bharati and Brandon Richard Webster},
year = {2016},
date = {2016-10-05},
booktitle = {Proceedings of the IEEE International Conference on Biometrics: Theory, Applications ans Systems (BTAS)},
publisher = {IEEE},
abstract = {This report presents results from the Video Person Recognition Evaluation held in conjunction with the 8th IEEE International Conference on Biometrics: Theory, Applications, and Systems (BTAS). Two experiments required algorithms to recognize people in videos from the Pointand- Shoot Face Recognition Challenge Problem (PaSC). The first consisted of videos from a tripod mounted high quality video camera. The second contained videos acquired from 5 different handheld video cameras. There were 1,401 videos in each experiment of 265 subjects. The subjects, the scenes, and the actions carried out by the people are the same in both experiments. An additional experiment required algorithms to recognize people in videos from the Video Database of Moving Faces and People (VDMFP). There were 958 videos in this experiment of 297 subjects. Four groups from around the world participated in the evaluation. The top verification rate for PaSC from this evaluation is 0:98 at a false accept rate of 0:01 — a remarkable advancement in performance from the competition held at FG 2015.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Janez Križaj; Simon Dobrišek; France Mihelič; Vitomir Štruc
Facial Landmark Localization from 3D Images Inproceedings
In: Proceedings of the Electrotechnical and Computer Science Conference (ERK), Portorož, Slovenia, 2016.
@inproceedings{ERK2016Janez,
title = {Facial Landmark Localization from 3D Images},
author = {Janez Križaj and Simon Dobrišek and France Mihelič and Vitomir Štruc},
year = {2016},
date = {2016-09-20},
booktitle = {Proceedings of the Electrotechnical and Computer Science Conference (ERK)},
address = {Portorož, Slovenia},
abstract = {A novel method for automatic facial landmark localization is presented. The method builds on the supervised descent framework, which was shown to successfully localize landmarks in the presence of large expression variations and mild occlusions, but struggles when localizing landmarks on faces with large pose variations. We propose an extension of the supervised descent framework which trains multiple descent maps and results in increased robustness to pose variations. The performance of the proposed method is demonstrated on the Bosphorus database for the problem of facial landmark localization from 3D data. Our experimental results show that the proposed method exhibits increased robustness to pose variations, while retaining high performance in the case of expression and occlusion variations.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Sebastjan Fabijan; Vitomir Štruc
Vpliv registracije obraznih področij na učinkovitost samodejnega razpoznavanja obrazov: študija z OpenBR Inproceedings
In: Proceedings of the Electrotechnical and Computer Science Conference (ERK), 2016.
@inproceedings{ERK2016_Seba,
title = {Vpliv registracije obraznih področij na učinkovitost samodejnega razpoznavanja obrazov: študija z OpenBR},
author = {Sebastjan Fabijan and Vitomir Štruc},
url = {http://luks.fe.uni-lj.si/nluks/wp-content/uploads/2016/09/erk_2016_08_22.pdf},
year = {2016},
date = {2016-09-20},
booktitle = {Proceedings of the Electrotechnical and Computer Science Conference (ERK)},
abstract = {Razpoznavanje obrazov je v zadnjih letih postalo eno najuspešnejših področij samodejne, računalniško podprte analize slik, ki se lahko pohvali z različnimi primeri upor-abe v praksi. Enega ključnih korakav za uspešno razpoznavanje predstavlja poravnava obrazov na slikah. S poravnavo poskušamo zagotoviti neodvisnost razpozn-av-an-ja od sprememb zornih kotov pri zajemu slike, ki v slikovne podatke vnašajo visoko stopnjo variabilnosti. V prispevku predstavimo tri postopke poravnavanja obrazov (iz literature) in proučimo njihov vpliv na uspešnost razpoznavanja s postopki, udejanjenimi v odprtokodnem programskem ogrodju Open Source Biometric Recognition (OpenBR). Vse poizkuse izvedemo na podatkovni zbirki Labeled Faces in the Wild (LFW).},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Žiga Stržinar; Klemen Grm; Vitomir Štruc
Učenje podobnosti v globokih nevronskih omrežjih za razpoznavanje obrazov Inproceedings
In: Proceedings of the Electrotechnical and Computer Science Conference (ERK), Portorož, Slovenia, 2016.
@inproceedings{ERK2016_sebastjan,
title = {Učenje podobnosti v globokih nevronskih omrežjih za razpoznavanje obrazov},
author = {Žiga Stržinar and Klemen Grm and Vitomir Štruc},
url = {http://luks.fe.uni-lj.si/nluks/wp-content/uploads/2016/09/erk_ziga_Vziga.pdf},
year = {2016},
date = {2016-09-20},
booktitle = {Proceedings of the Electrotechnical and Computer Science Conference (ERK)},
address = {Portorož, Slovenia},
abstract = {Učenje podobnosti med pari vhodnih slik predstavlja enega najpopularnejših pristopov k razpoznavanju na področju globokega učenja. Pri tem pristopu globoko nevronsko omrežje na vhodu sprejme par slik (obrazov) in na izhodu vrne mero podobnosti med vhodnima slikama, ki jo je moč uporabiti za razpoznavanje. Izračun podobnosti je pri tem lahko v celoti udejanjen z globokim omrežjem, lahko pa se omrežje uporabi zgolj za izračun predstavitve vhodnega para slik, preslikava iz izračunane predstavitve v mero podobnosti pa se izvede z drugim, potencialno primernejšim modelom. V tem prispevku preizkusimo 5 različnih modelov za izvedbo preslikave med izračunano predstavitvijo in mero podobnosti, pri čemer za poizkuse uporabimo lastno nevronsko omrežje. Rezultati naših eksperimentov na problemu razpoznavanja obrazov kažejo na pomembnost izbire primernega modela, saj so razlike med uspešnostjo razpoznavanje od modela do modela precejšnje.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Simon Dobrišek; David Čefarin; Vitomir Štruc; France Mihelič
Assessment of the Google Speech Application Programming Interface for Automatic Slovenian Speech Recognition Inproceedings
In: Jezikovne Tehnologije in Digitalna Humanistika, 2016.
@inproceedings{SJDT,
title = {Assessment of the Google Speech Application Programming Interface for Automatic Slovenian Speech Recognition},
author = {Simon Dobrišek and David Čefarin and Vitomir Štruc and France Mihelič},
url = {http://luks.fe.uni-lj.si/nluks/wp-content/uploads/2016/09/jtdh16-ulfe-luks-sd-final-pdfa.pdf},
year = {2016},
date = {2016-09-20},
booktitle = {Jezikovne Tehnologije in Digitalna Humanistika},
abstract = {Automatic speech recognizers are slowly maturing into technologies that enable humans to communicate more naturally and effectively with a variety of smart devices and information-communication systems. Large global companies such as Google, Microsoft, Apple, IBM and Baidu compete in developing the most reliable speech recognizers, supporting as many of the main world languages as possible. Due to the relatively small number of speakers, the support for the Slovenian spoken language is lagging behind, and among the major global companies only Google has recently supported our spoken language. The paper presents the results of our independent assessment of the Google speech-application programming interface for automatic Slovenian speech recognition. For the experiments, we used speech databases that are otherwise used for the development and assessment of Slovenian speech recognizers.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Metod Ribič; Žiga Emeršič; Vitomir Štruc; Peter Peer
Influence of alignment on ear recognition : case study on AWE Dataset Inproceedings
In: Proceedings of the Electrotechnical and Computer Science Conference (ERK), pp. 131-134, Portorož, Slovenia, 2016.
@inproceedings{RibicERK2016,
title = {Influence of alignment on ear recognition : case study on AWE Dataset},
author = {Metod Ribič and Žiga Emeršič and Vitomir Štruc and Peter Peer},
url = {http://luks.fe.uni-lj.si/nluks/wp-content/uploads/2016/09/Influence_of_Alignment_on_Ear_Recognitio.pdf},
year = {2016},
date = {2016-09-20},
booktitle = {Proceedings of the Electrotechnical and Computer Science Conference (ERK)},
pages = {131-134},
address = {Portorož, Slovenia},
abstract = {Ear as a biometric modality presents a viable source for automatic human recognition. In recent years local description methods have been gaining on popularity due to their invariance to illumination and occlusion. However, these methods require that images are well aligned and preprocessed as good as possible. This causes one of the greatest challenges of ear recognition: sensitivity to pose variations. Recently, we presented Annotated Web Ears dataset that opens new challenges in ear recognition. In this paper we test the influence of alignment on recognition performance and prove that even with the alignment the database is still very challenging, even-though the recognition rate is improved due to alignment. We also prove that more sophisticated alignment methods are needed to address the AWE dataset efficiently},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Simon Dobrišek; David Čefarin; Vitomir Štruc; France Mihelič
Preizkus Googlovega govornega programskega vmesnika pri samodejnem razpoznavanju govorjene slovenščine Inproceedings
In: Jezikovne tehnologije in digitalna humanistika, pp. 47-51, 2016.
@inproceedings{dobrivsekpreizkus,
title = {Preizkus Googlovega govornega programskega vmesnika pri samodejnem razpoznavanju govorjene slovenščine},
author = {Simon Dobrišek and David Čefarin and Vitomir Štruc and France Mihelič},
url = {http://www.sdjt.si/wp/wp-content/uploads/2016/09/JTDH-2016_Dobrisek-et-al_Preizkus-Googlovega-govornega-programskega-vmesnika.pdf},
year = {2016},
date = {2016-09-01},
booktitle = {Jezikovne tehnologije in digitalna humanistika},
pages = {47-51},
abstract = {Automatic speech recognizers are slowly maturing into technologies that enable humans to communicate more naturally and effectively with a variety of smart devices and information-communication systems. Large global companies such as Google, Microsoft, Apple, IBM and Baidu compete in developing the most reliable speech recognizers, supporting as many of the main world languages as possible. Due to the relatively small number of speakers, the support for the Slovenian spoken language is lagging behind, and among the major global companies only Google has recently supported our spoken language. The paper presents the results of our independent assessment of the Google speech-application programming interface for automatic Slovenian speech recognition. For the experiments, we used speech databases that are otherwise used for the development and assessment of Slovenian speech recognizers.
},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Klemen Grm; Simon Dobrišek; Vitomir Štruc
Deep pair-wise similarity learning for face recognition Inproceedings
In: 4th International Workshop on Biometrics and Forensics (IWBF), pp. 1–6, IEEE 2016.
@inproceedings{grm2016deep,
title = {Deep pair-wise similarity learning for face recognition},
author = {Klemen Grm and Simon Dobrišek and Vitomir Štruc},
url = {http://luks.fe.uni-lj.si/nluks/wp-content/uploads/2016/09/IWBF_2016.pdf},
year = {2016},
date = {2016-01-01},
booktitle = {4th International Workshop on Biometrics and Forensics (IWBF)},
pages = {1--6},
organization = {IEEE},
abstract = {Recent advances in deep learning made it possible to build deep hierarchical models capable of delivering state-of-the-art performance in various vision tasks, such as object recognition, detection or tracking. For recognition tasks the most common approach when using deep models is to learn object representations (or features) directly from raw image-input and then feed the learned features to a suitable classifier. Deep models used in this pipeline are typically heavily parameterized and require enormous amounts of training data to deliver competitive recognition performance. Despite the use of data augmentation techniques, many application domains, predefined experimental protocols or specifics of the recognition problem limit the amount of available training data and make training an effective deep hierarchical model a difficult task. In this paper, we present a novel, deep pair-wise similarity learning (DPSL) strategy for deep models, developed specifically to overcome the problem of insufficient training data, and demonstrate its usage on the task of face recognition. Unlike existing (deep) learning strategies, DPSL operates on image-pairs and tries to learn pair-wise image similarities that can be used for recognition purposes directly instead of feature representations that need to be fed to appropriate classification techniques, as with traditional deep learning pipelines. Since our DPSL strategy assumes an image pair as the input to the learning procedure, the amount of training data available to train deep models is quadratic in the number of available training images, which is of paramount importance for models with a large number of parameters. We demonstrate the efficacy of the proposed learning strategy by developing a deep model for pose-invariant face recognition, called Pose-Invariant Similarity Index (PISI), and presenting comparative experimental results on the FERET an IJB-A datasets.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Žiga Golob; Jerneja Žganec Gros; Vitomir Štruc; France Mihelič; Simon Dobrišek
A Composition Algorithm of Compact Finite-State Super Transducers for Grapheme-to-Phoneme Conversion Inproceedings
In: International Conference on Text, Speech, and Dialogue, pp. 375–382, Springer 2016.
@inproceedings{golob2016composition,
title = {A Composition Algorithm of Compact Finite-State Super Transducers for Grapheme-to-Phoneme Conversion},
author = {Žiga Golob and Jerneja Žganec Gros and Vitomir Štruc and France Mihelič and Simon Dobrišek},
year = {2016},
date = {2016-01-01},
booktitle = {International Conference on Text, Speech, and Dialogue},
pages = {375--382},
organization = {Springer},
abstract = {Minimal deterministic finite-state transducers (MDFSTs) are powerful models that can be used to represent pronunciation dictionaries in a compact form. Intuitively, we would assume that by increasing the size of the dictionary, the size of the MDFSTs would increase as well. However, as we show in the paper, this intuition does not hold for highly inflected languages. With such languages the size of the MDFSTs begins to decrease once the number of words in the represented dictionary reaches a certain threshold. Motivated by this observation, we have developed a new type of FST, called a finite-state super transducer (FSST), and show experimentally that the FSST is capable of representing pronunciation dictionaries with fewer states and transitions than MDFSTs. Furthermore, we show that (unlike MDFSTs) our FSSTs can also accept words that are not part of the represented dictionary. The phonetic transcriptions of these out-of-dictionary words may not always be correct, but the observed error rates are comparable to the error rates of the traditional methods for grapheme-to-phoneme conversion.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2015
Journal Articles
Boštjan Murovec
Job-shop local-search move evaluation without direct consideration of the criterion’s value Journal Article
In: European Journal of Operational Research, vol. 241, no. 2, pp. 320 - 329, 2015, ISSN: 0377-2217.
@article{MUROVEC2015320,
title = {Job-shop local-search move evaluation without direct consideration of the criterion’s value},
author = {Boštjan Murovec},
url = {http://www.sciencedirect.com/science/article/pii/S0377221714007309},
doi = {https://doi.org/10.1016/j.ejor.2014.08.044},
issn = {0377-2217},
year = {2015},
date = {2015-01-01},
journal = {European Journal of Operational Research},
volume = {241},
number = {2},
pages = {320 - 329},
abstract = {This article focuses on the evaluation of moves for the local search of the job-shop problem with the makespan criterion. We reason that the omnipresent ranking of moves according to their resulting value of a criterion function makes the local search unnecessarily myopic. Consequently, we introduce an alternative evaluation that relies on a surrogate quantity of the move’s potential, which is related to, but not strongly coupled with, the bare criterion. The approach is confirmed by empirical tests, where the proposed evaluator delivers a new upper bound on the well-known benchmark test yn2. The line of the argumentation also shows that by sacrificing accuracy the established makespan estimators unintentionally improve on the move evaluation in comparison to the exact makespan calculation, in contrast to the belief that the reliance on estimation degrades the optimization results.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Boštjan Murovec; Sabina Kolbl; Blaž Stres
Methane Yield Database: Online infrastructure and bioresource for methane yield data and related metadata Journal Article
In: Bioresource Technology, vol. 189, pp. 217 - 223, 2015, ISSN: 0960-8524.
@article{MUROVEC2015217,
title = {Methane Yield Database: Online infrastructure and bioresource for methane yield data and related metadata},
author = {Boštjan Murovec and Sabina Kolbl and Blaž Stres},
url = {http://www.sciencedirect.com/science/article/pii/S0960852415005040},
doi = {https://doi.org/10.1016/j.biortech.2015.04.021},
issn = {0960-8524},
year = {2015},
date = {2015-01-01},
journal = {Bioresource Technology},
volume = {189},
pages = {217 - 223},
abstract = {The aim of this study was to develop and validate a community supported online infrastructure and bioresource for methane yield data and accompanying metadata collected from published literature. In total, 1164 entries described by 15,749 data points were assembled. Analysis of data collection showed little congruence in reporting of methodological approaches. The largest identifiable source of variation in reported methane yields was represented by authorship (i.e. substrate batches within particular substrate class) within which experimental scales (volumes (0.02–5l), incubation temperature (34–40°C) and % VS of substrate played an important role (p<0.0},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Gemma Henderson; Faith Cox; Siva Ganesh; Arjan Jonker; Wayne Young; Peter H Janssen
Rumen microbial community composition varies with diet and host, but a core microbiome is found across a wide geographical range Journal Article
In: Scientific reports, vol. art 14567, no. 5, pp. 1–13, 2015, ISSN: 2045-2322.
@article{Henderson_Cox_Ganesh_Jonker_Young_Janssen_2015,
title = {Rumen microbial community composition varies with diet and host, but a core microbiome is found across a wide geographical range},
author = {Gemma Henderson and Faith Cox and Siva Ganesh and Arjan Jonker and Wayne Young and Peter H Janssen},
url = {http://www.nature.com/articles/srep14567},
doi = {10.1038/srep14567},
issn = {2045-2322},
year = {2015},
date = {2015-01-01},
journal = {Scientific reports},
volume = {art 14567},
number = {5},
pages = {1–13},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Inproceedings
Klemen Grm; Simon Dobrišek; Vitomir Štruc
The pose-invariant similarity index for face recognition Inproceedings
In: Proceedings of the Electrotechnical and Computer Science Conference (ERK), Portorož, Slovenia, 2015.
@inproceedings{ERK2015Klemen,
title = {The pose-invariant similarity index for face recognition},
author = {Klemen Grm and Simon Dobrišek and Vitomir Štruc},
year = {2015},
date = {2015-04-20},
booktitle = {Proceedings of the Electrotechnical and Computer Science Conference (ERK)},
address = {Portorož, Slovenia},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Vitomir Štruc; Janez Križaj; Simon Dobrišek
Modest face recognition Inproceedings
In: Proceedings of the International Workshop on Biometrics and Forensics (IWBF), pp. 1–6, IEEE, 2015.
@inproceedings{struc2015modest,
title = {Modest face recognition},
author = {Vitomir Štruc and Janez Križaj and Simon Dobrišek},
url = {http://luks.fe.uni-lj.si/nluks/wp-content/uploads/2016/09/IWBF2015.pdf},
year = {2015},
date = {2015-01-01},
booktitle = {Proceedings of the International Workshop on Biometrics and Forensics (IWBF)},
pages = {1--6},
publisher = {IEEE},
abstract = {The facial imagery usually at the disposal for forensics investigations is commonly of a poor quality due to the unconstrained settings in which it was acquired. The captured faces are typically non-frontal, partially occluded and of a low resolution, which makes the recognition task extremely difficult. In this paper we try to address this problem by presenting a novel framework for face recognition that combines diverse features sets (Gabor features, local binary patterns, local phase quantization features and pixel intensities), probabilistic linear discriminant analysis (PLDA) and data fusion based on linear logistic regression. With the proposed framework a matching score for the given pair of probe and target images is produced by applying PLDA on each of the four feature sets independently - producing a (partial) matching score for each of the PLDA-based feature vectors - and then combining the partial matching results at the score level to generate a single matching score for recognition. We make two main contributions in the paper: i) we introduce a novel framework for face recognition that relies on probabilistic MOdels of Diverse fEature SeTs (MODEST) to facilitate the recognition process and ii) benchmark it against the existing state-of-the-art. We demonstrate the feasibility of our MODEST framework on the FRGCv2 and PaSC databases and present comparative results with the state-of-the-art recognition techniques, which demonstrate the efficacy of our framework.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Ross Beveridge; Hao Zhang; Bruce A Draper; Patrick J Flynn; Zhenhua Feng; Patrik Huber; Josef Kittler; Zhiwu Huang; Shaoxin Li; Yan Li; Vitomir Štruc; Janez Križaj; others
Report on the FG 2015 video person recognition evaluation Inproceedings
In: 11th IEEE International Conference and Workshops on Automatic Face and Gesture Recognition (IEEE FG), pp. 1–8, IEEE 2015.
@inproceedings{beveridge2015report,
title = {Report on the FG 2015 video person recognition evaluation},
author = {Ross Beveridge and Hao Zhang and Bruce A Draper and Patrick J Flynn and Zhenhua Feng and Patrik Huber and Josef Kittler and Zhiwu Huang and Shaoxin Li and Yan Li and Vitomir Štruc and Janez Križaj and others},
url = {http://luks.fe.uni-lj.si/nluks/wp-content/uploads/2016/09/fg2015videoEvalPreprint.pdf},
year = {2015},
date = {2015-01-01},
booktitle = {11th IEEE International Conference and Workshops on Automatic Face and Gesture Recognition (IEEE FG)},
volume = {1},
pages = {1--8},
organization = {IEEE},
abstract = {This report presents results from the Video Person Recognition Evaluation held in conjunction with the 11th IEEE International Conference on Automatic Face and Gesture Recognition. Two experiments required algorithms to recognize people in videos from the Point-and-Shoot Face Recognition Challenge Problem (PaSC). The first consisted of videos from a tripod mounted high quality video camera. The second contained videos acquired from 5 different handheld video cameras. There were 1401 videos in each experiment of 265 subjects. The subjects, the scenes, and the actions carried out by the people are the same in both experiments. Five groups from around the world participated in the evaluation. The video handheld experiment was included in the International Joint Conference on Biometrics (IJCB) 2014 Handheld Video Face and Person Recognition Competition. The top verification rate from this evaluation is double that of the top performer in the IJCB competition. Analysis shows that the factor most effecting algorithm performance is the combination of location and action: where the video was acquired and what the person was doing.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Tadej Justin; Vitomir Štruc; Simon Dobrišek; Boštjan Vesnicer; Ivo Ipšić; France Mihelič
Speaker de-identification using diphone recognition and speech synthesis Inproceedings
In: 11th IEEE International Conference and Workshops on Automatic Face and Gesture Recognition (IEEE FG): DeID 2015, pp. 1–7, IEEE 2015.
@inproceedings{justin2015speaker,
title = {Speaker de-identification using diphone recognition and speech synthesis},
author = {Tadej Justin and Vitomir Štruc and Simon Dobrišek and Boštjan Vesnicer and Ivo Ipšić and France Mihelič},
url = {http://luks.fe.uni-lj.si/nluks/wp-content/uploads/2016/09/Deid2015.pdf},
year = {2015},
date = {2015-01-01},
booktitle = {11th IEEE International Conference and Workshops on Automatic Face and Gesture Recognition (IEEE FG): DeID 2015},
volume = {4},
pages = {1--7},
organization = {IEEE},
abstract = {The paper addresses the problem of speaker (or voice) de-identification by presenting a novel approach for concealing the identity of speakers in their speech. The proposed technique first recognizes the input speech with a diphone recognition system and then transforms the obtained phonetic transcription into the speech of another speaker with a speech synthesis system. Due to the fact that a Diphone RecOgnition step and a sPeech SYnthesis step are used during the deidentification, we refer to the developed technique as DROPSY. With this approach the acoustical models of the recognition and synthesis modules are completely independent from each other, which ensures the highest level of input speaker deidentification. The proposed DROPSY-based de-identification approach is language dependent, text independent and capable of running in real-time due to the relatively simple computing methods used. When designing speaker de-identification technology two requirements are typically imposed on the deidentification techniques: i) it should not be possible to establish the identity of the speakers based on the de-identified speech, and ii) the processed speech should still sound natural and be intelligible. This paper, therefore, implements the proposed DROPSY-based approach with two different speech synthesis techniques (i.e, with the HMM-based and the diphone TDPSOLA- based technique). The obtained de-identified speech is evaluated for intelligibility and evaluated in speaker verification experiments with a state-of-the-art (i-vector/PLDA) speaker recognition system. The comparison of both speech synthesis modules integrated in the proposed method reveals that both can efficiently de-identify the input speakers while still producing intelligible speech.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}