Publications – Laboratory for Machine Intelligence

Babnik, Žiga; Peer, Peter; Štruc, Vitomir

UVFace: Utility Driven Video-based Face Recognition Journal Article

In: ICT Express, pp. 1–6, 2026.

Abstract | Links | BibTeX | Tags: biometrics, CNN, deep learning, face image quality assessment, face images, face recognition, video based recognition

Babnik, Žiga; Boutros, Fadi; Damer, Naser; Jain, Deepak Kumar; Peer, Peter; Štruc, Vitomir

FunFace: Feature Utility and Norm Estimation for Face Recognition Proceedings Article

In: Proceedings of the IEEE International Conference on Automatic Face and Gesture Recognition, pp. 1–10, 2026.

Abstract | Links | BibTeX | Tags: face image processing, face image quality assessment, face image quality estimation, face images, face recognition

@inproceedings{FG2026_FunFace,

title = {FunFace: Feature Utility and Norm Estimation for Face Recognition},

author = {Žiga Babnik and Fadi Boutros and Naser Damer and Deepak Kumar Jain and Peter Peer and Vitomir Štruc},

url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2026/04/qFR_paper.pdf},

year  = {2026},

date = {2026-05-24},

urldate = {2026-05-24},

booktitle = {Proceedings of the IEEE International Conference on Automatic Face and Gesture Recognition},

pages = {1--10},

abstract = {Face Recognition (FR) is used in a variety of application domains, from entertainment and banking to security, and surveillance. Such applications rely on the FR model to be robust and perform well in a variety of settings. To achieve this, state-of-the-art FR models typically use expressive adaptive margin loss functions, which tie the feature norm to concepts related to sample quality, such as recognizability and perceptual image quality. Recently, through the development of Face Image Quality Assessment (FIQA) techniques, biometric utility has become the preferred measure of face-image quality and has been shown to be a better predictor of the usefulness of samples for face recognition compared to more human-centric aspects, such as resolution, blur, and lighting, tied to general image quality. While image quality expressed through feature norms exhibits a certain level of correlation with biometric utility, it does not fully encapsulate all aspects of utility. To address this point, we propose a new adaptive margin loss, FunFace (Face Recognition Through Utility and Norm Estimation), which incorporates biometric utility, estimated by the Certainty Ratio, into the adaptive margin, taking inspiration from AdaFace. We show that FunFace (when used to train a face recognition model) achieves competitive results to other state-of-the-art FR models on benchmarks containing high-quality samples, while surpassing them on low quality benchmarks.},

keywords = {face image processing, face image quality assessment, face image quality estimation, face images, face recognition},

pubstate = {published},

tppubtype = {inproceedings}

}

Close

Babnik, Žiga; Štruc, Vitomir

Delno nadzorovano ocenjevanje kakovosti obraznih slik Proceedings Article

In: Proceedings of ERK 2025, 2025.

Abstract | Links | BibTeX | Tags: face analysis, face image quality assessment, face images, face recognition

@inproceedings{Babnik_ERK25,

title = {Delno nadzorovano ocenjevanje kakovosti obraznih slik},

author = {Žiga Babnik and Vitomir Štruc},

url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2025/11/ERK25.pdf},

year  = {2025},

date = {2025-09-25},

booktitle = {Proceedings of ERK 2025},

abstract = {Important security and surveillance applications often depend on reliable predictions from the underlying face recognition (FR) models. Due to the nature of such applications FR models have to perform well in various unconstrained conditions. While state-of-the-art FR models achieve excellent results on large and varied closed set benchmarks, their performance depends heavily on the quality of the input face samples. Low-quality samples can cause critical false-match errors, lowering the trustworthiness of FR models, and furthermore lead to

monetary or privacy issues. Face Image Quality Assessment (FIQA) techniques offer the FR model an estimate of the sample’s quality, allowing the system to reject samples of poor quality. Supervised state-of-the-art FIQA techniques rely on extensive training to accurately assess the sample quality. Alternatively, unsupervised techniques extract the quality directly from the input sample, achieving higher runtime complexity and worse performance. In this paper, we present a technique for quality

estimation, combining desired characteristics of both supervised and unsupervised methods. Our technique is able to quickly estimate the quality using a single forward pass of the sample through the model needed also for recognition, without any prior supervised training. Comprehensive experiments on a varied set of benchmark datasets and face recognition models show that our method outperforms all existing unsupervised techniques and performs similarly to current state-of-the-art supervised techniques, while achieving excellent runtime.},

keywords = {face analysis, face image quality assessment, face images, face recognition},

pubstate = {published},

tppubtype = {inproceedings}

}

Close

Tomašević, Darian; Boutros, Fadi; Lin, Chenhao; Damer, Naser; Štruc, Vitomir; Peer, Peter

ID-Booth: Identity-consistent Face Generation with Diffusion Models Proceedings Article

In: IEEE International Conference on Automatic Face and Gesture Recognition 2025, pp. 1-10, 2025.

Abstract | Links | BibTeX | Tags: data synthesis, difussion, face, face images, face recognition, generative AI, generative models, synthetic data

@inproceedings{DarianFG2025,

title = {ID-Booth: Identity-consistent Face Generation with Diffusion Models},

author = {Darian Tomašević and Fadi Boutros and Chenhao Lin and Naser Damer and Vitomir Štruc and Peter Peer},

url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2025/04/ID_Booth__2025_.pdf},

year  = {2025},

date = {2025-05-27},

booktitle = {IEEE International Conference on Automatic Face and Gesture Recognition 2025},

pages = {1-10},

abstract = {Recent advances in generative modeling have enabled the generation of high-quality synthetic data that is applicable in a variety of domains, including face recognition.  

Here, state-of-the-art generative models typically rely on conditioning and fine-tuning of powerful pretrained diffusion models to facilitate the synthesis of realistic images of a desired identity. Yet, these models often do not consider the identity of subjects during training, leading to poor consistency between generated and intended identities. In contrast, methods that employ identity-based training objectives tend to overfit on various aspects of the identity, and in turn, lower the diversity of images that can be generated. To address these issues, we present in this paper a novel generative diffusion-based framework, called ID-Booth. ID-Booth consists of a denoising network responsible for data generation, a variational auto-encoder for mapping images to and from a lower-dimensional latent space and a text encoder that allows for prompt-based control over the generation procedure. The framework utilizes a novel triplet identity training objective and enables identity-consistent image generation while retaining the synthesis capabilities of pretrained diffusion models. Experiments with a state-of-the-art latent diffusion model and diverse prompts reveal that our method facilitates better intra-identity consistency and inter-identity separability than competing methods, while achieving higher image diversity. In turn, the produced data allows for effective augmentation of small-scale datasets and training of better-performing recognition models in a privacy-preserving manner. The source code for the ID-Booth framework is publicly available at https://github.com/dariant/ID-Booth. },

keywords = {data synthesis, difussion, face, face images, face recognition, generative AI, generative models, synthetic data},

pubstate = {published},

tppubtype = {inproceedings}

}

Close

Rot, Peter; Terhorst, Philipp; Peer, Peter; Štruc, Vitomir

ASPECD: Adaptable Soft-Biometric Privacy-Enhancement Using Centroid Decoding for Face Verification Proceedings Article

In: Proceedings of the IEEE International Conference on Automatic Face and Gesture Recognition (FG), pp. 1-9, 2024.

Abstract | Links | BibTeX | Tags: deepfake, deepfakes, face, face analysis, face deidentification, face image processing, face images, face synthesis, face verification, privacy, privacy enhancement, privacy protection, privacy-enhancing techniques, soft biometric privacy, soft biometrics

@inproceedings{Rot_FG2024,

title = {ASPECD: Adaptable Soft-Biometric Privacy-Enhancement Using Centroid Decoding for Face Verification},

author = {Peter Rot and Philipp Terhorst and Peter Peer and Vitomir Štruc},

url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2024/03/PeterRot_FG2024.pdf},

year  = {2024},

date = {2024-05-28},

booktitle = {Proceedings of the IEEE International Conference on Automatic Face and Gesture Recognition (FG)},

pages = {1-9},

abstract = {State-of-the-art face recognition models commonly extract information-rich biometric templates from the input images that are then used for comparison purposes and identity inference. While these templates encode identity information in a highly discriminative manner, they typically also capture other potentially sensitive facial attributes, such as age, gender or ethnicity. To address this issue, Soft-Biometric Privacy-Enhancing Techniques (SB-PETs) were proposed in the literature that aim to suppress such attribute information, and, in turn, alleviate the privacy risks associated with the extracted biometric templates. While various SB-PETs were presented so far, existing   approaches do not provide dedicated mechanisms to determine which soft-biometrics to exclude and which to retain. In this paper, we address this gap and introduce ASPECD, a modular framework designed to selectively suppress binary and categorical soft-biometrics based on users' privacy preferences. ASPECD consists of multiple sequentially connected components, each dedicated for privacy-enhancement of an individual soft-biometric attribute.  The proposed framework suppresses attribute information using a Moment-based Disentanglement process coupled with a centroid decoding procedure, ensuring that the privacy-enhanced templates are directly comparable to the templates in the original embedding space, regardless of the soft-biometric modality being suppressed. 

To validate the performance of ASPECD, we conduct experiments on a large-scale face dataset and with five state-of-the-art face recognition models, demonstrating the effectiveness of the proposed approach in suppressing single and multiple soft-biometric attributes. Our approach achieves a competitive privacy-utility trade-off compared to the state-of-the-art methods in scenarios that involve enhancing privacy w.r.t. gender and ethnicity attributes. Source code will be made publicly available.},

keywords = {deepfake, deepfakes, face, face analysis, face deidentification, face image processing, face images, face synthesis, face verification, privacy, privacy enhancement, privacy protection, privacy-enhancing techniques, soft biometric privacy, soft biometrics},

pubstate = {published},

tppubtype = {inproceedings}

}

Close

Tomašević, Darian; Boutros, Fadi; Damer, Naser; Peer, Peter; Štruc, Vitomir

Generating bimodal privacy-preserving data for face recognition Journal Article

In: Engineering Applications of Artificial Intelligence, vol. 133, iss. E, pp. 1-25, 2024.

Abstract | Links | BibTeX | Tags: CNN, face, face generation, face images, face recognition, generative AI, StyleGAN2, synthetic data

@article{Darian2024,

title = {Generating bimodal privacy-preserving data for face recognition},

author = {Darian Tomašević and Fadi Boutros and Naser Damer and Peter Peer and Vitomir Štruc},

url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2024/05/PapersDarian.pdf},

doi = {https://doi.org/10.1016/j.engappai.2024.108495},

year  = {2024},

date = {2024-05-01},

journal = {Engineering Applications of Artificial Intelligence},

volume = {133},

issue = {E},

pages = {1-25},

abstract = {The performance of state-of-the-art face recognition systems depends crucially on the availability of large-scale training datasets. However, increasing privacy concerns nowadays accompany the collection and distribution of biometric data, which has already resulted in the retraction of valuable face recognition datasets. The use of synthetic data represents a potential solution, however, the generation of privacy-preserving facial images useful for training recognition models is still an open problem. Generative methods also remain bound to the visible spectrum, despite the benefits that multispectral data can provide. To address these issues, we present a novel identity-conditioned generative framework capable of producing large-scale recognition datasets of visible and near-infrared privacy-preserving face images. The framework relies on a novel identity-conditioned dual-branch style-based generative adversarial network to enable the synthesis of aligned high-quality samples of identities determined by features of a pretrained recognition model. In addition, the framework incorporates a novel filter to prevent samples of privacy-breaching identities from reaching the generated datasets and improve both identity separability and intra-identity diversity. Extensive experiments on six publicly available datasets reveal that our framework achieves competitive synthesis capabilities while preserving the privacy of real-world subjects. The synthesized datasets also facilitate training more powerful recognition models than datasets generated by competing methods or even small-scale real-world datasets. Employing both visible and near-infrared data for training also results in higher recognition accuracy on real-world visible spectrum benchmarks. Therefore, training with multispectral data could potentially improve existing recognition systems that utilize only the visible spectrum, without the need for additional sensors.},

keywords = {CNN, face, face generation, face images, face recognition, generative AI, StyleGAN2, synthetic data},

pubstate = {published},

tppubtype = {article}

}

Close

Babnik, Žiga; Boutros, Fadi; Damer, Naser; Peer, Peter; Štruc, Vitomir

AI-KD: Towards Alignment Invariant Face Image Quality Assessment Using Knowledge Distillation Proceedings Article

In: Proceedings of the International Workshop on Biometrics and Forensics (IWBF), pp. 1-6, 2024.

Abstract | Links | BibTeX | Tags: ai, CNN, deep learning, face, face image quality assessment, face image quality estimation, face images, face recognition, face verification

Križaj, Janez; Plesh, Richard O.; Banavar, Mahesh; Schuckers, Stephanie; Štruc, Vitomir

Deep Face Decoder: Towards understanding the embedding space of convolutional networks through visual reconstruction of deep face templates Journal Article

In: Engineering Applications of Artificial Intelligence, vol. 132, iss. 107941, pp. 1-20, 2024.

Abstract | Links | BibTeX | Tags: CNN, embedding space, face, face images, face recognition, face synthesis, template reconstruction, xai

@article{KrizajEAAI2024,

title = {Deep Face Decoder: Towards understanding the embedding space of convolutional networks through visual reconstruction of deep face templates},

author = {Janez Križaj and Richard O. Plesh and Mahesh Banavar and Stephanie Schuckers and Vitomir Štruc},

url = {https://www.sciencedirect.com/science/article/abs/pii/S095219762400099X

https://lmi.fe.uni-lj.si/wp-content/uploads/2025/02/Deep_Face_Decoder__Elsevier_template_.pdf},

doi = {https://doi.org/10.1016/j.engappai.2024.107941},

year  = {2024},

date = {2024-01-30},

urldate = {2024-01-30},

journal = {Engineering Applications of Artificial Intelligence},

volume = {132},

issue = {107941},

pages = {1-20},

abstract = {Advances in deep learning and convolutional neural networks (ConvNets) have driven remarkable face recognition (FR) progress recently. However, the black-box nature of modern ConvNet-based face recognition models makes it challenging to interpret their decision-making process, to understand the reasoning behind specific success and failure cases, or to predict their responses to unseen data characteristics. It is, therefore, critical to design mechanisms that explain the inner workings of contemporary FR models and offer insight into their behavior. To address this challenge, we present in this paper a novel textit{template-inversion approach} capable of reconstructing high-fidelity face images from the embeddings (templates, feature-space representations) produced by modern FR techniques. Our approach is based on a novel Deep Face Decoder (DFD) trained in a regression setting to visualize the information encoded in the embedding space with the goal of fostering explainability. We utilize the developed DFD model in comprehensive experiments on multiple unconstrained face datasets, namely Visual Geometry Group Face dataset 2 (VGGFace2), Labeled Faces in the Wild (LFW), and Celebrity Faces Attributes Dataset High Quality (CelebA-HQ). Our analysis focuses on the embedding spaces of two distinct face recognition models with backbones based on the Visual Geometry Group 16-layer model (VGG-16) and the 50-layer Residual Network (ResNet-50). The results reveal how information is encoded in the two considered models and how perturbations in image appearance due to rotations, translations, scaling, occlusion, or adversarial attacks, are propagated into the embedding space. Our study offers researchers a deeper comprehension of the underlying mechanisms of ConvNet-based FR models, ultimately promoting advancements in model design and explainability. },

keywords = {CNN, embedding space, face, face images, face recognition, face synthesis, template reconstruction, xai},

pubstate = {published},

tppubtype = {article}

}

Close

Babnik, Žiga; Damer, Naser; Štruc, Vitomir

Optimization-Based Improvement of Face Image Quality Assessment Techniques Proceedings Article

In: Proceedings of the International Workshop on Biometrics and Forensics (IWBF), 2023.

Abstract | Links | BibTeX | Tags: distillation, face, face image quality assessment, face image quality estimation, face images, optimization, quality, transfer learning

@inproceedings{iwbf2023babnik,

title = {Optimization-Based Improvement of Face Image Quality Assessment Techniques},

author = {Žiga Babnik and Naser Damer and Vitomir Štruc},

url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2023/03/IWBF_23___paper-1.pdf},

year  = {2023},

date = {2023-02-28},

booktitle = {Proceedings of the International Workshop on Biometrics and Forensics (IWBF)},

abstract = {Contemporary face recognition~(FR) models achieve near-ideal recognition performance in constrained settings, yet do not fully translate the performance to unconstrained (real-world) scenarios. To help improve the performance and stability of FR systems in such unconstrained settings, face image quality assessment (FIQA) techniques try to infer sample-quality information from the input face images that can aid with the recognition process. While existing FIQA techniques are able to efficiently capture the differences between high and low quality images, they typically cannot fully distinguish between images of similar quality, leading to lower performance in many scenarios. To address this issue, we present in this paper a supervised quality-label optimization approach, aimed at improving the performance of existing FIQA techniques. The developed optimization procedure infuses additional information (computed with a selected FR model) into the initial quality scores generated with a given FIQA technique to produce better estimates of the ``actual'' image quality. We evaluate the proposed approach in comprehensive experiments with six  state-of-the-art FIQA approaches (CR-FIQA, FaceQAN, SER-FIQ, PCNet, MagFace, SER-FIQ) on five commonly used benchmarks (LFW, CFP-FP, CPLFW, CALFW, XQLFW) using three targeted FR models (ArcFace, ElasticFace, CurricularFace) with highly encouraging results. },

keywords = {distillation, face, face image quality assessment, face image quality estimation, face images, optimization, quality, transfer learning},

pubstate = {published},

tppubtype = {inproceedings}

}

Close

Križaj, Janez; Dobrišek, Simon; Štruc, Vitomir

Making the most of single sensor information : a novel fusion approach for 3D face recognition using region covariance descriptors and Gaussian mixture models Journal Article

In: Sensors, iss. 6, no. 2388, pp. 1-26, 2022.

Abstract | Links | BibTeX | Tags: 3d face, biometrics, face, face analysis, face images, face recognition

@article{KrizajSensors2022,

title = {Making the most of single sensor information : a novel fusion approach for 3D face recognition using region covariance descriptors and Gaussian mixture models},

author = {Janez Križaj and Simon Dobrišek and Vitomir Štruc},

url = {https://www.mdpi.com/1424-8220/22/6/2388},

doi = {10.3390/s22062388},

year  = {2022},

date = {2022-03-01},

journal = {Sensors},

number = {2388},

issue = {6},

pages = {1-26},

abstract = {Most commercially successful face recognition systems combine information from multiple sensors (2D and 3D, visible light and infrared, etc.) to achieve reliable recognition in various environments. When only a single sensor is available, the robustness as well as efficacy of the recognition process suffer. In this paper, we focus on face recognition using images captured by a single 3D sensor and propose a method based on the use of region covariance matrixes and Gaussian mixture models (GMMs). All steps of the proposed framework are automated, and no metadata, such as pre-annotated eye, nose, or mouth positions is required, while only a very simple clustering-based face detection is performed. The framework computes a set of region covariance descriptors from local regions of different face image representations and then uses the unscented transform to derive low-dimensional feature vectors, which are finally modeled by GMMs. In the last step, a support vector machine classification scheme is used to make a decision about the identity of the input 3D facial image. The proposed framework has several desirable characteristics, such as an inherent mechanism for data fusion/integration (through the region covariance matrixes), the ability to explore facial images at different levels of locality, and the ability to integrate a domain-specific prior knowledge into the modeling procedure. Several normalization techniques are incorporated into the proposed framework to further improve performance. Extensive experiments are performed on three prominent databases (FRGC v2, CASIA, and UMB-DB) yielding competitive results.},

keywords = {3d face, biometrics, face, face analysis, face images, face recognition},

pubstate = {published},

tppubtype = {article}

}

Close