Publications – Laboratory for Machine Intelligence

Ivanovska, Marija; Todorov, Leon; Damer, Naser; Jain, Deepak Kumar; Peer, Peter; Štruc, Vitomir

SelfMAD: Enhancing Generalization and Robustness in Morphing Attack Detection via Self-Supervised Learning Proceedings Article

In: IEEE International Conference on Automatic Face and Gesture Recognition 2025, pp. 1-10, 2025.

Abstract | Links | BibTeX | Tags: biometrics, face, face morphing, face morphing attack, face morphing detection, self-supervised learning, selfMAD

@inproceedings{MarijaFG2025,

title = {SelfMAD: Enhancing Generalization and Robustness in Morphing Attack Detection via Self-Supervised Learning},

author = {Marija Ivanovska and Leon Todorov and Naser Damer and Deepak Kumar Jain and Peter Peer and

Vitomir Štruc},

url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2025/04/FG2025__SelfMAD.pdf

http://arxiv.org/abs/2504.05504},

year  = {2025},

date = {2025-05-27},

booktitle = {IEEE International Conference on Automatic Face and Gesture Recognition 2025},

pages = {1-10},

abstract = {With the continuous advancement of generative models, face morphing attacks have become a significant challenge for existing face verification systems due to their potential use in identity fraud and other malicious activities. Contemporary Morphing Attack Detection (MAD) approaches frequently rely on supervised, discriminative models trained on examples of bona fide and morphed images. These models typically perform well with morphs generated with techniques seen during training, but often lead to suboptimal performance when subjected to novel unseen morphing techniques. While unsupervised models have been shown to perform better in terms of generalizability, they typically result in higher error rates, as they struggle to effectively capture features of subtle artifacts. To address these shortcomings, we present SelfMAD, a novel self-supervised approach that simulates general morphing attack artifacts, allowing classifiers to learn generic and robust decision boundaries without overfitting to the specific artifacts induced by particular face morphing methods. Through extensive experiments on widely used datasets, we demonstrate that SelfMAD significantly outperforms current state-of-the-art MADs, reducing the detection error by more than 64% in terms of EER when compared to the strongest unsupervised competitor, and by more than 66%, when compared to the best performing discriminative MAD model, tested in cross-morph settings. The source code for SelfMAD is available at https://github.com/LeonTodorov/SelfMAD.},

keywords = {biometrics, face, face morphing, face morphing attack, face morphing detection, self-supervised learning, selfMAD},

pubstate = {published},

tppubtype = {inproceedings}

}

Close

Tomašević, Darian; Boutros, Fadi; Lin, Chenhao; Damer, Naser; Štruc, Vitomir; Peer, Peter

ID-Booth: Identity-consistent Face Generation with Diffusion Models Proceedings Article

In: IEEE International Conference on Automatic Face and Gesture Recognition 2025, pp. 1-10, 2025.

Abstract | Links | BibTeX | Tags: data synthesis, difussion, face, face images, face recognition, generative AI, generative models, synthetic data

@inproceedings{DarianFG2025,

title = {ID-Booth: Identity-consistent Face Generation with Diffusion Models},

author = {Darian Tomašević and Fadi Boutros and Chenhao Lin and Naser Damer and Vitomir Štruc and Peter Peer},

url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2025/04/ID_Booth__2025_.pdf},

year  = {2025},

date = {2025-05-27},

booktitle = {IEEE International Conference on Automatic Face and Gesture Recognition 2025},

pages = {1-10},

abstract = {Recent advances in generative modeling have enabled the generation of high-quality synthetic data that is applicable in a variety of domains, including face recognition.  

Here, state-of-the-art generative models typically rely on conditioning and fine-tuning of powerful pretrained diffusion models to facilitate the synthesis of realistic images of a desired identity. Yet, these models often do not consider the identity of subjects during training, leading to poor consistency between generated and intended identities. In contrast, methods that employ identity-based training objectives tend to overfit on various aspects of the identity, and in turn, lower the diversity of images that can be generated. To address these issues, we present in this paper a novel generative diffusion-based framework, called ID-Booth. ID-Booth consists of a denoising network responsible for data generation, a variational auto-encoder for mapping images to and from a lower-dimensional latent space and a text encoder that allows for prompt-based control over the generation procedure. The framework utilizes a novel triplet identity training objective and enables identity-consistent image generation while retaining the synthesis capabilities of pretrained diffusion models. Experiments with a state-of-the-art latent diffusion model and diverse prompts reveal that our method facilitates better intra-identity consistency and inter-identity separability than competing methods, while achieving higher image diversity. In turn, the produced data allows for effective augmentation of small-scale datasets and training of better-performing recognition models in a privacy-preserving manner. The source code for the ID-Booth framework is publicly available at https://github.com/dariant/ID-Booth. },

keywords = {data synthesis, difussion, face, face images, face recognition, generative AI, generative models, synthetic data},

pubstate = {published},

tppubtype = {inproceedings}

}

Close

DeAndres-Tame, Ivan; Tolosana, Ruben; Melzi, Pietro; Vera-Rodriguez, Ruben; Kim, Minchul; Rathgeb, Christian; Liu, Xiaoming; Gomez, Luis F.; Morales, Aythami; Fierrez, Julian; Ortega-Garcia, Javier; Zhong, Zhizhou; Huang, Yuge; Mi, Yuxi; Ding, Shouhong; Zhou, Shuigeng; He, Shuai; Fu, Lingzhi; Cong, Heng; Zhang, Rongyu; Xiao, Zhihong; Smirnov, Evgeny; Pimenov, Anton; Grigorev, Aleksei; Timoshenko, Denis; Asfaw, Kaleb Mesfin; Low, Cheng Yaw; Liu, Hao; Wang, Chuyi; Zuo, Qing; He, Zhixiang; Shahreza, Hatef Otroshi; George, Anjith; Unnervik, Alexander; Rahimi, Parsa; Marcel, Sebastien; Neto, Pedro C.; Huber, Marco; Kolf, Jan Niklas; Damer, Naser; Boutros, Fadi; Cardoso, Jaime S.; Sequeira, Ana F.; Atzori, Andrea; Fenu, Gianni; Marras, Mirko; Štruc, Vitomir; Yu, Jiang; Li, Zhangjie; Li, Jichun; Zhao, Weisong; Lei, Zhen; Zhu, Xiangyu; Zhang, Xiao-Yu; Biesseck, Bernardo; Vidal, Pedro; Coelho, Luiz; Granada, Roger; Menotti, David

Second FRCSyn-onGoing: Winning solutions and post-challenge analysis to improve face recognition with synthetic data Journal Article

In: Information Fusion, no. 103099, 2025.

Abstract | Links | BibTeX | Tags: biometrics, data synthesis, face, face recognition, face synthesis, synthetic data

@article{Synth_InfoFUS2025,

title = {Second FRCSyn-onGoing: Winning solutions and post-challenge analysis to improve face recognition with synthetic data},

author = {Ivan DeAndres-Tame and Ruben Tolosana and Pietro Melzi and Ruben Vera-Rodriguez and Minchul Kim and Christian Rathgeb and Xiaoming Liu and Luis F. Gomez and Aythami Morales and Julian Fierrez and Javier Ortega-Garcia and Zhizhou Zhong and Yuge Huang and Yuxi Mi and Shouhong Ding and Shuigeng Zhou and Shuai He and Lingzhi Fu and Heng Cong and Rongyu Zhang and Zhihong Xiao and Evgeny Smirnov and Anton Pimenov and Aleksei Grigorev and Denis Timoshenko and Kaleb Mesfin Asfaw and Cheng Yaw Low and Hao Liu and Chuyi Wang and Qing Zuo and Zhixiang He and Hatef Otroshi Shahreza and Anjith George and Alexander Unnervik and Parsa Rahimi and Sebastien Marcel and Pedro C. Neto and Marco Huber and Jan Niklas Kolf and Naser Damer and Fadi Boutros and Jaime S. Cardoso and Ana F. Sequeira and Andrea Atzori and Gianni Fenu and Mirko Marras and Vitomir Štruc and Jiang Yu and Zhangjie Li and Jichun Li and Weisong Zhao and Zhen Lei and Xiangyu Zhu and Xiao-Yu Zhang and Bernardo Biesseck and Pedro Vidal and Luiz Coelho and Roger Granada and David Menotti},

url = {https://www.sciencedirect.com/science/article/pii/S1566253525001721},

doi = {https://doi.org/10.1016/j.inffus.2025.103099},

year  = {2025},

date = {2025-03-14},

urldate = {2025-03-14},

journal = {Information Fusion},

number = {103099},

abstract = {Synthetic data is gaining increasing popularity for face recognition technologies, mainly due to the privacy concerns and challenges associated with obtaining real data, including diverse scenarios, quality, and demographic groups, among others. It also offers some advantages over real data, such as the large amount of data that can be generated or the ability to customize it to adapt to specific problem-solving needs. To effectively use such data, face recognition models should also be specifically designed to exploit synthetic data to its fullest potential. In order to promote the proposal of novel Generative AI methods and synthetic data, and investigate the application of synthetic data to better train face recognition systems, we introduce the 2nd FRCSyn-on-Going challenge, based on the 2nd Face Recognition Challenge in the Era of Synthetic Data (FRCSyn), originally launched at CVPR 2024. This is an ongoing challenge that provides researchers with an accessible platform to benchmark (i) the proposal of novel Generative AI methods and synthetic data, and (ii) novel face recognition systems that are specifically proposed to take advantage of synthetic data. We focus on exploring the use of synthetic data both individually and in combination with real data to solve current challenges in face recognition such as demographic bias, domain adaptation, and performance constraints in demanding situations, such as age disparities between training and testing, changes in the pose, or occlusions. Very interesting findings are obtained in this second edition, including a direct comparison with the first one, in which synthetic databases were restricted to DCFace and GANDiffFace.},

keywords = {biometrics, data synthesis, face, face recognition, face synthesis, synthetic data},

pubstate = {published},

tppubtype = {article}

}

Close

Boutros, Fadi; Štruc, Vitomir; Damer, Naser

AdaDistill: Adaptive Knowledge Distillation for Deep Face Recognition Proceedings Article

In: Proceedings of the European Conference on Computer Vision (ECCV 2024), pp. 1-20, 2024.

Abstract | Links | BibTeX | Tags: adaptive distillation, biometrics, CNN, deep learning, face, face recognition, knowledge distillation

DeAndres-Tame, Ivan; Tolosana, Ruben; Melzi, Pietro; Vera-Rodriguez, Ruben; Kim, Minchul; Rathgeb, Christian; Liu, Xiaoming; Morales, Aythami; Fierrez, Julian; Ortega-Garcia, Javier; Zhong, Zhizhou; Huang, Yuge; Mi, Yuxi; Ding, Shouhong; Zhou, Shuigeng; He, Shuai; Fu, Lingzhi; Cong, Heng; Zhang, Rongyu; Xiao, Zhihong; Smirnov, Evgeny; Pimenov, Anton; Grigorev, Aleksei; Timoshenko, Denis; Asfaw, Kaleb Mesfin; Low, Cheng Yaw; Liu, Hao; Wang, Chuyi; Zuo, Qing; He, Zhixiang; Shahreza, Hatef Otroshi; George, Anjith; Unnervik, Alexander; Rahimi, Parsa; Marcel, Sébastien; Neto, Pedro C; Huber, Marco; Kolf, Jan Niklas; Damer, Naser; Boutros, Fadi; Cardoso, Jaime S; Sequeira, Ana F; Atzori, Andrea; Fenu, Gianni; Marras, Mirko; Štruc, Vitomir; Yu, Jiang; Li, Zhangjie; Li, Jichun; Zhao, Weisong; Lei, Zhen; Zhu, Xiangyu; Zhang, Xiao-Yu; Biesseck, Bernardo; Vidal, Pedro; Coelho, Luiz; Granada, Roger; Menotti, David

Second Edition FRCSyn Challenge at CVPR 2024: Face Recognition Challenge in the Era of Synthetic Data Proceedings Article

In: Proceedings of CVPR Workshops (CVPRW 2024), pp. 1-11, 2024.

Abstract | Links | BibTeX | Tags: competition, face, face recognition, synthetic data

@inproceedings{CVPR_synth2024,

title = {Second Edition FRCSyn Challenge at CVPR 2024: Face Recognition Challenge in the Era of Synthetic Data},

author = {Ivan DeAndres-Tame and Ruben Tolosana and Pietro Melzi and Ruben Vera-Rodriguez and Minchul Kim and Christian Rathgeb and Xiaoming Liu and Aythami Morales and Julian Fierrez and Javier Ortega-Garcia and Zhizhou Zhong and Yuge Huang and Yuxi Mi and Shouhong Ding and Shuigeng Zhou and Shuai He and Lingzhi Fu and Heng Cong and Rongyu Zhang and Zhihong Xiao and Evgeny Smirnov and Anton Pimenov and Aleksei Grigorev and Denis Timoshenko and Kaleb Mesfin Asfaw and Cheng Yaw Low and Hao Liu and Chuyi Wang and Qing Zuo and Zhixiang He and Hatef Otroshi Shahreza and Anjith George and Alexander Unnervik and Parsa Rahimi and Sébastien Marcel and Pedro C Neto and Marco Huber and Jan Niklas Kolf and Naser Damer and Fadi Boutros and Jaime S Cardoso and Ana F Sequeira and Andrea Atzori and Gianni Fenu and Mirko Marras and Vitomir Štruc and Jiang Yu and Zhangjie Li and Jichun Li and Weisong Zhao and Zhen Lei and Xiangyu Zhu and Xiao-Yu Zhang and Bernardo Biesseck and Pedro Vidal and Luiz Coelho and Roger Granada and David Menotti},

url = {https://openaccess.thecvf.com/content/CVPR2024W/FRCSyn/papers/Deandres-Tame_Second_Edition_FRCSyn_Challenge_at_CVPR_2024_Face_Recognition_Challenge_CVPRW_2024_paper.pdf},

year  = {2024},

date = {2024-06-17},

urldate = {2024-06-17},

booktitle = {Proceedings of CVPR Workshops (CVPRW 2024)},

pages = {1-11},

abstract = {Synthetic data is gaining increasing relevance for training machine learning models. This is mainly motivated due to several factors such as the lack of real data and intraclass variability, time and errors produced in manual labeling, and in some cases privacy concerns, among others. This paper presents an overview of the 2nd edition of the Face Recognition Challenge in the Era of Synthetic Data (FRCSyn) organized at CVPR 2024. FRCSyn aims to investigate the use of synthetic data in face recognition to address current technological limitations, including data privacy concerns, demographic biases, generalization to novel scenarios, and performance constraints in challenging situations such as aging, pose variations, and occlusions. Unlike the 1st edition, in which synthetic data from DCFace and GANDiffFace methods was only allowed to train face recognition systems, in this 2nd edition we propose new subtasks that allow participants to explore novel face generative methods. The outcomes of the 2nd FRCSyn Challenge, along with the proposed experimental protocol and benchmarking contribute significantly to the application of synthetic data to face recognition.},

keywords = {competition, face, face recognition, synthetic data},

pubstate = {published},

tppubtype = {inproceedings}

}

Close

Rot, Peter; Terhorst, Philipp; Peer, Peter; Štruc, Vitomir

ASPECD: Adaptable Soft-Biometric Privacy-Enhancement Using Centroid Decoding for Face Verification Proceedings Article

In: Proceedings of the IEEE International Conference on Automatic Face and Gesture Recognition (FG), pp. 1-9, 2024.

Abstract | Links | BibTeX | Tags: deepfake, deepfakes, face, face analysis, face deidentification, face image processing, face images, face synthesis, face verification, privacy, privacy enhancement, privacy protection, privacy-enhancing techniques, soft biometric privacy, soft biometrics

@inproceedings{Rot_FG2024,

title = {ASPECD: Adaptable Soft-Biometric Privacy-Enhancement Using Centroid Decoding for Face Verification},

author = {Peter Rot and Philipp Terhorst and Peter Peer and Vitomir Štruc},

url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2024/03/PeterRot_FG2024.pdf},

year  = {2024},

date = {2024-05-28},

booktitle = {Proceedings of the IEEE International Conference on Automatic Face and Gesture Recognition (FG)},

pages = {1-9},

abstract = {State-of-the-art face recognition models commonly extract information-rich biometric templates from the input images that are then used for comparison purposes and identity inference. While these templates encode identity information in a highly discriminative manner, they typically also capture other potentially sensitive facial attributes, such as age, gender or ethnicity. To address this issue, Soft-Biometric Privacy-Enhancing Techniques (SB-PETs) were proposed in the literature that aim to suppress such attribute information, and, in turn, alleviate the privacy risks associated with the extracted biometric templates. While various SB-PETs were presented so far, existing   approaches do not provide dedicated mechanisms to determine which soft-biometrics to exclude and which to retain. In this paper, we address this gap and introduce ASPECD, a modular framework designed to selectively suppress binary and categorical soft-biometrics based on users' privacy preferences. ASPECD consists of multiple sequentially connected components, each dedicated for privacy-enhancement of an individual soft-biometric attribute.  The proposed framework suppresses attribute information using a Moment-based Disentanglement process coupled with a centroid decoding procedure, ensuring that the privacy-enhanced templates are directly comparable to the templates in the original embedding space, regardless of the soft-biometric modality being suppressed. 

To validate the performance of ASPECD, we conduct experiments on a large-scale face dataset and with five state-of-the-art face recognition models, demonstrating the effectiveness of the proposed approach in suppressing single and multiple soft-biometric attributes. Our approach achieves a competitive privacy-utility trade-off compared to the state-of-the-art methods in scenarios that involve enhancing privacy w.r.t. gender and ethnicity attributes. Source code will be made publicly available.},

keywords = {deepfake, deepfakes, face, face analysis, face deidentification, face image processing, face images, face synthesis, face verification, privacy, privacy enhancement, privacy protection, privacy-enhancing techniques, soft biometric privacy, soft biometrics},

pubstate = {published},

tppubtype = {inproceedings}

}

Close

Tomašević, Darian; Boutros, Fadi; Damer, Naser; Peer, Peter; Štruc, Vitomir

Generating bimodal privacy-preserving data for face recognition Journal Article

In: Engineering Applications of Artificial Intelligence, vol. 133, iss. E, pp. 1-25, 2024.

Abstract | Links | BibTeX | Tags: CNN, face, face generation, face images, face recognition, generative AI, StyleGAN2, synthetic data

@article{Darian2024,

title = {Generating bimodal privacy-preserving data for face recognition},

author = {Darian Tomašević and Fadi Boutros and Naser Damer and Peter Peer and Vitomir Štruc},

url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2024/05/PapersDarian.pdf},

doi = {https://doi.org/10.1016/j.engappai.2024.108495},

year  = {2024},

date = {2024-05-01},

journal = {Engineering Applications of Artificial Intelligence},

volume = {133},

issue = {E},

pages = {1-25},

abstract = {The performance of state-of-the-art face recognition systems depends crucially on the availability of large-scale training datasets. However, increasing privacy concerns nowadays accompany the collection and distribution of biometric data, which has already resulted in the retraction of valuable face recognition datasets. The use of synthetic data represents a potential solution, however, the generation of privacy-preserving facial images useful for training recognition models is still an open problem. Generative methods also remain bound to the visible spectrum, despite the benefits that multispectral data can provide. To address these issues, we present a novel identity-conditioned generative framework capable of producing large-scale recognition datasets of visible and near-infrared privacy-preserving face images. The framework relies on a novel identity-conditioned dual-branch style-based generative adversarial network to enable the synthesis of aligned high-quality samples of identities determined by features of a pretrained recognition model. In addition, the framework incorporates a novel filter to prevent samples of privacy-breaching identities from reaching the generated datasets and improve both identity separability and intra-identity diversity. Extensive experiments on six publicly available datasets reveal that our framework achieves competitive synthesis capabilities while preserving the privacy of real-world subjects. The synthesized datasets also facilitate training more powerful recognition models than datasets generated by competing methods or even small-scale real-world datasets. Employing both visible and near-infrared data for training also results in higher recognition accuracy on real-world visible spectrum benchmarks. Therefore, training with multispectral data could potentially improve existing recognition systems that utilize only the visible spectrum, without the need for additional sensors.},

keywords = {CNN, face, face generation, face images, face recognition, generative AI, StyleGAN2, synthetic data},

pubstate = {published},

tppubtype = {article}

}

Close

Babnik, Žiga; Boutros, Fadi; Damer, Naser; Peer, Peter; Štruc, Vitomir

AI-KD: Towards Alignment Invariant Face Image Quality Assessment Using Knowledge Distillation Proceedings Article

In: Proceedings of the International Workshop on Biometrics and Forensics (IWBF), pp. 1-6, 2024.

Abstract | Links | BibTeX | Tags: ai, CNN, deep learning, face, face image quality assessment, face image quality estimation, face images, face recognition, face verification

Rot, Peter; Križaj, Janez; Peer, Peter; Štruc, Vitomir

Enhancing Gender Privacy with Photo-realistic Fusion of Disentangled Spatial Segments Proceedings Article

In: Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1-5, 2024.

Links | BibTeX | Tags: deep learning, face, privacy, privacy enhancement, privacy protection, privacy-enhancing techniques, soft biometric privacy

Babnik, Žiga; Peer, Peter; Štruc, Vitomir

eDifFIQA: Towards Efficient Face Image Quality Assessment based on Denoising Diffusion Probabilistic Models Journal Article

In: IEEE Transactions on Biometrics, Behavior, and Identity Science (TBIOM), pp. 1-16, 2024, ISSN: 2637-6407.

Abstract | Links | BibTeX | Tags: biometrics, CNN, deep learning, DifFIQA, difussion, face, face image quality assesment, face recognition, FIQA

@article{BabnikTBIOM2024,

title = {eDifFIQA: Towards Efficient Face Image Quality Assessment based on Denoising Diffusion Probabilistic Models},

author = {Žiga Babnik and Peter Peer and Vitomir Štruc},

url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2024/03/TBIOM___DifFIQAv2.pdf

https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10468647&tag=1},

doi = {10.1109/TBIOM.2024.3376236},

issn = {2637-6407},

year  = {2024},

date = {2024-03-07},

urldate = {2024-03-07},

journal = {IEEE Transactions on Biometrics, Behavior, and Identity Science (TBIOM)},

pages = {1-16},

abstract = {State-of-the-art Face Recognition (FR) models perform well in constrained scenarios, but frequently fail in difficult real-world scenarios, when no quality guarantees can be made for face samples. For this reason, Face Image Quality Assessment (FIQA) techniques are often used by FR systems, to provide quality estimates of captured face samples. The quality estimate provided by FIQA techniques can be used by the FR system to reject samples of low-quality, in turn improving the performance of the system and reducing the number of critical false-match errors. However, despite steady improvements, ensuring a good trade-off between the performance and computational complexity of FIQA methods across diverse face samples remains challenging. In this paper, we present DifFIQA, a powerful unsupervised approach for quality assessment based on the popular denoising diffusion probabilistic models (DDPMs) and the extended (eDifFIQA) approach. The main idea of the base DifFIQA approach is to utilize the forward and backward processes of DDPMs to perturb facial images and quantify the impact of these perturbations on the corresponding image embeddings for quality prediction. Because of the iterative nature of DDPMs the base DifFIQA approach is extremely computationally expensive. Using eDifFIQA we are able to improve on both the performance and computational complexity of the base DifFIQA approach, by employing label optimized knowledge distillation. In this process, quality information inferred by DifFIQA is distilled into a quality-regression model. During the distillation process, we use an additional source of quality information hidden in the relative position of the embedding to further improve the predictive capabilities of the underlying regression model. By choosing different feature extraction backbone models as the basis for the quality-regression eDifFIQA model, we are able to control the trade-off between the predictive capabilities and computational complexity of the final model. We evaluate three eDifFIQA variants of varying sizes in comprehensive experiments on 7 diverse datasets containing static-images and a separate video-based dataset, with 4 target CNN-based FR models and 2 target Transformer-based FR models and against 10 state-of-the-art FIQA techniques, as well as against the initial DifFIQA baseline and a simple regression-based predictor DifFIQA(R), distilled from DifFIQA without any additional optimization. The results show that the proposed label optimized knowledge distillation improves on the performance and computationally complexity of the base DifFIQA approach, and is able to achieve state-of-the-art performance in several distinct experimental scenarios. Furthermore, we also show that the distilled model can be used directly for face recognition and leads to highly competitive results.},

keywords = {biometrics, CNN, deep learning, DifFIQA, difussion, face, face image quality assesment, face recognition, FIQA},

pubstate = {published},

tppubtype = {article}

}

Close

State-of-the-art Face Recognition (FR) models perform well in constrained scenarios, but frequently fail in difficult real-world scenarios, when no quality guarantees can be made for face samples. For this reason, Face Image Quality Assessment (FIQA) techniques are often used by FR systems, to provide quality estimates of captured face samples. The quality estimate provided by FIQA techniques can be used by the FR system to reject samples of low-quality, in turn improving the performance of the system and reducing the number of critical false-match errors. However, despite steady improvements, ensuring a good trade-off between the performance and computational complexity of FIQA methods across diverse face samples remains challenging. In this paper, we present DifFIQA, a powerful unsupervised approach for quality assessment based on the popular denoising diffusion probabilistic models (DDPMs) and the extended (eDifFIQA) approach. The main idea of the base DifFIQA approach is to utilize the forward and backward processes of DDPMs to perturb facial images and quantify the impact of these perturbations on the corresponding image embeddings for quality prediction. Because of the iterative nature of DDPMs the base DifFIQA approach is extremely computationally expensive. Using eDifFIQA we are able to improve on both the performance and computational complexity of the base DifFIQA approach, by employing label optimized knowledge distillation. In this process, quality information inferred by DifFIQA is distilled into a quality-regression model. During the distillation process, we use an additional source of quality information hidden in the relative position of the embedding to further improve the predictive capabilities of the underlying regression model. By choosing different feature extraction backbone models as the basis for the quality-regression eDifFIQA model, we are able to control the trade-off between the predictive capabilities and computational complexity of the final model. We evaluate three eDifFIQA variants of varying sizes in comprehensive experiments on 7 diverse datasets containing static-images and a separate video-based dataset, with 4 target CNN-based FR models and 2 target Transformer-based FR models and against 10 state-of-the-art FIQA techniques, as well as against the initial DifFIQA baseline and a simple regression-based predictor DifFIQA(R), distilled from DifFIQA without any additional optimization. The results show that the proposed label optimized knowledge distillation improves on the performance and computationally complexity of the base DifFIQA approach, and is able to achieve state-of-the-art performance in several distinct experimental scenarios. Furthermore, we also show that the distilled model can be used directly for face recognition and leads to highly competitive results.

Close

Brodarič, Marko; Peer, Peter; Štruc, Vitomir

Cross-Dataset Deepfake Detection: Evaluating the Generalization Capabilities of Modern DeepFake Detectors Proceedings Article

In: Proceedings of the 27th Computer Vision Winter Workshop (CVWW), pp. 1-10, 2024.

Abstract | Links | BibTeX | Tags: data integrity, deepfake, deepfake detection, deepfakes, difussion, face, faceforensics++, media forensics

@inproceedings{MarkoCVWW,

title = {Cross-Dataset Deepfake Detection: Evaluating the Generalization Capabilities of Modern DeepFake Detectors},

author = {Marko Brodarič and Peter Peer and Vitomir Štruc},

url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2024/01/MarkoCVWW24_compressed.pdf},

year  = {2024},

date = {2024-01-31},

booktitle = {Proceedings of the 27th Computer Vision Winter Workshop (CVWW)},

pages = {1-10},

abstract = {Due to the recent advances in generative deep learning, numerous techniques have been proposed in the literature that allow for the creation of so-called deepfakes, i.e., forged facial images commonly used for malicious purposes. These developments have triggered a need for effective deepfake detectors, capable of identifying forged and manipulated imagery as robustly as possible. While a considerable number of detection techniques has been proposed over the years, generalization across a wide spectrum of deepfake-generation techniques still remains an open problem. In this paper, we study a representative set of deepfake generation methods and analyze their performance in a cross-dataset setting with the goal of better understanding the reasons behind the observed generalization performance. To this end, we conduct a comprehensive analysis on the FaceForensics++ dataset and adopt Gradient-weighted Class Activation Mappings (Grad-CAM) to provide insights into the behavior of the evaluated detectors. Since a new class of deepfake generation techniques based on diffusion models recently appeared in the literature, we introduce a new subset of the FaceForensics++ dataset with diffusion-based deepfake and include it in our analysis. The results of our experiments show that most detectors overfit to the specific image artifacts induced by a given deepfake-generation model and mostly focus on local image areas where such artifacts can be expected. Conversely, good generalization appears to be correlated with class activations that cover a broad spatial area and hence capture different image artifacts that appear in various part of the facial region.},

keywords = {data integrity, deepfake, deepfake detection, deepfakes, difussion, face, faceforensics++, media forensics},

pubstate = {published},

tppubtype = {inproceedings}

}

Close

Križaj, Janez; Plesh, Richard O.; Banavar, Mahesh; Schuckers, Stephanie; Štruc, Vitomir

Deep Face Decoder: Towards understanding the embedding space of convolutional networks through visual reconstruction of deep face templates Journal Article

In: Engineering Applications of Artificial Intelligence, vol. 132, iss. 107941, pp. 1-20, 2024.

Abstract | Links | BibTeX | Tags: CNN, embedding space, face, face images, face recognition, face synthesis, template reconstruction, xai

@article{KrizajEAAI2024,

title = {Deep Face Decoder: Towards understanding the embedding space of convolutional networks through visual reconstruction of deep face templates},

author = {Janez Križaj and Richard O. Plesh and Mahesh Banavar and Stephanie Schuckers and Vitomir Štruc},

url = {https://www.sciencedirect.com/science/article/abs/pii/S095219762400099X

https://lmi.fe.uni-lj.si/wp-content/uploads/2025/02/Deep_Face_Decoder__Elsevier_template_.pdf},

doi = {https://doi.org/10.1016/j.engappai.2024.107941},

year  = {2024},

date = {2024-01-30},

urldate = {2024-01-30},

journal = {Engineering Applications of Artificial Intelligence},

volume = {132},

issue = {107941},

pages = {1-20},

abstract = {Advances in deep learning and convolutional neural networks (ConvNets) have driven remarkable face recognition (FR) progress recently. However, the black-box nature of modern ConvNet-based face recognition models makes it challenging to interpret their decision-making process, to understand the reasoning behind specific success and failure cases, or to predict their responses to unseen data characteristics. It is, therefore, critical to design mechanisms that explain the inner workings of contemporary FR models and offer insight into their behavior. To address this challenge, we present in this paper a novel textit{template-inversion approach} capable of reconstructing high-fidelity face images from the embeddings (templates, feature-space representations) produced by modern FR techniques. Our approach is based on a novel Deep Face Decoder (DFD) trained in a regression setting to visualize the information encoded in the embedding space with the goal of fostering explainability. We utilize the developed DFD model in comprehensive experiments on multiple unconstrained face datasets, namely Visual Geometry Group Face dataset 2 (VGGFace2), Labeled Faces in the Wild (LFW), and Celebrity Faces Attributes Dataset High Quality (CelebA-HQ). Our analysis focuses on the embedding spaces of two distinct face recognition models with backbones based on the Visual Geometry Group 16-layer model (VGG-16) and the 50-layer Residual Network (ResNet-50). The results reveal how information is encoded in the two considered models and how perturbations in image appearance due to rotations, translations, scaling, occlusion, or adversarial attacks, are propagated into the embedding space. Our study offers researchers a deeper comprehension of the underlying mechanisms of ConvNet-based FR models, ultimately promoting advancements in model design and explainability. },

keywords = {CNN, embedding space, face, face images, face recognition, face synthesis, template reconstruction, xai},

pubstate = {published},

tppubtype = {article}

}

Close

Ivanovska, Marija; Štruc, Vitomir

On the Vulnerability of Deepfake Detectors to Attacks Generated by Denoising Diffusion Models Proceedings Article

In: Proceedings of WACV Workshops, pp. 1051-1060, 2024.

Abstract | Links | BibTeX | Tags: deep learning, deepfake, deepfake detection, diffusion models, face, media forensics

Larue, Nicolas; Vu, Ngoc-Son; Štruc, Vitomir; Peer, Peter; Christophides, Vassilis

SeeABLE: Soft Discrepancies and Bounded Contrastive Learning for Exposing Deepfakes Proceedings Article

In: Proceedings of the International Conference on Computer Vision (ICCV), pp. 21011 - 21021, IEEE 2023.

Abstract | Links | BibTeX | Tags: CNN, deepfake detection, deepfakes, face, media forensics, one-class learning, representation learning

@inproceedings{NicolasCCV,

title = {SeeABLE: Soft Discrepancies and Bounded Contrastive Learning for Exposing Deepfakes},

author = {Nicolas Larue and Ngoc-Son Vu and Vitomir Štruc and Peter Peer and Vassilis Christophides},

url = {https://openaccess.thecvf.com/content/ICCV2023/papers/Larue_SeeABLE_Soft_Discrepancies_and_Bounded_Contrastive_Learning_for_Exposing_Deepfakes_ICCV_2023_paper.pdf

https://lmi.fe.uni-lj.si/wp-content/uploads/2024/01/SeeABLE_compressed.pdf

https://lmi.fe.uni-lj.si/wp-content/uploads/2024/01/SeeABLE_supplementary_compressed.pdf},

year  = {2023},

date = {2023-10-01},

urldate = {2023-10-01},

booktitle = {Proceedings of the International Conference on Computer Vision (ICCV)},

pages = {21011 - 21021},

organization = {IEEE},

abstract = {Modern deepfake detectors have achieved encouraging results, when training and test images are drawn from the same data collection. However, when these detectors are applied to images produced with unknown deepfake-generation techniques, considerable performance degradations are commonly observed. In this paper, we propose a novel deepfake detector, called SeeABLE, that formalizes the detection problem as a (one-class) out-of-distribution detection task and generalizes better to unseen deepfakes. Specifically, SeeABLE first generates local image perturbations (referred to as soft-discrepancies) and then pushes the perturbed faces towards predefined prototypes using a novel regression-based bounded contrastive loss. To strengthen the generalization performance of SeeABLE to unknown deepfake types, we generate a rich set of soft discrepancies and train the detector: (i) to localize, which part of the face was modified, and (ii) to identify the alteration type. To demonstrate the capabilities of SeeABLE, we perform rigorous experiments on several widely-used deepfake datasets and show that our model convincingly outperforms competing state-of-the-art detectors, while exhibiting highly encouraging generalization capabilities. The source code for SeeABLE is available from: https://github.com/anonymous-author-sub/seeable.

},

keywords = {CNN, deepfake detection, deepfakes, face, media forensics, one-class learning, representation learning},

pubstate = {published},

tppubtype = {inproceedings}

}

Close

Rot, Peter; Grm, Klemen; Peer, Peter; Štruc, Vitomir

PrivacyProber: Assessment and Detection of Soft–Biometric Privacy–Enhancing Techniques Journal Article

In: IEEE Transactions on Dependable and Secure Computing, pp. 1-18, 2023, ISBN: 1545-5971.

Abstract | Links | BibTeX | Tags: biometrics, face, privacy, privacy enhancement, privacy protection, privacy-enhancing techniques, soft biometric privacy

@article{PrivacProberRot,

title = {PrivacyProber: Assessment and Detection of Soft–Biometric Privacy–Enhancing Techniques},

author = {Peter Rot and Klemen Grm and Peter Peer and Vitomir Štruc},

url = {https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10264192},

doi = {10.1109/TDSC.2023.3319500},

isbn = {1545-5971},

year  = {2023},

date = {2023-09-23},

journal = {IEEE Transactions on Dependable and Secure Computing},

pages = {1-18},

abstract = {Soft–biometric privacy–enhancing techniques represent machine learning methods that aim to: (i) mitigate privacy concerns associated with face recognition technology by suppressing selected soft–biometric attributes in facial images (e.g., gender, age, ethnicity) and (ii) make unsolicited extraction of sensitive personal information infeasible. Because such techniques are increasingly used in real–world applications, it is imperative to understand to what extent the privacy enhancement can be inverted and how much attribute information can be recovered from privacy–enhanced images. While these aspects are critical, they have not been investigated in the literature so far. In this paper, we, therefore, study the robustness of several state–of–the–art soft–biometric privacy–enhancing techniques to attribute recovery attempts. We propose PrivacyProber, a high–level framework for restoring soft–biometric information from privacy–enhanced facial images, and apply it for attribute recovery in comprehensive experiments on three public face datasets, i.e., LFW, MUCT and Adience. Our experiments show that the proposed framework is able to restore a considerable amount of suppressed information, regardless of the privacy–enhancing technique used (e.g., adversarial perturbations, conditional synthesis, etc.), but also that there are significant differences between the considered privacy models. These results point to the need for novel mechanisms that can improve the robustness of existing privacy–enhancing techniques and secure them against potential adversaries trying to restore suppressed information. Additionally, we demonstrate that PrivacyProber can also be used to detect privacy–enhancement in facial images (under black–box assumptions) with high accuracy. Specifically, we show that a detection procedure can be developed around the proposed framework that is learning free and, therefore, generalizes well across different data characteristics and privacy–enhancing techniques.},

keywords = {biometrics, face, privacy, privacy enhancement, privacy protection, privacy-enhancing techniques, soft biometric privacy},

pubstate = {published},

tppubtype = {article}

}

Close

Soft–biometric privacy–enhancing techniques represent machine learning methods that aim to: (i) mitigate privacy concerns associated with face recognition technology by suppressing selected soft–biometric attributes in facial images (e.g., gender, age, ethnicity) and (ii) make unsolicited extraction of sensitive personal information infeasible. Because such techniques are increasingly used in real–world applications, it is imperative to understand to what extent the privacy enhancement can be inverted and how much attribute information can be recovered from privacy–enhanced images. While these aspects are critical, they have not been investigated in the literature so far. In this paper, we, therefore, study the robustness of several state–of–the–art soft–biometric privacy–enhancing techniques to attribute recovery attempts. We propose PrivacyProber, a high–level framework for restoring soft–biometric information from privacy–enhanced facial images, and apply it for attribute recovery in comprehensive experiments on three public face datasets, i.e., LFW, MUCT and Adience. Our experiments show that the proposed framework is able to restore a considerable amount of suppressed information, regardless of the privacy–enhancing technique used (e.g., adversarial perturbations, conditional synthesis, etc.), but also that there are significant differences between the considered privacy models. These results point to the need for novel mechanisms that can improve the robustness of existing privacy–enhancing techniques and secure them against potential adversaries trying to restore suppressed information. Additionally, we demonstrate that PrivacyProber can also be used to detect privacy–enhancement in facial images (under black–box assumptions) with high accuracy. Specifically, we show that a detection procedure can be developed around the proposed framework that is learning free and, therefore, generalizes well across different data characteristics and privacy–enhancing techniques.

Close

Babnik, Žiga; Peer, Peter; Štruc, Vitomir

DifFIQA: Face Image Quality Assessment Using Denoising Diffusion Probabilistic Models Proceedings Article

In: IEEE International Joint Conference on Biometrics , pp. 1-10, IEEE, Ljubljana, Slovenia, 2023.

Abstract | Links | BibTeX | Tags: biometrics, deep learning, denoising diffusion probabilistic models, diffusion, face, face image quality assesment, face recognition, FIQA, quality

@inproceedings{Diffiqa_2023,

title = {DifFIQA: Face Image Quality Assessment Using Denoising Diffusion Probabilistic Models},

author = {Žiga Babnik and Peter Peer and Vitomir Štruc},

url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2023/09/121.pdf

https://lmi.fe.uni-lj.si/wp-content/uploads/2023/09/121-supp.pdf},

year  = {2023},

date = {2023-09-01},

booktitle = {IEEE International Joint Conference on Biometrics },

pages = {1-10},

publisher = {IEEE},

address = {Ljubljana, Slovenia},

abstract = {Modern face recognition (FR) models excel in constrained

scenarios, but often suffer from decreased performance

when deployed in unconstrained (real-world) environments

due to uncertainties surrounding the quality

of the captured facial data. Face image quality assessment

(FIQA) techniques aim to mitigate these performance

degradations by providing FR models with sample-quality

predictions that can be used to reject low-quality samples

and reduce false match errors. However, despite steady improvements,

ensuring reliable quality estimates across facial

images with diverse characteristics remains challenging.

In this paper, we present a powerful new FIQA approach,

named DifFIQA, which relies on denoising diffusion

probabilistic models (DDPM) and ensures highly competitive

results. The main idea behind the approach is to utilize

the forward and backward processes of DDPMs to perturb

facial images and quantify the impact of these perturbations

on the corresponding image embeddings for quality

prediction. Because the diffusion-based perturbations are

computationally expensive, we also distill the knowledge

encoded in DifFIQA into a regression-based quality predictor,

called DifFIQA(R), that balances performance and

execution time. We evaluate both models in comprehensive

experiments on 7 diverse datasets, with 4 target FR models

and against 10 state-of-the-art FIQA techniques with

highly encouraging results. The source code is available

from: https://github.com/LSIbabnikz/DifFIQA.},

keywords = {biometrics, deep learning, denoising diffusion probabilistic models, diffusion, face, face image quality assesment, face recognition, FIQA, quality},

pubstate = {published},

tppubtype = {inproceedings}

}

Close

Peng, Bo; Sun, Xianyun; Wang, Caiyong; Wang, Wei; Dong, Jing; Sun, Zhenan; Zhang, Rongyu; Cong, Heng; Fu, Lingzhi; Wang, Hao; Zhang, Yusheng; Zhang, HanYuan; Zhang, Xin; Liu, Boyuan; Ling, Hefei; Dragar, Luka; Batagelj, Borut; Peer, Peter; Struc, Vitomir; Zhou, Xinghui; Liu, Kunlin; Feng, Weitao; Zhang, Weiming; Wang, Haitao; Diao, Wenxiu

DFGC-VRA: DeepFake Game Competition on Visual Realism Assessment Proceedings Article

In: IEEE International Joint Conference on Biometrics (IJCB 2023), pp. 1-9, Ljubljana, Slovenia, 2023.

Abstract | Links | BibTeX | Tags: competition IJCB, deepfake detection, deepfakes, face, realism assessment

@inproceedings{Deepfake_comp2023,

title = {DFGC-VRA: DeepFake Game Competition on Visual Realism Assessment},

author = {Bo Peng and Xianyun Sun and Caiyong Wang and Wei Wang and Jing Dong and Zhenan Sun and Rongyu Zhang and Heng Cong and Lingzhi Fu and Hao Wang and Yusheng Zhang and HanYuan Zhang and Xin Zhang and Boyuan Liu and Hefei Ling and Luka Dragar and Borut Batagelj and Peter Peer and Vitomir Struc and Xinghui Zhou and Kunlin Liu and Weitao Feng and Weiming Zhang and Haitao Wang and Wenxiu Diao},

url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2023/09/CameraReady-225.pdf},

year  = {2023},

date = {2023-09-01},

booktitle = {IEEE International Joint Conference on Biometrics (IJCB 2023)},

pages = {1-9},

address = {Ljubljana, Slovenia},

abstract = {This paper presents the summary report on the DeepFake

Game Competition on Visual Realism Assessment (DFGCVRA).

Deep-learning based face-swap videos, also known

as deepfakes, are becoming more and more realistic and

deceiving. The malicious usage of these face-swap videos

has caused wide concerns. There is a ongoing deepfake

game between its creators and detectors, with the human in

the loop. The research community has been focusing on

the automatic detection of these fake videos, but the assessment

of their visual realism, as perceived by human

eyes, is still an unexplored dimension. Visual realism assessment,

or VRA, is essential for assessing the potential

impact that may be brought by a specific face-swap video,

and it is also useful as a quality metric to compare different

face-swap methods. This is the third edition of DFGC

competitions, which focuses on the new visual realism assessment

topic, different from previous ones that compete

creators versus detectors. With this competition, we conduct

a comprehensive study of the SOTA performance on

the new task. We also release our MindSpore codes to fur-

*Jing Dong (jdong@nlpr.ia.ac.cn) is the corresponding author.

ther facilitate research in this field (https://github.

com/bomb2peng/DFGC-VRA-benckmark).},

keywords = {competition IJCB, deepfake detection, deepfakes, face, realism assessment},

pubstate = {published},

tppubtype = {inproceedings}

}

Close

Kolf, Jan Niklas; Boutros, Fadi; Elliesen, Jurek; Theuerkauf, Markus; Damer, Naser; Alansari, Mohamad Y; Hay, Oussama Abdul; Alansari, Sara Yousif; Javed, Sajid; Werghi, Naoufel; Grm, Klemen; Struc, Vitomir; Alonso-Fernandez, Fernando; Hernandez-Diaz, Kevin; Bigun, Josef; George, Anjith; Ecabert, Christophe; Shahreza, Hatef Otroshi; Kotwal, Ketan; Marcel, Sébastien; Medvedev, Iurii; Bo, Jin; Nunes, Diogo; Hassanpour, Ahmad; Khatiwada, Pankaj; Toor, Aafan Ahmad; Yang, Bian

EFaR 2023: Efficient Face Recognition Competition Proceedings Article

In: IEEE International Joint Conference on Biometrics (IJCB 2023), pp. 1-12, Ljubljana, Slovenia, 2023.

Abstract | Links | BibTeX | Tags: biometrics, deep learning, face, face recognition, lightweight models

@inproceedings{EFAR2023_2023,

title = {EFaR 2023: Efficient Face Recognition Competition},

author = {Jan Niklas Kolf and Fadi Boutros and Jurek Elliesen and Markus Theuerkauf and Naser Damer and Mohamad Y Alansari and Oussama Abdul Hay and Sara Yousif Alansari and Sajid Javed and Naoufel Werghi and Klemen Grm and Vitomir Struc and Fernando Alonso-Fernandez and Kevin Hernandez-Diaz and Josef Bigun and Anjith George and Christophe Ecabert and Hatef Otroshi Shahreza and Ketan Kotwal and Sébastien Marcel and Iurii Medvedev and Jin Bo and Diogo Nunes and Ahmad Hassanpour and Pankaj Khatiwada and Aafan Ahmad Toor and Bian Yang},

url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2023/09/CameraReady-231.pdf},

year  = {2023},

date = {2023-09-01},

booktitle = {IEEE International Joint Conference on Biometrics (IJCB 2023)},

pages = {1-12},

address = {Ljubljana, Slovenia},

abstract = {This paper presents the summary of the Efficient Face

Recognition Competition (EFaR) held at the 2023 International

Joint Conference on Biometrics (IJCB 2023). The

competition received 17 submissions from 6 different teams.

To drive further development of efficient face recognition

models, the submitted solutions are ranked based on a

weighted score of the achieved verification accuracies on a

diverse set of benchmarks, as well as the deployability given

by the number of floating-point operations and model size.

The evaluation of submissions is extended to bias, crossquality,

and large-scale recognition benchmarks. Overall,

the paper gives an overview of the achieved performance

values of the submitted solutions as well as a diverse set of

baselines. The submitted solutions use small, efficient network

architectures to reduce the computational cost, some

solutions apply model quantization. An outlook on possible

techniques that are underrepresented in current solutions is

given as well.},

keywords = {biometrics, deep learning, face, face recognition, lightweight models},

pubstate = {published},

tppubtype = {inproceedings}

}

Close

Ivanovska, Marija; Štruc, Vitomir

Face Morphing Attack Detection with Denoising Diffusion Probabilistic Models Proceedings Article

In: Proceedings of the International Workshop on Biometrics and Forensics (IWBF), pp. 1-6, 2023.

Abstract | Links | BibTeX | Tags: biometrics, deep learning, denoising diffusion probabilistic models, diffusion, face, face morphing attack, morphing attack, morphing attack detection

Babnik, Žiga; Damer, Naser; Štruc, Vitomir

Optimization-Based Improvement of Face Image Quality Assessment Techniques Proceedings Article

In: Proceedings of the International Workshop on Biometrics and Forensics (IWBF), 2023.

Abstract | Links | BibTeX | Tags: distillation, face, face image quality assessment, face image quality estimation, face images, optimization, quality, transfer learning

@inproceedings{iwbf2023babnik,

title = {Optimization-Based Improvement of Face Image Quality Assessment Techniques},

author = {Žiga Babnik and Naser Damer and Vitomir Štruc},

url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2023/03/IWBF_23___paper-1.pdf},

year  = {2023},

date = {2023-02-28},

booktitle = {Proceedings of the International Workshop on Biometrics and Forensics (IWBF)},

abstract = {Contemporary face recognition~(FR) models achieve near-ideal recognition performance in constrained settings, yet do not fully translate the performance to unconstrained (real-world) scenarios. To help improve the performance and stability of FR systems in such unconstrained settings, face image quality assessment (FIQA) techniques try to infer sample-quality information from the input face images that can aid with the recognition process. While existing FIQA techniques are able to efficiently capture the differences between high and low quality images, they typically cannot fully distinguish between images of similar quality, leading to lower performance in many scenarios. To address this issue, we present in this paper a supervised quality-label optimization approach, aimed at improving the performance of existing FIQA techniques. The developed optimization procedure infuses additional information (computed with a selected FR model) into the initial quality scores generated with a given FIQA technique to produce better estimates of the ``actual'' image quality. We evaluate the proposed approach in comprehensive experiments with six  state-of-the-art FIQA approaches (CR-FIQA, FaceQAN, SER-FIQ, PCNet, MagFace, SER-FIQ) on five commonly used benchmarks (LFW, CFP-FP, CPLFW, CALFW, XQLFW) using three targeted FR models (ArcFace, ElasticFace, CurricularFace) with highly encouraging results. },

keywords = {distillation, face, face image quality assessment, face image quality estimation, face images, optimization, quality, transfer learning},

pubstate = {published},

tppubtype = {inproceedings}

}

Close

Grm, Klemen; Ozata, Berk; Struc, Vitomir; Ekenel, Hazim K.

Meet-in-the-middle: Multi-scale upsampling and matching for cross-resolution face recognition Proceedings Article

In: WACV workshops, pp. 120-129, 2023.

Abstract | Links | BibTeX | Tags: deep learning, face, face recognition, multi-scale matching, smart surveillance, surveillance, surveillance technology

Eyiokur, Fevziye Irem; Kantarci, Alperen; Erakin, Mustafa Ekrem; Damer, Naser; Ofli, Ferda; Imran, Muhammad; Križaj, Janez; Salah, Albert Ali; Waibel, Alexander; Štruc, Vitomir; Ekenel, Hazim K.

A Survey on Computer Vision based Human Analysis in the COVID-19 Era Journal Article

In: Image and Vision Computing, vol. 130, no. 104610, pp. 1-19, 2023.

Abstract | Links | BibTeX | Tags: COVID-19, face, face alignment, face analysis, face image processing, face image quality assessment, face landmarking, face recognition, face verification, human analysis, masked face analysis

@article{IVC2023,

title = {A Survey on Computer Vision based Human Analysis in the COVID-19 Era},

author = {Fevziye Irem Eyiokur and Alperen Kantarci and Mustafa Ekrem Erakin and Naser Damer and Ferda Ofli and Muhammad Imran and Janez Križaj and Albert Ali Salah and Alexander Waibel and Vitomir Štruc and Hazim K. Ekenel },

url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2023/01/FG4COVID19_PAPER_compressed.pdf

https://authors.elsevier.com/a/1gKOyxnVK7RBS},

doi = {https://doi.org/10.1016/j.imavis.2022.104610},

year  = {2023},

date = {2023-01-01},

journal = {Image and Vision Computing},

volume = {130},

number = {104610},

pages = {1-19},

abstract = {The emergence of COVID-19 has had a global and profound impact, not only on society as a whole, but also on the lives of individuals. Various prevention measures were introduced around the world to limit the transmission of the disease, including 

face masks, mandates for social distancing and regular disinfection in public spaces, and the use of screening applications. These developments also triggered the need for novel and improved computer vision techniques capable of (i) providing support to the prevention measures through an automated analysis of visual data, on the one hand, and (ii) facilitating normal operation of existing vision-based services, such as biometric authentication schemes, on the other. Especially important here, are computer vision techniques that focus on the analysis of people and faces in visual data and have been affected the most by the partial occlusions introduced by the mandates for facial masks. 

Such computer vision based human analysis techniques include face and face-mask detection approaches, face recognition techniques, crowd counting solutions, age and expression estimation procedures, models for detecting face-hand interactions and many others, and have seen considerable attention over recent years. The goal of this survey is to provide an introduction to the problems induced by COVID-19 into such research and to present a comprehensive review of the work done in the computer vision based human analysis field. Particular attention is paid to the impact of facial masks on the performance of various methods and recent solutions to mitigate this problem. Additionally, a detailed review of existing datasets useful for the development and evaluation of methods for COVID-19 related applications is also provided. Finally, to help advance the field further, a discussion on the main open challenges and future research direction is given at the end of the survey. This work is intended to have a broad appeal and be useful not only for computer vision researchers but also the general public.},

keywords = {COVID-19, face, face alignment, face analysis, face image processing, face image quality assessment, face landmarking, face recognition, face verification, human analysis, masked face analysis},

pubstate = {published},

tppubtype = {article}

}

Close

The emergence of COVID-19 has had a global and profound impact, not only on society as a whole, but also on the lives of individuals. Various prevention measures were introduced around the world to limit the transmission of the disease, including
face masks, mandates for social distancing and regular disinfection in public spaces, and the use of screening applications. These developments also triggered the need for novel and improved computer vision techniques capable of (i) providing support to the prevention measures through an automated analysis of visual data, on the one hand, and (ii) facilitating normal operation of existing vision-based services, such as biometric authentication schemes, on the other. Especially important here, are computer vision techniques that focus on the analysis of people and faces in visual data and have been affected the most by the partial occlusions introduced by the mandates for facial masks.
Such computer vision based human analysis techniques include face and face-mask detection approaches, face recognition techniques, crowd counting solutions, age and expression estimation procedures, models for detecting face-hand interactions and many others, and have seen considerable attention over recent years. The goal of this survey is to provide an introduction to the problems induced by COVID-19 into such research and to present a comprehensive review of the work done in the computer vision based human analysis field. Particular attention is paid to the impact of facial masks on the performance of various methods and recent solutions to mitigate this problem. Additionally, a detailed review of existing datasets useful for the development and evaluation of methods for COVID-19 related applications is also provided. Finally, to help advance the field further, a discussion on the main open challenges and future research direction is given at the end of the survey. This work is intended to have a broad appeal and be useful not only for computer vision researchers but also the general public.

Close

Huber, Marco; Boutros, Fadi; Luu, Anh Thi; Raja, Kiran; Ramachandra, Raghavendra; Damer, Naser; Neto, Pedro C.; Goncalves, Tiago; Sequeira, Ana F.; Cardoso, Jaime S.; Tremoco, João; Lourenco, Miguel; Serra, Sergio; Cermeno, Eduardo; Ivanovska, Marija; Batagelj, Borut; Kronovšek, Andrej; Peer, Peter; Štruc, Vitomir

SYN-MAD 2022: Competition on Face Morphing Attack Detection based on Privacy-aware Synthetic Training Data Proceedings Article

In: IEEE International Joint Conference on Biometrics (IJCB), pp. 1-10, 2022, ISBN: 978-1-6654-6394-2.

Links | BibTeX | Tags: data synthesis, deep learning, face, face PAD, pad, synthetic data

Ivanovska, Marija; Kronovšek, Andrej; Peer, Peter; Štruc, Vitomir; Batagelj, Borut

Face Morphing Attack Detection Using Privacy-Aware Training Data Proceedings Article

In: Proceedings of ERK 2022, pp. 1-4, 2022.

Abstract | Links | BibTeX | Tags: competition, face, face morphing, face morphing attack, face morphing detection, private data, synthetic data

Križaj, Janez; Dobrišek, Simon; Štruc, Vitomir

Making the most of single sensor information : a novel fusion approach for 3D face recognition using region covariance descriptors and Gaussian mixture models Journal Article

In: Sensors, iss. 6, no. 2388, pp. 1-26, 2022.

Abstract | Links | BibTeX | Tags: 3d face, biometrics, face, face analysis, face images, face recognition

@article{KrizajSensors2022,

title = {Making the most of single sensor information : a novel fusion approach for 3D face recognition using region covariance descriptors and Gaussian mixture models},

author = {Janez Križaj and Simon Dobrišek and Vitomir Štruc},

url = {https://www.mdpi.com/1424-8220/22/6/2388},

doi = {10.3390/s22062388},

year  = {2022},

date = {2022-03-01},

journal = {Sensors},

number = {2388},

issue = {6},

pages = {1-26},

abstract = {Most commercially successful face recognition systems combine information from multiple sensors (2D and 3D, visible light and infrared, etc.) to achieve reliable recognition in various environments. When only a single sensor is available, the robustness as well as efficacy of the recognition process suffer. In this paper, we focus on face recognition using images captured by a single 3D sensor and propose a method based on the use of region covariance matrixes and Gaussian mixture models (GMMs). All steps of the proposed framework are automated, and no metadata, such as pre-annotated eye, nose, or mouth positions is required, while only a very simple clustering-based face detection is performed. The framework computes a set of region covariance descriptors from local regions of different face image representations and then uses the unscented transform to derive low-dimensional feature vectors, which are finally modeled by GMMs. In the last step, a support vector machine classification scheme is used to make a decision about the identity of the input 3D facial image. The proposed framework has several desirable characteristics, such as an inherent mechanism for data fusion/integration (through the region covariance matrixes), the ability to explore facial images at different levels of locality, and the ability to integrate a domain-specific prior knowledge into the modeling procedure. Several normalization techniques are incorporated into the proposed framework to further improve performance. Extensive experiments are performed on three prominent databases (FRGC v2, CASIA, and UMB-DB) yielding competitive results.},

keywords = {3d face, biometrics, face, face analysis, face images, face recognition},

pubstate = {published},

tppubtype = {article}

}

Close

Rot, Peter; Peer, Peter; Štruc, Vitomir

Detecting Soft-Biometric Privacy Enhancement Book Section

In: Rathgeb, Christian; Tolosana, Ruben; Vera-Rodriguez, Ruben; Busch, Christoph (Ed.): Handbook of Digital Face Manipulation and Detection, 2022.

Links | BibTeX | Tags: biometrics, face, privacy, privacy enhancement, privacy-enhancing techniques, soft biometric privacy

Ivanovska, Marija; Štruc, Vitomir

A Comparative Study on Discriminative and One--Class Learning Models for Deepfake Detection Proceedings Article

In: Proceedings of ERK 2021, pp. 1–4, 2021.

Abstract | Links | BibTeX | Tags: biometrics, comparative study, computer vision, deepfake detection, deepfakes, detection, face, one-class learning

Grm, Klemen; Vitomir, Štruc

Frequency Band Encoding for Face Super-Resolution Proceedings Article

In: Proceedings of ERK 2021, pp. 1-4, 2021.

Abstract | Links | BibTeX | Tags: CNN, deep learning, face, face hallucination, frequency encoding, super-resolution

Batagelj, Borut; Peer, Peter; Štruc, Vitomir; Dobrišek, Simon

How to correctly detect face-masks for COVID-19 from visual information? Journal Article

In: Applied sciences, vol. 11, no. 5, pp. 1-24, 2021, ISBN: 2076-3417.

Abstract | Links | BibTeX | Tags: computer vision, COVID-19, deep learning, detection, face, mask detection, recognition

@article{Batagelj2021,

title = {How to correctly detect face-masks for COVID-19 from visual information?},

author = {Borut Batagelj and Peter Peer and Vitomir Štruc and Simon Dobrišek},

url = {https://www.mdpi.com/2076-3417/11/5/2070/pdf},

doi = {10.3390/app11052070},

isbn = {2076-3417},

year  = {2021},

date = {2021-03-01},

urldate = {2021-03-01},

journal = {Applied sciences},

volume = {11},

number = {5},

pages = {1-24},

abstract = {The new Coronavirus disease (COVID-19) has seriously affected the world. By the end of November 2020, the global number of new coronavirus cases had already exceeded 60 million and the number of deaths 1,410,378 according to information from the World Health Organization (WHO). To limit the spread of the disease, mandatory face-mask rules are now becoming common in public settings around the world. Additionally, many public service providers require customers to wear face-masks in accordance with predefined rules (e.g., covering both mouth and nose) when using public services. These developments inspired research into automatic (computer-vision-based) techniques for face-mask detection that can help monitor public behavior and contribute towards constraining the COVID-19 pandemic. Although existing research in this area resulted in efficient techniques for face-mask detection, these usually operate under the assumption that modern face detectors provide perfect detection performance (even for masked faces) and that the main goal of the techniques is to detect the presence of face-masks only. In this study, we revisit these common assumptions and explore the following research questions: (i) How well do existing face detectors perform with masked-face images? (ii) Is it possible to detect a proper (regulation-compliant) placement of facial masks? and (iii) How useful are existing face-mask detection techniques for monitoring applications during the COVID-19 pandemic? To answer these and related questions we conduct a comprehensive experimental evaluation of several recent face detectors for their performance with masked-face images. Furthermore, we investigate the usefulness of multiple off-the-shelf deep-learning models for recognizing correct face-mask placement. Finally, we design a complete pipeline for recognizing whether face-masks are worn correctly or not and compare the performance of the pipeline with standard face-mask detection models from the literature. To facilitate the study, we compile a large dataset of facial images from the publicly available MAFA and Wider Face datasets and annotate it with compliant and non-compliant labels. The annotation dataset, called Face-Mask-Label Dataset (FMLD), is made publicly available to the research community.},

keywords = {computer vision, COVID-19, deep learning, detection, face, mask detection, recognition},

pubstate = {published},

tppubtype = {article}

}

Close

The new Coronavirus disease (COVID-19) has seriously affected the world. By the end of November 2020, the global number of new coronavirus cases had already exceeded 60 million and the number of deaths 1,410,378 according to information from the World Health Organization (WHO). To limit the spread of the disease, mandatory face-mask rules are now becoming common in public settings around the world. Additionally, many public service providers require customers to wear face-masks in accordance with predefined rules (e.g., covering both mouth and nose) when using public services. These developments inspired research into automatic (computer-vision-based) techniques for face-mask detection that can help monitor public behavior and contribute towards constraining the COVID-19 pandemic. Although existing research in this area resulted in efficient techniques for face-mask detection, these usually operate under the assumption that modern face detectors provide perfect detection performance (even for masked faces) and that the main goal of the techniques is to detect the presence of face-masks only. In this study, we revisit these common assumptions and explore the following research questions: (i) How well do existing face detectors perform with masked-face images? (ii) Is it possible to detect a proper (regulation-compliant) placement of facial masks? and (iii) How useful are existing face-mask detection techniques for monitoring applications during the COVID-19 pandemic? To answer these and related questions we conduct a comprehensive experimental evaluation of several recent face detectors for their performance with masked-face images. Furthermore, we investigate the usefulness of multiple off-the-shelf deep-learning models for recognizing correct face-mask placement. Finally, we design a complete pipeline for recognizing whether face-masks are worn correctly or not and compare the performance of the pipeline with standard face-mask detection models from the literature. To facilitate the study, we compile a large dataset of facial images from the publicly available MAFA and Wider Face datasets and annotate it with compliant and non-compliant labels. The annotation dataset, called Face-Mask-Label Dataset (FMLD), is made publicly available to the research community.

Close

Grm, Klemen; Scheirer, Walter J.; Štruc, Vitomir

Face hallucination using cascaded super-resolution and identity priors Journal Article

In: IEEE Transactions on Image Processing, 2020.

Abstract | Links | BibTeX | Tags: biometrics, CNN, computer vision, deep learning, face, face hallucination, super-resolution

@article{TIPKlemen_2020,

title = {Face hallucination using cascaded super-resolution and identity priors},

author = {Klemen Grm and Walter J. Scheirer and Vitomir Štruc},

url = {https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8866753

https://lmi.fe.uni-lj.si/wp-content/uploads/2023/02/IEEET_face_hallucination_compressed.pdf},

doi = {10.1109/TIP.2019.2945835},

year  = {2020},

date = {2020-01-01},

urldate = {2020-01-01},

journal = {IEEE Transactions on Image Processing},

abstract = {In this paper we address the problem of hallucinating high-resolution facial images from low-resolution inputs at high magnification factors. We approach this task with convolutional neural networks (CNNs) and propose a novel (deep) face hallucination model that incorporates identity priors into the learning procedure. The model consists of two main parts: i) a cascaded super-resolution network that upscales the lowresolution facial images, and ii) an ensemble of face recognition models that act as identity priors for the super-resolution network during training. Different from most competing super-resolution techniques that rely on a single model for upscaling (even with large magnification factors), our network uses a cascade of multiple SR models that progressively upscale the low-resolution images using steps of 2×. This characteristic allows us to apply supervision signals (target appearances) at different resolutions and incorporate identity constraints at multiple-scales. The proposed C-SRIP model (Cascaded Super Resolution with Identity Priors) is able to upscale (tiny) low-resolution images captured in unconstrained conditions and produce visually convincing results for diverse low-resolution inputs. We rigorously evaluate the proposed model on the Labeled Faces in the Wild (LFW), Helen and CelebA datasets and report superior performance compared to the existing state-of-the-art.

},

keywords = {biometrics, CNN, computer vision, deep learning, face, face hallucination, super-resolution},

pubstate = {published},

tppubtype = {article}

}

Close

Grm, Klemen; Pernus, Martin; Cluzel, Leo; Scheirer, Walter J.; Dobrisek, Simon; Struc, Vitomir

Face Hallucination Revisited: An Exploratory Study on Dataset Bias Proceedings Article

In: IEEE Conference on Computer Vision and Pattern Recognition Workshops, 2019.

Abstract | Links | BibTeX | Tags: dataset bias, face, face hallucination, super-resolution

Meden, Blaz; Peer, Peter; Struc, Vitomir

Selective Face Deidentification with End-to-End Perceptual Loss Learning Proceedings Article

In: 2018 IEEE International Work Conference on Bioinspired Intelligence (IWOBI), pp. 1–7, IEEE 2018.

Abstract | Links | BibTeX | Tags: deidentification, face, face deidentification, privacy protection

Grm, Klemen; Štruc, Vitomir

Deep face recognition for surveillance applications Journal Article

In: IEEE Intelligent Systems, vol. 33, no. 3, pp. 46–50, 2018.

Abstract | Links | BibTeX | Tags: biometrics, face, face recognition, performance evaluation, surveillance

Meden, Blaž; Emeršič, Žiga; Štruc, Vitomir; Peer, Peter

k-Same-Net: k-Anonymity with Generative Deep Neural Networks for Face Deidentification Journal Article

In: Entropy, vol. 20, no. 1, pp. 60, 2018.

Abstract | Links | BibTeX | Tags: deidentification, face, k-same, k-same-net, privacy protection

@article{meden2018k,

title = {k-Same-Net: k-Anonymity with Generative Deep Neural Networks for Face Deidentification},

author = {Blaž Meden and Žiga Emeršič and Vitomir Štruc and Peter Peer},

url = {https://www.mdpi.com/1099-4300/20/1/60/pdf},

year  = {2018},

date = {2018-01-01},

journal = {Entropy},

volume = {20},

number = {1},

pages = {60},

publisher = {Multidisciplinary Digital Publishing Institute},

abstract = {Image and video data are today being shared between government entities and other relevant stakeholders on a regular basis and require careful handling of the personal information contained therein. A popular approach to ensure privacy protection in such data is the use of deidentification techniques, which aim at concealing the identity of individuals in the imagery while still preserving certain aspects of the data after deidentification. In this work, we propose a novel approach towards face deidentification, called k-Same-Net, which combines recent Generative Neural Networks (GNNs) with the well-known k-Anonymitymechanism and provides formal guarantees regarding privacy protection on a closed set of identities. Our GNN is able to generate synthetic surrogate face images for deidentification by seamlessly combining features of identities used to train the GNN model. Furthermore, it allows us to control the image-generation process with a small set of appearance-related parameters that can be used to alter specific aspects (e.g., facial expressions, age, gender) of the synthesized surrogate images. We demonstrate the feasibility of k-Same-Net in comprehensive experiments on the XM2VTS and CK+ datasets. We evaluate the efficacy of the proposed approach through reidentification experiments with recent recognition models and compare our results with competing deidentification techniques from the literature. We also present facial expression recognition experiments to demonstrate the utility-preservation capabilities of k-Same-Net. Our experimental results suggest that k-Same-Net is a viable option for facial deidentification that exhibits several desirable characteristics when compared to existing solutions in this area.},

keywords = {deidentification, face, k-same, k-same-net, privacy protection},

pubstate = {published},

tppubtype = {article}

}

Close

Klemen, Grm; Simon, Dobrišek; Vitomir, Štruc

Evaluating image superresolution algorithms for cross-resolution face recognition Proceedings Article

In: Proceedings of the Twenty-sixth International Electrotechnical and Computer Science Conference ERK 2017, 2017.

Abstract | Links | BibTeX | Tags: face, face hallucination, face recognition, performance evaluation, super-resolution

Meden, Blaž; Malli, Refik Can; Fabijan, Sebastjan; Ekenel, Hazim Kemal; Štruc, Vitomir; Peer, Peter

Face deidentification with generative deep neural networks Journal Article

In: IET Signal Processing, vol. 11, no. 9, pp. 1046–1054, 2017.

Abstract | Links | BibTeX | Tags: biometrics, computer vision, deidentification, face, privacy protection

@article{meden2017face,

title = {Face deidentification with generative deep neural networks},

author = {Blaž Meden and Refik Can Malli and Sebastjan Fabijan and Hazim Kemal Ekenel and Vitomir Štruc and Peter Peer},

url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2019/08/Face_Deidentification_with_Generative_Deep_Neural_Networks.pdf},

year  = {2017},

date = {2017-01-01},

journal = {IET Signal Processing},

volume = {11},

number = {9},

pages = {1046--1054},

publisher = {IET},

abstract = {Face deidentification is an active topic amongst privacy and security researchers. Early deidentification methods relying on image blurring or pixelisation have been replaced in recent years with techniques based on formal anonymity models that provide privacy guaranties and retain certain characteristics of the data even after deidentification. The latter aspect is important, as it allows the deidentified data to be used in applications for which identity information is irrelevant. In this work, the authors present a novel face deidentification pipeline, which ensures anonymity by synthesising artificial surrogate faces using generative neural networks (GNNs). The generated faces are used to deidentify subjects in images or videos, while preserving non-identity-related aspects of the data and consequently enabling data utilisation. Since generative networks are highly adaptive and can utilise diverse parameters (pertaining to the appearance of the generated output in terms of facial expressions, gender, race etc.), they represent a natural choice for the problem of face deidentification. To demonstrate the feasibility of the authors’ approach, they perform experiments using automated recognition tools and human annotators. Their results show that the recognition performance on deidentified images is close to chance, suggesting that the deidentification process based on GNNs is effective.},

keywords = {biometrics, computer vision, deidentification, face, privacy protection},

pubstate = {published},

tppubtype = {article}

}

Close

Meden, Blaz; Emersic, Ziga; Struc, Vitomir; Peer, Peter

k-Same-Net: Neural-Network-Based Face Deidentification Proceedings Article

In: 2017 International Conference and Workshop on Bioinspired Intelligence (IWOBI), pp. 1–7, IEEE 2017.

Abstract | Links | BibTeX | Tags: deidentification, face, privacy protection