2025
|
Tomašević, Darian; Boutros, Fadi; Lin, Chenhao; Damer, Naser; Štruc, Vitomir; Peer, Peter ID-Booth: Identity-consistent Face Generation with Diffusion Models Proceedings Article In: IEEE International Conference on Automatic Face and Gesture Recognition 2025, pp. 1-10, 2025. @inproceedings{DarianFG2025,
title = {ID-Booth: Identity-consistent Face Generation with Diffusion Models},
author = {Darian Tomašević and Fadi Boutros and Chenhao Lin and Naser Damer and Vitomir Štruc and Peter Peer},
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2025/04/ID_Booth__2025_.pdf},
year = {2025},
date = {2025-05-27},
booktitle = {IEEE International Conference on Automatic Face and Gesture Recognition 2025},
pages = {1-10},
abstract = {Recent advances in generative modeling have enabled the generation of high-quality synthetic data that is applicable in a variety of domains, including face recognition.
Here, state-of-the-art generative models typically rely on conditioning and fine-tuning of powerful pretrained diffusion models to facilitate the synthesis of realistic images of a desired identity. Yet, these models often do not consider the identity of subjects during training, leading to poor consistency between generated and intended identities. In contrast, methods that employ identity-based training objectives tend to overfit on various aspects of the identity, and in turn, lower the diversity of images that can be generated. To address these issues, we present in this paper a novel generative diffusion-based framework, called ID-Booth. ID-Booth consists of a denoising network responsible for data generation, a variational auto-encoder for mapping images to and from a lower-dimensional latent space and a text encoder that allows for prompt-based control over the generation procedure. The framework utilizes a novel triplet identity training objective and enables identity-consistent image generation while retaining the synthesis capabilities of pretrained diffusion models. Experiments with a state-of-the-art latent diffusion model and diverse prompts reveal that our method facilitates better intra-identity consistency and inter-identity separability than competing methods, while achieving higher image diversity. In turn, the produced data allows for effective augmentation of small-scale datasets and training of better-performing recognition models in a privacy-preserving manner. The source code for the ID-Booth framework is publicly available at https://github.com/dariant/ID-Booth. },
keywords = {data synthesis, difussion, face, face images, face recognition, generative AI, generative models, synthetic data},
pubstate = {published},
tppubtype = {inproceedings}
}
Recent advances in generative modeling have enabled the generation of high-quality synthetic data that is applicable in a variety of domains, including face recognition.
Here, state-of-the-art generative models typically rely on conditioning and fine-tuning of powerful pretrained diffusion models to facilitate the synthesis of realistic images of a desired identity. Yet, these models often do not consider the identity of subjects during training, leading to poor consistency between generated and intended identities. In contrast, methods that employ identity-based training objectives tend to overfit on various aspects of the identity, and in turn, lower the diversity of images that can be generated. To address these issues, we present in this paper a novel generative diffusion-based framework, called ID-Booth. ID-Booth consists of a denoising network responsible for data generation, a variational auto-encoder for mapping images to and from a lower-dimensional latent space and a text encoder that allows for prompt-based control over the generation procedure. The framework utilizes a novel triplet identity training objective and enables identity-consistent image generation while retaining the synthesis capabilities of pretrained diffusion models. Experiments with a state-of-the-art latent diffusion model and diverse prompts reveal that our method facilitates better intra-identity consistency and inter-identity separability than competing methods, while achieving higher image diversity. In turn, the produced data allows for effective augmentation of small-scale datasets and training of better-performing recognition models in a privacy-preserving manner. The source code for the ID-Booth framework is publicly available at https://github.com/dariant/ID-Booth. |
DeAndres-Tame, Ivan; Tolosana, Ruben; Melzi, Pietro; Vera-Rodriguez, Ruben; Kim, Minchul; Rathgeb, Christian; Liu, Xiaoming; Gomez, Luis F.; Morales, Aythami; Fierrez, Julian; Ortega-Garcia, Javier; Zhong, Zhizhou; Huang, Yuge; Mi, Yuxi; Ding, Shouhong; Zhou, Shuigeng; He, Shuai; Fu, Lingzhi; Cong, Heng; Zhang, Rongyu; Xiao, Zhihong; Smirnov, Evgeny; Pimenov, Anton; Grigorev, Aleksei; Timoshenko, Denis; Asfaw, Kaleb Mesfin; Low, Cheng Yaw; Liu, Hao; Wang, Chuyi; Zuo, Qing; He, Zhixiang; Shahreza, Hatef Otroshi; George, Anjith; Unnervik, Alexander; Rahimi, Parsa; Marcel, Sebastien; Neto, Pedro C.; Huber, Marco; Kolf, Jan Niklas; Damer, Naser; Boutros, Fadi; Cardoso, Jaime S.; Sequeira, Ana F.; Atzori, Andrea; Fenu, Gianni; Marras, Mirko; Štruc, Vitomir; Yu, Jiang; Li, Zhangjie; Li, Jichun; Zhao, Weisong; Lei, Zhen; Zhu, Xiangyu; Zhang, Xiao-Yu; Biesseck, Bernardo; Vidal, Pedro; Coelho, Luiz; Granada, Roger; Menotti, David Second FRCSyn-onGoing: Winning solutions and post-challenge analysis to improve face recognition with synthetic data Journal Article In: Information Fusion, no. 103099, 2025. @article{Synth_InfoFUS2025,
title = {Second FRCSyn-onGoing: Winning solutions and post-challenge analysis to improve face recognition with synthetic data},
author = {Ivan DeAndres-Tame and Ruben Tolosana and Pietro Melzi and Ruben Vera-Rodriguez and Minchul Kim and Christian Rathgeb and Xiaoming Liu and Luis F. Gomez and Aythami Morales and Julian Fierrez and Javier Ortega-Garcia and Zhizhou Zhong and Yuge Huang and Yuxi Mi and Shouhong Ding and Shuigeng Zhou and Shuai He and Lingzhi Fu and Heng Cong and Rongyu Zhang and Zhihong Xiao and Evgeny Smirnov and Anton Pimenov and Aleksei Grigorev and Denis Timoshenko and Kaleb Mesfin Asfaw and Cheng Yaw Low and Hao Liu and Chuyi Wang and Qing Zuo and Zhixiang He and Hatef Otroshi Shahreza and Anjith George and Alexander Unnervik and Parsa Rahimi and Sebastien Marcel and Pedro C. Neto and Marco Huber and Jan Niklas Kolf and Naser Damer and Fadi Boutros and Jaime S. Cardoso and Ana F. Sequeira and Andrea Atzori and Gianni Fenu and Mirko Marras and Vitomir Štruc and Jiang Yu and Zhangjie Li and Jichun Li and Weisong Zhao and Zhen Lei and Xiangyu Zhu and Xiao-Yu Zhang and Bernardo Biesseck and Pedro Vidal and Luiz Coelho and Roger Granada and David Menotti},
url = {https://www.sciencedirect.com/science/article/pii/S1566253525001721},
doi = {https://doi.org/10.1016/j.inffus.2025.103099},
year = {2025},
date = {2025-03-14},
urldate = {2025-03-14},
journal = {Information Fusion},
number = {103099},
abstract = {Synthetic data is gaining increasing popularity for face recognition technologies, mainly due to the privacy concerns and challenges associated with obtaining real data, including diverse scenarios, quality, and demographic groups, among others. It also offers some advantages over real data, such as the large amount of data that can be generated or the ability to customize it to adapt to specific problem-solving needs. To effectively use such data, face recognition models should also be specifically designed to exploit synthetic data to its fullest potential. In order to promote the proposal of novel Generative AI methods and synthetic data, and investigate the application of synthetic data to better train face recognition systems, we introduce the 2nd FRCSyn-on-Going challenge, based on the 2nd Face Recognition Challenge in the Era of Synthetic Data (FRCSyn), originally launched at CVPR 2024. This is an ongoing challenge that provides researchers with an accessible platform to benchmark (i) the proposal of novel Generative AI methods and synthetic data, and (ii) novel face recognition systems that are specifically proposed to take advantage of synthetic data. We focus on exploring the use of synthetic data both individually and in combination with real data to solve current challenges in face recognition such as demographic bias, domain adaptation, and performance constraints in demanding situations, such as age disparities between training and testing, changes in the pose, or occlusions. Very interesting findings are obtained in this second edition, including a direct comparison with the first one, in which synthetic databases were restricted to DCFace and GANDiffFace.},
keywords = {biometrics, data synthesis, face, face recognition, face synthesis, synthetic data},
pubstate = {published},
tppubtype = {article}
}
Synthetic data is gaining increasing popularity for face recognition technologies, mainly due to the privacy concerns and challenges associated with obtaining real data, including diverse scenarios, quality, and demographic groups, among others. It also offers some advantages over real data, such as the large amount of data that can be generated or the ability to customize it to adapt to specific problem-solving needs. To effectively use such data, face recognition models should also be specifically designed to exploit synthetic data to its fullest potential. In order to promote the proposal of novel Generative AI methods and synthetic data, and investigate the application of synthetic data to better train face recognition systems, we introduce the 2nd FRCSyn-on-Going challenge, based on the 2nd Face Recognition Challenge in the Era of Synthetic Data (FRCSyn), originally launched at CVPR 2024. This is an ongoing challenge that provides researchers with an accessible platform to benchmark (i) the proposal of novel Generative AI methods and synthetic data, and (ii) novel face recognition systems that are specifically proposed to take advantage of synthetic data. We focus on exploring the use of synthetic data both individually and in combination with real data to solve current challenges in face recognition such as demographic bias, domain adaptation, and performance constraints in demanding situations, such as age disparities between training and testing, changes in the pose, or occlusions. Very interesting findings are obtained in this second edition, including a direct comparison with the first one, in which synthetic databases were restricted to DCFace and GANDiffFace. |
2022
|
Tomašević, Darian; Peer, Peter; Štruc, Vitomir BiOcularGAN: Bimodal Synthesis and Annotation of Ocular Images Proceedings Article In: IEEE/IAPR International Joint Conference on Biometrics (IJCB 2022) , pp. 1-10, 2022. @inproceedings{TomasevicIJCBBiOcular,
title = {BiOcularGAN: Bimodal Synthesis and Annotation of Ocular Images},
author = {Darian Tomašević and Peter Peer and Vitomir Štruc },
url = {https://lmi.fe.uni-lj.si/wp-content/uploads/2022/12/BiModal_StyleGAN.pdf
https://arxiv.org/pdf/2205.01536.pdf},
year = {2022},
date = {2022-10-20},
urldate = {2022-10-20},
booktitle = {IEEE/IAPR International Joint Conference on Biometrics (IJCB 2022) },
pages = {1-10},
abstract = {Current state-of-the-art segmentation techniques for ocular images are critically dependent on large-scale annotated datasets, which are labor-intensive to gather and often raise privacy concerns. In this paper, we present a novel framework, called BiOcularGAN, capable of generating synthetic large-scale datasets of photorealistic (visible light and near-infrared) ocular images, together with corresponding segmentation labels to address these issues. At its core, the framework relies on a novel Dual-Branch StyleGAN2 (DB-StyleGAN2) model that facilitates bimodal image generation, and a Semantic Mask Generator (SMG) component that produces semantic annotations by exploiting latent features of the DB-StyleGAN2 model. We evaluate BiOcularGAN through extensive experiments across five diverse ocular datasets and analyze the effects of bimodal data generation on image quality and the produced annotations. Our experimental results show that BiOcularGAN is able to produce high-quality matching bimodal images and annotations (with minimal manual intervention) that can be used to train highly competitive (deep) segmentation models (in a privacy aware-manner) that perform well across multiple real-world datasets. The source code for the BiOcularGAN framework is publicly available at: https://github.com/dariant/BiOcularGAN.},
keywords = {biometrics, CNN, data synthesis, deep learning, ocular, segmentation, StyleGAN, synthetic data},
pubstate = {published},
tppubtype = {inproceedings}
}
Current state-of-the-art segmentation techniques for ocular images are critically dependent on large-scale annotated datasets, which are labor-intensive to gather and often raise privacy concerns. In this paper, we present a novel framework, called BiOcularGAN, capable of generating synthetic large-scale datasets of photorealistic (visible light and near-infrared) ocular images, together with corresponding segmentation labels to address these issues. At its core, the framework relies on a novel Dual-Branch StyleGAN2 (DB-StyleGAN2) model that facilitates bimodal image generation, and a Semantic Mask Generator (SMG) component that produces semantic annotations by exploiting latent features of the DB-StyleGAN2 model. We evaluate BiOcularGAN through extensive experiments across five diverse ocular datasets and analyze the effects of bimodal data generation on image quality and the produced annotations. Our experimental results show that BiOcularGAN is able to produce high-quality matching bimodal images and annotations (with minimal manual intervention) that can be used to train highly competitive (deep) segmentation models (in a privacy aware-manner) that perform well across multiple real-world datasets. The source code for the BiOcularGAN framework is publicly available at: https://github.com/dariant/BiOcularGAN. |
Huber, Marco; Boutros, Fadi; Luu, Anh Thi; Raja, Kiran; Ramachandra, Raghavendra; Damer, Naser; Neto, Pedro C.; Goncalves, Tiago; Sequeira, Ana F.; Cardoso, Jaime S.; Tremoco, João; Lourenco, Miguel; Serra, Sergio; Cermeno, Eduardo; Ivanovska, Marija; Batagelj, Borut; Kronovšek, Andrej; Peer, Peter; Štruc, Vitomir SYN-MAD 2022: Competition on Face Morphing Attack Detection based on Privacy-aware Synthetic Training Data Proceedings Article In: IEEE International Joint Conference on Biometrics (IJCB), pp. 1-10, 2022, ISBN: 978-1-6654-6394-2. @inproceedings{IvanovskaSYNMAD,
title = {SYN-MAD 2022: Competition on Face Morphing Attack Detection based on Privacy-aware Synthetic Training Data},
author = {Marco Huber and Fadi Boutros and Anh Thi Luu and Kiran Raja and Raghavendra Ramachandra and Naser Damer and Pedro C. Neto and Tiago Goncalves and Ana F. Sequeira and Jaime S. Cardoso and João Tremoco and Miguel Lourenco and Sergio Serra and Eduardo Cermeno and Marija Ivanovska and Borut Batagelj and Andrej Kronovšek and Peter Peer and Vitomir Štruc},
url = {https://ieeexplore.ieee.org/iel7/10007927/10007928/10007950.pdf?casa_token=k7CV1Vs4DUsAAAAA:xMvzvPAyLBoPv1PqtJQTmZQ9S3TJOlExgcxOeuZPNEuVFKVuIfofx30CgN-jnhVB8_5o_Ne3nJLB},
doi = {10.1109/IJCB54206.2022.10007950},
isbn = {978-1-6654-6394-2},
year = {2022},
date = {2022-09-01},
urldate = {2022-09-01},
booktitle = {IEEE International Joint Conference on Biometrics (IJCB)},
pages = {1-10},
keywords = {data synthesis, deep learning, face, face PAD, pad, synthetic data},
pubstate = {published},
tppubtype = {inproceedings}
}
|