2010
Proceedings Articles
Vitomir Štruc; Jerneja Žganec-Gros; Nikola Pavešić
Eye Localization using correlation filters Proceedings Article
In: Proceedings of the International Conference DOGS, pp. 188-191, Novi Sad, Serbia, 2010.
@inproceedings{DOGS_Struc_2010,
title = {Eye Localization using correlation filters},
author = {Vitomir Štruc and Jerneja Žganec-Gros and Nikola Pavešić},
year = {2010},
date = {2010-01-01},
booktitle = {Proceedings of the International Conference DOGS},
pages = {188-191},
address = {Novi Sad, Serbia},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2009
Journal Articles
Vitomir Štruc; Nikola Pavešić
Gabor-based kernel-partial-least-squares discrimination features for face recognition Journal Article
In: Informatica (Vilnius), vol. 20, no. 1, pp. 115-138, 2009.
@article{Inform-Struc_2009,
title = {Gabor-based kernel-partial-least-squares discrimination features for face recognition},
author = {Vitomir Štruc and Nikola Pavešić},
url = {https://lmi.fe.uni-lj.si/en/gabor-basedkernel-partial-least-squaresdiscriminationfeaturesforfacerecognition/},
year = {2009},
date = {2009-01-01},
urldate = {2009-01-01},
journal = {Informatica (Vilnius)},
volume = {20},
number = {1},
pages = {115-138},
abstract = {The paper presents a novel method for the extraction of facial features based on the Gabor-wavelet representation of face images and the kernel partial-least-squares discrimination (KPLSD) algorithm. The proposed feature-extraction method, called the Gabor-based kernel partial-least-squares discrimination (GKPLSD), is performed in two consecutive steps. In the first step a set of forty Gabor wavelets is used to extract discriminative and robust facial features, while in the second step the kernel partial-least-squares discrimination technique is used to reduce the dimensionality of the Gabor feature vector and to further enhance its discriminatory power. For optimal performance, the KPLSD-based transformation is implemented using the recently proposed fractional-power-polynomial models. The experimental results based on the XM2VTS and ORL databases show that the GKPLSD approach outperforms feature-extraction methods such as principal component analysis (PCA), linear discriminant analysis (LDA), kernel principal component analysis (KPCA) or generalized discriminant analysis (GDA) as well as combinations of these methods with Gabor representations of the face images. Furthermore, as the KPLSD algorithm is derived from the kernel partial-least-squares regression (KPLSR) model it does not suffer from the small-sample-size problem, which is regularly encountered in the field of face recognition.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Vitomir Štruc; Janez Žibert; Nikola Pavešić
Histogram remapping as a preprocessing step for robust face recognition Journal Article
In: WSEAS transactions on information science and applications, vol. 6, no. 3, pp. 520-529, 2009.
@article{WSEAS-Struc_2009,
title = {Histogram remapping as a preprocessing step for robust face recognition},
author = {Vitomir Štruc and Janez Žibert and Nikola Pavešić},
url = {https://lmi.fe.uni-lj.si/en/histogramremappingasapreprocessingstepforrobustfacerecognition/},
year = {2009},
date = {2009-01-01},
urldate = {2009-01-01},
journal = {WSEAS transactions on information science and applications},
volume = {6},
number = {3},
pages = {520-529},
abstract = {Image preprocessing techniques represent an essential part of a face recognition systems, which has a great impact on the performance and robustness of the recognition procedure. Amongst the number of techniques already presented in the literature, histogram equalization has emerged as the dominant preprocessing technique and is regularly used for the task of face recognition. With the property of increasing the global contrast of the facial image while simultaneously compensating for the illumination conditions present at the image acquisition stage, it represents a useful preprocessing step, which can ensure enhanced and more robust recognition performance. Even though, more elaborate normalization techniques, such as the multiscale retinex technique, isotropic and anisotropic smoothing, have been introduced to field of face recognition, they have been found to be more of a complement than a real substitute for histogram equalization. However, by closer examining the characteristics of histogram equalization, one can quickly discover that it represents only a specific case of a more general concept of histogram remapping techniques (which may have similar characteristics as histogram equalization does). While histogram equalization remapps the histogram of a given facial image to a uniform distribution, the target distribution could easily be replaced with an arbitrary one. As there is no theoretical justification of why the uniform distribution should be preferred to other target distributions, the question arises: how do other (non-uniform) target distributions influence the face recognition process and are they better suited for the recognition task. To tackle this issues, we present in this paper an empirical assessment of the concept of histogram remapping with the following target distributions: the uniform, the normal, the lognormal and the exponential distribution. We perform comparative experiments on the publicly available XM2VTS and YaleB databases and conclude that similar or even better recognition results that those ensured by histogram equalization can be achieved when other (non-uniform) target distribution are considered for the histogram remapping. This enhanced performance, however, comes at a price, as the nonuniform distributions rely on some parameters which have to be trained or selected appropriately to achieve the optimal performance.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Rok Gajšek; Vitomir Štruc; France Mihelič; Anja Podlesek; Luka Komidar; Gregor Sočan; Boštjan Bajec
Multi-modal emotional database: AvID Journal Article
In: Informatica (Ljubljana), vol. 33, no. 1, pp. 101-106, 2009.
@article{Inform-Gajsek_2009,
title = {Multi-modal emotional database: AvID},
author = {Rok Gajšek and Vitomir Štruc and France Mihelič and Anja Podlesek and Luka Komidar and Gregor Sočan and Boštjan Bajec},
url = {https://lmi.fe.uni-lj.si/en/multi-modalemotionaldatabaseavid/},
year = {2009},
date = {2009-01-01},
urldate = {2009-01-01},
journal = {Informatica (Ljubljana)},
volume = {33},
number = {1},
pages = {101-106},
abstract = {This paper presents our work on recording a multi-modal database containing emotional audio and video recordings. In designing the recording strategies a special attention was payed to gather data involving spontaneous emotions and therefore obtain a more realistic training and testing conditions for experiments. With specially planned scenarios including playing computer games and conducting an adaptive intelligence test different levels of arousal were induced. This will enable us to both detect different emotional states as well as experiment in speaker identification/verification of people involved in communications. So far the multi-modal database has been recorded and basic evaluation of the data was processed.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Vitomir Štruc; Rok Gajšek; France Mihelič; Nikola Pavešić
Using regression techniques for coping with the one-sample-size problem of face recognition Journal Article
In: Electrotechnical Review, vol. 76, no. 1-2, pp. 7-12, 2009.
@article{EV-Struc_2009,
title = {Using regression techniques for coping with the one-sample-size problem of face recognition},
author = {Vitomir Štruc and Rok Gajšek and France Mihelič and Nikola Pavešić},
url = {https://lmi.fe.uni-lj.si/en/usingregressiontechniquesforcopingwiththeone-sample-sizeproblemoffacerecognition/},
year = {2009},
date = {2009-01-01},
urldate = {2009-01-01},
journal = {Electrotechnical Review},
volume = {76},
number = {1-2},
pages = {7-12},
abstract = {There is a number of face recognition paradigms which ensure good recognition rates with frontal face images. However, the majority of them require an extensive training set and degrade in their performance when an insufficient number of training images is available. This is especially true for applications where only one image per subject is at hand for training. To cope with this one-sample-size (OSS) problem, we propose to employ subspace projection based regression techniques rather than modifications of the established face recognition paradigms, such as the principal component or linear discriminant analysis, as it was done in the past. Experiments performed on the XM2VTS and ORL databases show the effectiveness of the proposed approach. Also presented is a comparative assessment of several regression techniques and some popular face
recognition methods.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
recognition methods.
Vitomir Štruc; Nikola Pavešić
Phase-congruency features for palm-print verification Journal Article
In: IET Signal Processing, vol. 3, no. 4, pp. 258-268, 2009.
@article{IET-Struc_2009,
title = {Phase-congruency features for palm-print verification},
author = {Vitomir Štruc and Nikola Pavešić},
url = {https://lmi.fe.uni-lj.si/en/phase-congruencyfeaturesforpalm-printverification/},
doi = {10.1049/iet-spr.2008.0152},
year = {2009},
date = {2009-01-01},
urldate = {2009-01-01},
journal = {IET Signal Processing},
volume = {3},
number = {4},
pages = {258-268},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Vitomir Štruc; Nikola Pavešić
Gaussianization of image patches for efficient palmprint recognition Journal Article
In: Electrotechnical Review, vol. 76, no. 5, pp. 245-250, 2009.
@article{EV_2009_palms,
title = {Gaussianization of image patches for efficient palmprint recognition},
author = {Vitomir Štruc and Nikola Pavešić},
url = {https://lmi.fe.uni-lj.si/en/gaussianizationofimagepatchesforefficientpalmprintrecognition/},
year = {2009},
date = {2009-01-01},
urldate = {2009-01-01},
journal = {Electrotechnical Review},
volume = {76},
number = {5},
pages = {245-250},
abstract = {In this paper we present a comparison of the two dominant image preprocessing techniques for palmprint recognition, namely, histogram equalization and mean-variance normalization. We show that both techniques pursue a similar goal and that the difference in recognition efficiency stems from the fact that not all assumptions underlying the mean-variance normalization approach are always met. We present an alternative justification of why histogram equalization ensures enhanced verification performance, and, based on the findings, propose two novel preprocessing techniques: gaussianization of the palmprint images and gaussianization of image patches. We present comparative results obtained on the PolyU database and show that the patch-based normalization technique ensures stat-of-the-art recognition results with a simple feature extraction method and the nearest neighbor classifier.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Blaž Stres; James M Tiedje; Boštjan Murovec
In: Bioinformatics, vol. 25, no. 12, pp. 1556-1558, 2009, ISSN: 1367-4803.
@article{10.1093/bioinformatics/btp254,
title = {BEsTRF: a tool for optimal resolution of terminal-restriction fragment length polymorphism analysis based on user-defined primer–enzyme–sequence databases},
author = {Blaž Stres and James M Tiedje and Boštjan Murovec},
url = {https://doi.org/10.1093/bioinformatics/btp254},
doi = {10.1093/bioinformatics/btp254},
issn = {1367-4803},
year = {2009},
date = {2009-01-01},
journal = {Bioinformatics},
volume = {25},
number = {12},
pages = {1556-1558},
abstract = {Summary: BEsTRF (Best Estimated T-RF) provides a standalone environment for analyzing primers-enzymes-gene section combinations used in terminal-restriction fragment length polymorphism (T-RFLP) for its optimal resolution. User-defined sequence databases of several hundred thousand DNA sequences can be explored and the resolution of user-specified sets of primers and restriction endonucleases can be analyzed on either forward or reverse terminal fragments. Sequence quality, primer mismatches, insertions and deletions can be controlled and each primer pair-specific sequence collections can be exported for downstream analyses. The configuration for a novel T-RFLP population profiling using rpoB gene (DNA-directed RNA polymerase, beta subunit) on forward fluorescently labeled primer are presented.Availability: BEsTRF is freely available at http://lie.fe.uni-lj.si/bestrf and can be downloaded from the same site. The online protocol, numerous primer and enzyme dictionaries, sequence collections and results generated during this work for various genes are available at our website http://lie.fe.uni-lj.si/bestrf.Contact:blaz.stres@bfro.uni-lj.si},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Book Sections
Vitomir Štruc; Nikola Pavešić
Hand-Geometry Device Book Section
In: Li, Stan Z (Ed.): Encyclopedia of biometrics, pp. 693-698, Springer-Verlag, New York, 2009.
@incollection{Springer2009,
title = {Hand-Geometry Device},
author = {Vitomir Štruc and Nikola Pavešić},
editor = {Stan Z Li},
url = {https://lmi.fe.uni-lj.si/en/hand-geometrydevice/},
doi = {10.1007/978-0-387-73003-5_14},
year = {2009},
date = {2009-01-01},
urldate = {2009-01-01},
booktitle = {Encyclopedia of biometrics},
pages = {693-698},
publisher = {Springer-Verlag},
address = {New York},
abstract = {Hand-geometry devices are specially designed biometric devices used for capturing the geometric characteristics (e.g., the length, width, thickness and curvature of the fingers, the palm size, and the distances between joints) of a human hand for hand-geometry-based identity verification. A typical hand-geometry device records images of the lateral and dorsal parts of the hand with a charge-coupled device (CCD) camera that is mounted above a flat surface on which the person presented to the device places his/her hand. The set of geometrical features extracted from these images is then matched against a pre-recorded template stored in the device’s database. Depending on the result of this matching procedure, the identity of the person presented to the device is either verified or not.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Proceedings Articles
Vitomir Štruc; Rok Gajšek; Nikola Pavešić
Principal Gabor Filters for Face Recognition Proceedings Article
In: Proceedings of the 3rd IEEE International Conference on Biometrics: Theory, Systems and Applications (BTAS'09), pp. 1-6, IEEE, Washington D.C., U.S.A., 2009.
@inproceedings{BTAS2009,
title = {Principal Gabor Filters for Face Recognition},
author = {Vitomir Štruc and Rok Gajšek and Nikola Pavešić},
url = {https://lmi.fe.uni-lj.si/en/principalgaborfiltersforfacerecognition/},
doi = {10.1109/BTAS.2009.5339020},
year = {2009},
date = {2009-09-01},
urldate = {2009-09-01},
booktitle = {Proceedings of the 3rd IEEE International Conference on Biometrics: Theory, Systems and Applications (BTAS'09)},
pages = {1-6},
publisher = {IEEE},
address = {Washington D.C., U.S.A.},
abstract = {Gabor filters have proven themselves to be a powerful tool for facial feature extraction. An abundance of recognition techniques presented in the literature exploits these filters to achieve robust face recognition. However, while exhibiting desirable properties, such as orientational selectivity or spatial locality, Gabor filters have also some shortcomings which crucially affect the characteristics and size of the Gabor representation of a given face pattern. Amongst these shortcomings the fact that the filters are not orthogonal one to another and are, hence, correlated is probably the most important. This makes the information contained in the Gabor face representation redundant and also affects the size of the representation. To overcome this problem we propose in this paper to employ orthonormal linear combinations of the original Gabor filters rather than the filters themselves for deriving the Gabor face representation. The filters, named principal Gabor filters for the fact that they are computed by means of principal component analysis, are assessed in face recognition experiments performed on the XM2VTS and YaleB databases, where encouraging results are achieved.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Rok Gajšek; Vitomir Štruc; Simon Dobrišek; France Mihelič
Emotion recognition using linear transformations in combination with video Proceedings Article
In: Speech and intelligence: proceedings of Interspeech 2009, pp. 1967-1970, Brighton, UK, 2009.
@inproceedings{InterSp2009,
title = {Emotion recognition using linear transformations in combination with video},
author = {Rok Gajšek and Vitomir Štruc and Simon Dobrišek and France Mihelič},
url = {https://lmi.fe.uni-lj.si/en/emotionrecognitionusinglineartransformationsincombinationwithvideo/},
year = {2009},
date = {2009-09-01},
urldate = {2009-09-01},
booktitle = {Speech and intelligence: proceedings of Interspeech 2009},
pages = {1967-1970},
address = {Brighton, UK},
abstract = {The paper discuses the usage of linear transformations of Hidden Markov Models, normally employed for speaker and environment adaptation, as a way of extracting the emotional components from the speech. A constrained version of Maximum Likelihood Linear Regression (CMLLR) transformation is used as a feature for classification of normal or aroused emotional state. We present a procedure of incrementally building a set of speaker independent acoustic models, that are used to estimate the CMLLR transformations for emotion classification. An audio-video database of spontaneous emotions (AvID) is briefly presented since it forms the basis for the evaluation of the proposed method. Emotion classification using the video part of the database is also described and the added value of combining the visual information with the audio features is shown.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Vitomir Štruc; Zongmin Ma; Nikola Pavešić
Nuisance Attribute Projection in the Logarithm Domain for Face Recognition under Severe Illumination Changes Proceedings Article
In: Proceedings of the IEEE International Electrotechnical and Computer Science Conference (ERK'09), pp. 279-281, Portorož, Slovenia, 2009.
@inproceedings{ERK2009N,
title = {Nuisance Attribute Projection in the Logarithm Domain for Face Recognition under Severe Illumination Changes},
author = {Vitomir Štruc and Zongmin Ma and Nikola Pavešić},
year = {2009},
date = {2009-09-01},
booktitle = {Proceedings of the IEEE International Electrotechnical and Computer Science Conference (ERK'09)},
pages = {279-281},
address = {Portorož, Slovenia},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Vitomir Štruc; Zongmin Ma; Nikola Pavešić
Face Recognition using Sparse Projection Axes Proceedings Article
In: Proceedings of the IEEE International Electrotechnical and Computer Science Conference (ERK'09), pp. 271-274, Portorož, Slovenia, 2009.
@inproceedings{ERK2009S,
title = {Face Recognition using Sparse Projection Axes},
author = {Vitomir Štruc and Zongmin Ma and Nikola Pavešić},
year = {2009},
date = {2009-09-01},
booktitle = {Proceedings of the IEEE International Electrotechnical and Computer Science Conference (ERK'09)},
pages = {271-274},
address = {Portorož, Slovenia},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Vitomir Štruc; Nikola Pavešić
A comparative assessment of appearance based feature extraction techniques and their susceptibility to image degradations in face recognition systems Proceedings Article
In: Proceedings of the International Conference on Machine Learning and Pattern Recognition (ICMLPR'09), pp. 326-334, Paris, France, 2009.
@inproceedings{FSKD208b,
title = {A comparative assessment of appearance based feature extraction techniques and their susceptibility to image degradations in face recognition systems},
author = {Vitomir Štruc and Nikola Pavešić},
url = {https://lmi.fe.uni-lj.si/en/acomparativeassessmentofappearancebasedfeatureextractiontechniquesandtheirsusceptibilitytoimagedegradationsinfacerecognitionsystems/},
year = {2009},
date = {2009-06-01},
urldate = {2009-06-01},
booktitle = {Proceedings of the International Conference on Machine Learning and Pattern Recognition (ICMLPR'09)},
volume = {54},
pages = {326-334},
address = {Paris, France},
abstract = {Over the past decades, automatic face recognition has become a highly active research area, mainly due to the countless application possibilities in both the private as well as the public sector. Numerous algorithms have been proposed in the literature to cope with the problem of face recognition, nevertheless, a group of methods commonly referred to as appearance based have emerged as the dominant solution to the face recognition problem. Many comparative studies concerned with the performance of appearance based methods have already been presented in the literature, not rarely with inconclusive and often with contradictory results. No consent has been reached within the scientific community regarding the relative ranking of the efficiency of appearance based methods for the face recognition task, let alone regarding their susceptibility to appearance changes induced by various environmental factors. To tackle these open issues, this paper assess the performance of the three dominant appearance based methods: principal component analysis, linear discriminant analysis and independent component analysis, and compares them on equal footing (i.e., with the same preprocessing procedure, with optimized parameters for the best possible performance, etc.) in face verification experiments on the publicly available XM2VTS database. In addition to the comparative analysis on the XM2VTS database, ten degraded versions of the database are also employed in the experiments to evaluate the susceptibility of the appearance based methods on various image degradations which can occur in ”real-life” operating conditions. Our experimental results suggest that linear discriminant analysis ensures the most consistent verification rates across the tested databases.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Vitomir Štruc; Nikola Pavešić
A comparison of feature normalization techniques for PCA-based palmprint recognition Proceedings Article
In: Proceedings of the International Conference on Mathematical Modeling (MATHMOD'09), pp. 2450-2453, Viena, Austria, 2009.
@inproceedings{Mathmod09,
title = {A comparison of feature normalization techniques for PCA-based palmprint recognition},
author = {Vitomir Štruc and Nikola Pavešić},
url = {https://lmi.fe.uni-lj.si/en/acomparisonoffeaturenormalizationtechniquesforpca-basedpalmprintrecognition/},
year = {2009},
date = {2009-02-01},
urldate = {2009-02-01},
booktitle = {Proceedings of the International Conference on Mathematical Modeling (MATHMOD'09)},
pages = {2450-2453},
address = {Viena, Austria},
abstract = {Computing user templates (or models) for biometric authentication systems is one of the most crucial steps towards efficient and accurate biometric recognition. The constructed templates should encode user specific information extracted from a sample of a given biometric modality, such as, for example, palmprints, and exhibit a sufficient level of dissimilarity with other templates stored in the systems database. Clearly, the characteristics of the user templates depend on the approach employed for the extraction of biometric features, as well as on the procedure used to normalize the extracted feature vectors. While feature-extraction methods are a well studied topic, for which a vast amount of comparative studies can be found in the literature, normalization techniques lack such studies and are only briefly mentioned in most cases. In this paper we, therefore, apply several normalization techniques to feature vectors extracted from palmprint images by means of principal component analysis (PCA) and perform a comparative analysis on the results. We show that the choice of an appropriate normalization technique greatly influences the performance of the palmprint-based authentication system and can result in error rate reductions of more than 30%.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Vitomir Štruc; Nikola Pavešić
Image normalization techniques for robust face recognition Proceedings Article
In: Proceedings of the International Conference on Signal Processing, Robotics and Automation (ISPRA'09), pp. 155-160, Cambridge, UK, 2009.
@inproceedings{ISPRA09,
title = {Image normalization techniques for robust face recognition},
author = {Vitomir Štruc and Nikola Pavešić},
year = {2009},
date = {2009-02-01},
booktitle = {Proceedings of the International Conference on Signal Processing, Robotics and Automation (ISPRA'09)},
pages = {155-160},
address = {Cambridge, UK},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Norman Poh; Chi Ho Chan; Josef Kittler; Sebastien Marcel; Christopher McCool; Enrique Argones-Rua; Jose Luis Alba-Castro; Mauricio Villegas; Roberto Paredes; Vitomir Štruc; Nikola Pavešić; Albert Ali Salah; Hui Fang; Nicholas Costen
Face Video Competition Proceedings Article
In: Tistarelli, Massimo; Nixon, Mark (Ed.): Proceedings of the international Conference on Biometrics (ICB), pp. 715-724, Springer-Verlag, Berlin, Heidelberg, 2009.
@inproceedings{ICB2009,
title = {Face Video Competition},
author = {Norman Poh and Chi Ho Chan and Josef Kittler and Sebastien Marcel and Christopher McCool and Enrique Argones-Rua and Jose Luis Alba-Castro and Mauricio Villegas and Roberto Paredes and Vitomir Štruc and Nikola Pavešić and Albert Ali Salah and Hui Fang and Nicholas Costen},
editor = {Massimo Tistarelli and Mark Nixon},
url = {https://lmi.fe.uni-lj.si/en/facevideocompetition/},
year = {2009},
date = {2009-01-01},
urldate = {2009-01-01},
booktitle = {Proceedings of the international Conference on Biometrics (ICB)},
volume = {5558},
pages = {715-724},
publisher = {Springer-Verlag},
address = {Berlin, Heidelberg},
series = {Lecture Notes on Computer Science},
abstract = {Person recognition using facial features, e.g., mug-shot images, has long been used in identity documents. However, due to the widespread use of web-cams and mobile devices embedded with a camera, it is now possible to realise facial video recognition, rather than resorting to just still images. In fact, facial video recognition offers many advantages over still image recognition; these include the potential of boosting the system accuracy and deterring spoof attacks. This paper presents the first known benchmarking effort of person identity verification using facial video data. The evaluation involves 18 systems submitted by seven academic institutes.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Vitomir Štruc; Nikola Pavešić
Illumination Invariant Face Recognition by Non-Local Smoothing Proceedings Article
In: Biometric ID management and multimodal communication, pp. 1-8, Springer-Verlag, Berlin, Heidelberg, 2009.
@inproceedings{BioID_Multi2009,
title = {Illumination Invariant Face Recognition by Non-Local Smoothing},
author = {Vitomir Štruc and Nikola Pavešić},
url = {https://lmi.fe.uni-lj.si/en/illuminationinvariantfacerecognitionbynon-localsmoothing/},
year = {2009},
date = {2009-01-01},
urldate = {2009-01-01},
booktitle = {Biometric ID management and multimodal communication},
volume = {5707},
pages = {1-8},
publisher = {Springer-Verlag},
address = {Berlin, Heidelberg},
series = {Lecture Notes on Computer Science},
abstract = {Existing face recognition techniques struggle with their performance when identities have to be determined (recognized) based on image data captured under challenging illumination conditions. To overcome the susceptibility of the existing techniques to illumination variations numerous normalization techniques have been proposed in the literature. These normalization techniques, however, still exhibit some shortcomings and, thus, offer room for improvement. In this paper we identify the most important weaknesses of the commonly adopted illumination normalization techniques and presents two novel approaches which make use of the recently proposed non-local means algorithm. We assess the performance of the proposed techniques on the YaleB face database and report preliminary results.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Rok Gajšek; Vitomir Štruc; Simon Dobrišek; Janez Žibert; France Mihelič; Nikola Pavešić
Combining audio and video for detection of spontaneous emotions Proceedings Article
In: Biometric ID management and multimodal communication, pp. 114-121, Springer-Verlag, Berlin, Heidelberg, 2009.
@inproceedings{BioID_Multi2009b,
title = {Combining audio and video for detection of spontaneous emotions},
author = {Rok Gajšek and Vitomir Štruc and Simon Dobrišek and Janez Žibert and France Mihelič and Nikola Pavešić},
url = {https://lmi.fe.uni-lj.si/en/combiningaudioandvideofordetectionofspontaneousemotions/},
year = {2009},
date = {2009-01-01},
urldate = {2009-01-01},
booktitle = {Biometric ID management and multimodal communication},
volume = {5707},
pages = {114-121},
publisher = {Springer-Verlag},
address = {Berlin, Heidelberg},
series = {Lecture Notes on Computer Science},
abstract = {The paper presents our initial attempts in building an audio video emotion recognition system. Both, audio and video sub-systems are discussed, and description of the database of spontaneous emotions is given. The task of labelling the recordings from the database according to different emotions is discussed and the measured agreement between multiple annotators is presented. Instead of focusing on the prosody in audio emotion recognition, we evaluate the possibility of using linear transformations (CMLLR) as features. The classification results from audio and video sub-systems are combined using sum rule fusion and the increase in recognition results, when using both modalities, is presented.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Rok Gajšek; Vitomir Štruc; Boštjan Vesnicer; Anja Podlesek; Luka Komidar; France Mihelič
Analysis and assessment of AvID: multi-modal emotional database Proceedings Article
In: Text, speech and dialogue / 12th International Conference, pp. 266-273, Springer-Verlag, Berlin, Heidelberg, 2009.
@inproceedings{TSD2009,
title = {Analysis and assessment of AvID: multi-modal emotional database},
author = {Rok Gajšek and Vitomir Štruc and Boštjan Vesnicer and Anja Podlesek and Luka Komidar and France Mihelič},
url = {https://lmi.fe.uni-lj.si/en/analysisandassessmentofavidmulti-modalemotionaldatabase/},
year = {2009},
date = {2009-01-01},
urldate = {2009-01-01},
booktitle = {Text, speech and dialogue / 12th International Conference},
volume = {5729},
pages = {266-273},
publisher = {Springer-Verlag},
address = {Berlin, Heidelberg},
series = {Lecture Notes on Computer Science},
abstract = {The paper deals with the recording and the evaluation of a multi modal (audio/video) database of spontaneous emotions. Firstly, motivation for this work is given and different recording strategies used are described. Special attention is given to the process of evaluating the emotional database. Different kappa statistics normally used in measuring the agreement between annotators are discussed. Following the problems of standard kappa coefficients, when used in emotional database assessment, a new time-weighted free-marginal kappa is presented. It differs from the other kappa statistics in that it weights each utterance's particular score of agreement based on the duration of the utterance. The new method is evaluated and the superiority over the standard kappa, when dealing with a database of spontaneous emotions, is demonstrated.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2008
Journal Articles
Vitomir Štruc; France Mihelič; Nikola Pavešić
Face authentication using a hybrid approach Journal Article
In: Journal of Electronic Imaging, vol. 17, no. 1, pp. 1-11, 2008.
@article{JEI-Struc_2008,
title = {Face authentication using a hybrid approach},
author = {Vitomir Štruc and France Mihelič and Nikola Pavešić},
url = {https://lmi.fe.uni-lj.si/en/faceauthenticationusingahybridapproach/},
doi = {10.1117/1.2885149},
year = {2008},
date = {2008-01-01},
urldate = {2008-01-01},
journal = {Journal of Electronic Imaging},
volume = {17},
number = {1},
pages = {1-11},
abstract = {This paper presents a hybrid approach to face-feature extraction based on the trace transform and the novel kernel partial-least-squares discriminant analysis (KPA). The hybrid approach, called trace kernel partial-least-squares discriminant analysis (TKPA) first uses a set of fifteen trace functionals to derive robust and discriminative facial features and then applies the KPA method to reduce their dimensionality. The feasibility of the proposed approach was successfully tested on the XM2VTS database, where a false rejection rate (FRR) of 1.25% and a false acceptance rate (FAR) of 2.11% were achieved in our best-performing face-authentication experiment. The experimental results also show that the proposed approach can outperform kernel methods such as generalized discriminant analysis (GDA), kernel fisher analysis (KFA) and complete kernel fisher discriminant analysis (CKFA) as well as combinations of these methods with features extracted using the trace transform.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Proceedings Articles
Vitomir Štruc; Boštjan Vesnicer; Nikola Pavešić
The phase-based Gabor Fisher classifier and its application to face recognition under varying illumination conditions Proceedings Article
In: Proceedings of the IEEE International Conference on Signal Processing and Communication Systems (ICSPCS'08), pp. 1-6, IEEE, Gold Coast, Australia, 2008, ISBN: 978-1-4244-4243-0.
@inproceedings{ICSPCS08,
title = {The phase-based Gabor Fisher classifier and its application to face recognition under varying illumination conditions},
author = {Vitomir Štruc and Boštjan Vesnicer and Nikola Pavešić},
doi = {10.1109/ICSPCS.2008.4813663},
isbn = {978-1-4244-4243-0},
year = {2008},
date = {2008-12-01},
booktitle = {Proceedings of the IEEE International Conference on Signal Processing and Communication Systems (ICSPCS'08)},
pages = {1-6},
publisher = {IEEE},
address = {Gold Coast, Australia},
abstract = {The paper introduces a feature extraction technique for face recognition called the phase-based Gabor Fisher classifier (PBGFC). The PBGFC method constructs an augmented feature vector which encompasses Gabor-phase information derived from a novel representation of face images - the oriented Gabor phase congruency image (OGPCI) - and then applies linear discriminant analysis to the augmented feature vector to reduce its dimensionality. The feasibility of the proposed method was assessed in a series of face verification experiments performed on the XM2VTS database. The experimental results show that the PBGFC method performs better than other popular feature extraction techniques such as principal component analysis (PCA), the Fisherface method or the DCT-mod2 approach, while it ensures similar verification performance as the established Gabor Fisher Classifier (GFC). The results also show that the proposed phase-based Gabor Fisher classifier performs the best among the tested methods when severe illumination changes are introduced to the face images.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Vitomir Štruc; Nikola Pavešić
The corrected normalized correlation coefficient: a novel way of matching score calculation for LDA-based face verification Proceedings Article
In: Proceedings of the IEEE International Conference on Fuzzy Systems and Knowledge Discovery (FSKD'08), pp. 110-115, IEEE, Jinan, China, 2008, ISBN: 978-0-7695-3305-6.
@inproceedings{FSKD208b,
title = {The corrected normalized correlation coefficient: a novel way of matching score calculation for LDA-based face verification},
author = {Vitomir Štruc and Nikola Pavešić},
url = {https://lmi.fe.uni-lj.si/en/thecorrectednormalizedcorrelationcoefficientanovelwayofmatchingscorecalculationforlda-basedfaceverification/},
doi = {10.1109/FSKD.2008.334},
isbn = {978-0-7695-3305-6},
year = {2008},
date = {2008-10-01},
urldate = {2008-10-01},
booktitle = {Proceedings of the IEEE International Conference on Fuzzy Systems and Knowledge Discovery (FSKD'08)},
volume = {4},
pages = {110-115},
publisher = {IEEE},
address = {Jinan, China},
abstract = {The paper presents a novel way of matching score calculation for LDA-based face verification. Different from the classical matching schemes, where the decision regarding the identity of the user currently presented to the face verification system is made based on the similarity (or distance) between the "live" feature vector and the template of the claimed identity, we propose to employ a measure we named the corrected normalized correlation coefficient, which considers both the similarity with the template of the claimed identity as well as the similarity with all other templates stored in the database. The effectiveness of the proposed measure was assessed on the publicly available XM2VTS database where encouraging results were achieved.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Vitomir Štruc; France Mihelič; Rok Gajšek; Nikola Pavešić
Regression techniques versus discriminative methods for face recognition Proceedings Article
In: Proceedings of the 9th international PhD Workshop on Systems and Control, pp. 1-5, Izola, Slovenia, 2008.
@inproceedings{PHD2008,
title = {Regression techniques versus discriminative methods for face recognition},
author = {Vitomir Štruc and France Mihelič and Rok Gajšek and Nikola Pavešić},
url = {https://lmi.fe.uni-lj.si/en/regressiontechniquesversusdiscriminativemethodsforfacerecognition/},
year = {2008},
date = {2008-10-01},
urldate = {2008-10-01},
booktitle = {Proceedings of the 9th international PhD Workshop on Systems and Control},
pages = {1-5},
address = {Izola, Slovenia},
abstract = {In the field of face recognition it is generally believed that ”state of the art” recognition rates can only be achieved when discriminative (e.g., linear or generalized discriminant analysis) rather than expressive (e.g., principal or kernel principal component analysis) methods are used for facial feature extraction. However, while being superior in terms of the recognition rates, the discriminative techniques still exhibit some shortcomings when compared to the expressive approaches. More specifically, they suffer from the so-called small sample size (SSS) problem which is regularly encountered in the field of face recognition and occurs when the sample dimensionality is larger than the number of available training samples per subject. In this type of problems, the discriminative techniques need modifications in order to be feasible, but even in their most elaborate forms require at least two training samples per subject. The expressive approaches, on the other hand, are not susceptible to the SSS problem and are thus applicable even in the most extreme case of the small sample size problem, i.e., when only one training sample per subject is available. Nevertheless, in this paper we will show that the recognition performance of the expressive methods can match (or in some cases surpass) that of the discriminative techniques if the expressive feature extraction approaches are used as multivariate regression techniques with a pre-designed response matrix that encodes the class membership of the training samples. The effectiveness of the regression techniques for face recognition is demonstrated in a series of experiments performed on the ORL database. Additionally a comparative assessment of the regression techniques and popular discriminative approaches is presented.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Vitomir Štruc; France Mihelič; Nikola Pavešić
Combining experts for improved face verification performance Proceedings Article
In: Proceedings of the IEEE International Electrotechnical and Computer Science Conference (ERK'08), pp. 233-236, Portorož, Slovenia, 2008.
@inproceedings{ERK2008,
title = {Combining experts for improved face verification performance},
author = {Vitomir Štruc and France Mihelič and Nikola Pavešić},
url = {https://lmi.fe.uni-lj.si/en/combiningexpertsforimprovedfaceverificationperformance/},
year = {2008},
date = {2008-09-01},
urldate = {2008-09-01},
booktitle = {Proceedings of the IEEE International Electrotechnical and Computer Science Conference (ERK'08)},
pages = {233-236},
address = {Portorož, Slovenia},
abstract = {Samodejno razpoznavanje (avtentikacija/identifikacija) obrazov predstavlja eno najaktivnejših raziskovalnih področij biometrije. Avtentikacija oz. identifikacija oseb z razpoznavanjem obrazov ponuja možen način povečanja varnosti pri različnih dejavnostih, (npr. pri elektronskem poslovanju na medmrežju, pri bančnih storitvah ali pri vstopu v določene prostore, stavbe in države). Ponuja univerzalen in nevsiljiv način razpoznavanja oseb, ki pa trenutno še ni dovolj zanesljiv. Kot možna rešitev problema zanesljivosti razpoznavanja se v literaturi vse pogosteje pojavljajo večmodalni pristopi, v katerih se razpoznavanje izvede na podlagi večjega števila postopkov razpoznavanja obrazov. V skladu z opisanim trendom, bomo v članku ovrednotili zanesljivost delovanja različnih postopkov razpoznavanja obrazov, ki jih bomo na koncu združili še v večmodalni pristop. S pomočjo eksperimentov na podatkovni zbirki XM2VTS bomo preverili zanesljivost delovanja večmodalnega pristopa in jo primerjali z zanesljivostjo uveljavljenih postopkov razpoznavanja.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Vitomir Štruc; Nikola Pavešić
Palmprint recognition using the trace transform Proceedings Article
In: Proceedings of the national conference ROSUS'08, pp. 41-48, Maribor, Slovenia, 2008.
@inproceedings{rosus08,
title = {Palmprint recognition using the trace transform},
author = {Vitomir Štruc and Nikola Pavešić},
url = {https://lmi.fe.uni-lj.si/en/palmprintrecognitionusingthetracetransform/},
year = {2008},
date = {2008-03-01},
urldate = {2008-03-01},
booktitle = {Proceedings of the national conference ROSUS'08},
pages = {41-48},
address = {Maribor, Slovenia},
abstract = {Biometrija je znanstvena veda o metodah razpoznavanja ljudi na podlagi njihovih fizioloških in/ali vedenjskih značilnosti. Sistemi, ki uporabljajo te metode, služijo kot varnostni mehanizmi za omejevanje dostopa do določenih prostorov, zgradb ali storitev ter kot pomoč pri kriminalističnih preiskavah. V članku predstavljamo primer biometričnega sistema, ki preveri identiteto uporabnika na podlagi slike njegove dlani. Sistem temelji na novem, hibridnem postopku izpeljave značilk, ki na slikovnem področju dlani najprej izvede Kadyrov-Petrouvo transformacijo, transformirane slike pa s postopkom linearne diskriminantne analize v nadaljevanju pretvori v kompaktne vektorje značilk. Uspešnost razpoznavanja s predlaganim sistemom smo preizkusili na obsežni podatkovni zbirki, kjer smo dosegli zadovoljive rezultate.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Vitomir Štruc; Nikola Pavešić
A palmprint verification system based on phase congruency features Proceedings Article
In: Schouten, Ben; Juul, Niels Christian; Drygajlo, Andrzej; Tistarelli, Massimo (Ed.): Biometrics and Identity Management, pp. 110-119, Springer-Verlag, Berlin, Heidelberg, 2008.
@inproceedings{BioID2008,
title = {A palmprint verification system based on phase congruency features},
author = {Vitomir Štruc and Nikola Pavešić},
editor = {Ben Schouten and Niels Christian Juul and Andrzej Drygajlo and Massimo Tistarelli},
url = {https://lmi.fe.uni-lj.si/en/apalmprintverificationsystembasedonphasecongruencyfeatures/},
doi = {10.1007/978-3-540-89991-4_12},
year = {2008},
date = {2008-01-01},
urldate = {2008-01-01},
booktitle = {Biometrics and Identity Management},
volume = {5372},
pages = {110-119},
publisher = {Springer-Verlag},
address = {Berlin, Heidelberg},
series = {Lecture Notes on Computer Science},
abstract = {The paper presents a fully automatic palmprint verification system which uses 2D phase congruency to extract line features from a palmprint image and subsequently performs linear discriminant analysis on the computed line features to represent them in a more compact manner. The system was trained and tested on a database of 200 people (2000 hand images) and achieved a false acceptance rate (FAR) of 0.26% and a false rejection rate (FRR) of 1.39% in the best performing verification experiment. In a comparison, where in addition to the proposed system, three popular palmprint recognition techniques were tested for their verification accuracy, the proposed system performed the best.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Rok Gajšek; Anja Podlesek; Luka Komidar; Grekor Sočan; Boštjan Bajec; Vitomir Štruc; Valentin Bucik; France Mihelič
AvID: audio-video emotional database Proceedings Article
In: Proceedings of the 11th International Multi-conference Information Society (IS'08), pp. 70-74, Ljubljana, Slovenia, 2008.
@inproceedings{JJ2008,
title = {AvID: audio-video emotional database},
author = {Rok Gajšek and Anja Podlesek and Luka Komidar and Grekor Sočan and Boštjan Bajec and Vitomir Štruc and Valentin Bucik and France Mihelič},
year = {2008},
date = {2008-01-01},
booktitle = {Proceedings of the 11th International Multi-conference Information Society (IS'08)},
volume = {C},
pages = {70-74},
address = {Ljubljana, Slovenia},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2007
Journal Articles
Vitomir Štruc; Nikola Pavešić
Impact of image degradations on the face recognition accuracy Journal Article
In: Electrotechnical Review, vol. 74, no. 3, pp. 145-150, 2007.
@article{EV-Struc_2007,
title = {Impact of image degradations on the face recognition accuracy},
author = {Vitomir Štruc and Nikola Pavešić},
url = {https://lmi.fe.uni-lj.si/en/impactofimagedegradationsonthefacerecognitionaccuracy/},
year = {2007},
date = {2007-01-01},
urldate = {2007-01-01},
journal = {Electrotechnical Review},
volume = {74},
number = {3},
pages = {145-150},
abstract = {The accuracy of automatic face recognition systems depends on various factors among which robustness and accuracy of the face localization procedure, choice of an appropriate face-feature extraction procedure, as well as use of a suitable matching algorithm are the most important. Current systems perform relatively well whenever test images to be recognized are captured under conditions similar to those of the training images. However, they are not robust enough if there is a difference between test and training images. Changes in image characteristics such as noise, colour depth, background and compression all cause a drop in performance of even the best systems of today. At this point the main question is which image characteristics are the most important in terms of face recognition performance and how they affect the recognition accuracy. This paper addresses these issues and presents performance evaluation (Table 2.) of three popular subspace methods (PCA, LDA and ICA) using ten degraded versions of the XM2VTS face image database [10]. The presented experimental results show the effects of different changes in image characteristics on four score level fusion rules, namely, the maximum, minimum, sum and product rule. All of the feature extraction procedures as well as the fusion strategies are rather insensitive to the presence of noise, JPEG compression, colour depth reduction, and so forth, while on the other hand they all exhibit great sensitivity to degradations such as face occlusion and packet loss simulation},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Proceedings Articles
Nikola Pavešić Vitomir Štruc France Mihelič
Color spaces for face recognition Proceedings Article
In: Proceedings of the International Electrotechnical and Computer Science Conference (ERK'07), pp. 171-174, Portorož, Slovenia, 2007.
@inproceedings{ERK2007,
title = {Color spaces for face recognition},
author = {Nikola Pavešić Vitomir Štruc France Mihelič},
url = {https://lmi.fe.uni-lj.si/en/colorspacesforfacerecognition/},
year = {2007},
date = {2007-01-01},
urldate = {2007-01-01},
booktitle = {Proceedings of the International Electrotechnical and Computer Science Conference (ERK'07)},
pages = {171-174},
address = {Portorož, Slovenia},
abstract = {The paper investigates the impact that the face-image color space has on the verification performance of two popular face recognition procedures, i.e., the Fisherface approach and the Gabor-Fisher classifier - GFC. Experimental results on the XM2VTS database show that the Fisherface technique performs best when features are extracted from the Cr component of the YCbCr color space, while the performance of the Gabor-Fisher classifier is optimized when grey-scale intensity face-images are used for feature extraction. Based on these findings, a novel face recognition framework that combines the Fisherface and the GFC method is introduced in this paper and its feasibility demonstrated in a comparative study where, in addition to the proposed method, six widely used feature extraction techniques were tested for their face verification performance.},
key = {ERK2007},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2004
Journal Articles
Boštjan Murovec; Peter Šuhel
A repairing technique for the local search of the job-shop problem Journal Article
In: European Journal of Operational Research, vol. 153, no. 1, pp. 220 - 238, 2004, ISSN: 0377-2217, (Timetabling and Rostering).
@article{MUROVEC2004220,
title = {A repairing technique for the local search of the job-shop problem},
author = {Boštjan Murovec and Peter Šuhel},
url = {http://www.sciencedirect.com/science/article/pii/S0377221702007336},
doi = {https://doi.org/10.1016/S0377-2217(02)00733-6},
issn = {0377-2217},
year = {2004},
date = {2004-01-01},
journal = {European Journal of Operational Research},
volume = {153},
number = {1},
pages = {220 - 238},
abstract = {The local search technique has become a widely used tool for solving many combinatorial optimization problems. In the case of the job-shop the implementation of such a technique is not straightforward at all due to the existence of the technological constraints among the operations that belong to the same job. Their presence renders a certain set of schedules infeasible. Consequently, special attention is required when defining optimization algorithms to prevent the possibility of reaching an infeasible schedule during execution. Traditionally, the problem is tackled on the neighborhood level by using only a limited set of moves for which feasibility inherently holds. This paper proposes an alternative way to avoid infeasibility by incorporating a repairing technique into the mechanism for applying moves to a schedule. Whenever an infeasible move is being applied, a repairing mechanism rearranges the underlying schedule in such a way that the feasibility of the move is restored. The possibility of reaching infeasible solutions is, therefore, eliminated on the lowest possible conceptual level. Consequently, neighborhood functions need not to be constrained to a limited set of feasible moves any more.},
note = {Timetabling and Rostering},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
0000
Journal Articles
Blaz Meden, Peter Rot, Philipp Terhorst, Naser Damer, Arjan Kuijper, Walter J. Scheirer, Arun Ross, Peter Peer, Vitomir Srruc
Privacy-Enhancing Face Biometrics: A Comprehensive Survey Journal Article
In: IEEE Transactions on Information Forensics and Security, vol. vol. 16, pp. 4147-4183, 0000.
@article{TIFS_PrivacySurvey,
title = {Privacy-Enhancing Face Biometrics: A Comprehensive Survey},
author = {Blaz Meden, Peter Rot, Philipp Terhorst, Naser Damer, Arjan Kuijper, Walter J. Scheirer, Arun Ross, Peter Peer, Vitomir Srruc},
url = {https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9481149
https://lmi.fe.uni-lj.si/en/visual_privacy_of_faces__a_survey_preprint-compressed/},
doi = {10.1109/TIFS.2021.3096024},
journal = {IEEE Transactions on Information Forensics and Security},
volume = {vol. 16},
pages = {4147-4183},
abstract = {Biometric recognition technology has made significant advances over the last decade and is now used across a number of services and applications. However, this widespread deployment has also resulted in privacy concerns and evolving societal expectations about the appropriate use of the technology. For example, the ability to automatically extract age, gender, race, and health cues from biometric data has heightened concerns about privacy leakage. Face recognition technology, in particular, has been in the spotlight, and is now seen by many as posing a considerable risk to personal privacy. In response to these and similar concerns, researchers have intensified efforts towards developing techniques and computational models capable of ensuring privacy to individuals, while still facilitating the utility of face recognition technology in several application scenarios. These efforts have resulted in a multitude of privacy--enhancing techniques that aim at addressing privacy risks originating from biometric systems and providing technological solutions for legislative requirements set forth in privacy laws and regulations, such as GDPR. The goal of this overview paper is to provide a comprehensive introduction into privacy--related research in the area of biometrics and review existing work on textit{Biometric Privacy--Enhancing Techniques} (B--PETs) applied to face biometrics. To make this work useful for as wide of an audience as possible, several key topics are covered as well, including evaluation strategies used with B--PETs, existing datasets, relevant standards, and regulations and critical open issues that will have to be addressed in the future. },
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Chenquan Gan; Daitao Zhou; Kexin Wang; Qingyi Zhu; Deepak Kumar Jain; Vitomir Štruc
Optimizing ambiguous speech emotion recognition through spatial–temporal parallel network with label correction strategy Journal Article
In: Computer Vision and Image Understanding, vol. 260, no. 104483, pp. 1–14, 0000.
@article{CVIU_2025,
title = {Optimizing ambiguous speech emotion recognition through spatial–temporal parallel network with label correction strategy},
author = {Chenquan Gan and Daitao Zhou and Kexin Wang and Qingyi Zhu and Deepak Kumar Jain and Vitomir Štruc},
url = {https://www.sciencedirect.com/science/article/pii/S1077314225002061?dgcid=coauthor
https://lmi.fe.uni-lj.si/wp-content/uploads/2025/09/CVIU.pdf},
doi = {https://doi.org/10.1016/j.cviu.2025.104483},
journal = {Computer Vision and Image Understanding},
volume = {260},
number = {104483},
pages = {1--14},
abstract = {Speech emotion recognition is of great significance for improving the human–computer interaction experience. However, traditional methods based on hard labels have difficulty dealing with the ambiguity of emotional expression. Existing studies alleviate this problem by redefining labels, but still rely on the subjective emotional expression of annotators and fail to consider the truly ambiguous speech samples without dominant labels fully. To solve the problems of insufficient expression of emotional labels and ignoring ambiguous undominantly labeled speech samples, we propose a label correction strategy that uses a model with exact sample knowledge to modify inappropriate labels for ambiguous speech samples, integrating model training with emotion cognition, and considering the ambiguity without dominant label samples. It is implemented on a spatial–temporal parallel network, which adopts a temporal pyramid pooling (TPP) to process the variable-length features of speech to improve the recognition efficiency of speech emotion. Through experiments, it has been shown that ambiguous speech after label correction has a more promoting effect on the recognition performance of speech emotions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}