Copyright © 2025 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{182048, author = {md dilwar alam and deepti gupta}, title = {investigation of self -supervised speech models for stuttered speech detection}, journal = {International Journal of Innovative Research in Technology}, year = {2025}, volume = {12}, number = {2}, pages = {520-526}, issn = {2349-6002}, url = {https://ijirt.org/article?manuscript=182048}, abstract = {A speech condition called stuttering is typified by irregularities in speech fluency, such as repetitions, blocks, and prolongations. Speech-language pathologists' (SLPs') manual evaluations, which take a lot of time and need specialized knowledge, are a major component of traditional diagnosis. This study explores utterance-level stuttering detection using self-supervised learning (SSL) models to facilitate automated evaluation. We specifically assess how well a number of pretrained SSL speech models perform on utterance-level stuttering categorization tasks: WavLM Base, HuBERT Base, Wav2Vec 2.0 Base, WavLM Large, HuBERT Large, and Wav2Vec 2.0 Large. The Kassel State of Fluency (KSoF) dataset, FluencyBank, and SEP-28K are used for independent testing, and the models are refined using these datasets. F1 scores for various stuttering types are used to gauge performance. All three test sets (SEP-28K, FluencyBank, and KSoF) have the following F1 values: WavLM Base (0.797, 0.800, 0.772), HuBERT Base (0.790, 0.790, 0.766), Wav2Vec 2.0 Base (0.778, 0.782, 0.758), WavLM Large (0.832, 0.832, 0.758), HuBERT Large (0.817, 0.816, 0.788), and Wav2Vec 2.0 Large (0.804, 0.803, 0.779).WavLM Large continuously performs the best on utterance-level benchmarks out of all the models. This comparison study demonstrates how well SSL models identify stuttering and offers information about how they may be used in actual speech pathology and fluency disorder evaluation.}, keywords = {Sel-Supervised Learning, Utteranc-Level Stuttering Detection, Feature Extraction.}, month = {July}, }
Cite This Article
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry