Copyright © 2026 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{189525,
author = {Ahmed Qudsi Ghouse Ali Khan and Dr.Kamel Alikhan Siddiqui},
title = {A Comparative Study of Classical Machine Learning Approaches for Multi-Label Toxic Comment Classification},
journal = {International Journal of Innovative Research in Technology},
year = {2025},
volume = {12},
number = {7},
pages = {5907-5910},
issn = {2349-6002},
url = {https://ijirt.org/article?manuscript=189525},
abstract = {The increasing prevalence of toxic and abusive language on online platforms has raised significant concerns regarding user safety and community well-being. Automated toxic comment classification has therefore become an important research area within Natural Language Processing (NLP). While recent studies largely emphasize deep learning and transformer-based models, classical machine learning approaches continue to play an essential role due to their interpretability, reproducibility, and lower computational requirements. This paper presents a systematic comparative study of classical machine learning algorithms for multi-label toxic comment classification. A complete NLP pipeline is developed, including text preprocessing, feature extraction using Term Frequency–Inverse Document Frequency (TF-IDF), and independent binary classification for six toxicity categories. Six widely used machine learning models—Support Vector Machine, Logistic Regression, Naive Bayes, Decision Tree, Random Forest, and K-Nearest Neighbors—are evaluated using accuracy and Hamming Loss metrics. Experimental results show that ensemble and linear classifiers achieve competitive performance when supported by structured preprocessing. Rather than proposing a novel algorithm, this work establishes a transparent and reproducible baseline that highlights the strengths and limitations of classical approaches, providing a solid foundation for future research in explainable and responsible content moderation systems.},
keywords = {Toxic Comment Classification, Multi-Label Classification, Natural Language Processing, Classical Machine Learning, TF-IDF, Online Content Moderation},
month = {December},
}
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry