Copyright © 2026 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{181036,
author = {V.Vijayalakshmi and S Suguna},
title = {Optimizing Phishing URL Detection with TF-IDF, M-Relief, and RoBERTa: A Deep Learning Approach},
journal = {International Journal of Innovative Research in Technology},
year = {2025},
volume = {12},
number = {1},
pages = {3409-3420},
issn = {2349-6002},
url = {https://ijirt.org/article?manuscript=181036},
abstract = {Malicious URLs are a major cyber security
threat, enabling attacks like phishing and malware.
Traditional detection methods, such as blacklists and
heuristics, often miss new or disguised threats. To
improve detection, machine learning and deep
learning are increasingly used, though they depend on
large, regularly updated datasets. This study
introduces a novel phishing URL classification method
that combines TF-IDF for feature extraction, Label
Encoding for transforming categorical data,
Borderline SMOTE to address class imbalance, M
Relief for feature selection, and RoBERTa, a
transformer-based deep learning model, for final
classification. The dataset includes a diverse mix of
phishing and legitimate URLs. The effectiveness of the
models is assessed by measuring their accuracy,
analyzing precision, recall, confidence score, confusion
matrix, histogram and AUC-ROC specifically for the
classification of malware attacks. The fine-tuned
RoBERTa model demonstrates superior performance
in phishing detection, achieving 98.3% accuracy on the
test set. Compared to traditional classifiers like
Random Forest, SVM, and XGBoost, RoBERTa excels
in identifying phishing URLs with higher precision
and recall. The proposed approach proves effective for
real-time phishing detection, enhancing overall cyber
security protection.},
keywords = {Borderline SMOTE, TF-IDF, XGBoost, RoBERTa, Label encoding},
month = {June},
}
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry