Copyright © 2026 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{186716,
author = {S. Abdul Kabeer},
title = {Financial Fraud Detection in Skewed Data Using Machine Learning and Value at Risk},
journal = {International Journal of Innovative Research in Technology},
year = {2025},
volume = {12},
number = {6},
pages = {3545-3554},
issn = {2349-6002},
url = {https://ijirt.org/article?manuscript=186716},
abstract = {The increasing use of online banking services has led to significant financial losses for banks and other financial institutions due to new bank account (NBA) fraud. Machine learning (ML) models face substantial challenges from the inherent skewness and rarity of NBA fraud incidents, where non-fraud instances vastly outnumber fraud cases. This imbalance often results in ML models misclassifying fraudulent transactions as legitimate, potentially undermining customer trust. Previous research addressing dataset skewness has primarily focused on fraud patterns rather than quantifying potential losses associated with NBA fraud risk characteristics. This study proposes the identification of NBA fraud from a value-at-risk perspective, treating fraud incidents as worst-case scenarios. The complexity of fraudulent activities and the rapid growth of online transactions make financial fraud detection particularly challenging in the digital economy. Traditional statistical methods frequently fail to capture the nonlinear patterns of fraud, especially in highly imbalanced datasets where fraudulent transactions constitute a small fraction of all activity. To address these challenges, a hybrid architecture is proposed that integrates Value at Risk (VaR) with machine learning techniques for financial fraud detection. Supervised learning classifiers such as Random Forest, XGBoost, and Neural Networks are combined with the VaR model to enhance prediction accuracy by measuring transaction risk levels.
Resampling methods and cost-sensitive learning are utilized to mitigate class imbalance, facilitating the detection of minority fraudulent cases. Experimental findings on benchmark financial transaction datasets demonstrate that the suggested strategy enhances fraud detection rates while preserving a low false positive rate. The amalgamation of machine learning and risk management frameworks facilitates the creation of a resilient, data-informed fraud detection system that is adaptive to practical financial contexts. The value-at-risk model quantifies the skewed tail distribution and assesses possible losses of risk attributes using historical simulation. Machine learning is utilized on the bank account fraud (BAF) dataset to categorize risk-return attributes derived from value-at-risk. To address skewed NBA fraud scenarios, value-at-risk regulates fraud skewness through a modifiable threshold probability range.
The efficacy of the fraud detection system was assessed utilizing a distinctive detection rate (DT) metric that integrates risk fraud attributes. A superior fraud detection model was created with K-nearest neighbor, attaining a true positive (TP) rate of 0.95 and a detection rate of 0.9406. Value-at-risk offers a methodical framework for developing data-informed criteria for fraud risk management within permissible loss thresholds in the banking industry.},
keywords = {Django-ORM, Python, HTML, CSS, JavaScript, NBA fraud, ML, BAF, K-nearest neighbor, True positive, risk fraud, and MySQL (WAMP Server).},
month = {November},
}
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry