Copyright © 2025 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{180431, author = {Anikinee Deb}, title = {Unlocking Student Dropout Patterns: A Machine Learning-Based Data Analysis}, journal = {International Journal of Innovative Research in Technology}, year = {2025}, volume = {12}, number = {1}, pages = {1530-1537}, issn = {2349-6002}, url = {https://ijirt.org/article?manuscript=180431}, abstract = {In higher education, student dropout is still a major problem that has an impact on both the student outcome as well as institutional performance. This study uses a multi-dimensional dataset that includes academic, demographic, and economic features to examine how supervised machine learning algorithms can be used for predicting student dropout. These features were used to train six classifiers - Random Forest, Support Vector Classifier (SVC), Logistic Regression, K-Nearest Neighbours (KNN), Decision Tree, and Naive Bayes and evaluated using the performance metrics – Accuracy and ROC-AUC. With ROC-AUC scores of 0.956, 0.951, and 0.948, respectively, and accuracies of up to 0.91, Logistic Regression, Random Forest, and SVM outperformed the other classifiers in terms of predictive performance. To evaluate each feature group's relative contribution, a methodical feature ablation analysis was performed. The Logistic Regression and Random Forest models' ROC-AUC scores dropped from 0.956 to 0.810 and 0.951 to 0.816, respectively, when academic features were removed, resulting in the biggest performance drop of any model. On the other hand, the performance of the model was only marginally affected by the elimination of demographic or economic characteristics. The findings show that the most important predictors of student dropout are academic indicators, which are followed by economic and demographic characteristics.}, keywords = {Educational Data Mining, Random Forest, KNN, SVC, Logistic Regression, Decision Tree, Naïve Bayes, Machine Learning, ROC-AUC, Dropout Prediction, Data Science}, month = {June}, }
Cite This Article
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry