Copyright © 2026 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{195619,
author = {Y. Varshini and Y. Prabhu Kumari and A. Pavan Kumar},
title = {Enhancing Heart Attack Risk Prediction with Clustering and Regression: Insights into Post-Pandemic Vulnerabilities},
journal = {International Journal of Innovative Research in Technology},
year = {2026},
volume = {12},
number = {11},
pages = {939-947},
issn = {2349-6002},
url = {https://ijirt.org/article?manuscript=195619},
abstract = {Heart attack is one of the leading causes of death worldwide, and early identification of high-risk individuals can significantly reduce fatal outcomes. Traditional methods of diagnosis depend on manual analysis of medical reports and clinical experience, which may sometimes delay timely decision-making. The COVID-19 pandemic has further amplified cardiovascular vulnerabilities, increasing post-acute risks of myocarditis, hypertension, and acute myocardial infarction reinforcing the urgent need for automated, data-driven screening tools aligned with UN Sustainable Development Goal 3 (SDG 3: Good Health and Well-being).
This paper presents CardioSense, a Clinical Decision Support System (CDSS) that integrates Gaussian Mixture Model (GMM) and K-Means clustering with a calibrated stacking ensemble classifier for heart attack risk prediction. The system is built on a curated subset of the Kaggle Cardiovascular Disease Dataset (top 5,000 records) featuring medical attributes such as age, gender, blood pressure, cholesterol, glucose, ECG-derived parameters, and exercise-induced variables. A hybrid pipeline applies StandardScaler normalization, silhouette-optimal K-Means clustering, SMOTE class balancing, and trains eight classifiers Logistic Regression, Random Forest, KNN, Naive Bayes, SVM, Decision Tree, XGBoost, and a Stacking Ensemble evaluated via 10-fold stratified cross-validation.
A risk scoring mechanism based on correlation and regression is implemented to provide individual risk scores. The stacking ensemble achieves 73.8% accuracy, AUC-ROC of 0.815, sensitivity of 73.6%, specificity of 74.1%, and MCC of 0.48. Note: the lower accuracy relative to studies on small datasets (503 records) reflects the real-world prediction task on a more heterogeneous dataset. SHAP analysis identifies systolic blood pressure, BMI, and age as dominant predictors. A Flask-based CDSS delivers real-time risk scores with clinical recommendations. This project highlights the potential of machine learning in healthcare by assisting doctors in early diagnosis and improving clinical decision-making through a simple, non-invasive system.},
keywords = {Heart attack prediction; GMM clustering; Stacking ensemble; SHAP; CDSS; Post-pandemic; SMOTE; XGBoost; Logistic Regression; Random Forest; Flask; Kaggle cardiovascular dataset; SDG 3},
month = {April},
}
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry