Copyright © 2025 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{175591, author = {Surabhi Aashritha and S.Abhinay and J.Aakash and S.Aarthi and D.Aashritha and P.Bhavani}, title = {Data Poison Detection in Distributed Machine Learning Systems}, journal = {International Journal of Innovative Research in Technology}, year = {2025}, volume = {11}, number = {11}, pages = {3780-3787}, issn = {2349-6002}, url = {https://ijirt.org/article?manuscript=175591}, abstract = {The performance quality of machine learning models heavily depends on the quality of their data in training pro- grams. The data manipulation activities of malicious actors cause problems because they produce inadequate model performance and weak decision systems. The Data Poisoning Detection project functions to protect training data through pre-analysis before its utilization in training processes. The system uses preprocessing methods combined with statistical analysis together with anomaly detection algorithms from machine learning to raise both data reliability levels and system integrity. Through distributed processing the solution allows datasets to be both uploaded and processed by three autonomous server systems. The servers apply data cleaning and feature encoding techniques along with normalization steps before executing SVM and Random Forest algorithm training methods. The system uses combination anomaly detection methods that involve Z-score and Interquartile Range (IQR) and Isolation Forest and One- Class SVM techniques to identify potential poisoning threats. The server-wide decision about data integrity arises from merged outcomes between all three servers which creates a strong and scalable detection system. The system improves readability through visual presentation tools together with in-depth performance results. The performance reports present data distribution information alongside model accuracy assessments and reports on detected anomalies. The project achieves secure AI development through multi- layer protection of machine learning pipelines which protects the systems against harmful data poisoning threats.}, keywords = {Data Poisoning, Machine Learning Security, Anomaly Detection, Distributed Computing, Data Integrity, Adversarial Attacks.}, month = {April}, }
Cite This Article
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry