Copyright © 2025 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{184691, author = {Sarvesh Kumar Gupta}, title = {Machine Learning Integration in Spark-Based Pipelines}, journal = {International Journal of Innovative Research in Technology}, year = {2025}, volume = {12}, number = {4}, pages = {3020-3025}, issn = {2349-6002}, url = {https://ijirt.org/article?manuscript=184691}, abstract = {The growing demand for scalable and operationalized machine learning (ML) solutions has driven the adoption of Apache Spark as a platform for end-to-end ML workflows. This review explored the current landscape of ML integration within Spark-based pipelines, covering tools, architectures, scalability, and comparative performance with emerging distributed frameworks such as Ray, Dask, and Flink. Despite Spark’s strength in unified batch-stream processing and its robust MLlib API, limitations persist in deep learning integration, real-time model updating, and GPU utilization. The review presented a theoretical S.P.A.R.K. framework and shared experimental benchmarks to guide practitioners in optimizing resource usage, tracking lineage, and enhancing modularity. Future progress will rely on tighter ecosystem integrations, automated MLOps workflows, and AI-driven orchestration to sustain Spark’s relevance in the era of increasingly dynamic and heterogeneous ML workloads.}, keywords = {Apache Spark, machine learning pipelines, distributed computing, MLlib, Spark Structured Streaming, MLOps, AutoML, scalable ML, model orchestration, big data infrastructure}, month = {September}, }
Cite This Article
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry