Copyright © 2025 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{188962,
author = {Syed Nyamtulla and Dr. Dhirendra Kumar Tripathi},
title = {Optimizing Cold Start and Resource Scheduling in Serverless AI Workloads for High-Performance Cloud Computing},
journal = {International Journal of Innovative Research in Technology},
year = {2025},
volume = {12},
number = {7},
pages = {4021-4042},
issn = {2349-6002},
url = {https://ijirt.org/article?manuscript=188962},
abstract = {Serverless computing has gained significant attention as a deployment model for artificial intelligence (AI) applications due to its elasticity, reduced operational overhead, and cost efficiency. However, one of its major limitations remains the cold start latency, which severely impacts real-time performance for latency-sensitive AI workloads such as natural language processing (NLP), image classification, time-series forecasting, and recommendation systems. This study addresses this challenge by conducting extensive benchmarking of serverless platforms and introducing algorithmic improvements through workload-aware scheduling and pre-warming strategies. The research methodology involved systematic comparisons of serverless deployments against containerized and virtual machine (VM)-based models across multiple performance indicators, including latency, throughput, scalability, resource utilization, and cost efficiency. Experimental results show that the proposed warm-pool strategy reduced cold start latency from 250–600 ms to below 100 ms in 83% of test cases, yielding a 30–50% performance improvement, while workload-class scheduling enhanced resource efficiency by 20% and strengthened concurrency handling, achieving up to 1000 requests per second under peak load. These findings confirm that integrating dynamic scheduling with pre-warming can significantly mitigate cold start penalties, establishing a practical framework for optimizing serverless AI deployments and contributing to more reliable, scalable, and cost-effective high-performance cloud computing.},
keywords = {Serverless Computing, Cold Start Mitigation, Resource Scheduling, AI/ML Workloads, High-Performance Computing, Cloud Optimization, Function Warm Pools, Auto-scaling},
month = {December},
}
Cite This Article
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry