Copyright © 2026 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{190112,
author = {Rahul Patel},
title = {Synthetic Image Generation for Mitigating Overfitting in Deep Learning under Data-Scarce Conditions},
journal = {International Journal of Innovative Research in Technology},
year = {2026},
volume = {12},
number = {8},
pages = {2018-2135},
issn = {2349-6002},
url = {https://ijirt.org/article?manuscript=190112},
abstract = {Deep learning models have demonstrated strong performance in image classification; however, their effectiveness is highly dependent on the availability of large-scale labeled datasets. In data-scarce scenarios, convolutional neural networks are prone to overfitting limited training samples, resulting in poor generalization to unseen data. This challenge is especially relevant in practical applications where data collection is constrained by cost, accessibility, or domain-specific limitations.
This paper investigates the effectiveness of synthetic image generation as a lightweight and systematic approach for mitigating overfitting under extreme low-data conditions. Using the CIFAR-10 benchmark, a controlled limited-data setting is constructed with only 50 real training images per class. To enrich the training distribution without introducing additional real samples, synthetic images are generated through a combination of label-preserving geometric transformations and noise-based perturbations, designed to increase intra-class variability while preserving semantic consistency.
A ResNet-18 architecture is employed to evaluate the impact of synthetic image augmentation on training dynamics, overfitting behavior, and generalization performance. Models trained with synthetic-enhanced datasets are compared against baselines trained solely on limited real data under identical optimization settings. Experimental results demonstrate a substantial relative improvement in test accuracy and a pronounced reduction in the train–test generalization gap when synthetic data is incorporated.
These findings indicate that carefully designed synthetic image generation pipelines, even without complex generative models, can serve as an effective and computationally efficient strategy for improving robustness and generalization in deep learning systems operating under severe data-scarce conditions.},
keywords = {Synthetic image generation, data augmentation, overfitting mitigation, generalization, low-data learning, convolutional neural networks, CIFAR-10},
month = {January},
}
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry