Copyright © 2025 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{165489,
author = {Nandan Parmar},
title = {Ensemble of Data Augmentation Techniques for Efficient 3 Augmentation in NLP},
journal = {International Journal of Innovative Research in Technology},
year = {2025},
volume = {11},
number = {1},
pages = {2706-2734},
issn = {2349-6002},
url = {https://ijirt.org/article?manuscript=165489},
abstract = {In the last decade, NLP has made
significant advances in machine learning. In so many
machine learning scenarios, there isn't enough data
available to train a good classifier. Data augmentation
can indeed be utilized to solve this problem. It utilizes
transformations to artificially increase the amount of
available training data. Due of linguistic data's
discrete character, this topic is still relatively
underexplored, in spite of the huge rise in usage. A
major goal of the DA techniques is to increase the
diversity of training data, allowing the model to
better generalize when faced with novel testing data.
This study uses the term "data augmentation" to
allude as a broad concept that encompasses
techniques for transforming training data. While
most text data augmentation research focuses on the
long-term aim of developing end-to-end learning
solutions, this study focuses on using pragmatic,
robust,
scalable,
and easy-to-implement data
augmentation techniques comparable to those used in
computer vision. In natural language processing,
simple but successful data augmentation procedures
have been implemented and inspired by such efforts,
we construct and compare ensemble data
augmentation for NLP classification. We are
proposing an ensembling of simple yet effective data
augmentation techniques. Through experiments on
various dataset from kaggle, we show that ensembling
of augmentation can boost performance with any text
embedding technique particularly for small training
sets. We conclude by carrying out experiments on a
classification datasets. Based on the results, we draw
conclusion that Effective DA approach by ensembles
of data augmentation can help practitioners choose
suitable augmentation technique in different settings.},
keywords = {Text Data Augmentation, NLP, Class Imbalance, Text Embeddings},
month = {June},
}
Cite This Article
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry