Copyright © 2026 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{186559,
author = {Mr. P. Balaji and Mrs. Shakunthala B S and Ms. Palak Modi and Ms. Pooja Vora and Ms. Jagriti Bhatia},
title = {AI-Ready Data Pipelines for Domain-Specific Retrieval-Augmented Generation (RAG) Systems in Low-Resource Languages},
journal = {International Journal of Innovative Research in Technology},
year = {2025},
volume = {12},
number = {6},
pages = {7599-7602},
issn = {2349-6002},
url = {https://ijirt.org/article?manuscript=186559},
abstract = {The rapid progress of large language models (LLMs) has enabled powerful retrieval-augmented generation (RAG) systems that combine information retrieval with text generation to improve factual accuracy and context awareness. However, such systems rely on high-quality, AI-ready data — a condition rarely met in low-resource languages where text data is sparse, noisy.
This paper presents a modular, domain-specific data pipeline designed to prepare unstructured text in low-resource languages for RAG applications. The proposed framework includes language detection, adaptive chunking, multilingual embedding generation, and optimized vector storage. Using an educational dataset containing Telugu, Hindi, and English content, we demonstrate that our AI-ready data pipeline improves retrieval precision by 18% and generation consistency by 12% compared to baseline systems. The findings emphasize the importance of tailored data readiness in building inclusive and domain-aware AI systems.},
keywords = {RAG, Low-Resource Languages, AI-Ready Data, Multilingual NLP, Vector Databases, Data Preprocessing.},
month = {December},
}
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry