Copyright © 2026 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{178404,
author = {Dennis M B and Akshay P and Darshan R and Chethan K S and Pulukuri Aparna},
title = {Lightweight RAG with Confidence-Aware Dynamic Thresholding},
journal = {International Journal of Innovative Research in Technology},
year = {2025},
volume = {11},
number = {12},
pages = {3636-3638},
issn = {2349-6002},
url = {https://ijirt.org/article?manuscript=178404},
abstract = {Retrieval-Augmented Generation (RAG) enhances language model quality by infusing external knowledge at the expenses of efficiency concerns with latency and memory. Recent research (2022–2025) responses to these through hybrid architectures, confidence-aware retrieval, and lightweight models. Retrieval can account for 35–45% of retrieval, and memory consumption is excessive—albeit quantized indices can alleviate it by up to 7×. We explore prominent approaches and introduce Hybrid RAG with Confidence-Aware Thresholding, demonstrating gains in accuracy (EM, F1, BLEU/ROUGE), latency, memory, and energy. Implementation strategies include model pruning, efficient retrievers, and adaptive decoding for a compact, dynamic RAG pipeline.},
keywords = {Retrieval-Augmented Generation, Large Language Model, Confidence-Aware Retrieval.},
month = {May},
}
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry