Copyright © 2026 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{201405,
author = {Vaishnavi P C and Arjun Paramarthalingam},
title = {AI-Driven Chatbot for Government Scheme Accessibility in India Using Hybrid Retrieval-Augmented Generation with Large Language Models},
journal = {International Journal of Innovative Research in Technology},
year = {2026},
volume = {12},
number = {no},
pages = {354-361},
issn = {2349-6002},
url = {https://ijirt.org/article?manuscript=201405},
abstract = {Retrieval of relevant government welfare schemes in India are still not an easy task, and accessing the required data is not an easy task because of scattered data sources, the heterogeneity of eligibility criteria, and a lack of custom search support. Current digital solutions are based on keyword-based retrieval, which is not as effective to obtain user intent in natural language queries. This paper introduces an AI-based chatbot that can overcome these constraints by proposing a hybrid Retrieval-Augmented Generation model to discover relevant government schemes. The system proposed is a combination of lexical retrieval with BM25 and semantic similarity search with dense vector embeddings that should be stronger in structured and ambiguous queries. The retrieved candidates will be refined on a cross-encoder re-ranking model and then a rule-based personalization layer will be used to restrict schemes based on user specific characteristics in terms of age, gender, state and eligibility conditions. It works on a maintained database of about 3,500 government schemes, which are broken down into nearly 70,000 structured text segments, in order to provide fine-grained retrieval. A locally deployed language model is employed to guarantee reliability of responses, where the outputs are rigidly based on retrieved context to perform controlled generation. Experimental results have shown that the combination of retrieval and re-ranking pipeline is more effective at increasing the relevance of the recommendations especially in queries that are implicit in purpose or incomplete. The given solution is a useful and privacy-friendly way of increasing access to governmental services, particularly in the situations when connectivity is limited.},
keywords = {Retrieval-Augmented Generation, Semantic Search, Cross-Encoder Re-ranking, Government Scheme Recommendation, Large Language Models, E-Governance.},
month = {May},
}
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry