Copyright © 2026 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{175285,
author = {suvetha devi and Ganesan},
title = {Automated Pdf data extraction and retrieval using NLP and OCR},
journal = {International Journal of Innovative Research in Technology},
year = {2025},
volume = {11},
number = {11},
pages = {2234-2238},
issn = {2349-6002},
url = {https://ijirt.org/article?manuscript=175285},
abstract = {Automated document processing is becoming increasingly vital across industries for efficient information handling. This paper proposes a real-time PDF data extraction and retrieval system powered by Optical Character Recognition (OCR) and Natural Language Processing (NLP). It streamlines the extraction of key information from complex documents, minimizing manual effort and errors. By automating content interpretation and structuring, the system boosts productivity and accuracy. The goal of our research is to simplify document workflows and enhance access to critical information for all users.},
keywords = {Natural Language Processing, Optical Character Recognition, PDF Extraction, Real-Time Processing, Document Automation, Information Retrieval, Text Analytics, AI-driven Systems},
month = {April},
}
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry