Copyright © 2026 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{195590,
author = {M. Sai Pranav and K. Gnaneswararao and J. Gnaneswar Sai and N. Venkata Puneeth},
title = {OPTICAL CHARACTER RECOGNITION FOR MULTIPLE INDIAN LANGUAGES: DETECTION AND TRANSLATION},
journal = {International Journal of Innovative Research in Technology},
year = {2026},
volume = {12},
number = {11},
pages = {932-938},
issn = {2349-6002},
url = {https://ijirt.org/article?manuscript=195590},
abstract = {Optical Character Recognition (OCR) plays a vital role in converting printed and handwritten text into digital form for easy storage and processing. In India, where many languages are used together, most existing OCR systems work only with one or two languages and need manual selection for translation. This creates problems when documents contain text from different Indian languages. This project develops a complete multilingual OCR system that supports six Indian languages- English, Hindi, Telugu, Malayalam, Kannada, and Tamil. The system automatically extracts text from scanned documents or images, detects the language, and translates the text into any of the six chosen languages. All results are saved in CSV format for easy use. Image preprocessing steps such as noise removal, normalization, and segmentation are applied first. Deep learning models then recognize characters from different scripts. The system was tested on printed text images from all six languages. Results show an average text recognition accuracy of 89% and language detection accuracy of 95%. Translation worked smoothly in every target language.
The novelty lies in combining OCR, automatic language detection, and translation into one simple pipeline with CSV output. This makes the system useful for document digitization, education, tourism, and government work in multilingual areas. The project gives a strong base for future language-independent applications.},
keywords = {Optical character recognition, multilingual text processing, language detection, machine translation, document digitization, computer vision},
month = {April},
}
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry