Copyright © 2026 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{178915,
author = {Vancha Sumshaya and Pulipati Rakshitha and Shaik Mazhar and A.V.Siddhartha},
title = {Movie Genre Classification Using Machine Learning},
journal = {International Journal of Innovative Research in Technology},
year = {2025},
volume = {11},
number = {12},
pages = {4489-4499},
issn = {2349-6002},
url = {https://ijirt.org/article?manuscript=178915},
abstract = {The film industry relies on genre classification of films to create recommendation systems and content organization with specific target audiences. Conventional classification methods rely on manual tagging that reflects subjective bias and inefficient operation. Automated and scalable movie classification by machine learning relies on text metadata like plot descriptions, reviews, and cast information. The system preprocesses movie descriptions via TF IDF vectorization before model training such as Logistic Regression, Naïve Bayes, Radom Forest, SVM. The system identifies the most precise model for prediction uses. The accuracy of 0.5836 was the highest for Logistic Regression when it classified 54,215 pre-specified IMDb datasets drawn from Kaggle- the proposed approach results in more accurate genre classification outcomes. Future studies should concentrate on applying deep learning techniques and larger datasets to enhance prediction accuracy. The system applies NLP methods such as Term Frequency Inverse Document Frequency (TF IDF), Word Embeddings and Transformer-based models like BERT to extract meaningful textual features. The suggested system obtains its data from publicly available movie descriptions sourced from IMDb and TMDb. This project applies automated genre classification using machine learning that provides increased accuracy and enhanced efficiency. The Proposed System employs machine learning to avoid manual tagging and uses a Multi-Model Approach to compare several classifiers rather than relying upon a single model and applying TF IDF vectorization for better text feature representation. The system contains Model Persistence functionality where trained models are saved for future application use. The system is more accurate since Logistic Regression and SVM work better than standard keyword-based classification.},
keywords = {Logistic Regression, Naïve Bayes, Random Forest, SVM (Support Vector Machine), TF IDF vectorization.},
month = {May},
}
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry