Copyright © 2026 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{190755,
author = {APEKSHA H H and PRAJWAL R KOLEKAR and VINUTH A D and Ranjan V},
title = {An Intelligent Video-Based Question Answering Framework Using Deep Multimodal Learning},
journal = {International Journal of Innovative Research in Technology},
year = {2026},
volume = {12},
number = {8},
pages = {4889-4896},
issn = {2349-6002},
url = {https://ijirt.org/article?manuscript=190755},
abstract = {The rapid growth of video-based learning platforms has created a need for automated tools that can transform unstructured multimedia content into assessable educational resources. This paper presents an Artificial Intelligence–driven Video-Based Multiple Choice Question Answering (MCQ-QA) system that automatically generates quizzes from educational videos. The proposed framework accepts a YouTube video link as input and extracts the corresponding audio stream, which is transcribed into textual content using the Whisper speech-to-text model. The generated transcript is then processed by a generative AI model to create contextually relevant multiple-choice questions, along with answer options, correct responses, and explanatory feedback.
The complete workflow is implemented within a web-based application developed using Streamlit, offering an interactive interface for quiz participation and real-time feedback. Visualization of learner performance is enabled through graphical analysis, while automated report generation produces downloadable PDF summaries containing questions, answers, and assessment results. The proposed system significantly reduces the manual effort required for question paper preparation, enhances scalability of learning assessments, and supports personalized self-evaluation. Experimental validation demonstrates the system’s effectiveness in converting video lectures into structured, high-quality assessment material. This work highlights the potential of AI-driven automation in enriching video-based education and intelligent learning analytics.},
keywords = {Video-Based Question Answering; Automated Quiz Generation; Speech-to-Text; Generative Artificial Intelligence; Educational Technology; Learning Assessment; Natural Language Processing; AI in Education.},
month = {January},
}
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry