Copyright © 2026 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{177916,
author = {Mikhail Janli Purba and Samrawit Guangul Birhanu and Nyam Kim Zadok and Benyas Getachew Asnake and Dr. Karthick Raghuranath.K.M},
title = {From Text-to-Motion: A GAN Based Journey into Text-to-Video Generation},
journal = {International Journal of Innovative Research in Technology},
year = {2025},
volume = {11},
number = {12},
pages = {2557-2562},
issn = {2349-6002},
url = {https://ijirt.org/article?manuscript=177916},
abstract = {The generation of video content from textual descriptions represents a significant frontier in generative artificial intelligence. This paper presents an analysis of a Python implementation that leverages Generative Adversarial Networks (GANs) combined with pre-trained text encoders for text-to-video generation. We dissect the architectural design, including the use of 3D convolutional layers, conditional text embeddings, and simplified adaptations of the MoCoGAN framework. Key challenges such as semantic alignment, temporal consistency, and motion realism are discussed. Potential avenues for enhancement, including attention mechanisms, motion-content disentanglement, and diffusion models, are proposed. The insights provided highlight the opportunities and limitations of adversarial learning in video synthesis.},
keywords = {3D Convolutions, Generative Adversarial Networks, MoCoGAN, Text-to-Video Generation, Spatio-Temporal Modeling, Semantic Alignment},
month = {May},
}
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry