Copyright © 2026 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{176736,
author = {Smita Wagh and Hema Sachin Bahl and Sakshi Annaso Sutar and Bhagyashri Prakash Yerawar},
title = {Narrative Alchemy: Integrating Human Imagination and Generative AI for Dynamic Story Crafting},
journal = {International Journal of Innovative Research in Technology},
year = {2025},
volume = {11},
number = {11},
pages = {7654-7658},
issn = {2349-6002},
url = {https://ijirt.org/article?manuscript=176736},
abstract = {Rapid advancements in AI have enabled the seamless generation of both text and images, paving the way for multimodal storytelling. However, challenges like maintaining coherence, ensuring text-image alignment, and preserving character consistency remain. Story Teller, a Multimodal Large Language Model (MLLM), addresses these issues by predicting both text and visual tokens. It uses a visual de-tokenizer to generate consistent images and a multimodal attention sink mechanism to extend story length beyond training limits. To support this, StoryStream, a high-resolution dataset, is introduced for training and evaluation. This work advances AI-driven storytelling by improving coherence and expanding story length.},
keywords = {Detokenizer, Diffusion Model, MLLM, Story-Teller, SD-XL, Tokenizer, Vit},
month = {April},
}
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry