Copyright © 2026 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{191440,
author = {LAVISH TRIPATHI and Dr. Palanivel.S and Dr. Harish Kumar},
title = {Beyond Next-Token Prediction an Analysis of Advances in Transformer-Based Generative Models (GPT and Generative BERT Variants) for Efficient, Controllable, and Multimodal Generation},
journal = {International Journal of Innovative Research in Technology},
year = {2026},
volume = {12},
number = {8},
pages = {6207-6218},
issn = {2349-6002},
url = {https://ijirt.org/article?manuscript=191440},
abstract = {The new paradigm of natural language generation is now being researched by transformer-based generative models, which are being trained using next-token prediction objectives and extensive heavy pretraining. Even though this approach has led to the tremendous fluency and overall improvement in generalisation, the current research is showing an interest in moving past next-token prediction to investigate models that are more efficient, predictable, and multimodal generation. The present paper contains a systematic review of secondary data focusing on the advancement in the area of transformer architecture and, specifically, GPT-like autoregressive models and generative versions of BERT such as encoder-decoder transformers. The research is founded on the examination of peer-reviewed articles of the previous five years (2015-2024) synthesising the empirical evidence that relates to the architectural design, computational efficiency, controllability mechanisms, and multimodal integration. As the discussion reveals, the use of creative decisions and training objectives is the most influential factor on generative behaviour where autoregressive models are more effective in open-ended generation and prompt-based flexibility and generative versions of BERT are more effective on conditional faithfulness and structural control. The innovations to enhance efficiency like sparse attention and parameter efficient adaptation are shown to alleviate the computational constraints and cause context specific trade-offs in representational capacity. Results, also indicate that the controllability and multimodal competence are the perspectives of premeditated design and optimization strategies and not scale per se. The mixture of those dimensions into a single analytical framework, which is explored in the paper, will allow understanding the modern generative modelling better, and evaluation paradigms need to reflect on more viable, ethical and practical issues.},
keywords = {Transformer models; generative language models; controllable text generation; efficient attention mechanisms; multimodal generation},
month = {January},
}
Cite This Article
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry