Copyright © 2026 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{191959,
author = {Paras Kalariya and Dr. Yagnesh Shukla},
title = {Approaches to Aligning Large Language Models: A Comparative Review of Preference Optimization, Evaluation, and Value Measurement},
journal = {International Journal of Innovative Research in Technology},
year = {2026},
volume = {12},
number = {8},
pages = {8575-8583},
issn = {2349-6002},
url = {https://ijirt.org/article?manuscript=191959},
abstract = {Research on aligning large language models (LLMs) with human intentions, preferences, and values has expanded rapidly, spanning algorithmic training methods, evaluative frameworks, and normative analyses. This review synthesizes and comparatively analyzes a corpus of alignment literature that includes preference-based optimization, instruction following, dialogue-level assessment, automated evaluation, and value-oriented measurement frameworks. While preference-centered approaches assume that human judgments or rankings provide sufficient proxies for alignment objectives, value-focused studies argue that preferences may diverge from underlying normative commitments and require explicit conceptualization (Floridi & Sanders, 2020; Gabriel et al., 2023). Similarly, while reinforcement learning from human feedback and direct preference optimization emphasize empirical performance and scalability, evaluative research highlights persistent challenges related to reliability, generalization, and bias in both human and automated assessment (Belz et al., 2011; Bender et al., 2021). Through a thematic and comparative discussion, this review identifies recurring conceptual disagreements, methodological tensions, and incompatible assumptions across the literature, as well as unresolved questions concerning the definition, measurement, and scope of alignment. Rather than proposing new frameworks, the paper clarifies how existing approaches diverge in their assumptions about alignment targets, evaluative validity, and normative grounding, thereby delineating critical gaps that continue to shape the trajectory of alignment research.},
keywords = {Large language models, AI alignment; human preferences, value alignment, reinforcement learning from human feedback, preference optimization, instruction following, alignment evaluation, normative assumptions, ethical AI},
month = {January},
}
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry