Copyright © 2025 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{185669, author = {Husain Mistry}, title = {Proximal Policy Optimization under Partial Observability}, journal = {International Journal of Innovative Research in Technology}, year = {2025}, volume = {12}, number = {5}, pages = {2258-2259}, issn = {2349-6002}, url = {https://ijirt.org/article?manuscript=185669}, abstract = {We study a deterministic 2D grid navigation task with “hot–cold” sign feedback. The agent observes whether its last move decreased or increased the Manhattan distance to a hidden goal. The observation is non-Markov. A worst-case optimal strategy reaches the goal in at most D_0+6 steps, where D_0 is the start–goal Manhattan distance. We evaluate Proximal Policy Optimization with an LSTM backbone (Recurrent PPO). The learned policy approaches the theoretical bound on many episodes but shows gaps due to axis misidentification and turn dithering. The task provides a minimal, interpretable benchmark for reinforcement learning under partial observability.}, keywords = {}, month = {October}, }
Cite This Article
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry