Copyright © 2026 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{186345,
author = {AARYA PATIL and Prof. Nitin Alzende and Anushka Deshmukh and Diptimayee Panda and Siddhika Bishnoi},
title = {SOUL-SYNC: Real-Time Facial Emotion Detection and Personalized Playlist Generation},
journal = {International Journal of Innovative Research in Technology},
year = {2025},
volume = {12},
number = {6},
pages = {1017-1020},
issn = {2349-6002},
url = {https://ijirt.org/article?manuscript=186345},
abstract = {Abstract—This paper presents SOUL-SYNC, a real- time emotion-aware music application that detects a user's emotional state through facial expression analysis. Unlike existing systems that rely on multimodal inputs such as text or speech, SOUL-SYNC focuses exclusively on vision-based emotion recognition to ensure simplicity, reliability, and faster real-time processing. Using advanced facial emotion recognition models, the system identifies emotions such as happiness, sadness, anger, and neutrality, and generates personalized playlists using the Spotify or YouTube APIs. Users can choose between two interaction modes: mood-matching, where the playlist aligns with their current emotional state, or mood-shifting, where the system recommends tracks to alter the mood progressively. The proposed system integrates convolutional neural networks (CNNs) for emotion detection and a hybrid music recommendation engine that combines content-based filtering, collaborative filtering, and rule-based emotional mapping. Experimental design and implementation strategies are discussed to demonstrate how SOUL-SYNC creates an emotionally adaptive, real-time music experience using facial cues alone.
This study addresses three critical gaps in emotion- aware music systems:
Latency & Robustness: Establishing a reliable, low- latency, real-time emotion detection system focused purely on vision to bypass the complexity and latency of multimodal inputs.
Affective State Control: Developing a mechanism to predictably and progressively shift the user's emotional state using music features (mood-shifting mode), moving beyond static mood-matching.
Monolithic Sensory Input: Proving that a deeply personalized and adaptive music experience can be achieved effectively by relying exclusively on facial cues, thus simplifying deployment and safeguarding user privacy. To explore these questions, we integrate specialized CNN-based Facial Emotion Recognition (FER) models with a hybrid recommendation engine, utilizing the comprehensive audio feature data from streaming service APIs to test the efficacy of both matching and transitionary playlist generation.},
keywords = {Index Terms— Affective computing, computer vision, Facial emotion recognition, music recommendation, real-time personalization, Spotify API},
month = {November},
}
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry