Copyright © 2025 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{159872, author = {Ramamurthy Valavandan and Balakrishnan Gothandapani and Savitha Ramamurthy and Jagathambal Subramanian and Kanagalakshmi Subramaian and Valavandan Valavandan and Bharani and Dharani}, title = {Designing a metadata framework for bigdata models in Cloudera Data Lakes across AWS, Azure, and GCP}, journal = {International Journal of Innovative Research in Technology}, year = {}, volume = {9}, number = {12}, pages = {536-551}, issn = {2349-6002}, url = {https://ijirt.org/article?manuscript=159872}, abstract = {his research presents an innovative metadata framework design for big data models in Cloudera Data Lakes across AWS, Azure, and GCP cloud platforms. The study focuses on migrating metadata using Data Vault data models, utilizing PySpark and SparkSQL for analysis. As big data environments grow in complexity, accurate metadata migration becomes crucial. This study explores best practices and automation tools for efficient metadata migration in large-scale environments. The research evaluates unique features of AWS, Azure, and GCP, including data storage, processing, security, and cost-effectiveness. It also assesses scalability and usability for managing big data in Cloudera Data Lakes with Data Vault data models. Findings show that AWS offers extensive services and tools, while Azure and GCP provide cost-effective options. AWS benefits from a large partner and developer network, aiding in managing big data in Cloudera Data Lakes with Data Vault models. This study provides innovative insights into metadata framework design and the capabilities of AWS, Azure, and GCP for big data management in Cloudera Data Lakes, aiding organizations in selecting the appropriate cloud platform.}, keywords = {Cloud Platforms AWS Azure GCP , Big Data Environments, Cloudera, Metadata Migration, PySpark, SparkSQL}, month = {}, }
Cite This Article
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry