Copyright © 2026 Authors retain the copyright of this article. This article is an open access article distributed under the Creative Commons Attribution License which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
@article{145772,
author = {UPPALURU KIRAN KUMAR and pothineni swathi},
title = {Where you are is who you are: User identification by matching statistics},
journal = {International Journal of Innovative Research in Technology},
year = {},
volume = {4},
number = {11},
pages = {857-874},
issn = {2349-6002},
url = {https://ijirt.org/article?manuscript=145772},
abstract = {Most users of online services have unique behavioralor usage patterns. These behavioral patterns can be exploited to identify and track users by using only the observed patterns in the behavior. We study the task of identifying users from statistics of their behavioral patterns. Specifically, we focus on the setting in which we are given histograms of users’ data collected during two different experiments. We assume that, in the first dataset, the users’ identities are anonymized or hidden and that, in the second dataset, their identities are known. We study the task of identifying the users by matching the histograms of their data in the first dataset with the histograms from the second dataset. In recent works [1], [2] the optimal algorithm for this user identification task is introduced. In this paper, we evaluate the effectiveness of this method on three different types of datasets with up to 50,000 users, and in multiple scenarios. Using datasets such as call data records, web browsing histories, and GPS trajectories, we demonstrate that a large fraction of users can be easily identified given only histograms of their data; hence these histograms can act as users’ fingerprints. We also verify that simultaneous identification of users achieves better performance compared to one-by-one user identification. Furthermore, we show that using the optimal method for identification indeed gives higher identification accuracy than heuristics-based approaches in practical scenarios. The accuracy obtained under this optimal method can thus be used to quantify the maximum level of user identification that is possible in such settings. We show that the key factors affecting the accuracy of the optimal identification algorithm are the duration of the data collection, the number of users in the anonymized dataset, and the resolution of the dataset. We also analyze the effectiveness of k -anonymization in resisting user identification attacks on these datasets.},
keywords = {},
month = {},
}
Submit your research paper and those of your network (friends, colleagues, or peers) through your IPN account, and receive 800 INR for each paper that gets published.
Join NowNational Conference on Sustainable Engineering and Management - 2024 Last Date: 15th March 2024
Submit inquiry