@article{M556D96F3, title = "Grid-based Index Generation and k-nearest-neighbor Join Query-processing Algorithm using MapReduce", journal = "Journal of KIISE, JOK", year = "2015", issn = "2383-630X", doi = "", author = "Miyoung Jang,Jae Woo Chang", keywords = "distributed-data processing algorithm,MapReduce,k-NN join query-processing algorithm,grid index", abstract = "MapReduce provides high levels of system scalability and fault tolerance for large-size data processing. A MapReduce-based k-nearest-neighbor(k-NN) join algorithm seeks to produce the k nearest-neighbors of each point of a dataset from another dataset. The algorithm has been considered important in bigdata analysis. However, the existing k-NN join query-processing algorithm suffers from a high index-construction cost that makes it unsuitable for the processing of bigdata. To solve the corresponding problems, we propose a new grid-based, k-NN join query-processing algorithm. Our algorithm retrieves only the neighboring data from a query cell and sends them to each MapReduce task, making it possible to improve the overhead data transmission and computation. Our performance analysis shows that our algorithm outperforms the existing scheme by up to seven-fold in terms of the query-processing time, while also achieving high extent of query-result accuracy." }