@article{M85C859C2, title = "A Survey on Methods for Image Description", journal = "Journal of KIISE, JOK", year = "2023", issn = "2383-630X", doi = "10.5626/JOK.2023.50.3.210", author = "Subin Ok,Daeho Lee", keywords = "image description,object detection,computer vision,natural language processing,deep learning", abstract = "Image description, which has been receiving much attention with the development of deep learning, uses computer vision methods that identify the contents of images and natural language processing methods that represent descriptive sentences. Image description techniques are utilized in many applications including services for visually impaired people. In this paper, we summarize image description methods within three categories; template-based methods, visual/semantic similarity search-based methods, and deep learning-based methods, and compare their performances. Through performance comparison, we try to provide useful information by offering basic architectures, advantages, limitations, and performances of the models. We especially survey the deep learning-based methods in detail because the performances of these methods are significantly improved compared to other methods. Through this process, we aim to organize the overall contents of image description techniques. For the performance of each study, compare the METEOR and BLEU scores for the commonly used Flickr30K and MS COCO datasets, and if the results are not provided, check the test image and the sentences generated for it." }