@article{MBFD7F575,
title = "Prompt Engineering for Korean OCR Error Correction and Text Damage Restoration",
journal = "Journal of KIISE, JOK",
year = "2025",
issn = "2383-630X",
doi = "10.5626/JOK.2025.52.11.940",
author = "Suhyun Park,  Hyojin Lee,  Sung-Pil Choi",
keywords = "OCR post-processing, Korean OCR error correction, prompt engineering, LLM",
abstract = "Optical Character Recognition (OCR) is a technology that converts text within images into machine-readable formats, making it essential in industries where document management is critical. However, the Korean language has a complex structure, featuring combined consonants and vowels, which can lead to low recognition accuracy. Improving this situation requires a vast dataset that encompasses all 11,172 complete Korean characters. Additionally, errors such as spacing and spelling mistakes, along with text distortion and damage, complicate post-processing with conventional spell-check models. To tackle these challenges, this paper proposes the use of a Large Language Model combined with Few-shot Learning and Prompt Engineering. Experimental results indicate that error correction accuracy improved by up to 18.18% compared to basic prompts, while text restoration and spacing correction achieved performance improvements of 21.6% and 17.26%, respectively. These findings demonstrate that even with a limited number of examples, Korean OCR errors can be effectively corrected, and damaged text can be restored."
}