@article{MAA9016ED, title = "Automatic Word Spacing Using Raw Corpus and a Morphological Analyzer", journal = "Journal of KIISE, JOK", year = "2015", issn = "2383-630X", doi = "", author = "Kwangseob Shim", keywords = "automatic word spacing,morphological analysis,eojeol dictionary,sejong corpus", abstract = "This paper proposes a method for the automatic word spacing of unsegmented Korean sentences. In our method, eojeol monograms are used for word spacing as opposed to the syllable n-grams that have been used in previous studies. The use of a Korean morphological analyzer is limited to the correction of typical word spacing errors. Our method gives a 98.06% syllable accuracy and a 94.15% eojeol recall, when 10-fold cross-validated with the Sejong corpus, after filtering out non-hangul eojeols. The processing rate is 250K eojeols or 1.8 MB per second on a typical personal computer. Syllable accuracy and eojeol recall are related to the size of the eojeol dictionary, better performance is expected with a bigger corpus." }