@article{ME8C25C8F, title = "Sylender: A Syllable-Enhanced Transformer Encoder Model Incorporating Korean Characteristics", journal = "Journal of KIISE, JOK", year = "2025", issn = "2383-630X", doi = "10.5626/JOK.2025.52.10.860", author = "Yumin Heo, Jiwon Heo, Minjun Choi, Youngjoong Ko", keywords = "natural language processing. korean language model, syllable, tokenizer", abstract = "While syllable-level tokenization better preserves grammatical and linguistic features, it is often less semantically informative, resulting in lower performance. This paper introduces Sylender, a model that enhances existing pretrained subword-based language models by incorporating syllable-level information. Sylender adds a syllable-level transformer module to each layer of the subword model, utilizing both subword and syllable embeddings. This parallel structure retains the benefits of subword representations while effectively integrating syllable-level information, thereby improving the model's ability to capture Korean linguistic characteristics. Experiments across multiple Korean NLP tasks demonstrate that Sylender outperforms strong baselines and even larger models, validating the effectiveness of combining subword and syllable-level representations tailored to the nuances of the Korean language." }