@article{M8F839768, title = "Inverse Document Frequency-Based Word Embedding of Unseen Words for Question Answering Systems", journal = "Journal of KIISE, JOK", year = "2016", issn = "2383-630X", doi = "", author = "Wooin Lee,Gwangho Song,Kyuseok Shim", keywords = "question answering system,natural language processing,machine learning,answer sentence selection,data mining,information retrieval", abstract = "Question answering system (QA system) is a system that finds an actual answer to the question posed by a user, whereas a typical search engine would only find the links to the relevant documents. Recent works related to the open domain QA systems are receiving much attention in the fields of natural language processing, artificial intelligence, and data mining. However, the prior works on QA systems simply replace all words that are not in the training data with a single token, even though such unseen words are likely to play crucial roles in differentiating the candidate answers from the actual answers. In this paper, we propose a method to compute vectors of such unseen words by taking into account the context in which the words have occurred. Next, we also propose a model which utilizes inverse document frequencies (IDF) to efficiently process unseen words by expanding the system’s vocabulary. Finally, we validate that the proposed method and model improve the performance of a QA system through experiments." }