@article{MC6FB36CD, title = "Improved Prediction for Configuration Bug Report Using Text Mining and Dimensionality Reduction", journal = "Journal of KIISE, JOK", year = "2021", issn = "2383-630X", doi = "10.5626/JOK.2021.48.1.35", author = "Jeongwhan Choi,Jiwon Choi,Duksan Ryu,Suntae Kim", keywords = "configuration bug report,linear discriminant analysis,dimensionality reduction,class imbalance,sampling", abstract = "Configuration bugs are one of the main causes of software failure. Software organizations collect and manage bug reports using an issue tracking system. The bug assignor can spend excessive amounts of time identifying whether a bug is a configuration bug or not. Configuration bug prediction can help the bug assignor reduce classification efforts and aid decision making. In this paper, we propose an improved classification model using text mining and dimensionality reduction. This paper extracts 4,457 bug reports from six open-source software projects, trains a model to classify configuration bug reports, and evaluates prediction performance. The best performance method is obtained using the k-Nearest Neighbors model with the SMOTEENN sampling technique after extracting the feature with Bag of Words and then reducing the dimension of the feature using Linear Discriminant Analysis. The results show that ROC-AUC is 0.9812 and MCC is 0.942. This indicates better performance than Xia et al."s method and solves the class imbalance problem of our previous study. By predicting these enhanced configuration bug reports, our proposed approach can provide the bug assignors with information they need to make informed decisions." }