@article{M745A62DA,
title = "Conditional Knowledge Distillation for Model Specialization",
journal = "Journal of KIISE, JOK",
year = "2021",
issn = "2383-630X",
doi = "10.5626/JOK.2021.48.4.369",
author = "Hakbin Kim,Dong-Wan Choi",
keywords = "knowledge distillation,model compression,model specialization,data-free machine learning",
abstract = "Many recent works on model compression in neural networks are based on knowledge distillation (KD). However, since the basic goal of KD is to transfer the entire knowledge set of a teacher model to a student model, the standard KD may not represent the best use of the model’s capacity when a user wishes to classify only a small subset of classes. Also, it is necessary to possess the original teacher model dataset for KD, but for various practical reasons, such as privacy issues, the entire dataset may not be available. Thus, this paper proposes conditional knowledge distillation (CKD), which only distills specialized knowledge corresponding to a given subset of classes, as well as data-free CKD (DF-CKD), which does not require the original data. As a major extension, we devise Joint-CKD, which jointly performs DF-CKD and CKD with only a small additional dataset collected by a client. Our experimental results show that the CKD and DF-CKD methods are superior to standard KD, and also confirm that joint use of CKD and DF-CKD is effective at further improving the overall accuracy of a specialized model."
}