@conference {ICBO_2018_35, title = {ICBO_2018_35: Can a Convolutional Neural Network Support Auditing of NCI Thesaurus Neoplasm Concepts?}, booktitle = {International Conference on Biomedical Ontology (ICBO 2018)}, series = {Proceedings of the International Conference on Biological Ontology (2018)}, year = {2018}, month = {08/06/2018}, publisher = {International Conference on Biological Ontology}, organization = {International Conference on Biological Ontology}, abstract = {

We present a Machine Learning methodology using a Convolutional Neural Network to perform a specific case of an ontology Quality Assurance, namely discovery of missing IS-A relationships for Neoplasm concepts in the National Cancer Institute Thesaurus (NCIt). The training step checking all {\textquotedblleft}uncles{\textquotedblright} of a concept is computationally intensive. To shorten the time and to improve the accuracy, we define a restricted methodology to check only uncles that are similar to each current concept. The restricted technique yields higher classification recall (compared to the unrestricted one) when testing against known errors found by domain experts who manually reviewed Neoplasm concepts in a prior study. The results are encouraging and provide impetus for further improvements to our technique.

}, keywords = {Abstraction Network, CNN, deep learning, machine learning, National Cancer Institute Thesaurus, Neoplasm Hierarchy, quality assurance}, url = {http://ceur-ws.org/Vol-2285/ICBO_2018_paper_35.pdf}, author = {Hao Liu and Ling Zheng and Yehoshua Perl and James Geller and Gai Elhanan} } @conference {ICBO_2018_7, title = {ICBO_2018_7: A Quality Assurance Methodology for ChEBI Ontology Focusing on Uncommonly Modeled Concepts}, booktitle = {International Conference on Biomedical Ontology (ICBO 2018)}, series = {Proceedings of the International Conference on Biological Ontology (2018)}, year = {2018}, month = {08/06/2018}, publisher = {International Conference on Biological Ontology}, organization = {International Conference on Biological Ontology}, abstract = {

The Chemical Entities of Biological Interest (ChEBI) ontology is an important knowledge source of chemical entities in a biological context. ChEBI is large and complex, making it almost impossible to be error-free, given the scarce resources for quality assurance (QA). We present a methodology to locate concepts in ChEBI with a high probability of being erroneous. An Abstraction Network, which provides a compact summarization of an ontology, supports the methodology. By investigating a sample of ChEBI concepts, we show that uncommonly modeled concepts residing in small units of the Abstraction Network of ChEBI are statistically significantly more likely to have errors than other concepts. The finding may guide ChEBI ontology curators to focus their limited QA resources on such concepts to achieve a better QA yield. Furthermore, this study, combined with previous work, contributes to progress in showing that this methodology can be applied to a whole family of similar ontologies.

}, keywords = {ChEBI, chemical concept, chemical ontology, modeling error, quality assurance}, url = {http://ceur-ws.org/Vol-2285/ICBO_2018_paper_7.pdf }, author = {Hao Liu and Ling Chen and Ling Zheng and Yehoshua Perl and James Geller} }