@conference {ICBO_2018_11, title = {ICBO_2018_11: Planteome \& BisQue: Automating Image Annotation with Ontologies using Deep-Learning Networks}, booktitle = {International Conference on Biomedical Ontology (ICBO 2018)}, series = {Proceedings of the International Conference on Biological Ontology (2018)}, year = {2018}, month = {08/06/2018}, publisher = {International Conference on Biological Ontology}, organization = {International Conference on Biological Ontology}, abstract = {

The field of computer vision has recently experienced tremendous progress due to advances in deep learning. This development holds particular promise in applications for plant research, due to a significant increase in the scale of image data harvesting and a strong field-driven interest in the automated processing of observable phenotypes and visible traits within agronomically important species. Parallel developments have occurred in semantic computing; for example, new ontologies have been initiated to capture plant traits and disease indicators. When combined with existing segmentation capabilities, it is possible to conceptualize software applications that give researchers the ability to analyze large quantities of plant phenotype image data, and to auto-annotate that data with meaningful, computable semantic terminology. We have previously reported on a software application that integrates segmentation and ontologies, but lacked the ability to manage very high-resolution images, and also lacked a database platform to allow for high-volume storage requirements. We have also previously reported our migration of the AISO user-guided segmentation feature to a BisQue (Bio-Image Semantic Query User Environment) module to take advantage of its increased power, ability to scale, secure data management environment, and collaborative software ecosystem. Neither AISO nor our initial BisQue implementation possessed a machine-learning component for interpreting (parts of) images. Plant researchers could benefit greatly from a trained classification model that predicts image annotations with a high degree of accuracy. We have therefore implemented two deep-learning prototypes: a coarse classification module for plant object identification (i.e. flower, fruit) and a fine-grained classification module that focuses on plant traits (e.g. reticulate vs. parallel venation, tip shape). Both classification models return results mapped to ontology terms as a form of annotation enrichment. This current version of the Planteome Deep Segmenter module combines image classification with optional guided segmentation and ontology annotation. We have most recently run the module on local Planteome BisQue client services, and are currently working with CyVerse to install a hosted version on their BisQue client service.

}, keywords = {annotation, convolutional neural networks, deep learning, image analysis, machine learning, Ontology, segmentation}, url = {http://ceur-ws.org/Vol-2285/ICBO_2018_paper_11.pdf}, author = {Dimitrios Trigkakis and Justin Preece and Austin Meier and Justin Elser and Kris Kvilekval and Dmitry Fedorov and B.S. Manjunath and Pankaj Jaiswal and Sinisa Todorovic} } @conference {ICBO_2018_18, title = {ICBO_2018_18: Taking a Dive: Experiments in Deep Learning for Automatic Ontology-based Annotation of Scientific Literature}, booktitle = {International Conference on Biomedical Ontology (ICBO 2018)}, series = {Proceedings of the International Conference on Biological Ontology (2018)}, year = {2018}, month = {08/06/2018}, publisher = {International Conference on Biological Ontology}, organization = {International Conference on Biological Ontology}, abstract = {

Text mining approaches for automated ontology-based curation of biological and biomedical literature have largely focused on syntactic and lexical analysis along with machine learning. Recent advances in deep learning have shown increased accuracy for textual data annotation. However, the application of deep learning for ontology-based curation is a relatively new area and prior work has focused on a limited set of models. Here, we introduce a new deep learning model/architecture based on combining multiple Gated Recurrent Units (GRU) with a character+word based input. We use data from five ontologies in the CRAFT corpus as a Gold Standard to evaluate our model{\textquoteright}s performance. We also compare our model to seven models from prior work. We use four metrics - Precision, Recall, F1 score, and a semantic similarity metric (Jaccard similarity) to compare our model{\textquoteright}s output to the Gold Standard. Our model resulted in 84\% Precision, 84\% Recall, 83\% F1, and 84\% Jaccard similarity. Results show that our GRU-based model outperforms prior models across all five ontologies. We also observed that character+word inputs result in a higher performance across models as compared to word only inputs. These findings indicate that deep learning algorithms are a promising avenue to be explored for automated ontology-based curation of data. This study also serves as a formal comparison and guideline for building and selecting deep learning models and architectures for ontology-based curation.

}, keywords = {automated curation, deep learning, named entity recognition, natural language processing, Ontology}, url = {http://ceur-ws.org/Vol-2285/ICBO_2018_paper_18.pdf }, author = {Prashanti Manda and Lucas Beasley and Somya Mohanty} } @conference {ICBO_2018_35, title = {ICBO_2018_35: Can a Convolutional Neural Network Support Auditing of NCI Thesaurus Neoplasm Concepts?}, booktitle = {International Conference on Biomedical Ontology (ICBO 2018)}, series = {Proceedings of the International Conference on Biological Ontology (2018)}, year = {2018}, month = {08/06/2018}, publisher = {International Conference on Biological Ontology}, organization = {International Conference on Biological Ontology}, abstract = {

We present a Machine Learning methodology using a Convolutional Neural Network to perform a specific case of an ontology Quality Assurance, namely discovery of missing IS-A relationships for Neoplasm concepts in the National Cancer Institute Thesaurus (NCIt). The training step checking all {\textquotedblleft}uncles{\textquotedblright} of a concept is computationally intensive. To shorten the time and to improve the accuracy, we define a restricted methodology to check only uncles that are similar to each current concept. The restricted technique yields higher classification recall (compared to the unrestricted one) when testing against known errors found by domain experts who manually reviewed Neoplasm concepts in a prior study. The results are encouraging and provide impetus for further improvements to our technique.

}, keywords = {Abstraction Network, CNN, deep learning, machine learning, National Cancer Institute Thesaurus, Neoplasm Hierarchy, quality assurance}, url = {http://ceur-ws.org/Vol-2285/ICBO_2018_paper_35.pdf}, author = {Hao Liu and Ling Zheng and Yehoshua Perl and James Geller and Gai Elhanan} } @conference {ICBO_2018_70, title = {ICBO_2018_70: Improving Convergence Rates of Deep Learning for Very Small Image Training Sets}, booktitle = {International Conference on Biomedical Ontology (ICBO 2018)}, series = {Proceedings of the International Conference on Biological Ontology (2018)}, year = {2018}, month = {08/06/2018}, publisher = {International Conference on Biological Ontology}, organization = {International Conference on Biological Ontology}, abstract = {Typical visualization tasks in the domain of big image datasets include: image retrieval, clustering, and segmentation. Recently, there has been a tremendous progress in solving these tasks using deep convolutional neural networks (CNNs). CNNs typically require large training sets of manually annotated images for deep learning, which, however, is often impossible to provide in various applications, including image-based biological and medical research. This talk will address one way to address this critical issue {\textendash} a new CNN learning approach, based on second-order methods, aimed at improving: a) Convergence rates of existing gradient-based methods, and b) Robustness to the choice of learning hyper-parameters (e.g., learn- ing rate). Our approach simultaneously computes both gradients and second derivatives of the CNN{\textquoteright}s learning objective, and performs second-order back-propagation. In comparison with standard gradient-based deep learning, our evaluation demonstrates that we achieve faster convergences rates, and converge to better optima leading to better performance under a budgeted time for learning.}, keywords = {computer vision, deep learning, species classification}, url = {http://icbo2018.cgrb.oregonstate.edu/}, author = {Sinisa Todorovic} }