@conference {ICBO_2018_4, title = {ICBO_2018_4: Comparison of Natural Language Processing Tools for Automatic Gene Ontology Annotation of Scientific Literature}, booktitle = {International Conference on Biomedical Ontology (ICBO 2018)}, series = {Proceedings of the International Conference on Biological Ontology (2018)}, year = {2018}, month = {08/06/2018}, publisher = {International Conference on Biological Ontology}, organization = {International Conference on Biological Ontology}, abstract = {

Manual curation of scientific literature for ontology-based knowledge representation has proven infeasible and unscalable to the large and growing volume of scientific literature. Automated annotation solutions that leverage text mining and Natural Language Processing (NLP) have been developed to ameliorate the problem of literature curation. These NLP approaches use parsing, syntactical, and lexical analysis of text to recognize and annotate pieces of text with ontology concepts. Here, we conduct a comparison of four state of the art NLP tools at the task of recognizing Gene Ontology concepts from biomedical literature using the Colorado Richly Annotated Full-Text (CRAFT) corpus as a gold standard reference. We demonstrate the use of semantic similarity metrics to compare NLP tool annotations to the gold standard.

}, keywords = {curation, gene ontology, natural language processing, semantic similarity, text mining}, url = {http://ceur-ws.org/Vol-2285/ICBO_2018_paper_4.pdf}, author = {Lucas Beasley and Prashanti Manda} } @conference {ICBO_2018_58, title = {ICBO_2018_58: Computational Classification of Phenologs Across Biological Diversity}, booktitle = {International Conference on Biomedical Ontology (ICBO 2018)}, series = {Proceedings of the International Conference on Biological Ontology (2018)}, year = {2018}, month = {08/06/2018}, publisher = {International Conference on Biological Ontology}, organization = {International Conference on Biological Ontology}, abstract = {

Phenotypic diversity analyses are the basis for research discoveries ranging from basic biology to applied research. Phenotypic analyses often benefit from the availability of large quantities of high-quality data in a standardized format. Image and spectral analyses have been shown to enable high-throughput, computational classification of a variety of phenotypes and traits. However, equivalent phenotypes expressed across individuals or groups that are not anatomically similar can pose a problem for such classification methods. In these cases, high-throughput, computational classification is still possible if the phenotypes are documented using standardized, language-based descriptions. Conversion of language-based phenotypes to computer-readable {\textquotedblleft}EQ{\textquotedblright} statements enables such large-scale analyses. EQ statements are composed of entities (e.g., leaf) and qualities (e.g., increased length) drawn from terms in ontologies. In this work, we present a method for automatically converting free-text descriptions of plant phenotypes to EQ statements using a machine learning approach. Random forest classifiers identify potential matches between phenotype descriptions and terms from a set of ontologies including GO (gene ontology), PO (plant ontology), and PATO (phenotype and trait ontology), among others. These candidate ontology terms are combined into candidate EQ statements, which are probabilistically evaluated with respect to a natural language parse of the phenotype description. Models and parameters in this method are trained using a dataset of plant phenotypes and curator-converted EQ statements from the Plant PhenomeNET project (Oellrich, Walls et al., 2015). Preliminary results comparing predicted and curated EQ statements are presented. Potential use across datasets to enable automated phenolog discovery are discussed.

}, keywords = {ontologies, phenologs, phenotypes, text mining}, url = {http://ceur-ws.org/Vol-2285/ICBO_2018_paper_58.pdf }, author = {Ian Braun and Carolyn Lawrence-Dill} }