@conference {ICBO_2018_58, title = {ICBO_2018_58: Computational Classification of Phenologs Across Biological Diversity}, booktitle = {International Conference on Biomedical Ontology (ICBO 2018)}, series = {Proceedings of the International Conference on Biological Ontology (2018)}, year = {2018}, month = {08/06/2018}, publisher = {International Conference on Biological Ontology}, organization = {International Conference on Biological Ontology}, abstract = {

Phenotypic diversity analyses are the basis for research discoveries ranging from basic biology to applied research. Phenotypic analyses often benefit from the availability of large quantities of high-quality data in a standardized format. Image and spectral analyses have been shown to enable high-throughput, computational classification of a variety of phenotypes and traits. However, equivalent phenotypes expressed across individuals or groups that are not anatomically similar can pose a problem for such classification methods. In these cases, high-throughput, computational classification is still possible if the phenotypes are documented using standardized, language-based descriptions. Conversion of language-based phenotypes to computer-readable {\textquotedblleft}EQ{\textquotedblright} statements enables such large-scale analyses. EQ statements are composed of entities (e.g., leaf) and qualities (e.g., increased length) drawn from terms in ontologies. In this work, we present a method for automatically converting free-text descriptions of plant phenotypes to EQ statements using a machine learning approach. Random forest classifiers identify potential matches between phenotype descriptions and terms from a set of ontologies including GO (gene ontology), PO (plant ontology), and PATO (phenotype and trait ontology), among others. These candidate ontology terms are combined into candidate EQ statements, which are probabilistically evaluated with respect to a natural language parse of the phenotype description. Models and parameters in this method are trained using a dataset of plant phenotypes and curator-converted EQ statements from the Plant PhenomeNET project (Oellrich, Walls et al., 2015). Preliminary results comparing predicted and curated EQ statements are presented. Potential use across datasets to enable automated phenolog discovery are discussed.

}, keywords = {ontologies, phenologs, phenotypes, text mining}, url = {http://ceur-ws.org/Vol-2285/ICBO_2018_paper_58.pdf }, author = {Ian Braun and Carolyn Lawrence-Dill} } @conference {ICBO_2018_63, title = {ICBO_2018_63: GO-MAP Implements CAFA Tools: Improved Automated Gene Function Annotation for Plants}, booktitle = {International Conference on Biomedical Ontology (ICBO 2018)}, series = {Proceedings of the International Conference on Biological Ontology (2018)}, year = {2018}, month = {08/06/2018}, publisher = {International Conference on Biological Ontology}, organization = {International Conference on Biological Ontology}, abstract = {Maize is both a crop species and a model for genetics and genomics research. As such, maize GO annotations produced by the community data projects Gramene and Phytozome are widely used to derive hypotheses for both crop improvement and basic science. Our maize-GAMER project assessed existing maize GO annotations and to implement and test the performance of some of the most commonly used GO prediction tools (i.e., Reciprocal Best Hits and domain presence) alongside three of the top performing tools submitted for evaluation in the CAFA1 (Critical Assessment of protein Function Annotation) competition. All datasets were compared based on F-score using an independent gold-standard dataset (2002 GO annotations for 1,619 genes) provided by MaizeGDB. In addition to producing and comparing these individual GO annotation sets, we also combined the datasets we generated to produce a maize-GAMER aggregate annotation set. Compared to Gramene and Phytozome, the maize-GAMER aggregate set annotates more genes in the maize genome and assigns more GO terms per gene. In addition, the maize-GAMER dataset{\textquoteright}s functional assignments are comparable to Gramene and Phytozome overall (based on F-score). These findings have been published, and the maize-GAMER GO annotations are available via CyVerse and MaizeGDB. Here we review the methods and describe GO-MAP, the pipeline used to generate these datasets. GO-MAP has been containerized to facilitate gene function annotation for other plant proteomes and will be released via CyVerse in the very near future.}, keywords = {assessment, CAFA, function, gene ontology}, url = {http://icbo2018.cgrb.oregonstate.edu/}, author = {Kokulapalan Wimalanathan and Carson Andorf and Iddo Friedberg and Carolyn Lawrence-Dill} }