@conference {ICBO_2018_43, title = {ICBO_2018_43: Domain Informational Vocabulary Extraction Experiences with Publication Pipeline Integration and Ontology Curation}, booktitle = {International Conference on Biomedical Ontology (ICBO 2018)}, series = {Proceedings of the International Conference on Biological Ontology (2018)}, year = {2018}, month = {08/06/2018}, publisher = {International Conference on Biological Ontology}, organization = {International Conference on Biological Ontology}, abstract = {

We will present updates on an ongoing project DIVE (Domain Informational Vocabulary Extraction), a system designed for extracting domain information from scientific publications. DIVE implements an ensemble of text mining methods for biological entity extraction from article text. DIVE also attempts use the co-occurrence patterns of these entities to establish probable relationships between them. DIVE also features an improved web interface for expert user curation of extracted information, thereby providing a means for a constantly growing and expert curated body of domain information for an article corpus. We also discuss our experiences from successful integration of DIVE with the publishing pipeline for two prominent Plant Biology Journals (The Plant Cell and Plant Physiology) from ASPB (American Society of Plant Biologists). The extracted results are embedded at the end of the final proof of the published article to enhance its accessibility and discoverability. Furthermore, DIVE tracks expert user curation actions on its web interface for future training and improvement of the entity detection algorithm.

}, keywords = {big data, Cyberinfrastructure, DIVE, machine learning, Ontology}, url = {http://ceur-ws.org/Vol-2285/ICBO_2018_paper_43.pdf }, author = {Amit Gupta and Weijia Xu and Pankaj Jaiswal and Crispin Taylor and Jennifer Regala} }