Source code for ontobio.ontol_factory
"""
Factory class for generating ontology objects based on a variety of handle types.
See :ref:`inputs` on readthedocs for more details
"""
import ontobio.obograph_util as obograph_util
from ontobio.ontol import Ontology
from ontobio.sparql.sparql_ontology import EagerRemoteSparqlOntology
import os
import subprocess
import hashlib
import logging
logger = logging.getLogger(__name__)
# TODO
default_ontology_handle = 'cache/ontologies/pato.json'
#if not os.path.isfile(ontology_handle):
# ontology_handle = None
global default_ontology
default_ontology = None
[docs]class OntologyFactory():
"""Implements a factory for generating :class:`Ontology` objects.
You should use a factory object rather than initializing
`Ontology` directly. See :ref:`inputs` for more details.
"""
# class variable - reuse the same object throughout
test = 0
def __init__(self, handle=None):
"""
initializes based on an ontology name
Arguments
---------
handle : str
see `create`
"""
self.handle = handle
[docs] def create(self, handle=None, handle_type=None, **args):
"""
Creates an ontology based on a handle
Handle is one of the following
- `FILENAME.json` : creates an ontology from an obographs json file
- `obo:ONTID` : E.g. obo:pato - creates an ontology from obolibrary PURL (requires owltools)
- `ONTID` : E.g. 'pato' - creates an ontology from a remote SPARQL query
Arguments
---------
handle : str
specifies how to retrieve the ontology info
"""
if handle is None:
self.test = self.test+1
logger.info("T: "+str(self.test))
global default_ontology
if default_ontology is None:
logger.info("Creating new instance of default ontology")
default_ontology = create_ontology(default_ontology_handle, **args)
logger.info("Using default_ontology")
return default_ontology
return create_ontology(handle, **args)
def create_ontology(handle=None, **args):
ont = None
logger.info("Determining strategy to load '{}' into memory...".format(handle))
if handle.find("+") > -1:
handles = handle.split("+")
onts = [create_ontology(ont) for ont in handles]
ont = onts.pop()
ont.merge(onts)
return ont
# TODO: consider replacing with plugin architecture
if handle.find(".") > 0 and os.path.isfile(handle):
logger.info("Fetching obograph-json file from filesystem")
ont = translate_file_to_ontology(handle, **args)
elif handle.startswith("obo:"):
logger.info("Fetching from OBO PURL")
if handle.find(".") == -1:
if handle == 'chebi' or handle == 'ncbitaxon' or handle == 'pr':
handle += '.obo'
logger.info("using obo for large ontology: {}".format(handle))
else:
handle += '.owl'
fn = '/tmp/'+handle
if not os.path.isfile(fn):
url = handle.replace("obo:","http://purl.obolibrary.org/obo/")
cmd = ['owltools',url,'-o','-f','json',fn]
cp = subprocess.run(cmd, check=True)
logger.info(cp)
else:
logger.info("using cached file: "+fn)
g = obograph_util.convert_json_file(fn)
ont = Ontology(handle=handle, payload=g)
elif handle.startswith("wdq:"):
from ontobio.sparql.wikidata_ontology import EagerWikidataOntology
logger.info("Fetching from Wikidata")
ont = EagerWikidataOntology(handle=handle)
elif handle.startswith("skos:"):
fn = handle.replace('skos:','')
from ontobio.sparql.skos import Skos
logger.info("Fetching from Skos file")
skos = Skos()
ont = skos.process_file(fn)
elif handle.startswith("scigraph:"):
from ontobio.neo.scigraph_ontology import RemoteScigraphOntology
logger.info("Fetching from SciGraph")
ont = RemoteScigraphOntology(handle=handle)
elif handle.startswith("http:"):
logger.info("Fetching from Web PURL: "+handle)
encoded = hashlib.sha256(handle.encode()).hexdigest()
#encoded = binascii.hexlify(bytes(handle, 'utf-8'))
#base64.b64encode(bytes(handle, 'utf-8'))
logger.info(" encoded: "+str(encoded))
fn = '/tmp/'+encoded
if not os.path.isfile(fn):
cmd = ['owltools',handle,'-o','-f','json',fn]
cp = subprocess.run(cmd, check=True)
logger.info(cp)
else:
logger.info("using cached file: "+fn)
g = obograph_util.convert_json_file(fn)
ont = Ontology(handle=handle, payload=g)
else:
logger.info("Fetching from SPARQL")
ont = EagerRemoteSparqlOntology(handle=handle)
#g = get_digraph(handle, None, True)
return ont
def create_ontology_from_obograph(og):
ont = None
g = obograph_util.convert_json_object(og)
ont = Ontology(handle=None, payload=g)
return ont
def translate_file_to_ontology(handle, **args):
if handle.endswith(".json"):
g = obograph_util.convert_json_file(handle, **args)
return Ontology(handle=handle, payload=g)
elif handle.endswith(".ttl"):
from ontobio.sparql.rdf2nx import RdfMapper
logger.info("RdfMapper: {}".format(args))
m = RdfMapper(**args)
return m.convert(handle,'ttl')
else:
if not (handle.endswith(".obo") or handle.endswith(".owl")):
logger.info("Attempting to parse non obo or owl file with owltools: "+handle)
encoded = get_checksum(handle)
logger.info(" encoded: "+str(encoded))
fn = '/tmp/'+encoded
if not os.path.isfile(fn):
cmd = ['owltools',handle,'-o','-f','json',fn]
cp = subprocess.run(cmd, check=True)
logger.info(cp)
else:
logger.info("using cached file: "+fn)
g = obograph_util.convert_json_file(fn, **args)
return Ontology(handle=handle, payload=g)
def get_checksum(file):
"""
Get SHA256 hash from the contents of a given file
"""
with open(file, 'rb') as FH:
contents = FH.read()
return hashlib.sha256(contents).hexdigest()