"""
Factory class for generating association sets based on a variety of handle types.
Currently only supports golr query
"""
import networkx as nx
import pathlib
import logging
import os
import subprocess
import hashlib
from ontobio.golr.golr_associations import bulk_fetch
from ontobio.assocmodel import AssociationSet, AssociationSetMetadata
from ontobio.io.hpoaparser import HpoaParser
from ontobio.io.gpadparser import GpadParser
from ontobio.io.gafparser import GafParser
from ontobio.util.user_agent import get_user_agent
from collections import defaultdict
import json
logger = logging.getLogger(__name__)
[docs]class AssociationSetFactory():
"""
Factory for creating AssociationSets
Currently support for golr (GO and Monarch) is provided but other stores possible
"""
def __init__(self):
"""
initializes based on an ontology name
"""
[docs] def create(self, ontology=None,subject_category=None,object_category=None,evidence=None,taxon=None,relation=None, file=None, fmt=None, skim=True):
"""
creates an AssociationSet
Currently, this uses an eager binding to a `ontobio.golr` instance. All compact associations for the particular combination
of parameters are fetched.
Arguments
---------
ontology: an `Ontology` object
subject_category: string representing category of subjects (e.g. gene, disease, variant)
object_category: string representing category of objects (e.g. function, phenotype, disease)
taxon: string holding NCBITaxon:nnnn ID
"""
meta = AssociationSetMetadata(subject_category=subject_category,
object_category=object_category,
taxon=taxon)
if file is not None:
return self.create_from_file(file=file,
fmt=fmt,
ontology=ontology,
meta=meta,
skim=skim)
logger.info("Fetching assocs from store")
assocs = bulk_fetch_cached(subject_category=subject_category,
object_category=object_category,
evidence=evidence,
taxon=taxon)
logger.info("Creating map for {} subjects".format(len(assocs)))
amap = {}
subject_label_map = {}
for a in assocs:
rel = a['relation']
subj = a['subject']
subject_label_map[subj] = a['subject_label']
amap[subj] = a['objects']
aset = AssociationSet(ontology=ontology,
meta=meta,
subject_label_map=subject_label_map,
association_map=amap)
return aset
[docs] def create_from_tuples(self, tuples, **args):
"""
Creates from a list of (subj,subj_name,obj) tuples
"""
amap = {}
subject_label_map = {}
for a in tuples:
subj = a[0]
subject_label_map[subj] = a[1]
if subj not in amap:
amap[subj] = []
amap[subj].append(a[2])
aset = AssociationSet(subject_label_map=subject_label_map, association_map=amap, **args)
return aset
[docs] def create_from_assocs(self, assocs, **args):
"""
Creates from a list of association objects
"""
assocs = [a.to_hash_assoc() for a in assocs]
print(json.dumps(assocs[0], indent=4))
amap = defaultdict(list)
subject_label_map = {}
for a in assocs:
subj = a['subject']
subj_id = subj['id']
subj_label = subj['label']
subject_label_map[subj_id] = subj_label
if not a['negated']:
amap[subj_id].append(a['object']['id'])
aset = AssociationSet(subject_label_map=subject_label_map, association_map=amap, **args)
aset.associations_by_subj = defaultdict(list)
aset.associations_by_subj_obj = defaultdict(list)
for a in assocs:
sub_id = a['subject']['id']
obj_id = a['object']['id']
aset.associations_by_subj[sub_id].append(a)
aset.associations_by_subj_obj[(sub_id,obj_id)].append(a)
return aset
[docs] def create_from_file(self, file=None, fmt='gaf', skim=True, **args):
"""
Creates from a file. If fmt is set to None then the file suffixes will
be used to choose a parser.
Arguments
---------
file : str or file
input file or filename
fmt : str
name of format e.g. gaf
"""
if fmt is not None and not fmt.startswith('.'):
fmt = '.{}'.format(fmt)
d = {
'.gaf' : GafParser,
'.gpad' : GpadParser,
'.hpoa' : HpoaParser,
}
if fmt is None:
filename = file if isinstance(file, str) else file.name
suffixes = pathlib.Path(filename).suffixes
iterator = (fn() for ext, fn in d.items() if ext in suffixes)
else:
iterator = (fn() for ext, fn in d.items() if ext == fmt)
try:
parser = next(iterator)
except StopIteration:
logger.error("Format not recognized: {}".format(fmt))
logger.info("Parsing {} with {}/{}".format(file, fmt, parser))
if skim:
results = parser.skim(file)
return self.create_from_tuples(results, **args)
else:
assocs = parser.parse(file, skipheader=True)
return self.create_from_assocs(assocs, **args)
[docs] def create_from_gaf(self, file, **args):
"""
Creates from a GAF file
"""
return self.create_from_file(file, fmt='gaf', **args)
[docs] def create_from_phenopacket(self, file):
"""
Creates from a phenopacket file
"""
pass
[docs] def create_from_simple_json(self, file):
"""
Creates from a simple json rendering
"""
pass
[docs] def create_from_remote_file(self, group, snapshot=True, **args):
"""
Creates from remote GAF
"""
import requests
url = "http://snapshot.geneontology.org/annotations/{}.gaf.gz".format(group)
r = requests.get(url, stream=True, headers={'User-Agent': get_user_agent(modules=[requests], caller_name=__name__)})
p = GafParser()
results = p.skim(r.raw)
return self.create_from_tuples(results, **args)
def bulk_fetch_cached(**args):
logger.info("Fetching assocs from store (will be cached)")
return bulk_fetch(**args)