Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Assaytools speedup #94

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
14 changes: 7 additions & 7 deletions AssayTools/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@

DG_min = np.log(1e-15) # kT, most favorable (negative) binding free energy possible; 1 fM
DG_max = +0 # kT, least favorable binding free energy possible
niter = 500000 # number of iterations
nburn = 50000 # number of burn-in iterations to discard
nthin = 500 # thinning interval
niter = 10000 # number of iterations
nburn = 0 # number of burn-in iterations to discard
nthin = 1 # thinning interval

volume_unit = Unit(1.0, 'liter')
concentration_unit = Unit(1.0, 'moles/liter')
Expand Down Expand Up @@ -778,16 +778,16 @@ def run_mcmc(self, dbfilename='output'):
"""

# DEBUG: Write model
print('Writing graph...')
pymc.graph.moral_graph(self.model, format='ps')
#print('Writing graph...')
#pymc.graph.moral_graph(self.model, format='ps')

# Sample the model with pymc
# TODO: Allow
mcmc = pymc.MCMC(self.model, db='sqlite', name='output', verbose=True)

nthin = 10
nburn = nthin*100
niter = nthin*100
nburn = nthin*10
niter = nthin*10

# Specify initial parameter standard deviations to apply to specific classes of parameters
keywords = {
Expand Down
102 changes: 101 additions & 1 deletion AssayTools/bindingmodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import numpy as np
import copy
import time

import scipy.optimize
import scipy.integrate
Expand Down Expand Up @@ -38,6 +39,49 @@ class BindingModel(object):
def __init__(self):
pass


#=============================================================================================
# LRU cache that supports mutable args
#=============================================================================================

import typing
from collections import OrderedDict
from functools import lru_cache, wraps

def lru_cache2(size=128):

cache = OrderedDict()

def make_key(x):
if isinstance(x, typing.Mapping):
return (id(type(x)), *((k, make_key(v)) for k, v in sorted(x.items())))
elif isinstance(x, typing.Set):
return (id(type(x)), *map(make_key, sorted(x)))
elif isinstance(x, typing.Sequence):
return (id(type(x)), *map(make_key, x))
else:
try:
hash(x)
except TypeError:
return id(type(x)), str(x)
else:
return id(type(x)), x

def decorator(fn):
@wraps(fn)
def wrapped(*args, **kwargs):
key = make_key((id(fn), args, kwargs))
try:
return cache[key]
except KeyError:
res = fn(*args, **kwargs)
cache[key] = res
while len(cache) >= size:
cache.popitem(last=False)
return res
return wrapped
return decorator

#=============================================================================================
# Two-component binding model
#=============================================================================================
Expand All @@ -47,7 +91,6 @@ class TwoComponentBindingModel(BindingModel):
Simple two-component association.

"""

@classmethod
def equilibrium_concentrations(cls, DeltaG, Ptot, Ltot):
"""
Expand All @@ -72,6 +115,8 @@ def equilibrium_concentrations(cls, DeltaG, Ptot, Ltot):
Bound complex concentration, molarity.

"""
initial_time = time.time()

# Handle only strictly positive elements---all others are set to zero as constants
try:
nonzero_indices = np.where(Ltot > 0)[0]
Expand Down Expand Up @@ -118,6 +163,7 @@ def equilibrium_concentrations(cls, DeltaG, Ptot, Ltot):
# Compute remaining concentrations.
P = Ptot - PL; # free protein concentration in sample cell after n injections (M)
L = Ltot - PL; # free ligand concentration in sample cell after n injections (M)

return [P, L, PL]

#=============================================================================================
Expand All @@ -130,7 +176,11 @@ class GeneralBindingModel(BindingModel):

"""

_time = 0
_ncalls = 0

@classmethod
#@lru_cache2(size=128)
def equilibrium_concentrations(cls, reactions, conservation_equations, tol=1.0e-8):
"""
Compute the equilibrium concentrations of each complex species for a general set of binding reactions.
Expand Down Expand Up @@ -186,6 +236,49 @@ def equilibrium_concentrations(cls, reactions, conservation_equations, tol=1.0e-
all_species = list(all_species) # order is now fixed
nspecies = len(all_species)

# Construct function with appropriate roots.
# TODO: Meta-program efficient form of target function for speed?
# TODO: Form the logK, logQ, S, and C matrices in numpy outside of this function so that operations can be numpy-accelerated
# http://assaytools.readthedocs.io/en/latest/theory.html#general-binding-model
# Rewrite theory docs in these matrices as well.

# Form equations as numpy
logK = np.zeros([nreactions], np.float64)
S = np.zeros([nreactions, nspecies], np.float64)
C = np.zeros([nconservation, nspecies], np.float64)
logQ = np.zeros([nconservation], np.float64)
equation_index = 0
# Reactions
for (reaction_index, (log_equilibrium_constant, reaction)) in enumerate(reactions):
logK[reaction_index] = log_equilibrium_constant
for (species_index, species) in enumerate(all_species):
if species in reaction:
S[reaction_index, species_index] = reaction[species]
# Conservation of mass
for (equation_index, (log_total_concentration, conservation_equation)) in enumerate(conservation_equations):
logQ[equation_index] = log_total_concentration
for (species_index, species) in enumerate(all_species):
if species in conservation_equation:
C[equation_index, species_index] = conservation_equation[species]

# Define target function
from scipy.misc import logsumexp
def ftarget_numpy(logX):
target = np.zeros([nequations], np.float64)
jacobian = np.zeros([nequations, nspecies], np.float64)

# Reactions
target[0:nreactions] = - logK[:] + np.dot(S[:,:], logX[:])
jacobian[0:nreactions,:] = S[:,:]
# Conservation
target[nreactions:] = - logQ[:] + logsumexp(C + logX, axis=1)
for equation_index in range(nconservation):
nonzero_indices = np.where(C[equation_index,:] != 0)[0]
logsum = logsumexp(np.log(C[equation_index, nonzero_indices]) + logX[nonzero_indices])
jacobian[nreactions+equation_index,:] = C[equation_index,:] * np.exp(logX[:] - logsum)

return (target, jacobian)

# Construct function with appropriate roots.
def ftarget(X):
target = np.zeros([nequations], np.float64)
Expand Down Expand Up @@ -236,7 +329,14 @@ def ftarget(X):
# Solve
from scipy.optimize import root
options = {'xtol' : tol}
initial_time = time.time()
sol = root(ftarget, X, method='lm', jac=True, tol=tol, options=options)
final_time = time.time()
cls._time += (final_time - initial_time)
cls._ncalls += 1
if (cls._ncalls % 1000 == 0):
print('%8d calls : %8.3f s total (%8.3f ms/call)' % (cls._ncalls, cls._time, cls._time/cls._ncalls*1000))

if (sol.success == False):
msg = "root-finder failed to converge:\n"
msg += str(sol)
Expand Down
4 changes: 2 additions & 2 deletions examples/autoprotocol/single-wavelength-probe-assay.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,5 @@
assay.experiment.show_summary(mcmc)

# Generate plots
plots_filename = 'plots.pdf'
assay.experiment.generate_plots(mcmc, pdf_filename=plots_filename)
#plots_filename = 'plots.pdf'
#assay.experiment.generate_plots(mcmc, pdf_filename=plots_filename)