00001 import math
00002 from ost import mol
00003
00004 def FloatValueExtract(func):
00005 """
00006 Decorator to wrap functions that take a list of float values. In addition to
00007 passing in a list of float values directly, it is possible to extract the
00008 values from attributes or generic properties.
00009 """
00010 def _dec(xs, prop=None, attr=None):
00011 if prop!=None:
00012 if len(xs)==0:
00013 return func([])
00014 assert attr==None
00015 level=mol.Prop.Level.UNSPECIFIED
00016 if isinstance(xs[0], mol.AtomBase):
00017 level=mol.Prop.Level.ATOM
00018 elif isinstance(xs[0], mol.ResidueBase):
00019 level=mol.Prop.Level.RESIDUE
00020 elif isinstance(xs[0], mol.ChainBase):
00021 level=mol.Prop.Level.CHAIN
00022 epm=mol.EntityPropertyMapper(prop, level)
00023 vals=[]
00024 for x in xs:
00025 try:
00026 vals.append(epm.Get(x))
00027 except:
00028 pass
00029 return func(vals)
00030 if attr!=None:
00031 vals=[]
00032 for x in xs:
00033 try:
00034 vals.append(getattr(x, attr))
00035 except:
00036 pass
00037 return func(vals)
00038 return func(xs)
00039 return _dec
00040
00041 @FloatValueExtract
00042 def Mean(xs):
00043 """
00044 Calculate mean of dataset
00045 """
00046 if len(xs)==0:
00047 raise RuntimeError("Can't calculate mean of empty sequence")
00048 return sum(xs)/len(xs)
00049
00050 @FloatValueExtract
00051 def StdDev(xs):
00052 """
00053 Calculate standard-deviation of dataset
00054
00055 | sum[xi-<x>]^2 |
00056 sigma=sqrt|---------------|
00057 | n |
00058 """
00059 mean=Mean(xs)
00060 return math.sqrt(sum([(x-mean)**2 for x in xs])/len(xs))
00061
00062 @FloatValueExtract
00063 def Min(xs):
00064 return min(xs)
00065
00066 @FloatValueExtract
00067 def Max(xs):
00068 return max(xs)
00069
00070 def Correl(xs, ys):
00071 """
00072 Calculates the correlation coefficient between xs and ys as
00073
00074 sum[(xi-<x>)*(yi-<y>)]
00075 r=----------------------
00076 (n-1)*sx*sy
00077
00078 where <x>, <y> are the mean of dataset xs and ys, and, sx and sy are the
00079 standard deviations.
00080 """
00081 if len(xs)!=len(ys):
00082 raise RuntimeError("Can't calculate correl. Sequence lengths do not match.")
00083 if len(xs)==1:
00084 raise RuntimeError("Can't calculate correl of sequences with length 1.")
00085 mean_x=Mean(xs)
00086 mean_y=Mean(ys)
00087 sigma_x, sigma_y=(0.0, 0.0)
00088 cross_term=0.0
00089 for x, y in zip(xs, ys):
00090 cross_term+=(x-mean_x)*(y-mean_y)
00091 sigma_x+=(x-mean_x)**2
00092 sigma_y+=(y-mean_y)**2
00093 sigma_x=math.sqrt(sigma_x/len(xs))
00094 sigma_y=math.sqrt(sigma_y/len(ys))
00095 return cross_term/((len(xs)-1)*sigma_x*sigma_y)