©2019 Raazesh Sainudiin. Attribution 4.0 International (CC BY 4.0)
This is code accompanying the technical report 'The Transmission Process' by Raazesh Sainudiin and David Welch. The code is for clarity and simplicity of implementation (it is not efficient for large scale simulations).
See https://doi.org/10.1016/j.jtbi.2016.07.038 for the published paper.
def CountsDict(X):
'''convert a list X into a Dictionary of counts or frequencies'''
CD = {}
for x in X:
CD[x] = (CD[x] + 1) if (x in CD) else 1
return CD
def markAsInfected(C,v,m):
'''mark node v as infected with marker m on each of the incoming edges of v in SICN C'''
for e in C.incoming_edge_iterator([v]):
C.set_edge_label(e[0],e[1],m)
def susceptibleOutEdges(C,vs):
'''return the the susceptible outedges of node v in vs in SICN C'''
SOE = [e for e in C.outgoing_edge_iterator(vs) if e[2]==None]
return SOE
def growTransmissionTree(Ttree, pDict, z, infector, infectee):
'''grow the transmission tree Ttree and update pathsDict pDict by adding the
z-th infection event with infector -> infectee '''
LBT = LabelledBinaryTree
newSubTree = LBT([LBT([None,None], label=infector), LBT([None, None], label=infectee)], label=z).clone()
path2Infector = pDict[infector]
if z==1:
Ttree = newSubTree
else:
Ttree[tuple(path2Infector)] =newSubTree
#print ascii_art(Ttree)
pDict[infector]=path2Infector+[0]
pDict[infectee]=path2Infector+[1]
pDict[z]=path2Infector
return Ttree
def forgetLeafLabels(T):
'''return the transmission tree T with all leaf labels set to 0'''
leafLabelSet=set(T.leaf_labels())
leafUnlabelledT=T.map_labels(lambda z:0 if (z in leafLabelSet) else z)
return leafUnlabelledT
def forgetAllLabels(T):
'''return the transmission tree T with all node labels removed'''
return T.shape()
def justTree(T):
'''return the transmission tree T as nonplanar unranked unlabelled tree'''
return Graph(T.shape().to_undirected_graph(),immutable=True)
def transmissionProcessTC(C,initialI):
'''return transmission tree outcome of the DTDS transmission MC on SICN C with initial infection at node initialI'''
#initialisation of SICN
z=0 # infection event count
markAsInfected(C,initialI,'infected')
infectedIs = [initialI]
popSize=C.order()
# initialisation of Transmission Tree
pathsDict={} # dictionary of nodes -> paths from root in tree
LBT = LabelledBinaryTree
# individuals in tree are labelled by "i"+str(integer_label)
T = LBT([None,None],label="i"+str(initialI)).clone()
pathsDict["i"+str(initialI)]=[]
while (len(infectedIs) < popSize):
z=z+1 # increment infection event count
currentSOE = susceptibleOutEdges(C,infectedIs)
numberInfected=len(currentSOE)
nextEdge = currentSOE[randrange(0,numberInfected)]
C.set_edge_label(nextEdge[0],nextEdge[1],z)
infectedIs.append(nextEdge[1])
markAsInfected(C,nextEdge[1],'inf')
T=growTransmissionTree(T, pathsDict, z, "i"+str(nextEdge[0]),"i"+str(nextEdge[1]))
#print "step z = ",z; print ascii_art(T); print "--------------------"
return T.as_ordered_tree(with_leaves=False)
Let the SICN be the complete graph. We get the distribution of transmission trees at various resolutions next.
graphs.CompleteGraph(4).to_directed().show()
# demo
transmissionProcessTC(graphs.CompleteGraph(4).to_directed(),0)
ts=[transmissionProcessTC(graphs.CompleteGraph(4).to_directed(),0) for _ in range(100000)]
d=CountsDict(ts)
print len(d)
d
d=CountsDict([forgetLeafLabels(t) for t in ts])
d
d=CountsDict([forgetAllLabels(t) for t in ts])
d
d=CountsDict([justTree(t) for t in ts])
d
graphs.CompleteGraph(5).to_directed().show()
ts=[transmissionProcessTC(graphs.CompleteGraph(5).to_directed(),0) for _ in range(100000)]
d=CountsDict([forgetLeafLabels(t) for t in ts])
print len(d)
d
d=CountsDict([forgetAllLabels(t) for t in ts])
print len(d)
d
d=CountsDict([justTree(t) for t in ts])
len(d)
d
Let the SICN be the path graph. We get the distribution of transmission trees at various resolutions next.
digraphs.Path(4).show()
# demo
transmissionProcessTC(digraphs.Path(4),0)
ts=[transmissionProcessTC(digraphs.Path(4),0) for _ in range(1000)]
d=CountsDict(ts)
print len(d)
d
d=CountsDict([forgetAllLabels(t) for t in ts])
print len(d)
d
d=CountsDict([justTree(t) for t in ts])
len(d)
d
Let the SICN be the star network. We get the distribution of transmission trees at various resolutions next.
g=graphs.StarGraph(3).to_directed()
g.show()
# demo
transmissionProcessTC(graphs.StarGraph(3).to_directed(),0)
ts=[transmissionProcessTC(graphs.StarGraph(3).to_directed(),0) for _ in range(10000)]
d=CountsDict(ts)
print len(d)
d
d=CountsDict([forgetLeafLabels(t) for t in ts])
print len(d)
d
d=CountsDict([forgetAllLabels(t) for t in ts])
print len(d)
d
d=CountsDict([justTree(t) for t in ts])
len(d)
d
Note one can include branch-lengths into the likelihood easily via the exponential rates for the continuous time Markov processes that the discrete jump chains studied here. This will be necessary to conduct statistical inference for applied epidemiological models.
import numpy as np # import numpy for np.methods
# import optimize from scipy to do numerical optimization
from scipy import optimize
def splitsSequence(T):
'''return a list of tuples (left,right) split sizes at each split node'''
l = []
LabelledBinaryTree(T).post_order_traversal(lambda node:
l.append((node[0].node_number(),node[1].node_number())))
return l
def prob_RPT(T,a,b):
'''probability of ranked planar tree T under beta-splitting model
a,b>-1, where (a+1,b+1) are the parameters of the beta distribution'''
#print type(a),type(b)#type(x[0]),type(x[0])
return prod(map(lambda x: beta(x[0]+RR(a+1),x[1]+RR(b+1))/beta(RR(a+1),RR(b+1)),
splitsSequence(T)))
def negLogLkl_SplitPairCounts(spc,a,b):
'''-log likelihood of multiple independent ranked planar trees
through their sufficient statistics of the frequence of
split-pair counts spc= [(nL_i,nR_i,c_i): i=1,..,K]
under beta-splitting model
a,b>-1, where (a+1,b+1) are the parameters of the beta
distribution -- This implements first Equation in Thm 1 involving beta functions'''
return -RR(sum(map(lambda x:
x[2]*log(1.0*beta(x[0]+RR(a+1),x[1]+RR(b+1))/beta(RR(a+1),RR(b+1))), spc)))
def splitPairsCounts(TS):
'''list of the frequency of all distinct split-pairs, i.e. (# of left splits, # right splits)
beloe each internal node in each transmission tree in the list TS of transmission trees'''
splitPairCounts=sorted(CountsDict(flatten([splitsSequence(t) for t in TS],max_level=1)).items())
return [(x[0][0],x[0][1],x[1]) for x in splitPairCounts]
def splitPairsCountsDict(TS):
'''dictionary of the frequency of all distinct split-pairs, i.e. (# of left splits, # right splits)
beloe each internal node in each transmission tree in the list TS of transmission trees'''
#splitPairCounts=sorted(CountsDict(flatten([splitsSequence(t) for t in TS],max_level=1)))
sD = CountsDict(flatten([splitsSequence(t) for t in TS],max_level=1))
return sD
def logLklOfASplitPair(a,b,nL,nR):
'''beta(nL+a+1,nR+b+1)/beta(a+1,b+1) without beta functions via Eqn 2 in Thm 1'''
A1=sum([log((b+j)/(b+j+a)) for j in range(nR+1)])
A2=sum([log((a+i)/(a+i+b+nR+1)) for i in range(nL+1)])
A3=log(b*a/((a+b)*(a+b+1)))
return A1+A2-A3
def negLogLkl_SplitPairCounts2(spc,a,b):
'''-log likelihood of multiple independent ranked planar trees
through their sufficient statistics of the frequency of
split-pair counts spc= [(nL_i,nR_i,c_i): i=1,..,K]
under beta-splitting model
a,b>-1, where (a+1,b+1) are the parameters of the beta
distribution -- This implements second Equation in Thm 1 without beta functions'''
return -(sum(map(lambda x:
x[2]*logLklOfASplitPair(a,b,x[0],x[1]), spc)))
def LklOfASplitPair(a,b,nL,nR):
'''beta(nL+a+1,nR+b+1)/beta(a+1,b+1) without beta functions via Eqn 2 in Thm 1'''
A1=prod([((b+j)/(b+j+a)) for j in range(nR+1)])
A2=prod([((a+i)/(a+i+b+nR+1)) for i in range(nL+1)])
A3=(b*a/((a+b)*(a+b+1)))
return (A1*A2)/A3
def negLogLkl_SplitPairCounts2Prod(spc,a,b):
'''- log likelihood of multiple independent ranked planar trees
through their sufficient statistics of the frequency of
split-pair counts spc= [(nL_i,nR_i,c_i): i=1,..,K]
under beta-splitting model
a,b>-1, where (a+1,b+1) are the parameters of the beta
distribution -- This implements second Equation in Thm 1 without beta functions'''
return -(sum(map(lambda x:
x[2]*log(LklOfASplitPair(a,b,x[0],x[1])), spc)))
%time
# demo of the mle for a complete graph with 50 nodes and 10 sampled trees
c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1
c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1
# simulation settings
n=50
reps=10
# trial 1: make a list of 10 independent transmission trees on complete graph with 50 nodes
ts1=[transmissionProcessTC(graphs.CompleteGraph(n).to_directed(),0) for _ in range(reps)]
spc1=splitPairsCounts(ts1)
# trial 2: make another list of 10 independent transmission trees on complete graph with 50 nodes
ts2=[transmissionProcessTC(graphs.CompleteGraph(n).to_directed(),0) for _ in range(reps)]
spc2=splitPairsCounts(ts2)
# slower more explicit method of finding MLE
# (don't use this for larger simuations!)
def negLkl1(AB): # negative log likelihood function for trees from trial 1
return sum([-log(1.0*prob_RPT(ts1[j],AB[0],AB[1])) for j in range(reps)])
def negLkl2(AB): # negative log likelihood function for trees from trial 1
return sum([-log(1.0*prob_RPT(ts2[j],AB[0],AB[1])) for j in range(reps)])
%time
mle=minimize_constrained(negLkl1,[c_1,c_2],[0.0,0.0],disp=0) #MLE for trial 1
print [n,reps,mle]
%time mle=minimize_constrained(negLkl2,[c_1,c_2],[0.0,0.0],disp=0) #MLE for trial 2
print [n,reps,mle]
# faster MLE computation using sufficient statistics of split-pair frequencies
# (don't use this for larger simuations!)
def negLkl1(AB):
return negLogLkl_SplitPairCounts(spc1,AB[0],AB[1])
def negLkl2(AB):
return negLogLkl_SplitPairCounts(spc2,AB[0],AB[1])
%time mle=minimize_constrained(negLkl1,[c_1,c_2],[0.0,0.0],disp=0) #MLE for trial 1
print [n,reps,mle]
%time mle=minimize_constrained(negLkl2,[c_1,c_2],[0.0,0.0],disp=0) #MLE for trial 2
print [n,reps,mle]
# fastest and numerically most robust MLE computation using sufficient statistics
# USE this for larger simulations
def negLkl1(AB):
return negLogLkl_SplitPairCounts2(spc1,AB[0],AB[1])
def negLkl2(AB):
return negLogLkl_SplitPairCounts2(spc2,AB[0],AB[1])
%time mle=minimize_constrained(negLkl1,[c_1,c_2],[0.0,0.0],disp=0) #MLE for trial 1
print [n,reps,mle]
%time mle=minimize_constrained(negLkl2,[c_1,c_2],[0.0,0.0],disp=0) #MLE for trial 2
print [n,reps,mle]
len(ts1),len(ts2) # number of trees in each trial
def denMatOfsplitPairsCounts(ts,n,EMF):
'''dense matrix of split pair counts of transmission trees in the list ts
normalise to sum to 1 if EMF = Empirical Mass Function = True '''
spcD=splitPairsCountsDict(ts); spcD[(0,n)]=0; spcD[(n,0)]=0;
ss=matrix(spcD).dense_matrix()
if EMF:
ss=ss/sum(sum(ss))
return ss
Let's save these plots
mp1=matrix_plot(np.log1p(denMatOfsplitPairsCounts(ts1,n,0)) , cmap='summer', colorbar=True)
mp2=matrix_plot(np.log1p(denMatOfsplitPairsCounts(ts2,n,0)) , cmap='summer', colorbar=True)
mp2.show()
%sh
pwd
%sh
#mkdir /projects/58dfa924-55ae-4b6c-9fd4-1cd0ef49eb7c/figures && ls
ls
mp1.save('./figures/SuffStatsCompleteGraph_n50_reps10_mp1.png')
mp1.save('./figures/SuffStatsCompleteGraph_n50_reps10_mp1.eps')
mp1.save('./figures/SuffStatsCompleteGraph_n50_reps10_mp1.pdf')
mp2.save('./figures/SuffStatsCompleteGraph_n50_reps10_mp2.png')
mp2.save('./figures/SuffStatsCompleteGraph_n50_reps10_mp2.eps')
mp2.save('./figures/SuffStatsCompleteGraph_n50_reps10_mp2.pdf')
Let's do a more exhaustive simulation next
%time
# demo of the mle for a complete graph with 50 nodes and 10 sampled trees
c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1
c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1
# simulation settings
n=50
reps=1000
# trial 1: make a list of 10 independent transmission trees on complete graph with 50 nodes
ts3=[transmissionProcessTC(graphs.CompleteGraph(n).to_directed(),0) for _ in range(reps)]
spc3=splitPairsCounts(ts3)
spc3=splitPairsCounts(ts3)
def negLkl3(AB):
return negLogLkl_SplitPairCounts2(spc3,AB[0],AB[1])
%time mle=minimize_constrained(negLkl3,[c_1,c_2],[0.0,0.0],disp=0) #MLE for trial 1
print [n,reps,mle]
#mp3=matrix_plot(np.log1p(denMatOfsplitPairsCounts(ts3,n,1)*np.double(1000)), norm='value', cmap='summer') # a log(1+prob*1000) plot of the
mp3=matrix_plot(np.log1p(denMatOfsplitPairsCounts(ts3,n,0)*np.double(1)) , cmap='summer', colorbar=True)#, norm='value', cmap='summer', colorbar=True) # a log(1+freq) plot of the
mp3.show()
mp3.save('./figures/SuffStatsCompleteGraph_n50_reps1000_mp3.png')
mp3.save('./figures/SuffStatsCompleteGraph_n50_reps1000_mp3.eps')
mp3.save('./figures/SuffStatsCompleteGraph_n50_reps1000_mp3.pdf')
%sh
ls figures/
g=digraphs.Circulant(5,[1,-1])
g.show()
To Make Latex tikz-graph see http://doc.sagemath.org/html/en/reference/graphs/sage/graphs/graph_latex.html.
H = digraphs.Circulant(6,[1,-1])
H.set_latex_options(
graphic_size=(5,5),
vertex_size=0.2,
edge_thickness=0.04,
edge_color='green',
vertex_color='green',
vertex_label_color='red',
format='tkz_graph',
#tkz_style='Art',
#layout='acyclic'
)
latex(H)
ts=[transmissionProcessTC(digraphs.Circulant(6,[1,-1]),0) for _ in range(10)]
T = ts[3]
view(T)
#latex(T)
Just checking that the trees are uniformly distributed over 2^{n-1} ORBOR trees := Only-Right-Branching-subtrees-Of-Root-node trees (for peace of mind :).
ts=[transmissionProcessTC(digraphs.Circulant(3,[1,-1]),0) for _ in range(10000)]
d=CountsDict(ts)
print len(d)
d
ts=[transmissionProcessTC(digraphs.Circulant(4,[1,-1]),0) for _ in range(10000)]
d=CountsDict(ts)
print len(d)
d
for t in d.keys():
print ascii_art(t)
MLE of transmission trees from the bidirectional circular network.
%time
# demo of the mle for a BalancedTree(2,k) graph with k=4,5,6 or n=31,63,127 nodes and 1 or 10 sampled trees
c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1
c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1
mles=[]
n=50
reps=1 # all reps have the same unlabelled transmission tree topology
trials=5
for i in range(trials):
ts=[transmissionProcessTC(digraphs.Circulant(n,[1,-1]),0) for _ in range(reps)]
spc=splitPairsCounts(ts)
def negLkl(AB):
return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1])
mle=minimize_constrained(negLkl,[c_1,c_2],[0.0,0.0],disp=0)
x = [n,reps, i, mle, negLkl(mle)]
print x
mles.append(x)
#mles
print (n, reps, trials, mean([x[3][0] for x in mles]), std([x[3][0] for x in mles]), mean([x[3][1] for x in mles]), std([x[3][1] for x in mles]))
# (50, 1, 5, -0.9880406098481203, 0.0006220653042760461, 1.4583751679644803, 0.15345032109817106)
# (50, 100, 5, -0.9878920259080713, 3.460969702959854e-05, 1.5188788157835482, 0.006721406185256709)
Let us repeat the same computation over the relative frequency of split-pairs as opposed to the frequency over a larger number of replicate transmission trees.
%time
# demo of the mle for a BalancedTree(2,k) graph with k=4,5,6 or n=31,63,127 nodes and 1 or 10 sampled trees
c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1
c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1
mles=[]
n=50
reps=100 # all reps have the same unlabelled transmission tree topology
trials=5
for ti in range(trials):
ts=[transmissionProcessTC(digraphs.Circulant(n,[1,-1]),0) for _ in range(reps)]
spc=splitPairsCounts(ts)
s=sum([x[2] for x in spc])
spcRf=[]
for i in range(len(spc)):
spcRf.append((spc[i][0],spc[i][1],spc[i][2]/s))
def negLkl(AB):
return negLogLkl_SplitPairCounts2(spcRf,AB[0],AB[1])
mle=minimize_constrained(negLkl,[c_1,c_2],[0.0,0.0],disp=0)
x = [n,reps, i, mle, negLkl(mle)]
print x
mles.append(x)
print (n, reps, trials, mean([x[3][0] for x in mles]), std([x[3][0] for x in mles]), mean([x[3][1] for x in mles]), std([x[3][1] for x in mles]))
a=-0.98; b=1.5; BetaD = RealDistribution('beta', [a+1, b+1],seed=0)
show(BetaD.plot(xmin=0,xmax=1),figsize=[7,2]);
print ascii_art(ts[4]) # labelled transmission tree
g=graphs.BalancedTree(3,2).to_directed()
g.show()
To Make Latex tikz-graph see http://doc.sagemath.org/html/en/reference/graphs/sage/graphs/graph_latex.html.
H = graphs.BalancedTree(2,3).to_directed()
#H = graphs.BalancedTree(3,2).to_directed()
H.set_latex_options(
graphic_size=(5,5),
vertex_size=0.2,
edge_thickness=0.04,
edge_color='green',
vertex_color='green',
vertex_label_color='red',
format='tkz_graph',
#tkz_style='Art',
layout='acyclic'
)
#view(H)
#latex(H)
#latex(H)
d,h=3,2
g=graphs.BalancedTree(d,h).to_directed()
g.show()
d,h=3,3
ts=[transmissionProcessTC(graphs.BalancedTree(d,h).to_directed(),0) for _ in range(10000)]
#myDict=CountsDict(ts)
#myDict=CountsDict([forgetLeafLabels(t) for t in ts])
myDict=CountsDict([forgetAllLabels(t) for t in ts])
print len(myDict)
#myDict
for t in myDict.keys(): # d,h=3,3 "left-branching 3-shark" tree
print ascii_art(t)
# so long so far... could pursue the rank placements on balanced trees
graphs.BalancedTree(2,6).to_directed().order()
%time
# demo of the mle for a BalancedTree(2,k) graph with k=4,5,6 or n=31,63,127 nodes and 1 or 10 sampled trees
c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1
c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1
mles=[]
d,h=2,6
n=graphs.BalancedTree(d,h).to_directed().order()
reps=1 # all reps have the same unlabelled transmission tree topology
trials=2
for i in range(trials):
ts=[transmissionProcessTC(graphs.BalancedTree(d,h).to_directed(),0) for _ in range(reps)]
spc=splitPairsCounts(ts)
def negLkl(AB):
return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1])
mle=minimize_constrained(negLkl,[c_1,c_2],[0.0,0.0],disp=0)
x = [d,h, n,reps, i, mle]
print x
mles.append(x)
#mles
#[2, 6, 127, 1, 0, (-0.3259079515975824, -0.05752026912960705)]
#[2, 7, 255, 1, 0, (-0.3694074748854663, -0.10600727506430978)]
#[2, 8, 511, 1, 0, (-0.39276938517987087, -0.1329531458925401)]
#[2, 9, 1023, 1, 0, (-0.4051981012968259, -0.14770363723074506)]
#[2, 10, 2047, 1, 0, (-0.41245772002324016, -0.15622461056846243)]
#ts=[transmissionProcessTC(graphs.BalancedTree(2,3).to_directed(),0) for _ in range(5)]
ts=[transmissionProcessTC(graphs.BalancedTree(3,2).to_directed(),0) for _ in range(5)]
ascii_art(ts[0]) # labelled transmission tree is always the "left-branching 3-shark" ignoring labels
T = ts[4]
#view(T)
#latex(T)
def toroidal2DGrid(n):
G = graphs.GridGraph([n,n])
#G.show() # long time
G.add_edges([((i,0),(i,n-1)) for i in range(n)])
G.add_edges([((0,i),(n-1,i)) for i in range(n)])
G.relabel()
return G
ts=[transmissionProcessTC(toroidal2DGrid(10).to_directed(),0) for _ in range(10)]
# gives several distinct unlabelled trees depending on the sequence of infection events
d=CountsDict([forgetAllLabels(t) for t in ts])
len(d)
d.values()
d=CountsDict([justTree(t) for t in ts])
# gives several distinct trees depending on the sequence of infection events
len(d)
d.values()
%time
# demo of the mle - way too long...
c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1
c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1
# simulation settings
m=10
n=m^2
reps=10
repeatMLE=5
for i in range(repeatMLE):
ts=[transmissionProcessTC(toroidal2DGrid(m).to_directed(),0) for _ in range(reps)]
spc=splitPairsCounts(ts)
def negLkl(AB):
return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1])
mle=minimize_constrained(negLkl,[c_1,c_2],[0.0,0.0],disp=0)
print [i, n,reps,mle]
Let us see graph in latex.
H = toroidal2DGrid(3).to_directed()
H.set_latex_options(
graphic_size=(5,5),
vertex_size=0.2,
edge_thickness=0.04,
edge_color='green',
vertex_color='green',
vertex_label_color='red',
format='tkz_graph',
#tkz_style='Art',
#layout='acyclic'
)
#view(H)
#latex(H)
ts=[transmissionProcessTC(toroidal2DGrid(3).to_directed(),0) for _ in range(10)]
#view(ts[0])
#latex(ts[0])
#view(ts[1])
#latex(ts[1])
#view(ts[3])
#latex(T)
#latex(ts[3])
3D Toroidal Grid
def toroidal3DGrid(n):
G = graphs.GridGraph([n,n,n])
#G.show() # long time
G.add_edges([((0,i,j),(n-1,i,j)) for i in range(n) for j in range(n)])
G.add_edges([((i,0,j),(i,n-1,j)) for i in range(n) for j in range(n)])
G.add_edges([((i,j,0),(i,j,n-1)) for i in range(n) for j in range(n)])
G.relabel()
return G
%time
# demo of the mle
c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1
c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1
m=10
n=m^3
reps=1 # all reps have the same unlabelled transmission tree topology
for i in range(5):
ts=[transmissionProcessTC(toroidal3DGrid(m).to_directed(),0) for _ in range(reps)]
spc=splitPairsCounts(ts)
def negLkl(AB):
return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1])
mle=minimize_constrained(negLkl,[c_1,c_2],[0.0,0.0],disp=0)
print [n,reps, i, mle]
def ErdosReyniConnectedCompOf0(n,p,reps):
'''return reps many transmission trees from the connected component containing initial infection vertex 0'''
ts=[]
i=0; MAXTrials=10000; successfulTrials=0;
while (successfulTrials<reps or i>MAXTrials):
i=i+1
g0=graphs.RandomGNP(n,p).to_directed()
g=g0.subgraph(g0.connected_component_containing_vertex(0))
if g.order()>1: # just making sure we have at least 2 nodes in the connected component containing 0
#print g.order(), g.size()
ts.append(transmissionProcessTC(g,0))
successfulTrials=successfulTrials+1
return ts
%time
# demo of the mle
mleList=[]
c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1
c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1
n=100
#Lambdas=[floor(RR(n/2^i)) for i in range(ceil(log(n,2)))]; Lambdas.reverse(); #Lambdas
Lambdas=sorted(Set([floor(RR(n/(5/4)^i)) for i in range(ceil(log(n,5/4)))]));
for L in Lambdas:
prob=RR(L/n) # edge prob in Erdos-Reyni random graph on n nodes = lambda/n where lambda = expected degree of a node
reps=30 # all reps have the same unlabelled transmission tree topology
for i in range(5):
ts= ErdosReyniConnectedCompOf0(n,prob,reps)
spc=splitPairsCounts(ts)
def negLkl(AB):
return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1])
mle=minimize_constrained(negLkl,[c_1,c_2],[0.0,0.0],disp=0)
# mean number of connected components = pop size = # leaves in transmission tree
meanNumCc=RR(mean([(t.node_number()+1)/2 for t in ts]))
mleL=[n, prob, L, reps, meanNumCc, mle]
print mleL
mleList.append(mleL)
n=100.; print log(n)/n # edge probability threshold for getting a connected component
n=100; print log(100.) # average node degree threshold for getting a connected component
%time
# demo of the mle - way too long...
c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1
c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1
# simulation settings
mles=[]
n,prob,reps,repeatMLE=100,0.05,10,5 #50,5/49.,100,5
for i in range(repeatMLE):
ts=ErdosReyniConnectedCompOf0(n,prob,reps)
spc=splitPairsCounts(ts)
def negLkl(AB):
#return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1]) # our standard method - not ok for p>0.0
return negLogLkl_SplitPairCounts2Prod(spc,AB[0],AB[1]) # good rechecker
mle=minimize_constrained(negLkl,[c_1,c_2],[.0,.0],disp=0)
meanNumCc=RR(mean([(t.node_number()+1)/2 for t in ts]))
x = [i, n, prob, reps, mle, negLkl(mle), meanNumCc]
print x
mles.append(x)
y=mean([x[4][0] for x in mles]),std([x[4][0] for x in mles]), mean([x[4][1] for x in mles]),std([x[4][1] for x in mles]); [x.n(digits=4) for x in y]
# n,prob,reps,repeatMLE=50,0.04,30,5
#[0, 50, 2, 1.00000000000000, 30, (-0.7667591970714925, -0.5672234464838595), 2773.4718576096257, 37.5000000000000]
#[1, 50, 2, 1.00000000000000, 30, (-0.7214867804712249, -0.5415563735009684), 2571.6445754280776, 34.4000000000000]
#[2, 50, 2, 1.00000000000000, 30, (-0.7393765497380875, -0.525764191886687), 2701.583541657241, 36.4000000000000]
#[3, 50, 2, 1.00000000000000, 30, (-0.7482689119705859, -0.5327256368435153), 2883.349138665519, 38.3333333333333]
#[4, 50, 2, 1.00000000000000, 30, (-0.7660644698091582, -0.5746573248509714), 2740.2940119367827, 36.8333333333333]
#[-0.7484, 0.01907, -0.5484, 0.02150]
# n,prob,reps,repeatMLE=50,0.06,30,5
#[0, 50, 2, 1.00000000000000, 30, (-0.5951261508841413, -0.38549653008638246), 3783.127909979816, 45.6666666666667]
#[1, 50, 2, 1.00000000000000, 30, (-0.6303056569753376, -0.43061971591340364), 3900.4206600762795, 47.2000000000000]
#[2, 50, 2, 1.00000000000000, 30, (-0.6098958904200529, -0.396836976185501), 3909.283696080642, 47.1666666666667]
#[3, 50, 2, 1.00000000000000, 30, (-0.5563622923900977, -0.3258917498969631), 3967.752465957892, 47.5000000000000]
#[4, 50, 2, 1.00000000000000, 30, (-0.6139741520197212, -0.4041535631904723), 3891.4915292888013, 47.0333333333333]
#[-0.6011, 0.02799, -0.3886, 0.03879]
# n,prob,reps,repeatMLE=50,3/49.,30,5
[0, 50, 3, 1.00000000000000, 30, (-0.5983777634278724, -0.4291393006309947), 3976.460464836187, 47.6666666666667]
[1, 50, 3, 1.00000000000000, 30, (-0.5707923610262577, -0.3961141455439096), 4005.615823705353, 47.8333333333333]
[2, 50, 3, 1.00000000000000, 30, (-0.6183197359092514, -0.4261981038048322), 3949.032714636033, 47.5666666666667]
[3, 50, 3, 1.00000000000000, 30, (-0.6048013904814242, -0.37676154716542365), 3773.912360800056, 45.6333333333333]
[4, 50, 3, 1.00000000000000, 30, (-0.5593275350868996, -0.290756860450979), 3891.852472272532, 46.9000000000000]
[-0.5903, 0.02450, -0.3838, 0.05637] # [-0.8544, 0.009650, -0.6907, 0.01825] # compare from SmallWorld(n=50,k=3,p=1)
# n,prob,reps,repeatMLE=100,5/99.,30,5 - compare with SmallWorld(n=100,k=5,p=1) [-0.5079, 0.02486, -0.2263, 0.03785]
# [-0.4106, 0.02340, -0.2228, 0.03605]
# n,prob,reps,repeatMLE=50,5/49.,30,5 - compare with SmallWorld(n=50,k=5,p=1) [-0.4674, 0.01723, -0.1759, 0.02064]
[-0.3682, 0.04354, -0.1984, 0.03857]
# n,prob,reps,repeatMLE=50,5/49.=0.102040816326531,100,5
[-0.3798, 0.01348, -0.2068, 0.01602]
d,n=3,10 # n>d>2, and n*d is even
H = graphs.RandomRegular(d,n).to_directed()
H.set_latex_options(
graphic_size=(5,5),
vertex_size=0.2,
edge_thickness=0.04,
edge_color='green',
vertex_color='green',
vertex_label_color='red',
format='tkz_graph',
tkz_style='Art',
#layout='acyclic'
)
view(H)
#graphs.RandomRegular?
%time
# demo of the mle - way too long...
c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1
c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1
# simulation settings
mles=[]
d,n=4,1000 # n>d>2, and n*d is even
reps=1
repeatMLE=5
for i in range(repeatMLE):
ts=[transmissionProcessTC(graphs.RandomRegular(d,n).to_directed(),0) for _ in range(reps)]
spc=splitPairsCounts(ts)
def negLkl(AB):
return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1])
mle=minimize_constrained(negLkl,[c_1,c_2],[0.0,0.0],disp=0)
x = [i, n,reps, mle]
print x
mles.append(x)
#mles
#mles=[[0, 1000, 1, (-0.5694710622607956, 0.07235568882556642)], [1, 1000, 1, (-0.5813499039580335, -0.0008921069166473263)], [2, 1000, 1, (-0.5125503757396102, 0.12009937792771662)], [3, 1000, 1, (-0.5972839322945308, 0.005026801639200447)], [4, 1000, 1, (-0.516945909495096, 0.17203343447026423)]]
y=mean([x[3][0] for x in mles]),std([x[3][0] for x in mles]), mean([x[3][1] for x in mles]),std([x[3][1] for x in mles]); [x.n(digits=4) for x in y]
#
g=graphs.RandomNewmanWattsStrogatz(7, 2, 0.5) # no rewiring is done :(
g.show()
import networkx # so we use the networkx implementation
g = DiGraph(networkx.watts_strogatz_graph(7,2,0.2))
g.show()
networkx.watts_strogatz_graph?
#%%sh
#cat /projects/sage/sage-6.10/local/lib/python2.7/site-packages/networkx-1.10-py2.7.egg/networkx/generators/random_graphs.py #, the man dawg!
g.is_connected()
g.set_pos(g.layout_circular())
view(g)
def findMaxDegAndItsVertex(gr):
'''find the maximum degree in a graph and its vertex with smalelst ID'''
maxD=0; maxDv=0;
for vd in gr.degree_iterator(range(n),True):
#print vd
if vd[1] > maxD:
maxD=vd[1]; maxDv=vd[0];
return maxD,maxDv
import networkx # so we use the networkx implementation
def ConnectedSmallWorldFromMostpopular(n,k,p,reps):
'''return reps many transmission trees from the connected small-world network
initialized from the most popular smallest node'''
ts=[]
i=0; MAXTrials=10000; successfulTrials=0;
while (successfulTrials<reps or i>MAXTrials):
i=i+1
g0 = DiGraph(networkx.watts_strogatz_graph(n,k,p))
if g0.is_connected(): # just making sure we have connected network
#print g0.order(), g0.size()
maxDV=findMaxDegAndItsVertex(g0)
# to initialize from the most popular smallest node
ts.append(transmissionProcessTC(g0,maxDV[1]))
successfulTrials=successfulTrials+1
return ts
def ConnectedSmallWorldFromAnywhere(n,k,p,reps):
'''return reps many transmission trees from the connected connected small-world network
initialize from a random node'''
ts=[]
i=0; MAXTrials=10000; successfulTrials=0;
while (successfulTrials<reps or i>MAXTrials):
i=i+1
g0 = DiGraph(networkx.watts_strogatz_graph(n,k,p))
if g0.is_connected(): # just making sure we have connected network
#print g0.order(), g0.size()
# to initialize at a random node, say 0 - it's too noisy!
ts.append(transmissionProcessTC(g0,0))
successfulTrials=successfulTrials+1
return ts
ts=ConnectedSmallWorldFromAnywhere(10,3,0.5,10)
view(ts[5])
view(ts[1])
def spc2spcRF(spc):
'''to turn the split-pair counts into split-pair relative frequencies to
interrogate local optimization madness... - should be really using rigorous interval global optimization here!'''
s=sum([x[2] for x in spc])
spcRf=[]
for i in range(len(spc)):
spcRf.append((spc[i][0],spc[i][1],spc[i][2]/s))
return spcRf
%time
# demo of the mle - way too long...
c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1
c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1
# simulation settings
mles=[]
n,k,p,reps,repeatMLE=50,5,0.10,30,5
for i in range(repeatMLE):
#ts=ConnectedSmallWorldFromMostpopular(n,k,p,reps)
ts=ConnectedSmallWorldFromAnywhere(n,k,p,reps)
#spc=spc2spcRF(splitPairsCounts(ts)) # only needed if freqs need normalization for numerics...
spc=splitPairsCounts(ts)
def negLkl(AB):
# our standard method - not ok for n > 50 due to SICN-circularity's implications for number screen wrt log...
#return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1]) #our standard method ok for n>=50
return negLogLkl_SplitPairCounts2Prod(spc,AB[0],AB[1]) # ok for n=50 but not n=100 as it goes to boundary...
mle=minimize_constrained(negLkl,[c_1,c_2],[.0,.0],disp=0)
x = [i, n, k, p, reps, mle, negLkl(mle)]
print x
mles.append(x)
y=mean([x[5][0] for x in mles]),std([x[5][0] for x in mles]), mean([x[5][1] for x in mles]),std([x[5][1] for x in mles]); [x.n(digits=4) for x in y]
print 1,\
2
# n,k,p,reps,repeatMLE=50,2,0.10,1,5
#-0.9880, 0.0001708, 1.463, 0.06736 # for ConnectedSmallWorldFromMostpopular
#-0.9613, 0.01639, 0.2630, 1.103 # ConnectedSmallWorldFromAnywhere
# n,k,p,reps,repeatMLE=50,2,0.10,30,5
#-0.9652, 0.002863, -0.3828, 0.1171 # ConnectedSmallWorldFromAnywhere - more std error - matters where you start!
#-0.9618, 0.003047, -0.4147, 0.03203 # ConnectedSmallWorldFromMostpopular
# n,k,p,reps,repeatMLE=50,2,0.20,30,5
#[-0.9395, 0.003329, -0.5707, 0.02880] # ConnectedSmallWorldFromAnywhere
# n,k,p,reps,repeatMLE=50,2,0.50,30,5
#[-0.8807, 0.003303, -0.6570, 0.01039] # ConnectedSmallWorldFromAnywhere
# n,k,p,reps,repeatMLE=50,2,1.0,30,5
#[-0.8568, 0.01075, -0.6965, 0.01943] # ConnectedSmallWorldFromAnywhere
# n,k,p,reps,repeatMLE=50,3,1.0,30,5 - compare with ER(50,3/49) model
#[0, 50, 3, 1.00000000000000, 30, (-0.8635534058029828, -0.6890444879379554), 3723.1785805652808]
#[1, 50, 3, 1.00000000000000, 30, (-0.8408527225144719, -0.6604981618863403), 3830.8419905583555]
#[2, 50, 3, 1.00000000000000, 30, (-0.8637882040562047, -0.7061756425142889), 3726.06304636956]
#[3, 50, 3, 1.00000000000000, 30, (-0.8536921622583518, -0.6940200264032614), 3795.859886477451]
#[4, 50, 3, 1.00000000000000, 30, (-0.8503343394196402, -0.7036196315204895), 3795.7697716850957]
#[-0.8544, 0.009650, -0.6907, 0.01825]
# n,k,p,reps,repeatMLE=50,5,1.0,30,5
#[0, 50, 5, 1.00000000000000, 30, (-0.46872394191150984, -0.1706522325446755), 4272.575792123848]
#[1, 50, 5, 1.00000000000000, 30, (-0.4624673070275521, -0.15427392762755668), 4272.324189044862]
#[2, 50, 5, 1.00000000000000, 30, (-0.46951488058811636, -0.20172631679701244), 4276.462579144995]
#[3, 50, 5, 1.00000000000000, 30, (-0.492187467507606, -0.1929439971887712), 4263.705922020358]
#[4, 50, 5, 1.00000000000000, 30, (-0.4441155982889501, -0.16004700793846094), 4280.36354017808]
#[-0.4674, 0.01723, -0.1759, 0.02064]
# n,k,p,reps,repeatMLE=100,5,1.0,30,5
#[0, 100, 5, 1.00000000000000, 30, (-0.4707601633123625, -0.17234296564126989), 10635.95799150573]
#[1, 100, 5, 1.00000000000000, 30, (-0.5363716388086307, -0.2676407471091624), 10602.98862147951]
#[2, 100, 5, 1.00000000000000, 30, (-0.5047203692358055, -0.2089861380097876), 10615.295023641434]
#[3, 100, 5, 1.00000000000000, 30, (-0.5037854832989932, -0.25475858261388856), 10635.368084525608]
#[4, 100, 5, 1.00000000000000, 30, (-0.5238938785617278, -0.22761135503827465), 10597.336990398386]
#[-0.5079, 0.02486, -0.2263, 0.03785]
# n,k,p,reps,repeatMLE=50,5,1.0,100,5
# compare to ER [-0.3798, 0.01348, -0.2068, 0.01602] - but n too small for this
#[-0.4667, 0.01843, -0.1787, 0.03319]
# n,k,p,reps,repeatMLE=50,5,0.10,30,5
# [-0.7918, 0.01596, -0.5130, 0.03323] ConnectedSmallWorldFromAnywhere
#--------------------------------
# ConnectedSmallWorldFromMostpopular----------------
# n,k,p,reps,repeatMLE=50,2,0.0,1,5
#[-0.9878, 0.0003034, 1.492, 0.08432] # - too much std error
#[-0.9562, 0.01362, 0.07724, 0.7675] # - too much std error
# n,k,p,reps,repeatMLE=50,2,0.0,30,5
#[-0.9878, 0.0001516, 1.514, 0.01222]
# n,k,p,reps,repeatMLE=50,2,0.10,30,5
#[-0.9621, 0.002718, -0.4277, 0.08598]
# n,k,p,reps,repeatMLE=50,2,0.20,30,5
#[-0.9375, 0.004620, -0.5683, 0.01939]
# n,k,p,reps,repeatMLE=50,2,0.50,30,5
#[-0.8632, 0.008181, -0.6471, 0.03722]
# n,k,p,reps,repeatMLE=50,2,1.0,30,5
#[-0.8239, 0.01428, -0.6669, 0.01321]
# n,k,p,reps,repeatMLE=50,5,0.10,30,5
#[-0.7530, 0.01572, -0.4751, 0.04671]
# end of ConnectedSmallWorldFromMostpopular---------
#
# as p approaches 1 the small-world model approached Erdos-Reyni(n, ERProb)
k=10;
n=100
ERProb=k/(n-1.)
ERProb
%time
# demo of the mle - way too long...
c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1
c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1
# simulation settings
mles=[]
n,k,p,reps,repeatMLE=100,10,1.0,30,5
for i in range(repeatMLE):
ts=ConnectedSmallWorldFromAnywhere(n,k,p,reps)
spc=splitPairsCounts(ts)
def negLkl(AB):
return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1])
mle=minimize_constrained(negLkl,[c_1,c_2],[0.0,0.0],disp=0)
x = [i, n, k, p, reps, mle]
print x
mles.append(x)
y=mean([x[5][0] for x in mles]),std([x[5][0] for x in mles]), mean([x[5][1] for x in mles]),std([x[5][1] for x in mles]); [x.n(digits=4) for x in y]
n,m=15,2;
g=graphs.RandomBarabasiAlbert(n,m)
g.show()
def findMaxDegAndItsVertex(gr):
'''find the maximum degree in a graph and its vertex with smalelst ID'''
maxD=0; maxDv=0;
for vd in gr.degree_iterator(range(n),True):
#print vd
if vd[1] > maxD:
maxD=vd[1]; maxDv=vd[0];
return maxD,maxDv
findMaxDegAndItsVertex(g)
n,m=15,10;
g=graphs.RandomBarabasiAlbert(n,m)
print findMaxDegAndItsVertex(g)
g.show()
def PrefAttachmentFromMostPopular(n,m,reps):
'''return reps many transmission trees from the preferential attachment model with initial infection
from the smallest node with the maximum degree'''
ts=[]
for i in range(reps):
g=graphs.RandomBarabasiAlbert(n,m)
maxDV=findMaxDegAndItsVertex(g)
ts.append(transmissionProcessTC(g.to_directed(),maxDV[1]))
return ts
%time
# demo of the mle - way too long...
c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1
c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1
# simulation settings
mles=[]
n,m,reps,repeatMLE=100,1,30,10
for i in range(repeatMLE):
ts=PrefAttachmentFromMostPopular(n,m,reps)
spc=splitPairsCounts(ts)
def negLkl(AB):
return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1])
mle=minimize_constrained(negLkl,[c_1,c_2],[0.0,0.0],disp=0)
x = [i, n, m, reps, mle]
print x
mles.append(x)
mles
#########
#n,m,reps,repeatMLE=100,2,30,10
#mles=[[0, 100, 2, 30, (-0.22665379663514496, -0.6500365441924358)], [1, 100, 2, 30, (-0.25247638037582687, -0.6740522715490318)], [2, 100, 2, 30, (-0.27092350497911694, -0.6818376619669556)], [3, 100, 2, 30, (-0.20801219786250535, -0.6629331739317452)], [4, 100, 2, 30, (-0.20181548669784008, -0.6735625013240737)], [5, 100, 2, 30, (-0.21620078970637727, -0.6622066332612844)], [6, 100, 2, 30, (-0.2897399652564357, -0.667458689497513)], [7, 100, 2, 30, (-0.26265154465133816, -0.6386910379716887)], [8, 100, 2, 30, (-0.28873448085827796, -0.6603197512893558)], [9, 100, 2, 30, (-0.22625577133555644, -0.6756400458385884)]]
#mle mean and std-err
#[-0.2443, 0.03283, -0.6647, 0.01294]
##########
#n,m,reps,repeatMLE=100,1,30,10
#mles=[[0, 100, 1, 30, (-0.37414180696123656, -0.8290932447900436)], [1, 100, 1, 30, (-0.3236397755375402, -0.8314890535433167)], [2, 100, 1, 30, (-0.2468637183101474, -0.8042617935543253)], [3, 100, 1, 30, (-0.33050561419852764, -0.8159645696149106)], [4, 100, 1, 30, (-0.3070428692968694, -0.8171910942641389)], [5, 100, 1, 30, (-0.3930318773221692, -0.8290483487359996)], [6, 100, 1, 30, (-0.28123221418745825, -0.8123073810368434)], [7, 100, 1, 30, (-0.3861042546882455, -0.8353259786808699)], [8, 100, 1, 30, (-0.35248668135875144, -0.8321393837645847)], [9, 100, 1, 30, (-0.27975485700284897, -0.8080369576128273)]]
#mle mean and std-err
#[-0.3275, 0.04932, -0.8215, 0.01121]
y=mean([x[4][0] for x in mles]),std([x[4][0] for x in mles]), mean([x[4][1] for x in mles]),std([x[4][1] for x in mles]); [x.n(digits=4) for x in y]
%time
# demo of the mle - way too long...
c_1 = lambda p: p[0]+0.9999999 # constraint for alpha > -1
c_2 = lambda p: p[1]+0.9999999 # constraint for beta > -1
# simulation settings
mles=[]
n,m=100,2
reps=30
repeatMLE=10
for i in range(repeatMLE):
ts=[transmissionProcessTC(graphs.RandomBarabasiAlbert(n,m).to_directed(),0) for _ in range(reps)]
spc=splitPairsCounts(ts)
def negLkl(AB):
return negLogLkl_SplitPairCounts2(spc,AB[0],AB[1])
mle=minimize_constrained(negLkl,[c_1,c_2],[0.0,0.0],disp=0)
x = [i, n, m, reps, mle]
print x
mles.append(x)
Drawing some figures
n,m=25,1
H = graphs.RandomBarabasiAlbert(n,m)
H.set_latex_options(
graphic_size=(8,8),
vertex_size=0.1,
edge_thickness=0.04,
edge_color='green',
vertex_color='green',
vertex_label_color='red',
format='tkz_graph',
tkz_style='Art',
#layout='acyclic'
)
view(H)
maxDV=findMaxDegAndItsVertex(H)
print maxDV
T=transmissionProcessTC(H.to_directed(),maxDV[1])
print ascii_art(T)
#latex(H)
#latex(T)