Quetzal-CoaTL
The Coalescence Template Library
Loading...
Searching...
No Matches
DiscreteTimeWrightFisher.hpp
1// Copyright 2021 Arnaud Becheler <abechele@umich.edu>
2
3/*********************************************************************** * This program is free software; you can
4 *redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free
5 *Software Foundation; either version 2 of the License, or * (at your option) any later version. *
6 * *
7 ***************************************************************************/
8
9#ifndef __DISCRETE_WRIGHT_FISHER_H_INCLUDED__
10#define __DISCRETE_WRIGHT_FISHER_H_INCLUDED__
11
12#include "../coalescence/container/Forest.hpp"
13#include "../coalescence/merger_policy.hpp"
14#include "../coalescence/occupancy_spectrum/sampling_policy.hpp"
15
16#include <boost/math/special_functions/binomial.hpp>
17
18namespace quetzal::simulator
19{
20
33{
34
35 public:
37 template <typename Space, typename Tree> using forest_type = quetzal::coalescence::container::Forest<Space, Tree>;
38
39 /* @brief Coalesce a spatial forest of trees in a Wrigh-Fisher population until MRCA has been found.
40 *
41 * @tparam Space the coordinate type
42 * @tparam Tree the tree type
43 * @tparam Generator a random number Generator
44 * @tparam Binop a BranchingOperation functor for coalescence
45 * @tparam TimeFun an intialization functor for coalescence
46 *
47 * @param forest the spatial distribution of lineages to coalesce
48 * @param N number of gene copies in the Wright-Fisher population
49 * @param gen a random number Generator
50 * @param branch a branching binary operation (functor usually given by the selected coalescence policy).
51 * @param make_tree a initialization functor (functor usually given by the selected coalescence policy).
52 *
53 * @detail Coalesce lineages by sampling the waiting times directly in their distribution.
54 *
55 * @return One single tree.
56 */
57 template <typename Space, typename Tree, typename Generator, typename Binop, typename TimeFun>
58 static Tree coalesce(forest_type<Space, Tree> const &forest, unsigned int N, Generator &gen, Binop branch,
59 TimeFun make_tree)
60 {
61
62 if (forest.nb_trees() == 0)
63 {
64
65 throw std::logic_error("The forest to coalesce is empty.");
66 }
67 else if (forest.nb_trees() == 1)
68 {
69
70 auto range = forest.trees_at_same_position(*(forest.positions().begin()));
71 assert(std::distance(range.first, range.second) == 1);
72 return range.first->second;
73 }
74 assert(forest.nb_trees() >= 2 && "Trying to coalesce less than 2 nodes.");
75 auto trees = forest.get_all_trees();
76 return coalesce(trees, N, gen, branch, make_tree);
77 }
78
79 /* @brief Simulates the number of generations to wait before the next coalescence event
80 * @tparam Generator a random number Generator
81 * @param k number of lineages
82 * @param N number of gene copies in the Wright-Fisher population
83 * @param gen a random number Generator
84 * @detail Samples the number of generations to wait before the next coalescence
85 * event in a geometric distribution of parameter {k \choose 2} / N
86 * (see Molecular evolution, a statistical approach, Z. Yang, 9.2 p 313).
87 * @return the number of generations to wait before the next coalescence event.
88 */
89 template <typename Generator>
90 static unsigned int sample_waiting_time(unsigned int k, unsigned int N, Generator &gen)
91 {
92 assert(N > k);
93 double mean = static_cast<double>(N) / boost::math::binomial_coefficient<double>(k, 2);
94 double p = 1 / (1 + mean);
95 assert(0 < p && p <= 1);
96 std::geometric_distribution dist(p);
97 return dist(gen);
98 }
99 /*
100 * @brief Coalesce a non-spatial forest of trees in a Wrigh-Fisher population until MRCA has been found.
101 *
102 * @tparam Tree the tree type
103 * @tparam Generator a random number Generator
104 * @tparam Binop a BranchingOperation functor for coalescence
105 * @tparam TimeFun an intialization functor for coalescence
106 *
107 * @param trees the set of lineages to coalesce
108 * @param N number of gene copies in the Wright-Fisher population
109 * @param gen a random number Generator
110 * @param branch a branching binary operation (functor usually given by the selected coalescence policy).
111 * @param make_tree a initialization functor (functor usually given by the selected coalescence policy).
112 *
113 * @detail Coalesce lineages by sampling the waiting times directly in their
114 * distribution. The algorithm will perform one generation of simultaneous
115 * multiple merges if the number of lineages to coalesce k is superior to the
116 * population size N.
117 *
118 * @return One single tree.
119 */
120 template <typename Tree, typename Generator, typename Binop, typename TimeFun>
121 static Tree coalesce(std::vector<Tree> &trees, unsigned int N, Generator &gen, Binop branch, TimeFun make_tree)
122 {
123 assert(trees.size() >= 2 && "Trying to coalesce less than 2 nodes.");
124 if (trees.size() > N)
125 {
126 // Wright Fisher assumptions are clearly violated
127 // Let's do one generation of simultaneous multiple merges to fix it.
128 unsigned int g = 1;
131 auto last = smm_type::merge(trees.begin(), trees.end(), N, make_tree(g), branch, gen);
132 trees.erase(last, trees.end());
133 }
134 unsigned int k = trees.size();
135 assert(k <= N && "number of lineages greater than ancestral Wright-Fisher population size.");
136 auto last = trees.end();
137 while (k > 1)
138 {
139 unsigned int g = sample_waiting_time(k, N, gen);
140 last = quetzal::coalescence::algorithm::binary_merge(trees.begin(), last, make_tree(g), branch, gen);
141 --k;
142 }
143 return *(trees.begin());
144 }
145 /*
146 * @brief Coalesce a non-spatial forest of trees in a single Wrigh-Fisher population
147 * during a finite number of generations.
148 *
149 * @tparam Tree the tree type
150 * @tparam Generator a random number Generator
151 * @tparam Binop a BranchingOperation functor for coalescence
152 * @tparam TimeFun an intialization functor for coalescence
153 *
154 * @param trees the set of lineages to coalesce
155 * @param N number of gene copies in the Wright-Fisher population
156 * @param gen a random number Generator
157 * g the number of generations that lasts the coalescence process
158 * @param branch a branching binary operation (functor usually given by the selected coalescence policy).
159 * @param make_tree a initialization functor (functor usually given by the selected coalescence policy).
160 *
161 * @detail Coalesce lineages by sampling the waiting times directly in their
162 * distribution. The algorithm will perform one generation of simultaneous
163 * multiple merges if the number of lineages to coalesce k is superior to the
164 * population size N.
165 *
166 * @return A set of trees, possibly of length 1 if MRCA was found during the g generations.
167 */
168 template <typename MergerType = coalescence::merger_policy::SimultaneousMultipleMerger<
170 typename Tree, typename Generator, typename Binop, typename TimeFun>
171 static std::vector<Tree> coalesce(std::vector<Tree> &trees, unsigned int N, unsigned int g, Generator &gen,
172 Binop branch, TimeFun make_tree)
173 {
174 assert(trees.size() >= 2 && "Trying to coalesce less than 2 nodes.");
175 assert(g > 0 && "Number of generations for the coalescence process should be at least 1");
176 assert(N >= 1 && "Population size should be positive for evaluating coalescence probability");
177 using merger_type = MergerType;
178 unsigned int t = 0;
179 auto last = trees.end();
180 while (t < g && std::distance(trees.begin(), last) > 1)
181 {
182 last = merger_type::merge(trees.begin(), last, N, make_tree(t), branch, gen);
183 ++t;
184 }
185 trees.erase(last, trees.end());
186 return trees;
187 }
188}; // End DiscreteTimeWrightFisher
189
190} // namespace quetzal::simulator
191
192#endif
Collection of geo-localized coalescing trees.
Definition Forest.hpp:32
std::vector< Tree > get_all_trees() const
copies all trees in the forest
Definition Forest.hpp:255
std::set< Position > positions() const
positions in the forest
Definition Forest.hpp:241
unsigned int nb_trees() const
number of trees in the forest
Definition Forest.hpp:138
std::pair< const_iterator, const_iterator > trees_at_same_position(const Position &position) const
non-modifying access to trees in the forest at a given position
Definition Forest.hpp:155
Discrete time simulation in a Wright-Fisher Population.
Definition DiscreteTimeWrightFisher.hpp:33
BidirectionalIterator binary_merge(BidirectionalIterator first, BidirectionalIterator last, T init, BinaryOperation op, Generator &g)
merges 2 randomly selected elements in a range.
Definition merge.hpp:50
Standard simulators of genetic diversity.
Definition simulator.hpp:16
Discrete generation simultaneous multiple merger.
Definition merger_policy.hpp:123
Sample occupancy spectrums by simulating a ball to urn assignment random experiment.
Definition sampling_policy.hpp:57