Quetzal-CoaTL
The Coalescence Template Library
Loading...
Searching...
No Matches
distance_to_parent.hpp
1// Copyright 2020 Arnaud Becheler <abechele@umich.edu>
2
3/*********************************************************************** * This program is free software; you can
4 *redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free
5 *Software Foundation; either version 2 of the License, or * (at your option) any later version. *
6 * *
7 ***************************************************************************/
8
9#ifndef __DISTANCE_TO_PARENT_H_INCLUDED__
10#define __DISTANCE_TO_PARENT_H_INCLUDED__
11
12#include "../../simulator/DiscreteTimeWrightFisher.hpp"
13#include "../container/Forest.hpp"
14#include "../container/Tree.hpp"
15
16#include <map>
17
18namespace quetzal
19{
20namespace coalescence
21{
33template <typename Space, typename Time> class newick_with_distance_to_parent
34{
35 private:
36 unsigned int m_ancestral_Wright_Fisher_N;
37
38 public:
42 using coord_type = Space;
46 using time_type = Time;
51 {
52 private:
53 time_type m_t;
54 unsigned int m_distance_to_parent = 0;
55
56 public:
62 cell_type(time_type t) : m_t(t)
63 {
64 }
68 cell_type() = default;
73 {
74 return m_t;
75 }
79 void time(time_type const &t)
80 {
81 m_t = t;
82 }
86 unsigned int distance_to_parent() const
87 {
88 return m_distance_to_parent;
89 }
93 void distance_to_parent(unsigned int l)
94 {
95 m_distance_to_parent = l;
96 }
97 }; // end inner class cell_type
111 {
112 auto operator()(tree_type &node) const
113 {
114 if (node.has_parent())
115 {
116 int d = node.cell().time() - node.parent().cell().time();
117 int l = std::abs(d);
118 node.cell().distance_to_parent(l);
119 }
120 else
121 {
122 node.cell().distance_to_parent(0);
123 }
124 }
125 }; // end struct treatment
135 static auto make_forest(std::map<coord_type, unsigned int> const &sample_counts, time_type const &sampling_time)
136 {
137 forest_type forest;
138 for (auto const &it : sample_counts)
139 {
140 forest.insert(it.first, std::vector<tree_type>(it.second, tree_type(sampling_time)));
141 }
142 return forest;
143 }
150 void ancestral_Wright_Fisher_N(unsigned int value)
151 {
152 m_ancestral_Wright_Fisher_N = value;
153 }
160 unsigned int ancestral_Wright_Fisher_N() const
161 {
162 return m_ancestral_Wright_Fisher_N;
163 }
169 static auto branch()
170 {
171 return [](auto &parent, auto const &child) { return parent.add_child(child); };
172 }
178 static auto init()
179 {
180 return [](coord_type, time_type const &t) { return tree_type(t); };
181 }
195 template <typename Generator>
196 tree_type find_mrca(forest_type const &forest, time_type const &first_time, Generator &gen) const
197 {
198 time_type t_curr = first_time;
199 auto WF_init = [&t_curr](time_type const &t) {
200 t_curr -= t;
201 return tree_type(t_curr);
202 };
204 auto tree = WF_model::coalesce(forest, m_ancestral_Wright_Fisher_N, gen, branch(), WF_init);
205 treatment computer;
206 tree.visit_subtrees_by_pre_order_DFS(computer);
207 return tree;
208 }
216 static std::string treat(tree_type &tree)
217 {
218 std::string newick;
219 auto preorder = [&newick](auto const &node) {
220 if (node.has_children())
221 {
222 newick += "(";
223 }
224 };
225 auto inorder = [&newick](auto const &) { newick += ","; };
226 auto postorder = [&newick](auto const &node) {
227 if (node.has_children())
228 {
229 newick.pop_back();
230 newick += ")";
231 }
232 if (node.has_parent())
233 {
234 newick += ":";
235 newick += std::to_string(node.cell().distance_to_parent());
236 }
237 };
238 tree.visit_subtrees_by_generic_DFS(preorder, inorder, postorder);
239 newick.push_back(';');
240 return newick;
241 }
242};
243/*
244 * @brief Policy class for coalescing gene copies into a Newick formula.
245 *
246 * Policy class for coalescing gene copies into a Newick formula, computing
247 * distances (in generations) to parent nodes and naming each tips according to a specified functor.
248 *
249 * @tparam Space coordinate type
250 * @tparam Time time type
251 *
252 * @ingroup API
253 */
254template <typename Space, typename Time> class newick_with_distance_to_parent_and_leaf_name
255{
256 private:
257 unsigned int m_ancestral_Wright_Fisher_N;
258
259 public:
260 /*
261 * @brief Set the size of the putative ancestral Wright-Fisher population preceding
262 * the spatial history.
263 *
264 * @param value The Wright-Fisher population size
265 */
266 void ancestral_Wright_Fisher_N(unsigned int value)
267 {
268 m_ancestral_Wright_Fisher_N = value;
269 }
271 using coord_type = Space;
273 using time_type = Time;
276 {
277 private:
278 std::string m_name;
279 time_type m_t;
280 unsigned int m_distance_to_parent = 0;
281
282 public:
288 cell_type(std::string const &name, time_type const &t) : m_name(name), m_t(t)
289 {
290 }
295 cell_type(time_type const &t) : m_name(""), m_t(t)
296 {
297 }
301 cell_type() = default;
306 {
307 return m_t;
308 }
312 void time(time_type const &t)
313 {
314 m_t = t;
315 }
319 std::string name() const
320 {
321 return m_name;
322 }
326 void name(std::string const &name)
327 {
328 m_name = name;
329 }
333 unsigned int distance_to_parent() const
334 {
335 return m_distance_to_parent;
336 }
340 void distance_to_parent(unsigned int l)
341 {
342 m_distance_to_parent = l;
343 }
344 };
349 /*
350 * @brief Treatment to operate on a DFS on the tree to compute branches length.
351 */
353 {
354 auto operator()(tree_type &node) const
355 {
356 if (node.has_parent())
357 {
358 int d = node.cell().time() - node.parent().cell().time();
359 int l = std::abs(d);
360 node.cell().distance_to_parent(l);
361 }
362 else
363 {
364 node.cell().distance_to_parent(0);
365 }
366 }
367 };
378 template <typename F>
379 static auto make_forest(std::map<coord_type, unsigned int> const &sample_counts, time_type const &sampling_time,
380 F get_name)
381 {
382 forest_type forest;
383 for (auto const &it : sample_counts)
384 {
385 cell_type c(get_name(it.first, sampling_time), sampling_time);
386 forest.insert(it.first, std::vector<tree_type>(it.second, tree_type(c)));
387 }
388 return forest;
389 }
390 /*
391 * @brief Create a forest from a vector of individuals, extracting their location, name and sampling time.
392 *
393 * @tparam T type of an individual
394 * @tparam F1 a unary operation with signature equivalent to 'coord_type fun(const T& i)'
395 * @tparam F2 a unary operation with signature equivalent to 'std::string fun(const T& i)'
396 *
397 * @param sample a collection of individuals (gene copies)
398 * @param sampling_time the time of sampling
399 * @param get_location a unary operation taking a sampled individual of type T as an argument and returning its
400 * geographic location
401 * @param get_name a functor taking a sampled individual of type T as an argument and returning a std::string
402 */
403 template <typename T, typename F1, typename F2>
404 static auto make_forest(std::vector<T> sample, time_type const &sampling_time, F1 get_location, F2 get_name)
405 {
406 forest_type forest;
407 for (auto const &it : sample)
408 {
409 cell_type c(get_name(it, sampling_time), sampling_time);
410 forest.insert(get_location(it, sampling_time), tree_type(c));
411 }
412 return forest;
413 }
418 static auto branch()
419 {
420 return [](auto &parent, auto const &child) { return parent.add_child(child); };
421 }
426 static auto init()
427 {
428 return [](coord_type, time_type const &t) { return tree_type(t); };
429 }
438 template <typename Generator>
439 tree_type find_mrca(forest_type const &forest, time_type const &first_time, Generator &gen) const
440 {
441 time_type t_curr = first_time;
442 auto WF_init = [&t_curr](time_type const &t) {
443 t_curr -= t;
444 return tree_type(t_curr);
445 };
447 auto tree = WF_model::coalesce(forest, m_ancestral_Wright_Fisher_N, gen, branch(), WF_init);
448 treatment computer;
449 tree.visit_subtrees_by_pre_order_DFS(computer);
450 return tree;
451 }
457 std::string treat(tree_type &tree) const
458 {
459 std::string newick;
460 auto preorder = [&newick](auto const &node) {
461 if (node.has_children())
462 {
463 newick += "(";
464 }
465 };
466 auto inorder = [&newick](auto const &) { newick += ","; };
467 auto postorder = [&newick](auto const &node) {
468 if (node.has_children())
469 {
470 newick.pop_back();
471 newick += ")";
472 }
473 if (node.has_parent())
474 {
475 newick += node.cell().name();
476 newick += ":";
477 newick += std::to_string(node.cell().distance_to_parent());
478 }
479 };
480 tree.visit_subtrees_by_generic_DFS(preorder, inorder, postorder);
481 newick.push_back(';');
482 return newick;
483 }
484};
485} // end namespace coalescence
486} // end namespace quetzal
487
488#endif
Collection of geo-localized coalescing trees.
Definition Forest.hpp:32
iterator insert(Position const &position, Tree const &tree)
insert a new tree at a given position
Definition Forest.hpp:177
Data structure for representing tree topologies where each node contains a data field.
Definition Tree.hpp:29
const CellT & cell() const
Cell accessor.
Definition Tree.hpp:286
bool has_parent() const
Checks whether the tree has a parent.
Definition Tree.hpp:140
time_type time() const
Get time of node creation.
Definition distance_to_parent.hpp:72
void time(time_type const &t)
Set time of node creation.
Definition distance_to_parent.hpp:79
void distance_to_parent(unsigned int l)
Set distance to parent node, in number of generations.
Definition distance_to_parent.hpp:93
cell_type(time_type t)
Constructor.
Definition distance_to_parent.hpp:62
unsigned int distance_to_parent() const
Get distance to parent node, in number of generations.
Definition distance_to_parent.hpp:86
Inner class defining what data type the tree stores.
Definition distance_to_parent.hpp:276
time_type time() const
Get time of node creation.
Definition distance_to_parent.hpp:305
void name(std::string const &name)
Set name of the node.
Definition distance_to_parent.hpp:326
void time(time_type const &t)
Set time of node creation.
Definition distance_to_parent.hpp:312
cell_type(time_type const &t)
Constructor for inner nodes (no name for them!)
Definition distance_to_parent.hpp:295
void distance_to_parent(unsigned int l)
Set distance to parent node, in number of generations.
Definition distance_to_parent.hpp:340
cell_type(std::string const &name, time_type const &t)
Constructor for tip nodes (leaves)
Definition distance_to_parent.hpp:288
std::string name() const
Get name of the node.
Definition distance_to_parent.hpp:319
unsigned int distance_to_parent() const
Get distance to parent node, in number of generations.
Definition distance_to_parent.hpp:333
static auto make_forest(std::map< coord_type, unsigned int > const &sample_counts, time_type const &sampling_time, F get_name)
Make a forest respecting the policy from a geographic sample.
Definition distance_to_parent.hpp:379
tree_type find_mrca(forest_type const &forest, time_type const &first_time, Generator &gen) const
Coalesce the given forest into a discrete time Wright-Fisher model.
Definition distance_to_parent.hpp:439
std::string treat(tree_type &tree) const
Visit the whole tree and builds its Newick formula.
Definition distance_to_parent.hpp:457
quetzal::coalescence::container::Tree< cell_type > tree_type
The type used to represent a genealogy.
Definition distance_to_parent.hpp:346
Space coord_type
Geographic coordinate type.
Definition distance_to_parent.hpp:271
static auto init()
Get the functor interface required to initialize a node.
Definition distance_to_parent.hpp:426
quetzal::coalescence::container::Forest< coord_type, tree_type > forest_type
The type used to represent a spatial forest of genealogies.
Definition distance_to_parent.hpp:348
static auto branch()
Get the functor interface required to branch a parent node to a child node.
Definition distance_to_parent.hpp:418
Time time_type
Time type.
Definition distance_to_parent.hpp:273
Policy class for coalescing gene copies into a Newick formula.
Definition distance_to_parent.hpp:34
Time time_type
Definition distance_to_parent.hpp:46
static auto branch()
Get the functor interface required to branch a parent node to a child node.
Definition distance_to_parent.hpp:169
tree_type find_mrca(forest_type const &forest, time_type const &first_time, Generator &gen) const
Coalesce the given forest into a discrete time Wright-Fisher model.
Definition distance_to_parent.hpp:196
static auto init()
Get the functor interface required to initialize a node.
Definition distance_to_parent.hpp:178
unsigned int ancestral_Wright_Fisher_N() const
Get the size of the putative ancestral Wright-Fisher population preceding the spatial history.
Definition distance_to_parent.hpp:160
static auto make_forest(std::map< coord_type, unsigned int > const &sample_counts, time_type const &sampling_time)
Make a forest respecting the policy from a geographic sample.
Definition distance_to_parent.hpp:135
quetzal::coalescence::container::Tree< cell_type > tree_type
Definition distance_to_parent.hpp:102
Space coord_type
Definition distance_to_parent.hpp:42
void ancestral_Wright_Fisher_N(unsigned int value)
Set the size of the putative ancestral Wright-Fisher population preceding the spatial history.
Definition distance_to_parent.hpp:150
static std::string treat(tree_type &tree)
Visit the whole tree and builds its Newick formula.
Definition distance_to_parent.hpp:216
Discrete time simulation in a Wright-Fisher Population.
Definition DiscreteTimeWrightFisher.hpp:33
Generic components for parsing or generating Newick strings.
Definition io.hpp:28
Simulation of coalescence-based models of molecular evolution.
Definition coalescence.hpp:21
Treatment to operate on a DFS on the tree to compute branches length.
Definition distance_to_parent.hpp:111