Quetzal-CoaTL
The Coalescence Template Library
Loading...
Searching...
No Matches
SpatialGeneticSample.hpp
1// Copyright 2021 Arnaud Becheler <abechele@umich.edu> Florence Jornod <florence@jornod.com>
2
3/*********************************************************************** * This program is free software; you can
4 *redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free
5 *Software Foundation; either version 2 of the License, or * (at your option) any later version. *
6 * *
7 ************************************************************************/
8
9#ifndef __SPATIALGENETICSAMPLE_H_INCLUDED__
10#define __SPATIALGENETICSAMPLE_H_INCLUDED__
11
12#include <assert.h>
13#include <map>
14#include <numeric> // std::accumulate
15#include <set>
16#include <string>
17#include <vector>
18
19namespace quetzal::format
20{
21namespace genetics
22{
23
24template <typename Space, typename Individual> class SpatialGeneticSample
25{
26
27 public:
28 using coord_type = Space;
29 using individual_type = Individual;
30 using locus_ID_type = typename individual_type::locus_ID_type;
31 using value_type = typename individual_type::allele_type::value_type;
32
33 SpatialGeneticSample(std::map<coord_type, std::vector<individual_type>> const &data)
34 : m_loci(extract_loci(data)), m_locations(localize(data)), m_dictionnary(data)
35 {
36 }
37
38 SpatialGeneticSample() = default;
39
40 void add(coord_type const &x, Individual const &ind)
41 {
42 if (m_loci.empty())
43 {
44 m_loci = ind.loci();
45 }
46 else
47 {
48 assert(ind.loci() == m_loci);
49 }
50 m_dictionnary[x].push_back(ind);
51 m_locations.insert(x);
52 }
53
54 void add(Individual const &ind, coord_type const &x)
55 {
56 add(x, ind);
57 }
58
59 auto size(coord_type const &x) const
60 {
61 assert(m_dictionnary.count(x) > 0);
62 return m_dictionnary.at(x).size();
63 }
64
65 auto size() const
66 {
67 unsigned int size = 0;
68 for (auto const &it : m_dictionnary)
69 size += it.second.size();
70
71 return size;
72 }
73
74 template <typename T> SpatialGeneticSample<coord_type, individual_type> &reproject(T const &projection)
75 {
76
77 std::map<coord_type, std::vector<individual_type>> new_dico;
78 std::set<coord_type> new_sites;
79
80 for (auto const &it : m_dictionnary)
81 {
82 auto x = projection.reproject_to_centroid(it.first);
83 new_dico.insert(std::make_pair(x, it.second));
84 new_sites.insert(x);
85 }
86
87 new_dico.swap(m_dictionnary);
88 new_sites.swap(m_locations);
89 return *this;
90 }
91
92 std::set<coord_type> const &get_sampling_points() const
93 {
94 return m_locations;
95 }
96
97 std::set<typename individual_type::locus_ID_type> const &loci() const
98 {
99 return m_loci;
100 }
101
102 std::vector<individual_type> const &individuals_at(coord_type const &x) const
103 {
104 return m_dictionnary.at(x);
105 }
106
107 // TODO take ploidy into account
108 auto nb_gene_copies_discarding_NA(locus_ID_type const &locus) const
109 {
110 std::map<coord_type, std::map<value_type, unsigned int>> counts;
111 for (auto const &x : get_sampling_points())
112 {
113 for (auto const &individual : individuals_at(x))
114 {
115 auto alleles = individual.alleles(locus);
116
117 if (alleles.first.get_allelic_state() > 0)
118 {
119 counts[x][alleles.first.get_allelic_state()] += 1;
120 }
121
122 if (alleles.second.get_allelic_state() > 0)
123 {
124 counts[x][alleles.second.get_allelic_state()] += 1;
125 }
126 }
127 }
128 return counts;
129 }
130
131 unsigned int allelic_richness(locus_ID_type const &locus) const
132 {
133 std::set<value_type> set;
134 auto freq = frequencies_discarding_NA(locus);
135 for (auto const &it1 : freq)
136 {
137 for (auto const &it2 : it1.second)
138 {
139 set.insert(it2.first);
140 }
141 }
142 return set.size();
143 }
144
145 auto frequencies_discarding_NA(locus_ID_type const &locus) const
146 {
147
148 std::map<coord_type, std::map<value_type, double>> freq;
149
150 auto counts = nb_gene_copies_discarding_NA(locus);
151
152 auto get = [](auto const &a, auto const &b) { return b.second; };
153
154 for (auto const &it1 : counts)
155 {
156 double sum = std::accumulate(it1.second.begin(), it1.second.end(), 0.0, get);
157 assert(sum > 0.0);
158
159 for (auto const &it2 : it1.second)
160 {
161 freq[it1.first][it2.first] = static_cast<double>(it2.second) / sum;
162 }
163 }
164 return freq;
165 }
166
167 // TODO UPDATE
168 friend std::ostream &operator<<(std::ostream &os, const SpatialGeneticSample &dt)
169 {
170 for (auto const &it : dt.m_dictionnary)
171 {
172 os << it.first << "\t" << it.second.size() << "\n";
173 }
174 return os;
175 }
176
177 private:
178 std::set<typename individual_type::locus_ID_type> m_loci;
179 std::set<coord_type> m_locations;
180 std::map<coord_type, std::vector<individual_type>> m_dictionnary;
181
182 template <typename T> std::set<coord_type> localize(T const &data) const
183 {
184 std::set<coord_type> locations;
185 for (auto const &it : data)
186 {
187 locations.insert(it.first);
188 }
189 return (locations);
190 }
191
192 std::set<typename individual_type::locus_ID_type> extract_loci(
193 std::map<coord_type, std::vector<individual_type>> const &data) const
194 {
195 std::set<typename individual_type::locus_ID_type> loci = data.cbegin()->second.front().loci();
196 assert(!loci.empty());
197 for (auto const &it1 : data)
198 {
199 assert(!it1.second.empty());
200 for (auto const &it2 : it1.second)
201 {
202 assert(it2.loci() == loci);
203 }
204 }
205 return loci;
206 }
207};
208} // namespace genetics
209} // namespace quetzal::format
210
211#endif
Definition SpatialGeneticSample.hpp:25
Parsers and generators for input/output.
Definition io.hpp:23