Quetzal-CoaTL
The Coalescence Template Library
Loading...
Searching...
No Matches
PopulationSizeOnDiskImplementation.hpp
1// Copyright 2021 Arnaud Becheler <abechele@umich.edu>
2
3/*********************************************************************** * This program is free software; you can
4 *redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free
5 *Software Foundation; either version 2 of the License, or * (at your option) any later version. *
6 * *
7 ***************************************************************************/
8
9#ifndef __POPULATIONS_ON_DISK_H_INCLUDED__
10#define __POPULATIONS_ON_DISK_H_INCLUDED__
11
12#include <map>
13#include <sstream>
14
15#include <boost/serialization/serialization.hpp>
16#include <boost/serialization/unordered_map.hpp>
17
18#include <boost/archive/binary_iarchive.hpp>
19#include <boost/archive/binary_oarchive.hpp>
20
21#include "assert.h"
22#include <fstream>
23#include <iostream>
24#include <unordered_map>
25#include <unordered_set>
26#include <vector>
27
28namespace quetzal
29{
30namespace demography
31{
40template <typename Space, typename Time, typename Value> class PopulationSizeOnDiskImplementation
41{
42 private:
43 // Data in this windows are in RAM and allow for read and write access
44 mutable std::pair<Time, Time> m_RAM_window = {0, 1};
45 // only stores 2 time keys at the time, other are serialized
46 mutable std::unordered_map<Time, std::unordered_map<Space, Value>> m_populations;
47
48 public:
50 using time_type = Time;
52 using coord_type = Space;
54 using value_type = Value;
62 void set(coord_type const &x, time_type const &t, value_type N)
63 {
64 assert(N >= 0);
65 slide_RAM_window_to(t);
66 m_populations[t][x] = N;
67 }
73 value_type &operator()(coord_type const &x, time_type const &t)
74 {
75 slide_RAM_window_to(t);
76 return m_populations[t][x];
77 }
81 value_type get(coord_type const &x, time_type const &t) const
82 {
83 slide_RAM_window_to(t);
84 assert(is_defined(x, t));
85 return m_populations.at(t).at(x);
86 }
91 value_type operator()(coord_type const &x, time_type const &t) const
92 {
93 return get(x, t);
94 }
95
99 std::vector<coord_type> definition_space(time_type const &t) const
100 {
101 slide_RAM_window_to(t);
102 std::vector<coord_type> v;
103 for (auto const &it : m_populations.at(t))
104 {
105 if (it.second > 0)
106 {
107 v.push_back(it.first);
108 }
109 }
110 return v;
111 }
112
117 bool is_defined(coord_type const &x, time_type const &t) const
118 {
119 return (!m_populations.empty()) && (m_populations.find(t) != m_populations.end()) &&
120 (m_populations.at(t).find(x) != m_populations.at(t).end());
121 }
122
123 private:
124 std::string get_archive_name(time_type t) const
125 {
126 const std::string prefix = "N-";
127 const std::string extension = ".archive";
128 return prefix + std::to_string(t) + extension;
129 }
130
131 void serialize_layer(time_type t) const
132 {
133 const std::string filename = get_archive_name(t);
134 // create and open a binary archive for output
135 std::ofstream ofs(filename);
136 // save data to archive
137 boost::archive::binary_oarchive oa(ofs);
138 // write class instance to archive
139 oa << m_populations.at(t);
140 // archive and stream closed when destructors are called, clear map
141 m_populations.erase(t);
142 }
143
144 void deserialize_layer(time_type t) const
145 {
146 std::string filename = get_archive_name(t);
147 // create and open an archive for input
148 std::ifstream ifs(filename, std::ios::binary);
149 boost::archive::binary_iarchive ia(ifs);
150 // read class state from archive
151 std::unordered_map<Space, Value> layer;
152 ia >> layer;
153 // archive and stream closed when destructors are called
154 m_populations[t] = layer;
155 }
156
157 void slide_RAM_window_to(time_type t) const
158 {
159 // window is well positioned
160 if (t == m_RAM_window.first || t == m_RAM_window.second)
161 {
162 // do nothing, just read the data
163 return;
164 }
165 // windows has to go forward
166 else if (t == m_RAM_window.second + 1)
167 {
168 // we know for sure we can serialize this
169 serialize_layer(m_RAM_window.first);
170 // slide the window
171 m_RAM_window.first += 1;
172 m_RAM_window.second += 1;
173 // we don't know if we are in forward history simulation or forward reading
174 try
175 {
176 deserialize_layer(m_RAM_window.second);
177 }
178 catch (const std::exception &e)
179 {
180 // std::cout << "layer did not exist on disk, I assume it's forward simulation time" << std::endl;
181 }
182 }
183 // windows has to go backward
184 else if (t == m_RAM_window.first - 1)
185 {
186 // we know for sure we can serialize this
187 serialize_layer(m_RAM_window.second);
188 // we know for sure we should be able to deserialize this
189 deserialize_layer(m_RAM_window.first - 1);
190 // slide back the window
191 m_RAM_window.first -= 1;
192 m_RAM_window.second -= 1;
193 }
194 // current windows is totally not aligned: assume it's (for now) only random reading access
195 else
196 {
197 // erase both layers from RAM
198 serialize_layer(m_RAM_window.first);
199 serialize_layer(m_RAM_window.second);
200 // slide the windows
201 if (t == 0)
202 {
203 m_RAM_window.first = 0;
204 m_RAM_window.second = 1;
205 }
206 else
207 {
208 m_RAM_window.first = t - 1;
209 m_RAM_window.second = t;
210 }
211 // read both layers from disk
212 deserialize_layer(m_RAM_window.first);
213 deserialize_layer(m_RAM_window.second);
214 }
215 }
216}; // PopulationSizeOnDiskImplementation
217} // namespace demography
218} // namespace quetzal
219
220#endif
Population size distribution in time and space, serialized in a sliding windows to limit RAM usage.
Definition PopulationSizeOnDiskImplementation.hpp:41
value_type & operator()(coord_type const &x, time_type const &t)
Population size at deme x at time t.
Definition PopulationSizeOnDiskImplementation.hpp:73
PopulationSizeOnDiskImplementation()=default
Default constructor.
value_type operator()(coord_type const &x, time_type const &t) const
Get population size at deme x at time t.
Definition PopulationSizeOnDiskImplementation.hpp:91
std::vector< coord_type > definition_space(time_type const &t) const
Return the demes at which the population size was defined (>0) at time t.
Definition PopulationSizeOnDiskImplementation.hpp:99
bool is_defined(coord_type const &x, time_type const &t) const
Checks if population size is defined at deme x at time t.
Definition PopulationSizeOnDiskImplementation.hpp:117
void set(coord_type const &x, time_type const &t, value_type N)
Set population size value at deme x at time t.
Definition PopulationSizeOnDiskImplementation.hpp:62
value_type get(coord_type const &x, time_type const &t) const
Get population size value at deme x at time t.
Definition PopulationSizeOnDiskImplementation.hpp:81
Simulation of coalescence-based models of molecular evolution.
Definition coalescence.hpp:21