Quetzal-CoaTL
The Coalescence Template Library
Loading...
Searching...
No Matches
FuzzyPartition.hpp
1// Copyright 2021 Arnaud Becheler <abechele@umich.edu>
2
3/*********************************************************************** * This program is free software; you can
4 *redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free
5 *Software Foundation; either version 2 of the License, or * (at your option) any later version. *
6 * *
7 ***************************************************************************/
8
9#ifndef FUZZY_PARTITION_H
10#define FUZZY_PARTITION_H
11
12#include "assert.h"
13#include <map>
14#include <numeric> // accumulate
15#include <set>
16
17#include "zip_range.hpp"
18#include <boost/numeric/ublas/io.hpp>
19#include <boost/numeric/ublas/matrix.hpp>
20#include <boost/numeric/ublas/matrix_proxy.hpp>
21#include <boost/numeric/ublas/vector.hpp>
22
23#include "RestrictedGrowthString.hpp"
24#include "matrix.hpp"
25#include "munkres.hpp"
26
27template <typename T>
28bool operator==(const boost::numeric::ublas::matrix<T> &m, const boost::numeric::ublas::matrix<T> &n);
29
31{
32template <typename E> class FuzzyPartition;
33
34template <typename E> std::ostream &operator<<(std::ostream &os, const FuzzyPartition<E> &fuzzy_partition);
35
37
52template <typename Element> class FuzzyPartition
53{
54 using matrix_type = boost::numeric::ublas::matrix<double>;
55 using vector_type = boost::numeric::ublas::vector<double>;
56
57 public:
58 using element_type = Element;
59 using cluster_type = unsigned int;
60
66 FuzzyPartition(std::map<element_type, std::vector<double>> const &coefficients)
67 : m_elements(retrieve_elements(coefficients)), m_clusters(retrieve_clusters(coefficients)),
68 m_membership_coefficients(build_coefficients_matrix(coefficients))
69 {
70 assert(m_membership_coefficients.size2() == m_elements.size());
71 assert(m_membership_coefficients.size1() == coefficients.cbegin()->second.size());
72 check_coefficients_validty();
73 }
74
76 const std::set<element_type> &elements() const
77 {
78 return m_elements;
79 }
80
82 const std::set<cluster_type> &clusters() const
83 {
84 return m_clusters;
85 }
86
88 size_t nElements() const
89 {
90 return m_elements.size();
91 }
92
94 size_t nClusters() const
95 {
96 return m_clusters.size();
97 }
98
102 double fuzzy_transfer_distance(FuzzyPartition const &other) const
103 {
104 assert(this->nElements() == other.nElements());
105 auto costs = this->weighted_bipartite_graph(other);
106 auto best_assignment = hungarian_algorithm_resolution(costs);
107 return (compute_solution_cost(best_assignment, costs));
108 }
109
112 {
113 assert(rgs.size() == this->nClusters());
114 matrix_type new_coefficients(rgs.nBlocks(), this->nElements(), 0);
116 for (unsigned int i = 0; i < m_membership_coefficients.size1(); ++i)
117 {
118 for (unsigned int j = 0; j < m_membership_coefficients.size2(); ++j)
119 {
120 new_coefficients(rgs.at(i), j) += m_membership_coefficients(i, j);
121 }
122 }
123 m_membership_coefficients.swap(new_coefficients);
124 std::set<cluster_type> c;
125 for (unsigned int i = 0; i < rgs.nBlocks(); ++i)
126 {
127 c.insert(i);
128 }
129 m_clusters = c;
130 check_coefficients_validty();
131 return *this;
132 }
133
137 {
138 bool is_equal = this->m_elements == other.m_elements &&
139 this->m_membership_coefficients == other.m_membership_coefficients &&
140 this->m_clusters == other.m_clusters;
141
142 return is_equal;
143 }
144
145 private:
146 std::set<element_type> m_elements;
147 std::set<cluster_type> m_clusters;
148 matrix_type m_membership_coefficients;
149
150 friend std::ostream &operator<< <>(std::ostream &, FuzzyPartition<element_type> const &);
151
152 Matrix<double> weighted_bipartite_graph(FuzzyPartition const &other) const
153 {
154 using namespace boost::numeric::ublas;
155 Matrix<double> costs(this->nClusters(), other.nClusters());
156
157 for (unsigned int i = 0; i < this->nClusters(); ++i)
158 {
159 matrix_row<const matrix<double>> v(m_membership_coefficients, i);
160
161 for (unsigned int j = 0; j < other.nClusters(); ++j)
162 {
163 matrix_row<const matrix<double>> w(other.m_membership_coefficients, j);
164 costs(i, j) = compute_cost(v, w);
165 }
166 }
167 return costs;
168 }
169
170 template <typename V, typename W> double compute_cost(V const &v, W const &w) const
171 {
172 assert(v.size() == w.size());
173
174 std::vector<double> distances;
175
176 for (auto &&t : zip_range(v, w))
177 {
178 distances.push_back(std::abs(boost::get<0>(t) - boost::get<1>(t)));
179 }
180
181 return std::accumulate(distances.begin(), distances.end(), 0.0);
182 }
183
184 template <typename T> Matrix<T> hungarian_algorithm_resolution(Matrix<T> costs) const
185 {
186 Munkres<T> solver;
187 solver.solve(costs);
188 return (costs);
189 }
190
191 template <typename T> double compute_solution_cost(Matrix<T> const &solution, Matrix<T> const &costs) const
192 {
193 double total_cost = 0.0;
194 for (unsigned int i = 0; i < solution.rows(); ++i)
195 {
196 int row_matches = 0;
197 for (unsigned int j = 0; j < solution.columns(); ++j)
198 {
199 if (solution(i, j) == 0)
200 {
201 ++row_matches;
202 total_cost += costs(i, j);
203 }
204 }
205 assert(row_matches == 0 || row_matches == 1);
206 }
207 return total_cost;
208 }
209
210 std::set<element_type> retrieve_elements(std::map<element_type, std::vector<double>> const &coefficients) const
211 {
212 std::set<element_type> elems;
213 for (auto const &it : coefficients)
214 {
215 elems.insert(it.first);
216 }
217 return elems;
218 }
219
220 std::set<cluster_type> retrieve_clusters(std::map<element_type, std::vector<double>> const &coefficients) const
221 {
222 std::set<cluster_type> clusters;
223 unsigned int s = coefficients.begin()->second.size();
224 for (unsigned int i = 0; i < s; ++i)
225 {
226 clusters.insert(i);
227 }
228 return clusters;
229 }
230
231 matrix_type build_coefficients_matrix(std::map<element_type, std::vector<double>> const &coefficients)
232 {
233
234 unsigned int n_rows = coefficients.begin()->second.size();
235 unsigned int n_cols = coefficients.size();
236
237 for (auto const &it : coefficients)
238 {
239 assert(it.second.size() == n_rows);
240 }
241
242 matrix_type m(n_rows, n_cols, 0);
243
244 unsigned int i = 0;
245 unsigned int j = 0;
246
247 for (auto const &it1 : coefficients)
248 {
249 for (auto const &it2 : it1.second)
250 {
251 m(i, j) = it2;
252 ++i;
253 }
254 i = 0;
255 ++j;
256 }
257 return m;
258 }
259
260 void check_coefficients_validty()
261 {
262 double sum = 0.;
263 for (unsigned int j = 0; j < m_membership_coefficients.size2(); ++j)
264 {
265 for (unsigned int i = 0; i < m_membership_coefficients.size1(); ++i)
266 {
267 sum += m_membership_coefficients(i, j);
268 }
269 // assert(std::fabs(sum - 1) < 1E-3);
270 sum = 0;
271 }
272 }
273};
274} // namespace quetzal::polymorphism::fuzzy_transfer_distance
275
276template <typename E>
277std::ostream &operator<<(std::ostream &os,
279{
280 os << fuzzy_partition.m_membership_coefficients;
281 return os;
282}
283
284template <typename T>
285bool operator==(const boost::numeric::ublas::matrix<T> &m, const boost::numeric::ublas::matrix<T> &n)
286{
287 bool returnValue = (m.size1() == n.size1()) && (m.size2() == n.size2());
288
289 if (returnValue)
290 {
291 for (unsigned int i = 0; returnValue && i < m.size1(); ++i)
292 {
293 for (unsigned int j = 0; returnValue && j < m.size2(); ++j)
294 {
295 returnValue &= m(i, j) == n(i, j);
296 }
297 }
298 }
299 return returnValue;
300}
301
302#endif
Clusters are implicitly defined because of cluster merging (what would make unique IDs complicated to...
Definition FuzzyPartition.hpp:53
size_t nElements() const
Returns the number of elements (columns) of the fuzzy partition (matrix)
Definition FuzzyPartition.hpp:88
FuzzyPartition(std::map< element_type, std::vector< double > > const &coefficients)
Construct the fuzz partition with given coefficients.
Definition FuzzyPartition.hpp:66
bool operator==(FuzzyPartition< element_type > const &other) const
Comparison operator, returning true if two partitions have equal elements set and membership coeffice...
Definition FuzzyPartition.hpp:136
const std::set< element_type > & elements() const
Return the elements of the fuzzy partition.
Definition FuzzyPartition.hpp:76
size_t nClusters() const
Returns the number of clusters (lines) of the fuzzy partition (matrix)
Definition FuzzyPartition.hpp:94
FuzzyPartition & merge_clusters(RestrictedGrowthString const &rgs)
Modifies the object, merging clusters (summing lines) according to a restricted growth string object.
Definition FuzzyPartition.hpp:111
const std::set< cluster_type > & clusters() const
Return the clusters indices.
Definition FuzzyPartition.hpp:82
double fuzzy_transfer_distance(FuzzyPartition const &other) const
Compute the fuzzy transfer distance between two fuzzy partition.
Definition FuzzyPartition.hpp:102
auto operator==(const C1 &c1, const C2 &c2)
GridCoordinates are equality comparable.
Definition coordinates.hpp:39
Summary statistics based on spatial partition of coalescent trees.
Definition polymorphism.hpp:28