Quetzal-CoaTL
The Coalescence Template Library
Loading...
Searching...
No Matches
from_network.hpp
1// Copyright 2020 Arnaud Becheler <abechele@umich.edu>
2
3/*********************************************************************** * This program is free software; you can
4 *redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free
5 *Software Foundation; either version 2 of the License, or * (at your option) any later version. *
6 * *
7 ***************************************************************************/
8
9#ifndef __EXTENDED_NEWICK_FORMATTER_H_INCLUDED__
10#define __EXTENDED_NEWICK_FORMATTER_H_INCLUDED__
11
12#include <algorithm>
13#include <concepts>
14#include <regex>
15#include <stdexcept>
16#include <string>
17
18namespace quetzal
19{
20namespace format
21{
26{
27};
32{
33};
41bool check_if_balanced(const std::string &input, const char &open = '(', const char &close = ')')
42{
43 int count = 0;
44 for (const auto &ch : input)
45 {
46 if (ch == open)
47 count++;
48 if (ch == close)
49 count--;
50 // if a parenthesis is closed without being opened return false
51 if (count < 0)
52 return false;
53 }
54 // in the end the test is passed only if count is zero
55 return count == 0;
56}
61{
62 static std::string edit(const std::string &s)
63 {
64 return s;
65 }
66};
70template <class tag> struct is_balanced
71{
72};
76template <> struct is_balanced<parenthesis>
77{
78 static bool check(const std::string &s)
79 {
80 return check_if_balanced(s, '(', ')');
81 }
82};
86template <> struct is_balanced<square_bracket>
87{
88 static bool check(const std::string &s)
89 {
90 return check_if_balanced(s, '[', ']');
91 }
92};
93
94namespace newick
95{
96using namespace std::string_literals;
100static inline std::vector<std::string> forbidden_labels = {" "s, ","s, ";"s, "("s, ")"s, "["s, "]"s};
108static inline constexpr char blank = '_';
112template <unsigned int N> struct remove_comments_of_depth
113{
114};
118template <> struct remove_comments_of_depth<0> : identity
119{
120};
126template <> struct remove_comments_of_depth<1>
127{
128 static std::string edit(const std::string &s)
129 {
130 return std::regex_replace(s, std::regex(R"(\[[^()]*\])"), "");
131 }
132};
138template <> struct remove_comments_of_depth<2>
139{
140 static std::string edit(const std::string &s)
141 {
142 std::string buffer;
143 int counter = 0;
144 for (const auto &ch : s)
145 {
146 if (ch == '[')
147 counter++;
148 if (ch == ']')
149 counter--;
150 if (ch == '[' && counter == 2)
151 continue; // do nothing, that was the opening
152 if (ch == ']' && counter == 1)
153 continue; // do nothing, that was the closing
154 if (!(counter >= 2 || (counter == 1 && ch == ']')))
155 buffer.append(std::string(1, ch));
156 }
157 return buffer;
158 }
159};
163struct PAUP
164{
165 // return empty string
166 static inline std::string root_branch_length()
167 {
168 return "";
169 }
170 // do nothing
171 static inline std::string treat_comments(std::string &s)
172 {
173 return s;
174 }
175};
180{
181 // Set explicit null branch length for root node
182 static inline std::string root_branch_length()
183 {
184 return ":0.0";
185 }
186 // Remove comments that are nested, keep comments of depth 1
187 static inline std::string treat_comments(const std::string &s)
188 {
190 }
191};
195struct PHYLIP
196{
197 // Branch length for root node is not explicit.
198 static inline std::string root_branch_length()
199 {
200 return "";
201 }
202 // Remove comments that are nested, keep comments of depth 1
203 static inline std::string treat_comments(std::string &s)
204 {
205 // Allow comments of depth 1, but does not allow nested comments.
207 }
208};
212template <class F, class... Args>
213concept Formattable = std::invocable<F, Args...> && std::convertible_to<std::invoke_result_t<F, Args...>, std::string>;
214
218template <class T, std::predicate<T> P1, std::predicate<T> P2, Formattable<T> F1, Formattable<T> F2,
219 class Policy = PAUP>
220class Formatter : public Policy
221{
222 public:
226 using node_type = T;
230 using formula_type = std::string;
234 using policy_type = Policy;
235
236 private:
240 static inline constexpr char _end = ';';
244 mutable formula_type _formula;
248 P1 _has_parent;
249
253 P2 _has_children;
254
263 F1 _label;
264
274 F2 _branch_length;
275
281 bool has_forbidden_characters(const std::string &s) const
282 {
283 std::string joined;
284 for (const std::string &piece : forbidden_labels)
285 joined += piece;
286 bool is_forbidden = std::regex_search(s, std::regex("^[^"s + joined + "]"s));
287 return is_forbidden;
288 }
289
295 void _pre_order(const node_type &node) const
296 {
297 if (std::invoke(_has_children, node))
298 {
299 _formula += "(";
300 }
301 }
302
306 void _in_order(const node_type &) const
307 {
308 _formula += ",";
309 }
310
316 void _post_order(const node_type &node) const
317 {
318
319 if (std::invoke(_has_children, node))
320 {
321 _formula.pop_back(); // Remove comma
322 _formula += ")";
323 }
324
325 if (std::invoke(_has_parent, node))
326 {
327 auto label = std::invoke(_label, node);
328
329 if (has_forbidden_characters(remove_comments_of_depth<1>::edit(label)))
330 {
331 throw std::invalid_argument(std::string("Label with forbidden characters:") + std::string(label));
332 }
333
334 _formula += label;
335
336 auto branch = std::invoke(_branch_length, node);
337 if (branch != "")
338 {
339 _formula += ":";
340 _formula += branch;
341 }
342 }
343 else
344 {
345 _formula += std::invoke(_label, node);
346 _formula += policy_type::root_branch_length();
347 }
348 }
349
350 public:
354 Formatter(P1 &&has_parent, P2 &&has_children, F1 &&label, F2 &&branch_length)
355 : _has_parent(std::forward<P1>(has_parent)), _has_children(std::forward<P2>(has_children)),
356 _label(std::forward<F1>(label)), _branch_length(std::forward<F2>(branch_length))
357 {
358 }
359
366 {
367 return [this](const node_type &node) { this->_pre_order(node); };
368 }
372 auto in_order()
373 {
374 return [this](const node_type &node) { this->_in_order(node); };
375 }
383 {
384 return [this](const node_type &node) { this->_post_order(node); };
385 }
389 void clear()
390 {
391 _formula.clear();
392 }
397 {
398 // that or expose orders
399 // root.visit_by_generic_DFS(preorder, inorder, postorder);
400
401 _formula.push_back(this->_end);
402
403 _formula = policy_type::treat_comments(_formula);
404
405 if (is_balanced<parenthesis>::check(_formula) == false)
406 {
407 throw std::runtime_error(std::string("Failed: formula parenthesis are not balanced:") + _formula);
408 }
409
410 if (is_balanced<square_bracket>::check(_formula) == false)
411 {
412 throw std::runtime_error(std::string("Failed: formula square brackets are not balanced:") + _formula);
413 }
414
415 return _formula;
416 }
417}; // end structrure Newick
418
419// Replacement for `std::function<T(U)>::argument_type`
420template <typename T> struct single_function_argument;
421template <typename Ret, typename Arg> struct single_function_argument<std::function<Ret(Arg)>>
422{
423 using type = Arg;
424};
425
426template <typename P1> struct single_function_argument_impl
427{
428 using type = typename single_function_argument<decltype(std::function{std::declval<P1>()})>::type;
429};
430
431template <typename P1> using single_function_argument_t = typename single_function_argument_impl<P1>::type;
432
433// Deduction guide: type T is deduced from P1
434template <class P1, class P2, class F1, class F2, class Policy = PAUP>
435Formatter(P1 &&, P2 &&, F1 &&, F2 &&) -> Formatter<single_function_argument_t<P1>, P1, P2, F1, F2, Policy>;
436
440template <class P1, class P2, class F1, class F2, class Policy = PAUP>
441auto make_formatter(P1 &&has_parent, P2 &&has_children, F1 &&label, F2 &&branch_length, Policy policy = Policy())
442{
443 // Use Class template argument deduction (CTAD)
444 return Formatter<single_function_argument_t<P1>, P1, P2, F1, F2, Policy>(
445 std::forward<P1>(has_parent), std::forward<P2>(has_children), std::forward<F1>(label),
446 std::forward<F2>(branch_length));
447}
448
453template <class T, std::predicate<T> P1, std::predicate<T> P2, Formattable<T> F1, Formattable<T> F2, class Policy>
454auto make_formatter(P1 &&has_parent, P2 &&has_children, F1 &&label, F2 &&branch_length, Policy policy = Policy())
455{
456 // Use Class template argument deduction (CTAD)
457 return Formatter<T, P1, P2, F1, F2, Policy>(std::forward<P1>(has_parent), std::forward<P2>(has_children),
458 std::forward<F1>(label), std::forward<F2>(branch_length));
459}
460} // end namespace newick
461} // end namespace format
462} // end namespace quetzal
463
464#endif
Generic algorithm to generate the Newick formula of a tree.
Definition from_network.hpp:221
auto in_order()
Operation called in the general DFS algorithm to add a comma between visited nodes.
Definition from_network.hpp:372
void clear()
Clear the formula buffer.
Definition from_network.hpp:389
T node_type
Type of node being formatted.
Definition from_network.hpp:226
formula_type get() const
Retrieve the formatted string of the given node in the specified format.
Definition from_network.hpp:396
std::string formula_type
Type of formula being generated.
Definition from_network.hpp:230
auto post_order()
Operation to be passed to a generic DFS algorithm to open a parenthesis if node has children to be vi...
Definition from_network.hpp:382
Formatter(P1 &&has_parent, P2 &&has_children, F1 &&label, F2 &&branch_length)
Constructor.
Definition from_network.hpp:354
auto pre_order()
Operation called in the general DFS algorithm to open a parenthesis if node has children to be visite...
Definition from_network.hpp:365
Policy policy_type
Type of formula being generated.
Definition from_network.hpp:234
Concept for label name.
Definition from_network.hpp:213
Generic components for parsing or generating Newick strings.
Definition io.hpp:28
auto make_formatter(P1 &&has_parent, P2 &&has_children, F1 &&label, F2 &&branch_length, Policy policy=Policy())
to use for template lambda [](const auto& s) {} e.g. make_formatter<Node>`
Definition from_network.hpp:441
bool check_if_balanced(const std::string &input, const char &open='(', const char &close=')')
Check if the string is balanced for open/close symbols (parenthesis,brackets)
Definition from_network.hpp:41
Simulation of coalescence-based models of molecular evolution.
Definition coalescence.hpp:21
Do not change anything.
Definition from_network.hpp:61
Class template.
Definition from_network.hpp:71
Allow nested comments.
Definition from_network.hpp:164
Requires that an unrooted tree begin with a trifurcation; it will not "uproot" a rooted tree.
Definition from_network.hpp:196
Set a root node branch length to zero. Remove all nested comments.
Definition from_network.hpp:180
Template class.
Definition from_network.hpp:113
Tag.
Definition from_network.hpp:26
Tag.
Definition from_network.hpp:32