Quetzal-CoaTL
The Coalescence Template Library
Loading...
Searching...
No Matches
generator.hpp
1// Copyright 2020 Arnaud Becheler <abechele@umich.edu>
2
3/*********************************************************************** * This program is free software; you can
4 *redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free
5 *Software Foundation; either version 2 of the License, or * (at your option) any later version. *
6 * *
7 ***************************************************************************/
8
9#pragma once
10
11#include <algorithm>
12#include <concepts>
13#include <regex>
14#include <stdexcept>
15#include <string>
16#include <string_view>
17
18#include "quetzal/coalescence/graph/k_ary_tree.hpp"
19#include <boost/graph/depth_first_search.hpp>
20
22{
23
24namespace detail
25{
26// Replacement for `std::function<T(U)>::argument_type`
27template <typename T> struct single_function_argument;
28
29template <typename Ret, typename Arg> struct single_function_argument<std::function<Ret(Arg)>>
30{
31 using type = Arg;
32};
33
34template <typename P1> struct single_function_argument_impl
35{
36 using type = typename single_function_argument<decltype(std::function{std::declval<P1>()})>::type;
37};
38
39template <typename P1> using single_function_argument_t = typename single_function_argument_impl<P1>::type;
40
43{
44};
45
48{
49};
50
58bool check_if_balanced(std::string_view input, const char &open = '(', const char &close = ')')
59{
60 int count = 0;
61 for (const auto &ch : input)
62 {
63 if (ch == open)
64 count++;
65 if (ch == close)
66 count--;
67 // if a parenthesis is closed without being opened return false
68 if (count < 0)
69 return false;
70 }
71 // in the end the test is passed only if count is zero
72 return count == 0;
73}
74
79{
80 static std::string edit(const std::string s)
81 {
82 return s;
83 }
84};
85
89template <class tag> struct is_balanced
90{
91};
92
96template <> struct is_balanced<detail::parenthesis>
97{
98 static bool check(std::string_view s)
99 {
100 return check_if_balanced(s, '(', ')');
101 }
102};
103
107template <> struct is_balanced<detail::square_bracket>
108{
109 static bool check(std::string_view s)
110 {
111 return check_if_balanced(s, '[', ']');
112 }
113};
114} // namespace detail
115
116using namespace std::string_literals;
117
121static inline std::vector<std::string> forbidden_labels = {" "s, ","s, ";"s, "("s, ")"s, "["s, "]"s};
122
130static inline constexpr char blank = '_';
131
135template <unsigned int N> struct remove_comments_of_depth
136{
137};
138
142template <> struct remove_comments_of_depth<0> : detail::identity
143{
144};
145
151template <> struct remove_comments_of_depth<1>
152{
153 static std::string edit(const std::string s)
154 {
155 if (s.empty())
156 return s;
157 return std::regex_replace(s, std::regex(R"(\[[^()]*\])"), "");
158 }
159};
160
166template <> struct remove_comments_of_depth<2>
167{
168 static std::string edit(const std::string s)
169 {
170 std::string buffer;
171 int counter = 0;
172 for (const auto &ch : s)
173 {
174 if (ch == '[')
175 counter++;
176 if (ch == ']')
177 counter--;
178 if (ch == '[' && counter == 2)
179 continue; // do nothing, that was the opening
180 if (ch == ']' && counter == 1)
181 continue; // do nothing, that was the closing
182 if (!(counter >= 2 || (counter == 1 && ch == ']')))
183 buffer.append(std::string(1, ch));
184 }
185 return buffer;
186 }
187};
188
194struct PAUP
195{
196 // return empty string
197 static inline std::string root_branch_length()
198 {
199 return "";
200 }
201 // do nothing
202 static inline std::string treat_comments(std::string &s)
203 {
204 return s;
205 }
206};
207
213struct TreeAlign
214{
215 // Set explicit null branch length for root node
216 static inline std::string root_branch_length()
217 {
218 return ":0.0";
219 }
220 // Remove comments that are nested, keep comments of depth 1
221 static inline std::string treat_comments(const std::string s)
222 {
223 return remove_comments_of_depth<2>::edit(s);
224 }
225};
226
232struct PHYLIP
233{
234 // Branch length for root node is not explicit.
235 static inline std::string root_branch_length()
236 {
237 return "";
238 }
239 // Remove comments that are nested, keep comments of depth 1
240 static inline std::string treat_comments(std::string &s)
241 {
242 // Allow comments of depth 1, but does not allow nested comments.
243 return remove_comments_of_depth<2>::edit(s);
244 }
245};
246
250template <class F, class... Args>
251concept Formattable = std::invocable<F, Args...> && std::convertible_to<std::invoke_result_t<F, Args...>, std::string>;
252
258template <class T, std::predicate<T> P1, std::predicate<T> P2, Formattable<T> F1, Formattable<T> F2,
259 class Policy = PAUP>
260class generator : public Policy
261{
262 public:
266 using node_type = T;
270 using formula_type = std::string;
274 using policy_type = Policy;
275
276 private:
280 static inline constexpr char _end = ';';
281
285 mutable formula_type _formula;
286
290 mutable int _depth = 0;
291
295 P1 _has_parent;
296
300 P2 _has_children;
301
310 F1 _label;
311
321 F2 _branch_length;
322
323 void _pre_order(const node_type &node) const
324 {
325 if (std::invoke(_has_children, node))
326 {
327 _formula += "(";
328 _depth += 1;
329 }
330 }
331
332 void _in_order() const
333 {
334 _formula += ",";
335 }
336
337 void _post_order(node_type const &node)
338 {
339 // Not leaf
340 if (std::invoke(_has_children, node))
341 _formula += ")";
342
343 // Not root
344 if (std::invoke(_has_parent, node))
345 {
346 auto const label = std::invoke(_label, node);
347 _formula += label;
348 std::string const branch(std::invoke(_branch_length, node));
349 if (!branch.empty())
350 _formula.append(":").append(branch);
351 }
352 else
353 {
354 // Is root
355 _formula += std::invoke(_label, node);
356 _formula += policy_type::root_branch_length();
357 }
358
359 _depth -= 1;
360 }
361
362 public:
366 generator(P1 has_parent, P2 has_children, F1 label, F2 branch_length, Policy pol = {})
367 : policy_type(std::move(pol)), _has_parent(std::move(has_parent)), _has_children(std::move(has_children)),
368 _label(std::move(label)), _branch_length(std::move(branch_length))
369 {
370 }
371
378 {
379 return [this](const node_type &node) { this->_pre_order(node); };
380 }
381
385 auto in_order()
386 {
387 return [this]() { this->_in_order(); };
388 }
389
397 {
398 return [this](const node_type &node) { this->_post_order(node); };
399 }
400
404 bool has_forbidden_characters(const std::string &s) const
405 {
406 if (s.empty())
407 return false;
408 std::string forbidden = " ,;()[\\]";
409 bool is_forbidden = std::regex_search(s, std::regex("[" + forbidden + "]"));
410 return is_forbidden;
411 }
412
416 void clear()
417 {
418 _formula.clear();
419 }
420
424 std::string &&take_result() const
425 {
426 _formula.push_back(this->_end);
427
428 _formula = policy_type::treat_comments(_formula);
429
431 {
432 throw std::runtime_error(std::string("Failed: formula parenthesis are not balanced:") + _formula);
433 }
434
436 {
437 throw std::runtime_error(std::string("Failed: formula square brackets are not balanced:") + _formula);
438 }
439
440 return std::move(_formula);
441 }
442}; // end structure generator
443
449template <class P1, class P2, class F1, class F2, class Policy = PAUP>
450generator(P1, P2, F1, F2, Policy pol = {}) -> generator<detail::single_function_argument_t<P1>, P1, P2, F1, F2, Policy>;
451
452} // end namespace quetzal::format::newick
Generate the Newick formula from an external (custom) tree class.
Definition generator.hpp:261
generator(P1 has_parent, P2 has_children, F1 label, F2 branch_length, Policy pol={})
Constructor.
Definition generator.hpp:366
std::string && take_result() const
Retrieve the formatted string of the given node in the specified format.
Definition generator.hpp:424
auto in_order()
Operation called in the general DFS algorithm to add a comma between visited nodes.
Definition generator.hpp:385
std::string formula_type
Type of formula being generated.
Definition generator.hpp:270
T node_type
Type of node being formatted.
Definition generator.hpp:266
void clear()
Clear the formula buffer to refresh the generator.
Definition generator.hpp:416
bool has_forbidden_characters(const std::string &s) const
Check if a string contains characters forbidden by the standard.
Definition generator.hpp:404
auto post_order()
Operation to be passed to a generic DFS algorithm to open a parenthesis if node has children to be vi...
Definition generator.hpp:396
Policy policy_type
Type of formula being generated.
Definition generator.hpp:274
auto pre_order()
Operation called in the general DFS algorithm to open a parenthesis if node has children to be visite...
Definition generator.hpp:377
Generic components for parsing or generating Newick strings.
Definition io.hpp:28
bool check_if_balanced(const std::string &input, const char &open='(', const char &close=')')
Check if the string is balanced for open/close symbols (parenthesis,brackets)
Definition from_network.hpp:41
Default comment removal policy: do not change anything.
Definition generator.hpp:79
Class template, base for further specialization.
Definition generator.hpp:90
Tag.
Definition generator.hpp:43