Quetzal-CoaTL
The Coalescence Template Library
Loading...
Searching...
No Matches
to_network.hpp
1// Copyright 2020 Arnaud Becheler <abechele@umich.edu>
2
3/*********************************************************************** * This program is free software; you can
4 *redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free
5 *Software Foundation; either version 2 of the License, or * (at your option) any later version. *
6 * *
7 ***************************************************************************/
8
9#pragma once
10
11#include "quetzal/coalescence/graph/network.hpp"
12#include "quetzal/io/newick/ast.hpp"
13#include "quetzal/io/newick/generator.hpp"
14#include "quetzal/io/newick/parser.hpp"
15
17{
18namespace detail
19{
20template <class Graph> void regraft_cycles(Graph &graph, auto root)
21{
22 using graph_type = Graph;
23 using vertex_descriptor = typename graph_type::vertex_descriptor;
24 using vertex_property = typename Graph::vertex_property;
25 using dict_type = std::map<std::string, std::map<std::string, std::vector<vertex_descriptor>>>;
26
27 dict_type records;
28 std::regex rgx("[a-zA-Z0-9_]*" // Label: only upper and lowercase letters, numbers, underscores, or empty
29 "#" // Cycle detected
30 "([H|LGT|R])" // Hybrid, Lateral Gene Transfer or Recombination
31 "(0|[1-9][0-9]*)" // Any positive integer, including 0
32 );
33
34 // iterate through vertices/labels
35 for (auto vertex : graph.vertices())
36 {
37 std::smatch match;
38 std::string label;
39 if constexpr (std::is_same_v<vertex_property, std::string>)
40 label = graph[vertex];
41 else
42 label = graph[vertex].label();
43
44 // find if cycle detected
45 if (std::regex_search(label, match, rgx))
46 {
47 // save vertices id to their rightful groups
48 const auto event_type = match.str(1);
49 const auto event_id = match.str(2);
50 records[event_type][event_id].push_back(vertex);
51 }
52 }
53
54 using namespace ranges;
55
56 // iterate through results
57 for (auto const &[event_type, val] : records)
58 {
59 for (auto const &[event_id, duplicated_vertices] : val)
60 {
61 auto new_vertex = graph.add_vertex("#" + event_type + event_id);
62
63 auto new_sources =
64 duplicated_vertices | views::transform([&graph](auto v) { return graph.in_edges(v); }) |
65 views::take(1) |
66 views::transform([&graph](auto &&range_of_e) { return graph.source(*range_of_e.begin()); });
67
68 auto new_targets = duplicated_vertices | views::transform([&graph](auto v) { return graph.out_edges(v); }) |
69 views::join | views::transform([&graph](auto e) { return graph.target(e); });
70
71 ranges::for_each(duplicated_vertices, [&graph](auto v) {
72 graph.clear_vertex(v);
73 graph.remove_vertex(v);
74 });
75
76 auto regraft = [&graph, new_vertex](auto s, auto t) {
77 graph.add_edge(s, new_vertex);
78 graph.add_edge(new_vertex, t);
79 };
80
81 auto todo = ranges::views::zip(new_sources, new_targets);
82 ranges::for_each(todo, [regraft](auto &&z) { std::apply(regraft, z); });
83 }
84 }
85}
86} // end namespace detail
87
89template <class VertexProperty = quetzal::format::newick::ast::node::vertex_property,
91std::tuple<quetzal::coalescence::network<VertexProperty, EdgeProperty>,
94{
96 using vertex_descriptor = typename graph_type::vertex_descriptor;
97
98 graph_type graph;
99 auto root = graph.add_vertex(VertexProperty(ast_root.name));
100
101 // A hacky recursive lambda (C++23 will make this much simpler)
102 static const auto populate = [](graph_type &graph, vertex_descriptor parent, const auto &ast) {
103 static auto recursive = [](graph_type &graph, vertex_descriptor parent, const auto &ast,
104 auto &self) mutable -> void {
105 std::vector<std::tuple<vertex_descriptor, EdgeProperty>> children;
106
107 children.reserve(ast.children.size());
108
109 for (const auto &ast_child : ast.children)
110 {
111 children.push_back(std::make_tuple(graph.add_vertex(VertexProperty(ast_child.name)),
112 EdgeProperty(ast_child.distance_to_parent)));
113 }
114
115 graph.add_edges(parent, children);
116
117 for (int i = 0; i < children.size(); i++)
118 {
119 self(graph, std::get<0>(children[i]), ast.children[i], self);
120 }
121 }; // end recursive
122 recursive(graph, parent, ast, recursive);
123 }; // end populate
124
125 populate(graph, root, ast_root);
126 detail::regraft_cycles(graph, root);
127 return {std::move(graph), root};
128}
129
133template <class VertexProperty = quetzal::format::newick::ast::node::vertex_property,
135std::tuple<quetzal::coalescence::network<VertexProperty, EdgeProperty>,
137to_network(const std::string &extended_newick)
138{
139 // We initialize the root of Abstract Syntax Tree (AST)
141 // We parse the input string into the AST
142 auto ok = quetzal::parse(begin(extended_newick), end(extended_newick), quetzal::format::newick::parser::tree, ast);
143 // todo: handling exceptions in boost::spirit
144 assert(ok);
145 return to_network<VertexProperty, EdgeProperty>(ast);
146}
147
148} // namespace quetzal::format::newick::extended
Definition network.hpp:380
Generic components for parsing or generating extended Newick strings.
Definition io.hpp:33
std::tuple< quetzal::coalescence::network< VertexProperty, EdgeProperty >, typename quetzal::coalescence::network< VertexProperty, EdgeProperty >::vertex_descriptor > to_network(quetzal::format::newick::ast::node ast_root)
Converts an Extended Newick AST to a quetzal network graph.
Definition to_network.hpp:93
Abstract Syntax Tree structure.
Definition ast.hpp:30
double edge_property
The type of edge described by the AST.
Definition ast.hpp:39
vertex_property name
the name of the node
Definition ast.hpp:49
std::string vertex_property
The type of vertex described by the AST.
Definition ast.hpp:34