Quetzal-CoaTL
The Coalescence Template Library
Loading...
Searching...
No Matches
Generating a Newick string

Objectives

In this tutorial section you will learn how to:

  • Use the quetzal::format::newick::generate_from function to generate a Newick string
    • from a Quetzal coalescence binary tree.
    • from a Quetzal coalescence k-ary tree.
  • Generate a Newick string from your own (non-Quetzal) legacy tree class.
  • Customize the behavior of the generator based on the type of information (properties) you wish to format.

From a Quetzal binary tree


No property

When a Newick string is generated from a tree that has no vertex nor edge information/properties attached to it, it is then assumed the only interest is the tree topology: as there is no clear way to populate the labels or branch length data fields in the Newick string, those are left empty.

Input

1#include "quetzal/quetzal.hpp"
2#include <cassert>
3#include <random>
4
5int main()
6{
7 // Declare a random number generator
8 std::mt19937 rng{std::random_device{}()};
9
10 // Generate a random tree with 5 leaves
12 auto [tree, root] = get_random_binary_tree<>(5, rng);
13
14 // Chose a formatting standard
16 using Flavor2 = quetzal::format::newick::PAUP;
17 using Flavor3 = quetzal::format::newick::PHYLIP;
18
19 // Generate the newick string
20 auto s1 = quetzal::format::newick::generate_from(tree, root, Flavor1());
21 auto s2 = quetzal::format::newick::generate_from(tree, root, Flavor2());
22 auto s3 = quetzal::format::newick::generate_from(tree, root, Flavor3());
23
24 std::cout << s1 << "\n" << s2 << "\n" << s3 << std::endl;
25
26 return 0;
27}
std::pair< quetzal::coalescence::binary_tree< Vertex, Edge >, typename quetzal::coalescence::binary_tree< Vertex, Edge >::vertex_descriptor > get_random_binary_tree(int n_leaves, Generator &rng)
Generate a random binary tree.
Definition binary_tree.hpp:734
Allow nested comments.
Definition from_network.hpp:164
Requires that an unrooted tree begin with a trifurcation; it will not "uproot" a rooted tree.
Definition from_network.hpp:196
Set a root node branch length to zero. Remove all nested comments.
Definition from_network.hpp:180

Since there are no comments stored in the labels of the generated random tree, the output of the different flavors are here quite similar:

Output

1((,),((,),)):0.0;
2((,),((,),));
3((,),((,),));

Custom properties

When a Newick string is generated from a tree that has some information attached to its vertices and edges through a property class, the formatter can acccess this information as long as the property class defined a std::string label() const method.

Input

1#include "quetzal/quetzal.hpp"
2#include <cassert>
3#include <random>
4
5// Declare a global random number generator
6std::mt19937 rng{std::random_device{}()};
7
8int main()
9{
10
11 struct vertex_info
12 {
13 std::string data;
14 // constructor that randomly initializes the vertex data with a letter
16 {
17 data = std::uniform_int_distribution<int>(0, 25)(rng) + 'A';
18 }
19 // access point to the label by the formatter
20 std::string label() const
21 {
22 return data;
23 }
24 };
25
26 struct edge_info
27 {
28 int data;
29 // constructor that randomly initializes the edge data with a number
30 edge_info()
31 {
32 data = std::uniform_int_distribution<int>(0, 9)(rng);
33 }
34 // access point to the label by the formatter
35 std::string label() const
36 {
37 return std::to_string(data);
38 }
39 };
40
41 // Generate a random tree with 5 leaves
43 auto [tree, root] = get_random_binary_tree<vertex_info, edge_info>(5, rng);
44
45 // Generate the newick string
47 auto s = quetzal::format::newick::generate_from(tree, root, Flavor());
48
49 std::cout << s << std::endl;
50
51 return 0;
52}
Definition geography_dispersal_kernel_4.cpp:13
Definition coalescence_binary_tree_2.cpp:5

Output

1(V:6,((V:9,A:7)J:6,(A:5,A:8)S:8)K:0)R:0.0;

From a Quetzal k-ary tree

Extending the string generation to a k-ary tree is straightforward: instead of passing a quetzal::coalescence::binary_tree object, just pass a quetzal::coalescence::k_ary_tree to the quetzal::format::newick::generate_from function.


No property

No vertex nor edge properties are embedded: the vertices labels and branch length data fields of the Newick string are left empty.

Input

1#include "quetzal/quetzal.hpp"
2#include <cassert>
3#include <random>
4
5int main()
6{
7 // Declare a random number generator
8 std::mt19937 rng{std::random_device{}()};
9
10 // Generate a random tree with 5 leaves
12 auto [tree, root] = get_random_k_ary_tree<>(5, rng);
13
14 // Chose a formatting standard
16 using Flavor2 = quetzal::format::newick::PAUP;
17 using Flavor3 = quetzal::format::newick::PHYLIP;
18
19 // Generate the newick string
20 auto s1 = quetzal::format::newick::generate_from(tree, root, Flavor1());
21 auto s2 = quetzal::format::newick::generate_from(tree, root, Flavor2());
22 auto s3 = quetzal::format::newick::generate_from(tree, root, Flavor3());
23
24 std::cout << s1 << "\n" << s2 << "\n" << s3 << std::endl;
25
26 return 0;
27}
std::pair< quetzal::coalescence::k_ary_tree< Vertex, Edge >, typename quetzal::coalescence::k_ary_tree< Vertex, Edge >::vertex_descriptor > get_random_k_ary_tree(int n_leaves, Generator &rng)
Generate a random k-ary tree.
Definition k_ary_tree.hpp:761

Since there are no comments stored in the labels of the generated random tree, the output of the different flavors are here quite similar:

Output

1(,(,(,(,)))):0.0;
2(,(,(,(,))));
3(,(,(,(,))));

Custom properties

The Newick string is generated from a tree that embeds some information attached to its vertices and edges through a property class, the formatter can acccess this information as long as the property class defined a std::string label() const method.

Input

1#include "quetzal/quetzal.hpp"
2#include <cassert>
3#include <random>
4
5// Declare a global random number generator
6std::mt19937 rng{std::random_device{}()};
7
8int main()
9{
10
11 struct vertex_info
12 {
13 std::string data;
14 // constructor that randomly initializes the vertex data with a letter
16 {
17 data = std::uniform_int_distribution<int>(0, 25)(rng) + 'A';
18 }
19 // access point to the label by the formatter
20 std::string label() const
21 {
22 return data;
23 }
24 };
25
26 struct edge_info
27 {
28 int data;
29 // constructor that randomly initializes the edge data with a number
30 edge_info()
31 {
32 data = std::uniform_int_distribution<int>(0, 9)(rng);
33 }
34 // access point to the label by the formatter
35 std::string label() const
36 {
37 return std::to_string(data);
38 }
39 };
40
41 // Generate a random tree with 5 leaves
43 auto [tree, root] = get_random_k_ary_tree<vertex_info, edge_info>(5, rng);
44
45 // Generate the newick string
47 auto s = quetzal::format::newick::generate_from(tree, root, Flavor());
48
49 std::cout << s << std::endl;
50
51 return 0;
52}

Output

12
22
32
42
5(W:5,((C:7,W:7)Z:2,(F:9,K:1)Q:7)M:2)I;

Interfacing legacy code

If your existing code heavily dependson your own class to describe a tree graph, then refactoring your whole project to switch to Quetzal trees may be too costly.

In this case, you can connect the quetzal::format::newick::generator class to your own legacy code.

Note
  1. Quetzal does not know anything about your own classes, so you will need a bit of extra effort to make the quetzal::format::newick::generator aware of your own design.
  2. But at least you don't have to worry about the grammar logic anymore: if you can connect the dots the generator internals will handle the Newick string generation.
  3. If the task becomes tedious, don't hesitate to ask for help!

No property

Quetzal implements a quetzal::format::newick::generator that consumes a tree graph and produces a newick string.

This generator is not aware of your class. Instead, it requires you to defined 4 functors that embed all the information there is to know about a vertex \(v\):

  • has_parent(v): returns false if \(v\) is the root, true otherwise.
  • has_children(v): returns false if \(v\) is a leaf, true otherwise.
  • label(v): returns a (possibly empty) std::string used as label for vertex \(v\)
  • branch_length_to_parent(v): returns a (possibly empty) std::string used as label for the edge between \(v\) and its parent.

Input

1#include "quetzal/quetzal.hpp"
2#include <cassert>
3#include <random>
4
5// Let's say you have this legacy class ...
6struct Node
7{
8 Node *parent = nullptr;
9 Node *left = nullptr;
10 Node *right = nullptr;
11
12 // You will need a DFS method that takes arbitrary (templated) pre/in/post order operations
13 template <class Op1, class Op2, class Op3> void depth_first_search(Op1 pre_order, Op2 in_order, Op3 post_order)
14 {
15 pre_order(*this);
16 if (this->left != nullptr && this->right != nullptr)
17 {
18 this->left->depth_first_search(pre_order, in_order, post_order);
19 in_order();
20 this->right->depth_first_search(pre_order, in_order, post_order);
21 }
22 post_order(*this);
23 }
24};
25
26int main()
27{
28 /* Let's assume you built this topology using your legacy Node class :
29 * a
30 * / \
31 * / c
32 * / / \
33 * b d e
34 */
35
36 Node a, b, c, d, e;
37 a.left = &b;
38 b.parent = &a;
39 a.right = &c;
40 c.parent = &a;
41 c.left = &d;
42 d.parent = &c;
43 c.right = &e;
44 e.parent = &c;
45
46 // Now you want to generate its Newick formula ...
48
49 // You need to define 4 lambdas to interface the Quetzal generator with a non-quetzal tree type:
50 std::predicate<Node> auto has_parent = [](const Node &v) { return v.parent != nullptr; };
51 std::predicate<Node> auto has_children = [](const Node &v) { return v.left != nullptr && v.right != nullptr; };
52
53 // If you are not interested into formatting label or branch_length information, just return an empty string:
54 newick::Formattable<Node> auto no_label = [](const Node &v) { return ""; };
55 newick::Formattable<Node> auto no_branch_length = [](const Node &v) { return ""; };
56
57 // We declare a quetzal newick generator and we pass to it the 4 lambdas:
58 auto generator = newick::generator(has_parent, has_children, no_label, no_branch_length);
59
60 // We connect the generator's interface to the legacy class DFS
61 // the generator will handle the formatting events during the traversal
62 a.depth_first_search(generator.pre_order(), generator.in_order(), generator.post_order());
63
64 // We retrieve the formatted string
65 std::cout << generator.take_result() << std::endl;
66 return 0;
67}
Generate the Newick formula from an external (custom) tree class.
Definition generator.hpp:261
Generic components for parsing or generating Newick strings.
Definition io.hpp:28
generator(P1, P2, F1, F2, Policy pol={}) -> generator< detail::single_function_argument_t< P1 >, P1, P2, F1, F2, Policy >
User-defined deduction guide where the node/graph type T is deduced from P1.
Definition newick_generator_5.cpp:7

Output

1(,(,));

Custom properties

Once you understand the previous example with no property, then generating a Newick string from one of your legacy tree class object with added vertices and/or edge labels is pretty straightforward.

You only need to modify three things:

  • Your class somehow embeds a data field
  • the label(v) lambda looks into the vertex to return a std::string used as its label
  • the branch_length_to_parent(v): can also look into the vertex to return a std::string to label the edge between \(v\) and its parent.

Input

1#include "quetzal/quetzal.hpp"
2#include <cassert>
3#include <random>
4#include <string>
5
6// Again your legacy class ...
7struct Node
8{
9 Node *parent = nullptr;
10 Node *left = nullptr;
11 Node *right = nullptr;
12
13 // But this time you have additional data
14 char data;
15
16 // The DFS is unchanged
17 template <class Op1, class Op2, class Op3> void depth_first_search(Op1 pre_order, Op2 in_order, Op3 post_order)
18 {
19 pre_order(*this);
20 if (this->left != nullptr && this->right != nullptr)
21 {
22 this->left->depth_first_search(pre_order, in_order, post_order);
23 in_order();
24 this->right->depth_first_search(pre_order, in_order, post_order);
25 }
26 post_order(*this);
27 }
28};
29
30int main()
31{
32
33 // Same topology
34 Node a, b, c, d, e;
35 a.left = &b;
36 b.parent = &a;
37 a.right = &c;
38 c.parent = &a;
39 c.left = &d;
40 d.parent = &c;
41 c.right = &e;
42 e.parent = &c;
43
44 // But this time you add information to each vertex
45 a.data = 'a';
46 b.data = 'b';
47 c.data = 'c';
48 d.data = 'd';
49 e.data = 'e';
50
51 // Now you want to generate its Newick formula ...
53
54 // You need to define 4 lambdas to interface the Quetzal generator with a non-quetzal tree type:
55
56 // These are unchanged
57 std::predicate<Node> auto has_parent = [](const Node &v) { return v.parent != nullptr; };
58 std::predicate<Node> auto has_children = [](const Node &v) { return v.left != nullptr && v.right != nullptr; };
59
60 // These now return information
61 newick::Formattable<Node> auto no_label = [](const Node &v) { return std::string{v.data}; };
62 newick::Formattable<Node> auto no_branch_length = [](const Node &v) { return "1.0"; };
63
64 // The rest is unchanged:
65
66 // We declare a quetzal newick generator and we pass to it the 4 lambdas:
67 auto generator = newick::generator(has_parent, has_children, no_label, no_branch_length);
68
69 // We connect the generator's interface to the legacy class DFS
70 a.depth_first_search(generator.pre_order(), generator.in_order(), generator.post_order());
71
72 // We retrieve the formatted string
73 std::cout << generator.take_result() << std::endl;
74 return 0;
75}

Output

1(b:1.0,(d:1.0,e:1.0)c:1.0)a;