From bee42aab3569199fac02b0e0c19257df4dea82b1 Mon Sep 17 00:00:00 2001 From: Jordi Date: Wed, 27 Mar 2024 13:16:16 +0100 Subject: [PATCH] newick.pyx: Fix overflow when reading very big trees. When reading a tree from a file with more than 5 GB or so, using the position as an int would overflow. It happened for example with K01181_GTDB_rooted.faa.alg_annotated.nw. Changing int -> long (in cython) for the position fixes the problem, and has no appreciable bad impact on performance. --- ete4/parser/newick.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ete4/parser/newick.pyx b/ete4/parser/newick.pyx index 30f9b436..fc2aef86 100644 --- a/ete4/parser/newick.pyx +++ b/ete4/parser/newick.pyx @@ -229,7 +229,7 @@ def loads(tree_text, parser=None, tree_class=Tree): return tree_class(props, nodes) -def read_nodes(nodes_text, parser, int pos=0, tree_class=Tree): +def read_nodes(nodes_text, parser, long pos=0, tree_class=Tree): """Return a list of nodes and the position in the text where they end.""" # nodes_text looks like '(a,b,c)', where any element can be a list of nodes if nodes_text[pos] != '(': @@ -256,7 +256,7 @@ def read_nodes(nodes_text, parser, int pos=0, tree_class=Tree): return nodes, pos+1 -def skip_spaces_and_comments(text, int pos): +def skip_spaces_and_comments(text, long pos): """Return position in text after pos and all whitespaces and comments.""" # text = '... [this is a comment] node1...' # ^-- pos ^-- pos (returned) @@ -274,7 +274,7 @@ def skip_spaces_and_comments(text, int pos): return pos -def read_content(str text, int pos, endings=',);'): +def read_content(str text, long pos, endings=',);'): """Return content starting at position pos in text, and where it ends.""" # text = '...(node_1:0.5[&&NHX:p=a],...' -> 'node_1:0.5[&&NHX:p=a]' # ^-- pos ^-- pos (returned) @@ -291,7 +291,7 @@ def read_content(str text, int pos, endings=',);'): return text[start:pos], pos -def skip_quoted_name(str text, int pos): +def skip_quoted_name(str text, long pos): """Return the position where a quoted name ends.""" # text = "... 'node ''2'' in tree' ..." # ^-- pos ^-- pos (returned)