-
Notifications
You must be signed in to change notification settings - Fork 1
/
basecomp.cpp
96 lines (82 loc) · 2.93 KB
/
basecomp.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
/* basecomp: get the base comp
*
* Copyright (C) 2023 Andrew D. Smith
*
* Authors: Andrew D. Smith
*
* This program is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <iostream>
#include <array>
#include <cstdio>
#include <cstdlib>
#include <cstdint>
using std::cout;
using std::cerr;
using std::tolower;
constexpr std::uint8_t nuc_to_idx[] = {
/* 0*/ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
/* 16*/ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
/* 32*/ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
/* 48*/ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
/* 64*/ 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
/* 80*/ 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
/* 96*/ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
/*112*/ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
/*128*/ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
};
int
main(int argc, const char **argv) {
static constexpr auto buf_size = 64u * 1024u;
if (argc != 2) {
std::perror("basecomp <fasta-file>");
return EXIT_FAILURE;
}
const auto genome_file = argv[1];
std::array<std::uint_fast64_t, 256> counter;
std::fill_n(begin(counter), 256, 0);
int is_ok = EXIT_FAILURE;
FILE *fp = std::fopen(genome_file, "r");
if (!fp) {
std::perror(genome_file);
return is_ok;
}
std::array<uint8_t, 64 * 1024> buf;
std::int_fast32_t n = 0;
while ((n = std::fread(buf.data(), 1, buf_size, fp)) > 0) {
auto c = buf.data();
// not bother to exclude name lines that start with '>'
while (n-- > 0) counter[*c]++;
}
if (std::ferror(fp)) {
std::perror(genome_file);
return is_ok;
}
std::fclose(fp);
std::uint_fast32_t tot = 0;
for (auto i : {'A', 'C', 'G', 'T'}) tot += counter[i] + counter[tolower(i)];
if (tot == 0u) tot = 1u;
for (auto i : {'A', 'C', 'G', 'T'}) {
const double denom = counter[i] + counter[tolower(i)];
cout << static_cast<char>(i) << '\t' << denom / tot << '\n';
}
return EXIT_SUCCESS;
}