Skip to content

Commit

Permalink
Merge pull request #158 from smithlabcode/fixing-linter-errors
Browse files Browse the repository at this point in the history
Fixing linter errors
  • Loading branch information
saketkc committed Apr 13, 2024
2 parents 23d6188 + e269b62 commit 071b5cc
Show file tree
Hide file tree
Showing 16 changed files with 115 additions and 47 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8]
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v2
Expand Down
6 changes: 5 additions & 1 deletion HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# V1.3.3 (2022-02-09)
# Unreleased

- Added `meta_min_reads` parameter to control minimum coverage for metagene plots ([#155](https://github.com/smithlabcode/ribotricer/pull/155))

# v1.3.3 (2022-02-09)

- Print start codons when preparing orfs
- Fix for custom start codons
Expand Down
2 changes: 1 addition & 1 deletion ribotricer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = "Saket Choudhary, Wenzheng Li"
__version__ = "1.3.3"
__version__ = "1.4.0-dev0"
21 changes: 12 additions & 9 deletions ribotricer/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from .const import MINIMUM_VALID_CODONS_RATIO
from .const import MINIMUM_READS_PER_CODON
from .const import MINIMUM_DENSITY_OVER_ORF
from .const import META_MIN_READS

from .count_orfs import count_orfs
from .count_orfs import count_orfs_codon
Expand All @@ -49,7 +50,7 @@ def cli():
pass


###################### prepare-orfs function #########################################
# prepare-orfs function #########################################
@cli.command(
"prepare-orfs",
context_settings=CONTEXT_SETTINGS,
Expand Down Expand Up @@ -110,7 +111,7 @@ def prepare_orfs_cmd(
prepare_orfs(gtf, fasta, prefix, min_orf_length, start_codons, stop_codons, longest)


###################### detect-orfs function #########################################
# detect-orfs function #########################################
@cli.command(
"detect-orfs",
context_settings=CONTEXT_SETTINGS,
Expand Down Expand Up @@ -228,7 +229,7 @@ def detect_orfs_cmd(
if read_lengths is not None:
try:
read_lengths = [int(x.strip()) for x in read_lengths.strip().split(",")]
except:
except Exception:
sys.exit("Error: cannot convert read_lengths into integers")
if not all([x > 0 for x in read_lengths]):
sys.exit("Error: read length must be positive")
Expand All @@ -237,8 +238,10 @@ def detect_orfs_cmd(
sys.exit("Error: psite_offsets only allowed when read_lengths is provided")
if read_lengths is not None and psite_offsets is not None:
try:
psite_offsets = [int(x.strip()) for x in psite_offsets.strip().split(",")]
except:
psite_offsets = [
int(x.strip()) for x in psite_offsets.strip().split(",")
]
except Exception:
sys.exit("Error: cannot convert psite_offsets into integers")
if len(read_lengths) != len(psite_offsets):
sys.exit("Error: psite_offsets must match read_lengths")
Expand All @@ -265,7 +268,7 @@ def detect_orfs_cmd(
)


###################### count-orfs function #########################################
# count-orfs function #########################################
@cli.command(
"count-orfs",
context_settings=CONTEXT_SETTINGS,
Expand Down Expand Up @@ -307,7 +310,7 @@ def count_orfs_cmd(ribotricer_index, detected_orfs, features, out, report_all):
count_orfs(ribotricer_index, detected_orfs, features, out, report_all)


###################### count-orfs-codon function #########################################
# count-orfs-codon function #########################################
@cli.command(
"count-orfs-codon",
context_settings=CONTEXT_SETTINGS,
Expand Down Expand Up @@ -367,7 +370,7 @@ def count_orfs_codon_cmd(
)


###################### orfs-seq function #########################################
# orfs-seq function #########################################
@cli.command(
"orfs-seq",
context_settings=CONTEXT_SETTINGS,
Expand Down Expand Up @@ -396,7 +399,7 @@ def orf_seq_cmd(ribotricer_index, fasta, saveto, protein):
orf_seq(ribotricer_index, fasta, saveto, protein)


###################### learn-cutoff function #########################################
# learn-cutoff function #########################################
@cli.command(
"learn-cutoff",
context_settings=CONTEXT_SETTINGS,
Expand Down
11 changes: 8 additions & 3 deletions ribotricer/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ def is_read_uniq_mapping(read):
return False
else:
sys.stdout.write(
"WARNING: ribotricer was unable to detect any tags for determining multimapping status. All the reads will be treated as uniquely mapping\n"
"WARNING: ribotricer was unable to detect any tags for "
"determining multimapping status. All the reads will be "
"treated as uniquely mapping\n"
)


Expand All @@ -76,7 +78,9 @@ def merge_intervals(intervals):
intervals[i].end,
intervals[i].strand,
)
while i + 1 < len(intervals) and intervals[i + 1].start <= to_merge.end:
while (
i + 1 < len(intervals) and intervals[i + 1].start <= to_merge.end
):
to_merge.end = max(to_merge.end, intervals[i + 1].end)
i += 1
merged_intervals.append(to_merge)
Expand Down Expand Up @@ -124,6 +128,7 @@ def collapse_coverage_to_codon(coverage):
Coverage collapsed to codon level
"""
codon_coverage = [
sum(coverage[current : current + 3]) for current in range(0, len(coverage), 3)
sum(coverage[current: current + 3])
for current in range(0, len(coverage), 3)
]
return codon_coverage
2 changes: 1 addition & 1 deletion ribotricer/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@
# defined as the number of reads per unit length of the ORF
MINIMUM_DENSITY_OVER_ORF = 0.0
# Minimum number of reads for a read length to be considered
META_MIN_READS=100000
META_MIN_READS = 100000
44 changes: 36 additions & 8 deletions ribotricer/count_orfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
import pandas as pd


def count_orfs(ribotricer_index, detected_orfs, features, outfile, report_all=False):
def count_orfs(
ribotricer_index, detected_orfs, features, outfile, report_all=False
):
"""
Parameters
----------
Expand Down Expand Up @@ -57,7 +59,11 @@ def count_orfs(ribotricer_index, detected_orfs, features, outfile, report_all=Fa
# do not output 'nontranslating' events unless report_all is set
if status != "nontranslating" or report_all:
intervals = orf_index[oid].intervals
coor = [x for iv in intervals for x in range(iv.start, iv.end + 1)]
coor = [
x
for iv in intervals
for x in range(iv.start, iv.end + 1)
]
if strand == "-":
coor = coor[::-1]
profile_stripped = profile.strip()[1:-1].split(", ")
Expand Down Expand Up @@ -105,7 +111,9 @@ def count_orfs_codon(
if True, all coverages will be exported
"""
orf_index = {}
fasta_df = pd.read_csv(ribotricer_index_fasta, sep="\t").set_index("ORF_ID")
fasta_df = pd.read_csv(ribotricer_index_fasta, sep="\t").set_index(
"ORF_ID"
)
read_counts = defaultdict(dict)
with open(ribotricer_index, "r") as fin:
# Skip header
Expand All @@ -126,9 +134,15 @@ def count_orfs_codon(
# do not output 'nontranslating' events unless report_all is set
if status != "nontranslating" or report_all:
intervals = orf_index[oid].intervals
coor = [x for iv in intervals for x in range(iv.start, iv.end + 1)]
coor = [
x
for iv in intervals
for x in range(iv.start, iv.end + 1)
]
codon_coor = [
x for iv in intervals for x in range(iv.start, iv.end + 1, 3)
x
for iv in intervals
for x in range(iv.start, iv.end + 1, 3)
]
if strand == "-":
coor = coor[::-1]
Expand Down Expand Up @@ -158,7 +172,17 @@ def count_orfs_codon(
# Output count table
with open("{}_genewise.tsv".format(prefix), "w") as fout:
fout.write(
"gene_id\tcodon\tvalues\tmean_codon_coverage\tmedian_codon_coverage\tvar_codon_coverage\tcodon_occurences\ttotal_codon_coverage\n"
"\t".join(
"gene_id",
"codon",
"values",
"mean_codon_coverage",
"median_codon_coverage",
"var_codon_coverage",
"codon_occurences",
"total_codon_coverage",
)
+ "\n"
)
for gene_id, codon_seq in sorted(read_counts):
values = list(read_counts[gene_id, codon_seq].values())
Expand All @@ -183,12 +207,16 @@ def count_orfs_codon(
fout_df["per_codon_enrichment(total/n_occur)"] = (
fout_df["total_codon_coverage"] / fout_df["codon_occurences"]
)
fout_df["-log10_relative_enrichment(per_codon/total_gene_coverage)"] = -np.log10(
fout_df[
"-log10_relative_enrichment(per_codon/total_gene_coverage)"
] = -np.log10(
fout_df["per_codon_enrichment(total/n_occur)"]
/ fout_df.groupby("gene_id")["total_codon_coverage"].transform("sum")
)
# Overwrite
fout_df.to_csv("{}_genewise.tsv".format(prefix), sep="\t", index=False, header=True)
fout_df.to_csv(
"{}_genewise.tsv".format(prefix), sep="\t", index=False, header=True
)
# Remove infs
fout_df = fout_df.replace([np.inf, -np.inf], np.nan)
fout_df = fout_df.dropna()
Expand Down
43 changes: 33 additions & 10 deletions ribotricer/detect_orfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,10 @@ def orf_coverage(orf, alignments, offset_5p=0, offset_3p=0):
except KeyError:
coverage.append(0)
else:
if strand in alignments and (chrom, pos) in alignments[strand]:
if (
strand in alignments
and (chrom, pos) in alignments[strand]
):
coverage.append(alignments[strand][(chrom, pos)])
else:
coverage.append(0)
Expand Down Expand Up @@ -266,7 +269,9 @@ def export_orf_coverages(
valid_codons_ratio = valid_codons / n_codons
# total reads in the ORF divided by the length
orf_density = np.sum(codon_coverage) / n_codons
codon_coverage_exceeds_min = codon_coverage >= min_reads_per_codon
codon_coverage_exceeds_min = (
codon_coverage >= min_reads_per_codon
)
status = (
"translating"
if (
Expand Down Expand Up @@ -322,7 +327,9 @@ def export_wig(merged_alignments, prefix):
if chrom != cur_chrom:
cur_chrom = chrom
to_write += "variableStep chrom={}\n".format(chrom)
to_write += "{}\t{}\n".format(pos, merged_alignments[strand][(chrom, pos)])
to_write += "{}\t{}\n".format(
pos, merged_alignments[strand][(chrom, pos)]
)
if strand == "+":
fname = "{}_pos.wig".format(prefix)
else:
Expand Down Expand Up @@ -380,7 +387,11 @@ def detect_orfs(

# parse the index file
now = datetime.datetime.now()
print(now.strftime("%b %d %H:%M:%S ... started parsing ribotricer index file"))
print(
now.strftime(
"%b %d %H:%M:%S ... started parsing ribotricer index file"
)
)
annotated, refseq = parse_ribotricer_index(ribotricer_index)

# create directory
Expand All @@ -391,7 +402,8 @@ def detect_orfs(
now = datetime.datetime.now()
print(
"{} ... {}".format(
now.strftime("%b %d %H:%M:%S"), "started inferring experimental design"
now.strftime("%b %d %H:%M:%S"),
"started inferring experimental design",
)
)
protocol = infer_protocol(bam, refseq, prefix)
Expand All @@ -400,13 +412,16 @@ def detect_orfs(
# split bam file into strand and read length
now = datetime.datetime.now()
print(now.strftime("%b %d %H:%M:%S ... started reading bam file"))
alignments, read_length_counts = split_bam(bam, protocol, prefix, read_lengths)
alignments, read_length_counts = split_bam(
bam, protocol, prefix, read_lengths
)

# plot read length distribution
now = datetime.datetime.now()
print(
"{} ... {}".format(
now.strftime("%b %d %H:%M:%S"), "started plotting read length distribution"
now.strftime("%b %d %H:%M:%S"),
"started plotting read length distribution",
)
)
plot_read_lengths(read_length_counts, prefix)
Expand All @@ -419,13 +434,20 @@ def detect_orfs(
"started calculating metagene profiles. This may take a long time...",
)
)
metagenes = metagene_coverage(annotated, alignments, read_length_counts, prefix, meta_min_reads = meta_min_reads)
metagenes = metagene_coverage(
annotated,
alignments,
read_length_counts,
prefix,
meta_min_reads=meta_min_reads,
)

# plot metagene profiles
now = datetime.datetime.now()
print(
"\n{} ... {}".format(
now.strftime("%b %d %H:%M:%S"), "started plotting metagene profiles"
now.strftime("%b %d %H:%M:%S"),
"started plotting metagene profiles",
)
)
plot_metagene(metagenes, read_length_counts, prefix)
Expand All @@ -435,7 +457,8 @@ def detect_orfs(
now = datetime.datetime.now()
print(
"{} ... {}".format(
now.strftime("%b %d %H:%M:%S"), "started inferring P-site offsets"
now.strftime("%b %d %H:%M:%S"),
"started inferring P-site offsets",
)
)
psite_offsets = align_metagenes(
Expand Down
2 changes: 1 addition & 1 deletion ribotricer/metagene.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def align_metagenes(
xcorr = np.correlate(reference, cov, "full")
origin = len(xcorr) // 2
bound = min(base, length)
xcorr = xcorr[origin - bound : origin + bound]
xcorr = xcorr[(origin - bound):(origin + bound)]
lag = np.argmax(xcorr) - len(xcorr) // 2
psite_offsets[length] = lag + TYPICAL_OFFSET
to_write += "\tlag of {}: {}\n".format(length, lag)
Expand Down
3 changes: 2 additions & 1 deletion ribotricer/orf.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ def from_string(cls, line):
)
)
return None
oid = fields[0]
# ADS: oid below is not used
oid = fields[0] # noqa F841
category = fields[1]
tid = fields[2]
ttype = fields[3]
Expand Down
10 changes: 5 additions & 5 deletions ribotricer/orf_seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def translate_nt_to_aa(seq):
protein = ""
if len(seq) % 3 == 0:
for i in range(0, len(seq), 3):
codon = seq[i : i + 3]
codon = seq[i: i + 3]
if "N" in codon:
protein += "X"
elif codon not in codon_table:
Expand Down Expand Up @@ -142,10 +142,10 @@ def orf_seq(ribotricer_index, genome_fasta, saveto, translate=False):
if translate:
if len(seq) % 3 != 0:
sys.stderr.write(
"WARNING: Sequence length with ORF ID '{}' is not a multiple of three. Output sequence might be truncated.\n".format(
orf_id
)
"WARNING: Sequence length with ORF ID '{orf_id}' is not "
"a multiple of three. Output sequence might be "
"truncated.\n"
)
seq = seq[0 : (len(seq) // 3) * 3]
seq = seq[0: (len(seq) // 3) * 3]
seq = translate_nt_to_aa(seq)
fh.write("{}\t{}\n".format(orf_id, seq))

0 comments on commit 071b5cc

Please sign in to comment.