Skip to content

Commit

Permalink
Addressing flake8 issues: Fixing long lines, running black for format…
Browse files Browse the repository at this point in the history
…, adding noqa and a comment when it seems appropriate
  • Loading branch information
andrewdavidsmith committed Apr 7, 2024
1 parent 23d6188 commit 910fbc0
Show file tree
Hide file tree
Showing 11 changed files with 104 additions and 43 deletions.
23 changes: 13 additions & 10 deletions ribotricer/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def cli():
pass


###################### prepare-orfs function #########################################
# prepare-orfs function #########################################
@cli.command(
"prepare-orfs",
context_settings=CONTEXT_SETTINGS,
Expand Down Expand Up @@ -110,7 +110,7 @@ def prepare_orfs_cmd(
prepare_orfs(gtf, fasta, prefix, min_orf_length, start_codons, stop_codons, longest)


###################### detect-orfs function #########################################
# detect-orfs function #########################################
@cli.command(
"detect-orfs",
context_settings=CONTEXT_SETTINGS,
Expand Down Expand Up @@ -200,7 +200,8 @@ def prepare_orfs_cmd(
@click.option(
"--meta-min-reads",
type=int,
default=META_MIN_READS,
# ADS: META_MIN_READS was detected as undefined by flake8
default=META_MIN_READS, # noqa: F821
show_default=True,
help="Minimum number of reads for a read length to be considered",
)
Expand Down Expand Up @@ -228,7 +229,7 @@ def detect_orfs_cmd(
if read_lengths is not None:
try:
read_lengths = [int(x.strip()) for x in read_lengths.strip().split(",")]
except:
except Exception:
sys.exit("Error: cannot convert read_lengths into integers")
if not all([x > 0 for x in read_lengths]):
sys.exit("Error: read length must be positive")
Expand All @@ -237,8 +238,10 @@ def detect_orfs_cmd(
sys.exit("Error: psite_offsets only allowed when read_lengths is provided")
if read_lengths is not None and psite_offsets is not None:
try:
psite_offsets = [int(x.strip()) for x in psite_offsets.strip().split(",")]
except:
psite_offsets = [
int(x.strip()) for x in psite_offsets.strip().split(",")
]
except Exception:
sys.exit("Error: cannot convert psite_offsets into integers")
if len(read_lengths) != len(psite_offsets):
sys.exit("Error: psite_offsets must match read_lengths")
Expand All @@ -265,7 +268,7 @@ def detect_orfs_cmd(
)


###################### count-orfs function #########################################
# count-orfs function #########################################
@cli.command(
"count-orfs",
context_settings=CONTEXT_SETTINGS,
Expand Down Expand Up @@ -307,7 +310,7 @@ def count_orfs_cmd(ribotricer_index, detected_orfs, features, out, report_all):
count_orfs(ribotricer_index, detected_orfs, features, out, report_all)


###################### count-orfs-codon function #########################################
# count-orfs-codon function #########################################
@cli.command(
"count-orfs-codon",
context_settings=CONTEXT_SETTINGS,
Expand Down Expand Up @@ -367,7 +370,7 @@ def count_orfs_codon_cmd(
)


###################### orfs-seq function #########################################
# orfs-seq function #########################################
@cli.command(
"orfs-seq",
context_settings=CONTEXT_SETTINGS,
Expand Down Expand Up @@ -396,7 +399,7 @@ def orf_seq_cmd(ribotricer_index, fasta, saveto, protein):
orf_seq(ribotricer_index, fasta, saveto, protein)


###################### learn-cutoff function #########################################
# learn-cutoff function #########################################
@cli.command(
"learn-cutoff",
context_settings=CONTEXT_SETTINGS,
Expand Down
11 changes: 8 additions & 3 deletions ribotricer/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ def is_read_uniq_mapping(read):
return False
else:
sys.stdout.write(
"WARNING: ribotricer was unable to detect any tags for determining multimapping status. All the reads will be treated as uniquely mapping\n"
"WARNING: ribotricer was unable to detect any tags for "
"determining multimapping status. All the reads will be "
"treated as uniquely mapping\n"
)


Expand All @@ -76,7 +78,9 @@ def merge_intervals(intervals):
intervals[i].end,
intervals[i].strand,
)
while i + 1 < len(intervals) and intervals[i + 1].start <= to_merge.end:
while (
i + 1 < len(intervals) and intervals[i + 1].start <= to_merge.end
):
to_merge.end = max(to_merge.end, intervals[i + 1].end)
i += 1
merged_intervals.append(to_merge)
Expand Down Expand Up @@ -124,6 +128,7 @@ def collapse_coverage_to_codon(coverage):
Coverage collapsed to codon level
"""
codon_coverage = [
sum(coverage[current : current + 3]) for current in range(0, len(coverage), 3)
sum(coverage[current: current + 3])
for current in range(0, len(coverage), 3)
]
return codon_coverage
2 changes: 1 addition & 1 deletion ribotricer/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@
# defined as the number of reads per unit length of the ORF
MINIMUM_DENSITY_OVER_ORF = 0.0
# Minimum number of reads for a read length to be considered
META_MIN_READS=100000
META_MIN_READS = 100000
44 changes: 36 additions & 8 deletions ribotricer/count_orfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
import pandas as pd


def count_orfs(ribotricer_index, detected_orfs, features, outfile, report_all=False):
def count_orfs(
ribotricer_index, detected_orfs, features, outfile, report_all=False
):
"""
Parameters
----------
Expand Down Expand Up @@ -57,7 +59,11 @@ def count_orfs(ribotricer_index, detected_orfs, features, outfile, report_all=Fa
# do not output 'nontranslating' events unless report_all is set
if status != "nontranslating" or report_all:
intervals = orf_index[oid].intervals
coor = [x for iv in intervals for x in range(iv.start, iv.end + 1)]
coor = [
x
for iv in intervals
for x in range(iv.start, iv.end + 1)
]
if strand == "-":
coor = coor[::-1]
profile_stripped = profile.strip()[1:-1].split(", ")
Expand Down Expand Up @@ -105,7 +111,9 @@ def count_orfs_codon(
if True, all coverages will be exported
"""
orf_index = {}
fasta_df = pd.read_csv(ribotricer_index_fasta, sep="\t").set_index("ORF_ID")
fasta_df = pd.read_csv(ribotricer_index_fasta, sep="\t").set_index(
"ORF_ID"
)
read_counts = defaultdict(dict)
with open(ribotricer_index, "r") as fin:
# Skip header
Expand All @@ -126,9 +134,15 @@ def count_orfs_codon(
# do not output 'nontranslating' events unless report_all is set
if status != "nontranslating" or report_all:
intervals = orf_index[oid].intervals
coor = [x for iv in intervals for x in range(iv.start, iv.end + 1)]
coor = [
x
for iv in intervals
for x in range(iv.start, iv.end + 1)
]
codon_coor = [
x for iv in intervals for x in range(iv.start, iv.end + 1, 3)
x
for iv in intervals
for x in range(iv.start, iv.end + 1, 3)
]
if strand == "-":
coor = coor[::-1]
Expand Down Expand Up @@ -158,7 +172,17 @@ def count_orfs_codon(
# Output count table
with open("{}_genewise.tsv".format(prefix), "w") as fout:
fout.write(
"gene_id\tcodon\tvalues\tmean_codon_coverage\tmedian_codon_coverage\tvar_codon_coverage\tcodon_occurences\ttotal_codon_coverage\n"
"\t".join(
"gene_id",
"codon",
"values",
"mean_codon_coverage",
"median_codon_coverage",
"var_codon_coverage",
"codon_occurences",
"total_codon_coverage",
)
+ "\n"
)
for gene_id, codon_seq in sorted(read_counts):
values = list(read_counts[gene_id, codon_seq].values())
Expand All @@ -183,12 +207,16 @@ def count_orfs_codon(
fout_df["per_codon_enrichment(total/n_occur)"] = (
fout_df["total_codon_coverage"] / fout_df["codon_occurences"]
)
fout_df["-log10_relative_enrichment(per_codon/total_gene_coverage)"] = -np.log10(
fout_df[
"-log10_relative_enrichment(per_codon/total_gene_coverage)"
] = -np.log10(
fout_df["per_codon_enrichment(total/n_occur)"]
/ fout_df.groupby("gene_id")["total_codon_coverage"].transform("sum")
)
# Overwrite
fout_df.to_csv("{}_genewise.tsv".format(prefix), sep="\t", index=False, header=True)
fout_df.to_csv(
"{}_genewise.tsv".format(prefix), sep="\t", index=False, header=True
)
# Remove infs
fout_df = fout_df.replace([np.inf, -np.inf], np.nan)
fout_df = fout_df.dropna()
Expand Down
43 changes: 33 additions & 10 deletions ribotricer/detect_orfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,10 @@ def orf_coverage(orf, alignments, offset_5p=0, offset_3p=0):
except KeyError:
coverage.append(0)
else:
if strand in alignments and (chrom, pos) in alignments[strand]:
if (
strand in alignments
and (chrom, pos) in alignments[strand]
):
coverage.append(alignments[strand][(chrom, pos)])
else:
coverage.append(0)
Expand Down Expand Up @@ -266,7 +269,9 @@ def export_orf_coverages(
valid_codons_ratio = valid_codons / n_codons
# total reads in the ORF divided by the length
orf_density = np.sum(codon_coverage) / n_codons
codon_coverage_exceeds_min = codon_coverage >= min_reads_per_codon
codon_coverage_exceeds_min = (
codon_coverage >= min_reads_per_codon
)
status = (
"translating"
if (
Expand Down Expand Up @@ -322,7 +327,9 @@ def export_wig(merged_alignments, prefix):
if chrom != cur_chrom:
cur_chrom = chrom
to_write += "variableStep chrom={}\n".format(chrom)
to_write += "{}\t{}\n".format(pos, merged_alignments[strand][(chrom, pos)])
to_write += "{}\t{}\n".format(
pos, merged_alignments[strand][(chrom, pos)]
)
if strand == "+":
fname = "{}_pos.wig".format(prefix)
else:
Expand Down Expand Up @@ -380,7 +387,11 @@ def detect_orfs(

# parse the index file
now = datetime.datetime.now()
print(now.strftime("%b %d %H:%M:%S ... started parsing ribotricer index file"))
print(
now.strftime(
"%b %d %H:%M:%S ... started parsing ribotricer index file"
)
)
annotated, refseq = parse_ribotricer_index(ribotricer_index)

# create directory
Expand All @@ -391,7 +402,8 @@ def detect_orfs(
now = datetime.datetime.now()
print(
"{} ... {}".format(
now.strftime("%b %d %H:%M:%S"), "started inferring experimental design"
now.strftime("%b %d %H:%M:%S"),
"started inferring experimental design",
)
)
protocol = infer_protocol(bam, refseq, prefix)
Expand All @@ -400,13 +412,16 @@ def detect_orfs(
# split bam file into strand and read length
now = datetime.datetime.now()
print(now.strftime("%b %d %H:%M:%S ... started reading bam file"))
alignments, read_length_counts = split_bam(bam, protocol, prefix, read_lengths)
alignments, read_length_counts = split_bam(
bam, protocol, prefix, read_lengths
)

# plot read length distribution
now = datetime.datetime.now()
print(
"{} ... {}".format(
now.strftime("%b %d %H:%M:%S"), "started plotting read length distribution"
now.strftime("%b %d %H:%M:%S"),
"started plotting read length distribution",
)
)
plot_read_lengths(read_length_counts, prefix)
Expand All @@ -419,13 +434,20 @@ def detect_orfs(
"started calculating metagene profiles. This may take a long time...",
)
)
metagenes = metagene_coverage(annotated, alignments, read_length_counts, prefix, meta_min_reads = meta_min_reads)
metagenes = metagene_coverage(
annotated,
alignments,
read_length_counts,
prefix,
meta_min_reads=meta_min_reads,
)

# plot metagene profiles
now = datetime.datetime.now()
print(
"\n{} ... {}".format(
now.strftime("%b %d %H:%M:%S"), "started plotting metagene profiles"
now.strftime("%b %d %H:%M:%S"),
"started plotting metagene profiles",
)
)
plot_metagene(metagenes, read_length_counts, prefix)
Expand All @@ -435,7 +457,8 @@ def detect_orfs(
now = datetime.datetime.now()
print(
"{} ... {}".format(
now.strftime("%b %d %H:%M:%S"), "started inferring P-site offsets"
now.strftime("%b %d %H:%M:%S"),
"started inferring P-site offsets",
)
)
psite_offsets = align_metagenes(
Expand Down
2 changes: 1 addition & 1 deletion ribotricer/metagene.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def align_metagenes(
xcorr = np.correlate(reference, cov, "full")
origin = len(xcorr) // 2
bound = min(base, length)
xcorr = xcorr[origin - bound : origin + bound]
xcorr = xcorr[(origin - bound):(origin + bound)]
lag = np.argmax(xcorr) - len(xcorr) // 2
psite_offsets[length] = lag + TYPICAL_OFFSET
to_write += "\tlag of {}: {}\n".format(length, lag)
Expand Down
3 changes: 2 additions & 1 deletion ribotricer/orf.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ def from_string(cls, line):
)
)
return None
oid = fields[0]
# ADS: oid below is not used
oid = fields[0] # noqa F841
category = fields[1]
tid = fields[2]
ttype = fields[3]
Expand Down
10 changes: 5 additions & 5 deletions ribotricer/orf_seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def translate_nt_to_aa(seq):
protein = ""
if len(seq) % 3 == 0:
for i in range(0, len(seq), 3):
codon = seq[i : i + 3]
codon = seq[i: i + 3]
if "N" in codon:
protein += "X"
elif codon not in codon_table:
Expand Down Expand Up @@ -142,10 +142,10 @@ def orf_seq(ribotricer_index, genome_fasta, saveto, translate=False):
if translate:
if len(seq) % 3 != 0:
sys.stderr.write(
"WARNING: Sequence length with ORF ID '{}' is not a multiple of three. Output sequence might be truncated.\n".format(
orf_id
)
"WARNING: Sequence length with ORF ID '{orf_id}' is not "
"a multiple of three. Output sequence might be "
"truncated.\n"
)
seq = seq[0 : (len(seq) // 3) * 3]
seq = seq[0: (len(seq) // 3) * 3]
seq = translate_nt_to_aa(seq)
fh.write("{}\t{}\n".format(orf_id, seq))
5 changes: 3 additions & 2 deletions ribotricer/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
import matplotlib
matplotlib.use("Agg")

import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
# ADS: verify that matplotlib.use("Agg") must precede imports below
import matplotlib.pyplot as plt # noqa E402
from matplotlib.backends.backend_pdf import PdfPages # noqa E402

matplotlib.rcParams["pdf.fonttype"] = 42
matplotlib.rcParams["ps.fonttype"] = 42
Expand Down
2 changes: 1 addition & 1 deletion ribotricer/prepare_orfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def search_orfs(fasta, intervals, min_orf_length, start_codons, stop_codons, lon
)
seq = merged_seq[start:idx]
leader = merged_seq[:start]
trailer = merged_seq[idx + 3 :]
trailer = merged_seq[idx + 3:]
if ivs:
orfs.append((ivs, seq, leader, trailer))
if longest:
Expand Down
2 changes: 1 addition & 1 deletion ribotricer/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ def translate(seq):
protein = ""
if len(seq) % 3 == 0:
for i in range(0, len(seq), 3):
codon = seq[i : i + 3]
codon = seq[i: i + 3]
protein += CODON_TO_AA[codon]
return protein

Expand Down

0 comments on commit 910fbc0

Please sign in to comment.