Skip to content

Commit 9414c19

Browse files
committed
fixes #434, clearly labeling plots that are downsampled
1 parent ebdf07a commit 9414c19

3 files changed

Lines changed: 41 additions & 23 deletions

File tree

nanoplot/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.46.2"
1+
__version__ = "1.47.0"

nanoplotter/nanoplotter_main.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -105,19 +105,22 @@ def scatter(
105105
return []
106106
plots_made = []
107107
idx = np.random.choice(x.index, min(10000, len(x)), replace=False)
108+
downsampled = len(x) > 10000
108109
maxvalx = xmax or np.amax(x[idx])
109110
maxvaly = ymax or np.amax(y[idx])
110111

111112
if plots["dot"]:
112113
if log:
113114
dot_plot = Plot(
114115
path=path + "_loglength_dot.html",
115-
title=f"{names[0]} vs {names[1]} plot using dots "
116-
"after log transformation of read lengths",
116+
title=f"{names[0]} vs {names[1]} dot plot "
117+
+ ("(log-transformed, downsampled)" if downsampled else "(log-transformed)"),
117118
)
118119
else:
119120
dot_plot = Plot(
120-
path=path + "_dot.html", title=f"{names[0]} vs {names[1]} plot using dots"
121+
path=path + "_dot.html",
122+
title=f"{names[0]} vs {names[1]} dot plot"
123+
+ (" (downsampled)" if downsampled else ""),
121124
)
122125

123126
fig = px.scatter(
@@ -150,7 +153,8 @@ def scatter(
150153
if plots["kde"]:
151154
kde_plot = Plot(
152155
path=path + "_loglength_kde.html" if log else path + "_kde.html",
153-
title=f"{names[0]} vs {names[1]} kde plot",
156+
title=f"{names[0]} vs {names[1]} kde plot"
157+
+ (" (downsampled)" if downsampled else ""),
154158
)
155159

156160
col = hex_to_rgb_scale_0_1(color)
@@ -317,17 +321,19 @@ def scatter_legacy(
317321

318322
if plots["kde"]:
319323
if len(x) > 2:
324+
kde_downsampled = len(x) > 2000
320325
idx = np.random.choice(x.index, min(2000, len(x)), replace=False)
321326
if log:
322327
kde_plot = Plot(
323328
path=path + "_loglength_kde." + figformat[0],
324-
title="{} vs {} plot using a kernel density estimation "
325-
"after log transformation of read lengths".format(names[0], names[1]),
329+
title=f"{names[0]} vs {names[1]} kde plot "
330+
+ ("(log-transformed, downsampled)" if kde_downsampled else "(log-transformed)"),
326331
)
327332
else:
328333
kde_plot = Plot(
329334
path=path + "_kde." + figformat[0],
330-
title=f"{names[0]} vs {names[1]} plot using a kernel density estimation",
335+
title=f"{names[0]} vs {names[1]} kde plot"
336+
+ (" (downsampled)" if kde_downsampled else ""),
331337
)
332338
plot = sns.jointplot(
333339
x=x[idx],
@@ -349,7 +355,10 @@ def scatter_legacy(
349355
plot.ax_marg_x.set_xticks(np.log10(ticks))
350356
plot.ax_joint.set_xticklabels(ticks)
351357
plt.subplots_adjust(top=0.90)
352-
plot.fig.suptitle(title or "{} vs {} plot".format(names[0], names[1]), fontsize=25)
358+
_suptitle = "{} vs {} plot{}".format(
359+
names[0], names[1], " (downsampled)" if kde_downsampled else ""
360+
)
361+
plot.fig.suptitle(title or _suptitle, fontsize=25)
353362
kde_plot.fig = plot
354363
kde_plot.save(settings)
355364
plots_made.append(kde_plot)
@@ -494,11 +503,14 @@ def dynamic_histogram(array, name, path, settings, title=None, color="#4CB391"):
494503
Use plotly to a histogram
495504
Return html code, but also save as png
496505
"""
506+
is_downsampled = len(array) > 10000
497507
dynhist = Plot(
498508
path=path + f"Dynamic_Histogram_{name[0].lower() + name[1:].replace(' ', '_')}.html",
499-
title="Dynamic histogram of {}".format(name[0].lower() + name[1:]),
509+
title="Dynamic histogram of {}{}".format(
510+
name[0].lower() + name[1:], " (downsampled)" if is_downsampled else ""
511+
),
500512
)
501-
ylabel = "Number of reads" if len(array) <= 10000 else "Downsampled number of reads"
513+
ylabel = "Number of reads" if not is_downsampled else "Downsampled number of reads"
502514
dynhist.html, dynhist.fig = plotly_histogram(
503515
array=array.sample(min(len(array), 10000)),
504516
color=color,

nanoplotter/timeplots.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -51,16 +51,18 @@ def time_plots(df, subsampled_df, path, settings, title=None, color="#4CB391", l
5151
title=title,
5252
color=color,
5353
settings=settings)
54+
was_downsampled = len(df) > len(subsampled_df)
5455
violins = violin_plots_over_time(dfs=check_valid_time_and_sort(subsampled_df),
5556
path=path,
5657
title=title,
5758
log_length=log_length,
5859
color=color,
59-
settings=settings)
60+
settings=settings,
61+
downsampled=was_downsampled)
6062
return cumyields + reads_pores_over_time + violins
6163

6264

63-
def violin_plots_over_time(dfs, path, title, settings, log_length=False, color="#4CB391"):
65+
def violin_plots_over_time(dfs, path, title, settings, log_length=False, color="#4CB391", downsampled=False):
6466

6567
dfs['timebin'] = add_time_bins(dfs)
6668
plots = []
@@ -72,29 +74,33 @@ def violin_plots_over_time(dfs, path, title, settings, log_length=False, color="
7274
title=title,
7375
log_length=log_length,
7476
color=color,
75-
settings=settings))
77+
settings=settings,
78+
downsampled=downsampled))
7679
if "quals" in dfs:
7780
plots.append(quality_over_time(dfs=dfs,
7881
path=path,
7982
title=title,
8083
color=color,
81-
settings=settings))
84+
settings=settings,
85+
downsampled=downsampled))
8286
if "duration" in dfs:
8387
plots.append(sequencing_speed_over_time(dfs=dfs,
8488
path=path,
8589
title=title,
8690
color=color,
87-
settings=settings))
91+
settings=settings,
92+
downsampled=downsampled))
8893
return plots
8994

9095

91-
def length_over_time(dfs, path, title, settings, log_length=False, color="#4CB391"):
96+
def length_over_time(dfs, path, title, settings, log_length=False, color="#4CB391", downsampled=False):
97+
suffix = " (downsampled)" if downsampled else ""
9298
if log_length:
9399
time_length = Plot(path=path + "TimeLogLengthViolinPlot.html",
94-
title="Violin plot of log read lengths over time")
100+
title=f"Violin plot of log read lengths over time{suffix}")
95101
else:
96102
time_length = Plot(path=path + "TimeLengthViolinPlot.html",
97-
title="Violin plot of read lengths over time")
103+
title=f"Violin plot of read lengths over time{suffix}")
98104

99105
length_column = "log_lengths" if log_length else "lengths"
100106

@@ -136,9 +142,9 @@ def length_over_time(dfs, path, title, settings, log_length=False, color="#4CB39
136142
return time_length
137143

138144

139-
def quality_over_time(dfs, path, settings, title=None, color="#4CB391"):
145+
def quality_over_time(dfs, path, settings, title=None, color="#4CB391", downsampled=False):
140146
time_qual = Plot(path=path + "TimeQualityViolinPlot.html",
141-
title="Violin plot of quality over time")
147+
title="Violin plot of quality over time" + (" (downsampled)" if downsampled else ""))
142148

143149
fig = go.Figure()
144150

@@ -163,9 +169,9 @@ def quality_over_time(dfs, path, settings, title=None, color="#4CB391"):
163169
return time_qual
164170

165171

166-
def sequencing_speed_over_time(dfs, path, title, settings, color="#4CB391"):
172+
def sequencing_speed_over_time(dfs, path, title, settings, color="#4CB391", downsampled=False):
167173
time_duration = Plot(path=path + "TimeSequencingSpeed_ViolinPlot.html",
168-
title="Violin plot of sequencing speed over time")
174+
title="Violin plot of sequencing speed over time" + (" (downsampled)" if downsampled else ""))
169175

170176
mask = dfs['duration'] != 0
171177

0 commit comments

Comments
 (0)