Skip to content

Commit 64ead93

Browse files
authored
Merge pull request #496 from zivy/updateCharacterizeDataScript
Update the characterize_data script figure formats.
2 parents caf86e8 + 767778f commit 64ead93

File tree

1 file changed

+19
-6
lines changed

1 file changed

+19
-6
lines changed

Python/scripts/characterize_data.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -839,9 +839,16 @@ def characterize_data(argv=None):
839839
a date-time prefix and "_characterize_data_settings.json" postfix). This file can then be
840840
used to override the default parameter settings with user defaults in a reproducible
841841
manner via the --configuration_file option.
842-
3. pdf figure with the histogram of image sizes.
843-
4. Possibly a pdf figure with histogram of min-max intensity values for the scalar images, if any.
844-
5. Possibly a csv file listing exact duplicate images, if any. Images are considered duplicates if
842+
3. Two or three scatterplots in pdf/png format. File format is determined based on the number of
843+
images. If more than 500,000 images the png format is used, otherwise pdf. This avoids excessively long
844+
rendering times associated with the vector graphics format which renders each individual point in the
845+
scatterplot. Preference is to use a vector graphics format which allows for resizing without loss of
846+
quality. If you require a vector graphics format even for large datasets, you will need to modify the
847+
PDF_FOMAT_THRESHOLD value in the script.
848+
Plots include: image sizes, image spacing, and possibly min-max intensity values for
849+
scalar images. Image size and spacings are 2D plots. When dealing with 3D images, information
850+
along the z axis is encoded using color.
851+
4. Possibly a csv file listing exact duplicate images, if any. Images are considered duplicates if
845852
the intensity values are the same, header and spatial information may be different.
846853
847854
Empty lines in the resulting csv file (file names listed but nothing else in that row)
@@ -897,6 +904,12 @@ def xyz_to_index(x, y, z, thumbnail_size, tile_size):
897904
When this happens you will see a WARNING printed to the terminal output, along the lines of
898905
"ImageSeriesReader : Non uniform sampling or missing slices detected...".
899906
"""
907+
# Maximal number of points for which scatterplots are saved in pdf format,
908+
# otherwise png. Threshold was deterimined empirically based on rendering
909+
# times longer than 10sec on a 2020 MacBook Pro (1.4GHz Quad core Intel i5
910+
# with 16GB RAM).
911+
PDF_FOMAT_THRESHOLD = 500000
912+
900913
# Configure argument parser for commandline arguments and set default
901914
# values.
902915
# We use two parsers, one for the optional parameters and the other for positional and
@@ -1233,14 +1246,14 @@ def xyz_to_index(x, y, z, thumbnail_size, tile_size):
12331246
size_ax.set_ylabel("y size")
12341247
size_fig.tight_layout()
12351248
size_fig.savefig(
1236-
f"{os.path.splitext(args.output_file)[0]}_image_size_scatterplot.pdf",
1249+
f"{os.path.splitext(args.output_file)[0]}_image_size_scatterplot.{'png' if len(df) > PDF_FOMAT_THRESHOLD else 'pdf'}",
12371250
bbox_inches="tight",
12381251
)
12391252
spacing_ax.set_xlabel("x spacing [mm]")
12401253
spacing_ax.set_ylabel("y spacing [mm]")
12411254
spacing_fig.tight_layout()
12421255
spacing_fig.savefig(
1243-
f"{os.path.splitext(args.output_file)[0]}_image_spacing_scatterplot.pdf",
1256+
f"{os.path.splitext(args.output_file)[0]}_image_spacing_scatterplot.{'png' if len(df) > PDF_FOMAT_THRESHOLD else 'pdf'}",
12441257
bbox_inches="tight",
12451258
)
12461259

@@ -1254,7 +1267,7 @@ def xyz_to_index(x, y, z, thumbnail_size, tile_size):
12541267
ax.set_xlabel("min intensity")
12551268
ax.set_ylabel("max intensity")
12561269
fig.savefig(
1257-
f"{os.path.splitext(args.output_file)[0]}_min_max_intensity_scatterplot.pdf",
1270+
f"{os.path.splitext(args.output_file)[0]}_min_max_intensity_scatterplot.{'png' if len(df) > PDF_FOMAT_THRESHOLD else 'pdf'}",
12581271
bbox_inches="tight",
12591272
)
12601273

0 commit comments

Comments
 (0)