Skip to content

Commit 065ba17

Browse files
author
Jeff Whitaker
authored
Merge pull request #1183 from hmaarrfk/add_set_alignment
Add support for nc_set_alignment and nc_get_alignment
2 parents 5d35046 + 3a81994 commit 065ba17

5 files changed

Lines changed: 224 additions & 4 deletions

File tree

Changelog

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
* add Dataset methods has_<name>_filter (where <name>=zstd,blosc,bzip2,szip)
44
to check for availability of extra compression filters.
55
* release GIL for all C-lib calls (issue #1180).
6+
* Add support for nc_set_alignment and nc_get_alignment to control alignment
7+
of data within HDF5 files.
68

79
version 1.6.0 (tag v1.6.0rel)
810
==============================

include/netCDF4.pxi

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,11 @@ IF HAS_PARALLEL4_SUPPORT or HAS_PNETCDF_SUPPORT:
441441
NC_MPIPOSIX
442442
NC_PNETCDF
443443

444+
IF HAS_SET_ALIGNMENT:
445+
cdef extern from "netcdf.h":
446+
int nc_set_alignment(int threshold, int alignment)
447+
int nc_get_alignment(int *threshold, int *alignment)
448+
444449
# taken from numpy.pxi in numpy 1.0rc2.
445450
cdef extern from "numpy/arrayobject.h":
446451
ctypedef int npy_intp

setup.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ def check_api(inc_dirs,netcdf_lib_version):
7171
has_zstandard = False
7272
has_bzip2 = False
7373
has_blosc = False
74+
has_set_alignment = False
7475

7576
for d in inc_dirs:
7677
try:
@@ -92,6 +93,8 @@ def check_api(inc_dirs,netcdf_lib_version):
9293
has_cdf5_format = True
9394
if line.startswith('nc_def_var_quantize'):
9495
has_quantize = True
96+
if line.startswith('nc_set_alignment'):
97+
has_set_alignment = True
9598

9699
if has_nc_open_mem:
97100
try:
@@ -141,7 +144,7 @@ def check_api(inc_dirs,netcdf_lib_version):
141144
return has_rename_grp, has_nc_inq_path, has_nc_inq_format_extended, \
142145
has_cdf5_format, has_nc_open_mem, has_nc_create_mem, \
143146
has_parallel4_support, has_pnetcdf_support, has_szip_support, has_quantize, \
144-
has_zstandard, has_bzip2, has_blosc
147+
has_zstandard, has_bzip2, has_blosc, has_set_alignment
145148

146149

147150
def getnetcdfvers(libdirs):
@@ -228,7 +231,7 @@ def extract_version(CYTHON_FNAME):
228231

229232
setup_cfg = 'setup.cfg'
230233
# contents of setup.cfg will override env vars, unless
231-
# USE_SETUPCFG evaluates to False.
234+
# USE_SETUPCFG evaluates to False.
232235
ncconfig = None
233236
use_ncconfig = None
234237
if USE_SETUPCFG and os.path.exists(setup_cfg):
@@ -338,7 +341,7 @@ def extract_version(CYTHON_FNAME):
338341
elif USE_NCCONFIG is None:
339342
# if nc-config exists, and USE_NCCONFIG not set, try to use it.
340343
if HAS_NCCONFIG: USE_NCCONFIG=True
341-
#elif USE_NCCONFIG is None:
344+
#elif USE_NCCONFIG is None:
342345
# USE_NCCONFIG = False # don't try to use nc-config if USE_NCCONFIG not set
343346

344347
try:
@@ -555,7 +558,7 @@ def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs):
555558
has_rename_grp, has_nc_inq_path, has_nc_inq_format_extended, \
556559
has_cdf5_format, has_nc_open_mem, has_nc_create_mem, \
557560
has_parallel4_support, has_pnetcdf_support, has_szip_support, has_quantize, \
558-
has_zstandard, has_bzip2, has_blosc = \
561+
has_zstandard, has_bzip2, has_blosc, has_set_alignment = \
559562
check_api(inc_dirs,netcdf_lib_version)
560563
# for netcdf 4.4.x CDF5 format is always enabled.
561564
if netcdf_lib_version is not None and\
@@ -662,6 +665,13 @@ def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs):
662665
sys.stdout.write('netcdf lib does not have szip compression functions\n')
663666
f.write('DEF HAS_SZIP_SUPPORT = 0\n')
664667

668+
if has_set_alignment:
669+
sys.stdout.write('netcdf lib has nc_set_alignment function\n')
670+
f.write('DEF HAS_SET_ALIGNMENT = 1\n')
671+
else:
672+
sys.stdout.write('netcdf lib does not have nc_set_alignment function\n')
673+
f.write('DEF HAS_SET_ALIGNMENT = 0\n')
674+
665675
f.close()
666676

667677
if has_parallel4_support or has_pnetcdf_support:

src/netCDF4/_netCDF4.pyx

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1324,6 +1324,52 @@ details."""
13241324
ierr = nc_set_chunk_cache(sizep,nelemsp, preemptionp)
13251325
_ensure_nc_success(ierr)
13261326

1327+
IF HAS_SET_ALIGNMENT:
1328+
def get_alignment():
1329+
"""
1330+
**`get_alignment()`**
1331+
1332+
return current netCDF alignment within HDF5 files in a tuple
1333+
(threshold,alignment). See netcdf C library documentation for
1334+
`nc_get_alignment` for details. Values can be reset with
1335+
`set_alignment`.
1336+
1337+
This function was added in netcdf 4.9.0."""
1338+
cdef int ierr
1339+
cdef int thresholdp, alignmentp
1340+
ierr = nc_get_alignment(&thresholdp, &alignmentp)
1341+
_ensure_nc_success(ierr)
1342+
threshold = thresholdp
1343+
alignment = alignmentp
1344+
return (threshold,alignment)
1345+
1346+
def set_alignment(threshold, alignment):
1347+
"""
1348+
**`set_alignment(threshold,alignment)`**
1349+
1350+
Change the HDF5 file alignment.
1351+
See netcdf C library documentation for `nc_set_alignment` for
1352+
details.
1353+
1354+
This function was added in netcdf 4.9.0."""
1355+
cdef int ierr
1356+
cdef int thresholdp, alignmentp
1357+
thresholdp = threshold
1358+
alignmentp = alignment
1359+
1360+
ierr = nc_set_alignment(thresholdp, alignmentp)
1361+
_ensure_nc_success(ierr)
1362+
ELSE:
1363+
def get_alignment():
1364+
raise RuntimeError(
1365+
"This function requires netcdf4 4.9.0+ to be used at compile time"
1366+
)
1367+
1368+
def set_alignment(threshold, alignment):
1369+
raise RuntimeError(
1370+
"This function requires netcdf4 4.9.0+ to be used at compile time"
1371+
)
1372+
13271373
__netcdf4libversion__ = getlibversion().split()[0]
13281374
__hdf5libversion__ = _gethdf5libversion()
13291375
__has_rename_grp__ = HAS_RENAME_GRP
@@ -1339,6 +1385,7 @@ __has_zstandard_support__ = HAS_ZSTANDARD_SUPPORT
13391385
__has_bzip2_support__ = HAS_BZIP2_SUPPORT
13401386
__has_blosc_support__ = HAS_BLOSC_SUPPORT
13411387
__has_szip_support__ = HAS_SZIP_SUPPORT
1388+
__has_set_alignment__ = HAS_SET_ALIGNMENT
13421389
_needsworkaround_issue485 = __netcdf4libversion__ < "4.4.0" or \
13431390
(__netcdf4libversion__.startswith("4.4.0") and \
13441391
"-development" in __netcdf4libversion__)

test/tst_alignment.py

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
import numpy as np
2+
from netCDF4 import set_alignment, get_alignment, Dataset
3+
import netCDF4
4+
import os
5+
import subprocess
6+
import tempfile
7+
import unittest
8+
9+
# During testing, sometimes development versions are used.
10+
# They may be written as 4.9.1-development
11+
libversion_no_development = netCDF4.__netcdf4libversion__.split('-')[0]
12+
libversion = tuple(int(v) for v in libversion_no_development.split('.'))
13+
has_alignment = (libversion[0] > 4) or (
14+
libversion[0] == 4 and (libversion[1] >= 9)
15+
)
16+
try:
17+
has_h5ls = subprocess.check_call(['h5ls', '--version'], stdout=subprocess.PIPE) == 0
18+
except Exception:
19+
has_h5ls = False
20+
21+
file_name = tempfile.NamedTemporaryFile(suffix='.nc', delete=False).name
22+
23+
24+
class AlignmentTestCase(unittest.TestCase):
25+
def setUp(self):
26+
self.file = file_name
27+
28+
# This is a global variable in netcdf4, it must be set before File
29+
# creation
30+
if has_alignment:
31+
set_alignment(1024, 4096)
32+
assert get_alignment() == (1024, 4096)
33+
34+
f = Dataset(self.file, 'w')
35+
f.createDimension('x', 4096)
36+
# Create many datasets so that we decrease the chance of
37+
# the dataset being randomly aligned
38+
for i in range(10):
39+
f.createVariable(f'data{i:02d}', np.float64, ('x',))
40+
v = f.variables[f'data{i:02d}']
41+
v[...] = 0
42+
f.close()
43+
if has_alignment:
44+
# ensure to reset the alignment to 1 (default values) so as not to
45+
# disrupt other tests
46+
set_alignment(1, 1)
47+
assert get_alignment() == (1, 1)
48+
49+
def test_version_settings(self):
50+
if has_alignment:
51+
# One should always be able to set the alignment to 1, 1
52+
set_alignment(1, 1)
53+
assert get_alignment() == (1, 1)
54+
else:
55+
with self.assertRaises(RuntimeError):
56+
set_alignment(1, 1)
57+
with self.assertRaises(RuntimeError):
58+
get_alignment()
59+
60+
# if we have no support for alignment, we have no guarantees on
61+
# how the data can be aligned
62+
@unittest.skipIf(
63+
not has_h5ls,
64+
"h5ls not found."
65+
)
66+
@unittest.skipIf(
67+
not has_alignment,
68+
"No support for set_alignment in libnetcdf."
69+
)
70+
def test_setting_alignment(self):
71+
# We choose to use h5ls instead of h5py since h5ls is very likely
72+
# to be installed alongside the rest of the tooling required to build
73+
# netcdf4-python
74+
# Output from h5ls is expected to look like:
75+
"""
76+
Opened "/tmp/tmpqexgozg1.nc" with sec2 driver.
77+
data00 Dataset {4096/4096}
78+
Attribute: DIMENSION_LIST {1}
79+
Type: variable length of
80+
object reference
81+
Attribute: _Netcdf4Coordinates {1}
82+
Type: 32-bit little-endian integer
83+
Location: 1:563
84+
Links: 1
85+
Storage: 32768 logical bytes, 32768 allocated bytes, 100.00% utilization
86+
Type: IEEE 64-bit little-endian float
87+
Address: 8192
88+
data01 Dataset {4096/4096}
89+
Attribute: DIMENSION_LIST {1}
90+
Type: variable length of
91+
object reference
92+
Attribute: _Netcdf4Coordinates {1}
93+
Type: 32-bit little-endian integer
94+
Location: 1:1087
95+
Links: 1
96+
Storage: 32768 logical bytes, 32768 allocated bytes, 100.00% utilization
97+
Type: IEEE 64-bit little-endian float
98+
Address: 40960
99+
[...]
100+
x Dataset {4096/4096}
101+
Attribute: CLASS scalar
102+
Type: 16-byte null-terminated ASCII string
103+
Attribute: NAME scalar
104+
Type: 64-byte null-terminated ASCII string
105+
Attribute: REFERENCE_LIST {10}
106+
Type: struct {
107+
"dataset" +0 object reference
108+
"dimension" +8 32-bit little-endian unsigned integer
109+
} 16 bytes
110+
Attribute: _Netcdf4Dimid scalar
111+
Type: 32-bit little-endian integer
112+
Location: 1:239
113+
Links: 1
114+
Storage: 16384 logical bytes, 0 allocated bytes
115+
Type: IEEE 32-bit big-endian float
116+
Address: 18446744073709551615
117+
"""
118+
h5ls_results = subprocess.check_output(
119+
["h5ls", "--verbose", "--address", "--simple", self.file]
120+
).decode()
121+
122+
addresses = {
123+
f'data{i:02d}': -1
124+
for i in range(10)
125+
}
126+
127+
data_variable = None
128+
for line in h5ls_results.split('\n'):
129+
if not line.startswith(' '):
130+
data_variable = line.split(' ')[0]
131+
# only process the data variables we care to inpsect
132+
if data_variable not in addresses:
133+
continue
134+
line = line.strip()
135+
if line.startswith('Address:'):
136+
address = int(line.split(':')[1].strip())
137+
addresses[data_variable] = address
138+
139+
for key, address in addresses.items():
140+
is_aligned = (address % 4096) == 0
141+
assert is_aligned, f"{key} is not aligned. Address = 0x{address:x}"
142+
143+
# Alternative implementation in h5py
144+
# import h5py
145+
# with h5py.File(self.file, 'r') as h5file:
146+
# for i in range(10):
147+
# v = h5file[f'data{i:02d}']
148+
# assert (dataset.id.get_offset() % 4096) == 0
149+
150+
def tearDown(self):
151+
# Remove the temporary files
152+
os.remove(self.file)
153+
154+
155+
if __name__ == '__main__':
156+
unittest.main()

0 commit comments

Comments
 (0)