forked from microsoft/Qcodes
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_export.py
More file actions
356 lines (262 loc) · 10.1 KB
/
data_export.py
File metadata and controls
356 lines (262 loc) · 10.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
import numpy as np
import numpy.typing as npt
from typing_extensions import TypedDict
from qcodes.utils import list_of_data_to_maybe_ragged_nd_array
if TYPE_CHECKING:
from qcodes.dataset.data_set_protocol import DataSetProtocol
from qcodes.parameters import ParamSpecBase
log = logging.getLogger(__name__)
class DSPlotData(TypedDict):
"""
The dictionary used to represent data for use within `plot_dataset`
"""
name: str
unit: str
label: str
data: npt.NDArray
shape: tuple[int, ...] | None
def _get_data_from_ds(ds: DataSetProtocol) -> list[list[DSPlotData]]:
dependent_parameters: tuple[ParamSpecBase, ...] = tuple(
ds.description.interdeps.dependencies.keys()
)
all_data = ds.cache.data()
parameter_data = {ps.name: all_data[ps.name] for ps in dependent_parameters}
output = []
for dep_name, data_dict in parameter_data.items():
data_dicts_list = []
dependent = ds.description.interdeps[dep_name]
dependencies = ds.description.interdeps.dependencies[dependent]
for param_spec_base in (*dependencies, dependent):
my_data_dict: DSPlotData = {
"name": param_spec_base.name,
"unit": param_spec_base.unit,
"label": param_spec_base.label,
"data": data_dict[param_spec_base.name],
"shape": None,
}
data_dicts_list.append(my_data_dict)
if ds.description.shapes is not None:
data_dicts_list[-1]["shape"] = ds.description.shapes.get(dependent.name)
output.append(data_dicts_list)
return output
def _all_steps_multiples_of_min_step(rows: npt.NDArray) -> bool:
"""
Are all steps integer multiples of the smallest step?
This is used in determining whether the setpoints correspond
to a regular grid
Args:
rows: the output of _rows_from_datapoints
Returns:
The answer to the question
"""
steps_list: list[npt.NDArray] = []
for row in rows:
# TODO: What is an appropriate precision?
steps_list += list(np.unique(np.diff(row).round(decimals=15)))
steps = np.unique(steps_list)
remainders = np.mod(steps[1:] / steps[0], 1)
# TODO: What are reasonable tolerances for allclose?
asmoms = bool(np.allclose(remainders, np.zeros_like(remainders)))
return asmoms
def _rows_from_datapoints(inputsetpoints: npt.NDArray) -> npt.NDArray:
"""
Cast the (potentially) unordered setpoints into rows
of sorted, unique setpoint values. Because of the way they are ordered,
these rows do not necessarily correspond to actual rows of the scan,
but they can nonetheless be used to identify certain scan types
Args:
inputsetpoints: The raw setpoints as a one-dimensional array
Returns:
A ndarray of the rows
"""
rows = []
setpoints = inputsetpoints.copy()
# first check if there is only one unique array in which case we can avoid the
# potentially slow loop below
temp, inds, count = np.unique(setpoints, return_index=True, return_counts=True)
num_repeats_array = np.unique(count)
if len(num_repeats_array) == 1 and count.sum() == len(inputsetpoints):
return np.tile(temp, (num_repeats_array[0], 1))
else:
rows.append(temp)
setpoints = np.delete(setpoints, inds)
while len(setpoints) > 0:
temp, inds = np.unique(setpoints, return_index=True)
rows.append(temp)
setpoints = np.delete(setpoints, inds)
return list_of_data_to_maybe_ragged_nd_array(rows)
def _all_in_group_or_subgroup(rows: npt.NDArray) -> bool:
"""
Detects whether the setpoints correspond to two groups of
of identical rows, one being contained in the other.
This is the test for whether the setpoints correspond to a
rectangular sweep. It allows for a single rectangular hole
in the setpoint grid, thus allowing for an interrupted sweep.
Note that each axis needs NOT be equidistantly spaced.
Args:
rows: The output from _rows_from_datapoints
Returns:
A boolean indicating whether the setpoints meet the
criterion
"""
groups = 1
comp_to = rows[0]
aigos = True
switchindex = 0
for rowind, row in enumerate(rows[1:]):
if np.array_equal(row, comp_to):
continue
else:
groups += 1
comp_to = row
switchindex = rowind
if groups > 2:
aigos = False
break
# if there are two groups, check that the rows of one group
# are all contained in the rows of the other
if aigos and switchindex > 0:
for row in rows[1 + switchindex :]:
if sum(r in rows[0] for r in row) != len(row):
aigos = False
break
return aigos
def _strings_as_ints(inputarray: npt.NDArray) -> npt.NDArray:
"""
Return an integer-valued array version of a string-valued array. Maps, e.g.
array(['a', 'b', 'c', 'a', 'c']) to array([0, 1, 2, 0, 2]). Useful for
numerical setpoint analysis
Args:
inputarray: A 1D array of strings
"""
newdata = np.zeros(len(inputarray))
for n, word in enumerate(np.unique(inputarray)):
newdata += (inputarray == word).astype(int) * n
return newdata
def get_1D_plottype(xpoints: npt.NDArray, ypoints: npt.NDArray) -> str:
"""
Determine plot type for a 1D plot by inspecting the data
Possible plot types are:
* '1D_bar' - bar plot
* '1D_point' - scatter plot
* '1D_line' - line plot
Args:
xpoints: The x-axis values
ypoints: The y-axis values
Returns:
Determined plot type as a string
"""
if isinstance(xpoints[0], str) and not isinstance(ypoints[0], str):
if len(xpoints) == len(np.unique(xpoints)):
return "1D_bar"
else:
return "1D_point"
if isinstance(xpoints[0], str) or isinstance(ypoints[0], str):
return "1D_point"
else:
return datatype_from_setpoints_1d(xpoints)
def datatype_from_setpoints_1d(setpoints: npt.NDArray) -> str:
"""
Figure out what type of visualisation is proper for the
provided setpoints.
The type is:
* '1D_point' (scatter plot) when all setpoints are identical
* '1D_line' otherwise
Args:
setpoints: The x-axis values
Returns:
A string representing the plot type as described above
"""
if np.allclose(setpoints, setpoints[0]):
return "1D_point"
else:
return "1D_line"
def get_2D_plottype(
xpoints: npt.NDArray, ypoints: npt.NDArray, zpoints: npt.NDArray
) -> str:
"""
Determine plot type for a 2D plot by inspecting the data
Plot types are:
* '2D_grid' - colormap plot for data that is on a grid
* '2D_equidistant' - colormap plot for data that is on equidistant grid
* '2D_scatter' - scatter plot
* '2D_unknown' - returned in case the data did not match any criteria of the
other plot types
Args:
xpoints: The x-axis values
ypoints: The y-axis values
zpoints: The z-axis (colorbar) values
Returns:
Determined plot type as a string
"""
plottype = datatype_from_setpoints_2d(xpoints, ypoints)
return plottype
def datatype_from_setpoints_2d(xpoints: npt.NDArray, ypoints: npt.NDArray) -> str:
"""
For a 2D plot, figure out what kind of visualisation we can use
to display the data.
Plot types are:
* '2D_point' - all setpoint are the same in each direction; one point
* '2D_grid' - colormap plot for data that is on a grid
* '2D_equidistant' - colormap plot for data that is on equidistant grid
* '2D_scatter' - scatter plot
* '2D_unknown' - returned in case the data did not match any criteria of the
other plot types
Args:
xpoints: The x-axis values
ypoints: The y-axis values
Returns:
A string with the name of the determined plot type
"""
# We represent categorical data as integer-valued data
if isinstance(xpoints[0], str):
xpoints = _strings_as_ints(xpoints)
if isinstance(ypoints[0], str):
ypoints = _strings_as_ints(ypoints)
# First check whether all setpoints are identical along
# any dimension
x_all_the_same = np.allclose(xpoints, xpoints[0])
y_all_the_same = np.allclose(ypoints, ypoints[0])
if x_all_the_same or y_all_the_same:
return "2D_point"
# Now check if this is a simple rectangular sweep,
# possibly interrupted in the middle of one row
xrows = _rows_from_datapoints(xpoints)
yrows = _rows_from_datapoints(ypoints)
x_check = _all_in_group_or_subgroup(xrows)
y_check = _all_in_group_or_subgroup(yrows)
x_check = x_check and (len(xrows[0]) == len(yrows))
y_check = y_check and (len(yrows[0]) == len(xrows))
# this is the check that we are on a "simple" grid
if y_check and x_check:
return "2D_grid"
x_check = _all_steps_multiples_of_min_step(xrows)
y_check = _all_steps_multiples_of_min_step(yrows)
# this is the check that we are on an equidistant grid
if y_check and x_check:
return "2D_equidistant"
return "2D_unknown"
def reshape_2D_data(
x: npt.NDArray, y: npt.NDArray, z: npt.NDArray
) -> tuple[npt.NDArray, npt.NDArray, npt.NDArray]:
xrow = np.array(_rows_from_datapoints(x)[0])
yrow = np.array(_rows_from_datapoints(y)[0])
nx = len(xrow)
ny = len(yrow)
# potentially slow method of filling in the data, should be optimised
log.debug("Sorting 2D data onto grid")
if isinstance(z[0], str):
z_to_plot = np.full((ny, nx), "", dtype=z.dtype)
else:
z_to_plot = np.full((ny, nx), np.nan)
x_index = np.zeros_like(x, dtype=np.dtype(np.int_))
y_index = np.zeros_like(y, dtype=np.dtype(np.int_))
for i, xval in enumerate(xrow):
x_index[np.where(x == xval)[0]] = i
for i, yval in enumerate(yrow):
y_index[np.where(y == yval)[0]] = i
z_to_plot[y_index, x_index] = z
return xrow, yrow, z_to_plot