Qcodes/src/qcodes/dataset/data_export.py at d93e641dbdb7b997bd7d700a0d5aeae75fb6ade6 · samantha-ho/Qcodes · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
from __future__ import annotations

import logging
from typing import TYPE_CHECKING

import numpy as np
import numpy.typing as npt
from typing_extensions import TypedDict

from qcodes.utils import list_of_data_to_maybe_ragged_nd_array

if TYPE_CHECKING:
    from qcodes.dataset.data_set_protocol import DataSetProtocol
    from qcodes.parameters import ParamSpecBase

log = logging.getLogger(__name__)


class DSPlotData(TypedDict):
    """
    The dictionary used to represent data for use within `plot_dataset`
    """

    name: str
    unit: str
    label: str
    data: npt.NDArray
    shape: tuple[int, ...] | None


def _get_data_from_ds(ds: DataSetProtocol) -> list[list[DSPlotData]]:
    dependent_parameters: tuple[ParamSpecBase, ...] = tuple(
        ds.description.interdeps.dependencies.keys()
    )

    all_data = ds.cache.data()

    parameter_data = {ps.name: all_data[ps.name] for ps in dependent_parameters}

    output = []

    for dep_name, data_dict in parameter_data.items():
        data_dicts_list = []

        dependent = ds.description.interdeps[dep_name]
        dependencies = ds.description.interdeps.dependencies[dependent]

        for param_spec_base in (*dependencies, dependent):
            my_data_dict: DSPlotData = {
                "name": param_spec_base.name,
                "unit": param_spec_base.unit,
                "label": param_spec_base.label,
                "data": data_dict[param_spec_base.name],
                "shape": None,
            }
            data_dicts_list.append(my_data_dict)

        if ds.description.shapes is not None:
            data_dicts_list[-1]["shape"] = ds.description.shapes.get(dependent.name)

        output.append(data_dicts_list)

    return output


def _all_steps_multiples_of_min_step(rows: npt.NDArray) -> bool:
    """
    Are all steps integer multiples of the smallest step?
    This is used in determining whether the setpoints correspond
    to a regular grid

    Args:
        rows: the output of _rows_from_datapoints

    Returns:
        The answer to the question

    """

    steps_list: list[npt.NDArray] = []
    for row in rows:
        # TODO: What is an appropriate precision?
        steps_list += list(np.unique(np.diff(row).round(decimals=15)))

    steps = np.unique(steps_list)
    remainders = np.mod(steps[1:] / steps[0], 1)

    # TODO: What are reasonable tolerances for allclose?
    asmoms = bool(np.allclose(remainders, np.zeros_like(remainders)))

    return asmoms


def _rows_from_datapoints(inputsetpoints: npt.NDArray) -> npt.NDArray:
    """
    Cast the (potentially) unordered setpoints into rows
    of sorted, unique setpoint values. Because of the way they are ordered,
    these rows do not necessarily correspond to actual rows of the scan,
    but they can nonetheless be used to identify certain scan types

    Args:
        inputsetpoints: The raw setpoints as a one-dimensional array

    Returns:
        A ndarray of the rows

    """

    rows = []
    setpoints = inputsetpoints.copy()

    # first check if there is only one unique array in which case we can avoid the
    # potentially slow loop below
    temp, inds, count = np.unique(setpoints, return_index=True, return_counts=True)
    num_repeats_array = np.unique(count)
    if len(num_repeats_array) == 1 and count.sum() == len(inputsetpoints):
        return np.tile(temp, (num_repeats_array[0], 1))
    else:
        rows.append(temp)
        setpoints = np.delete(setpoints, inds)

    while len(setpoints) > 0:
        temp, inds = np.unique(setpoints, return_index=True)
        rows.append(temp)
        setpoints = np.delete(setpoints, inds)

    return list_of_data_to_maybe_ragged_nd_array(rows)


def _all_in_group_or_subgroup(rows: npt.NDArray) -> bool:
    """
    Detects whether the setpoints correspond to two groups of
    of identical rows, one being contained in the other.

    This is the test for whether the setpoints correspond to a
    rectangular sweep. It allows for a single rectangular hole
    in the setpoint grid, thus allowing for an interrupted sweep.
    Note that each axis needs NOT be equidistantly spaced.

    Args:
        rows: The output from _rows_from_datapoints

    Returns:
        A boolean indicating whether the setpoints meet the
            criterion

    """

    groups = 1
    comp_to = rows[0]

    aigos = True
    switchindex = 0

    for rowind, row in enumerate(rows[1:]):
        if np.array_equal(row, comp_to):
            continue
        else:
            groups += 1
            comp_to = row
            switchindex = rowind
            if groups > 2:
                aigos = False
                break

    # if there are two groups, check that the rows of one group
    # are all contained in the rows of the other
    if aigos and switchindex > 0:
        for row in rows[1 + switchindex :]:
            if sum(r in rows[0] for r in row) != len(row):
                aigos = False
                break

    return aigos


def _strings_as_ints(inputarray: npt.NDArray) -> npt.NDArray:
    """
    Return an integer-valued array version of a string-valued array. Maps, e.g.
    array(['a', 'b', 'c', 'a', 'c']) to array([0, 1, 2, 0, 2]). Useful for
    numerical setpoint analysis

    Args:
        inputarray: A 1D array of strings

    """
    newdata = np.zeros(len(inputarray))
    for n, word in enumerate(np.unique(inputarray)):
        newdata += (inputarray == word).astype(int) * n
    return newdata


def get_1D_plottype(xpoints: npt.NDArray, ypoints: npt.NDArray) -> str:
    """
    Determine plot type for a 1D plot by inspecting the data

    Possible plot types are:
    * '1D_bar' - bar plot
    * '1D_point' - scatter plot
    * '1D_line' - line plot

    Args:
        xpoints: The x-axis values
        ypoints: The y-axis values

    Returns:
        Determined plot type as a string

    """

    if isinstance(xpoints[0], str) and not isinstance(ypoints[0], str):
        if len(xpoints) == len(np.unique(xpoints)):
            return "1D_bar"
        else:
            return "1D_point"
    if isinstance(xpoints[0], str) or isinstance(ypoints[0], str):
        return "1D_point"
    else:
        return datatype_from_setpoints_1d(xpoints)


def datatype_from_setpoints_1d(setpoints: npt.NDArray) -> str:
    """
    Figure out what type of visualisation is proper for the
    provided setpoints.

    The type is:
        * '1D_point' (scatter plot) when all setpoints are identical
        * '1D_line' otherwise

    Args:
        setpoints: The x-axis values

    Returns:
        A string representing the plot type as described above

    """
    if np.allclose(setpoints, setpoints[0]):
        return "1D_point"
    else:
        return "1D_line"


def get_2D_plottype(
    xpoints: npt.NDArray, ypoints: npt.NDArray, zpoints: npt.NDArray
) -> str:
    """
    Determine plot type for a 2D plot by inspecting the data

    Plot types are:
    * '2D_grid' - colormap plot for data that is on a grid
    * '2D_equidistant' - colormap plot for data that is on equidistant grid
    * '2D_scatter' - scatter plot
    * '2D_unknown' - returned in case the data did not match any criteria of the
    other plot types

    Args:
        xpoints: The x-axis values
        ypoints: The y-axis values
        zpoints: The z-axis (colorbar) values

    Returns:
        Determined plot type as a string

    """

    plottype = datatype_from_setpoints_2d(xpoints, ypoints)
    return plottype


def datatype_from_setpoints_2d(xpoints: npt.NDArray, ypoints: npt.NDArray) -> str:
    """
    For a 2D plot, figure out what kind of visualisation we can use
    to display the data.

    Plot types are:
    * '2D_point' - all setpoint are the same in each direction; one point
    * '2D_grid' - colormap plot for data that is on a grid
    * '2D_equidistant' - colormap plot for data that is on equidistant grid
    * '2D_scatter' - scatter plot
    * '2D_unknown' - returned in case the data did not match any criteria of the
    other plot types

    Args:
        xpoints: The x-axis values
        ypoints: The y-axis values

    Returns:
        A string with the name of the determined plot type

    """
    # We represent categorical data as integer-valued data
    if isinstance(xpoints[0], str):
        xpoints = _strings_as_ints(xpoints)
    if isinstance(ypoints[0], str):
        ypoints = _strings_as_ints(ypoints)

    # First check whether all setpoints are identical along
    # any dimension
    x_all_the_same = np.allclose(xpoints, xpoints[0])
    y_all_the_same = np.allclose(ypoints, ypoints[0])

    if x_all_the_same or y_all_the_same:
        return "2D_point"

    # Now check if this is a simple rectangular sweep,
    # possibly interrupted in the middle of one row

    xrows = _rows_from_datapoints(xpoints)
    yrows = _rows_from_datapoints(ypoints)

    x_check = _all_in_group_or_subgroup(xrows)
    y_check = _all_in_group_or_subgroup(yrows)

    x_check = x_check and (len(xrows[0]) == len(yrows))
    y_check = y_check and (len(yrows[0]) == len(xrows))

    # this is the check that we are on a "simple" grid
    if y_check and x_check:
        return "2D_grid"

    x_check = _all_steps_multiples_of_min_step(xrows)
    y_check = _all_steps_multiples_of_min_step(yrows)

    # this is the check that we are on an equidistant grid
    if y_check and x_check:
        return "2D_equidistant"

    return "2D_unknown"


def reshape_2D_data(
    x: npt.NDArray, y: npt.NDArray, z: npt.NDArray
) -> tuple[npt.NDArray, npt.NDArray, npt.NDArray]:
    xrow = np.array(_rows_from_datapoints(x)[0])
    yrow = np.array(_rows_from_datapoints(y)[0])
    nx = len(xrow)
    ny = len(yrow)

    # potentially slow method of filling in the data, should be optimised
    log.debug("Sorting 2D data onto grid")

    if isinstance(z[0], str):
        z_to_plot = np.full((ny, nx), "", dtype=z.dtype)
    else:
        z_to_plot = np.full((ny, nx), np.nan)
    x_index = np.zeros_like(x, dtype=np.dtype(np.int_))
    y_index = np.zeros_like(y, dtype=np.dtype(np.int_))
    for i, xval in enumerate(xrow):
        x_index[np.where(x == xval)[0]] = i
    for i, yval in enumerate(yrow):
        y_index[np.where(y == yval)[0]] = i

    z_to_plot[y_index, x_index] = z

    return xrow, yrow, z_to_plot