Skip to content

Commit 77fde13

Browse files
authored
Merge pull request #7240 from microsoft/copilot/fix-7051
Fix inferred parameters missing from dataset when explicitly added
2 parents 5e3e636 + d567d8c commit 77fde13

17 files changed

Lines changed: 750 additions & 139 deletions
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
The QCoDeS dataset now correctly captures data for inferred parameters when added via ``datasaver.add_result``. Previously these were discarded due to an oversight.
2+
Any parameters added in this way are available via ``DataSetProtocol.get_parameter_data`` but not exported to xarray or pandas.
3+
Future improvements to these parameters are planned and tracked `here <https://github.com/microsoft/Qcodes/issues/7060>`_.
4+
5+
The property ``InterDependencies_.non_dependencies`` has been deprecated and replaced by ``InterDependencies_.top_level_parameters`` that correctly includes inferred parameters.
6+
The ``InterDependencies_`` object has gained two additional methods ``find_all_parameters_in_tree`` and ``all_parameters_in_tree_by_group``.
7+
8+
The not documented but publicly importable functions ``qcodes.dataset.exporters.export_to_pandas.load_to_concatenated_dataframe`` and ``qcodes.dataset.exporters.export_to_pandas.load_to_dataframe_dict``
9+
now require an extra argument in the form of an ``InterDependencies_`` object. Users are discouraged from using anything not documented in `the QCoDeS API documentation <https://microsoft.github.io/Qcodes/api/index.html>`_ outside QCoDeS.

docs/examples/DataSet/Accessing-data-in-DataSet.ipynb

Lines changed: 5 additions & 5 deletions
Large diffs are not rendered by default.

src/qcodes/dataset/data_set.py

Lines changed: 43 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from typing import TYPE_CHECKING, Any, Literal
1414

1515
import numpy
16+
import numpy.typing as npt
1617
from tqdm.auto import trange
1718

1819
import qcodes
@@ -851,8 +852,9 @@ def get_parameter_data(
851852
"""
852853
if len(params) == 0:
853854
valid_param_names = [
854-
ps.name for ps in self._rundescriber.interdeps.non_dependencies
855+
ps.name for ps in self._rundescriber.interdeps.top_level_parameters
855856
]
857+
856858
else:
857859
valid_param_names = self._validate_parameters(*params)
858860
return get_parameter_data(
@@ -903,7 +905,7 @@ def to_pandas_dataframe_dict(
903905
904906
"""
905907
datadict = self.get_parameter_data(*params, start=start, end=end)
906-
dfs_dict = load_to_dataframe_dict(datadict)
908+
dfs_dict = load_to_dataframe_dict(datadict, self.description.interdeps)
907909
return dfs_dict
908910

909911
def to_pandas_dataframe(
@@ -951,7 +953,7 @@ def to_pandas_dataframe(
951953
952954
"""
953955
datadict = self.get_parameter_data(*params, start=start, end=end)
954-
return load_to_concatenated_dataframe(datadict)
956+
return load_to_concatenated_dataframe(datadict, self.description.interdeps)
955957

956958
def to_xarray_dataarray_dict(
957959
self,
@@ -1226,7 +1228,7 @@ def __repr__(self) -> str:
12261228
return "\n".join(out)
12271229

12281230
def _enqueue_results(
1229-
self, result_dict: Mapping[ParamSpecBase, numpy.ndarray]
1231+
self, result_dict: Mapping[ParamSpecBase, npt.NDArray]
12301232
) -> None:
12311233
"""
12321234
Enqueue the results into self._results
@@ -1243,14 +1245,25 @@ def _enqueue_results(
12431245
self._raise_if_not_writable()
12441246
interdeps = self._rundescriber.interdeps
12451247

1246-
toplevel_params = set(interdeps.dependencies).intersection(set(result_dict))
1248+
result_parameters = set(result_dict.keys())
1249+
unused_results = result_parameters.copy()
1250+
1251+
toplevel_params = set(interdeps.top_level_parameters).intersection(
1252+
result_parameters
1253+
)
12471254

1248-
new_results: dict[str, dict[str, numpy.ndarray]] = {}
1255+
new_results: dict[str, dict[str, npt.NDArray]] = {}
12491256

12501257
for toplevel_param in toplevel_params:
1251-
inff_params = set(interdeps.inferences.get(toplevel_param, ()))
1252-
deps_params = set(interdeps.dependencies.get(toplevel_param, ()))
1253-
all_params = inff_params.union(deps_params).union({toplevel_param})
1258+
# Transitively collect all parameters that are related to any parameter
1259+
# in the current tree, including parameters that dependencies are inferred from
1260+
all_params = interdeps.find_all_parameters_in_tree(toplevel_param)
1261+
# Only include parameters that are present in result_dict
1262+
# we keep track of results unused in any tree and raise a warning at the end
1263+
# if there are any
1264+
all_params = all_params.intersection(result_parameters)
1265+
1266+
unused_results = unused_results.difference(all_params)
12541267

12551268
if self._in_memory_cache:
12561269
new_results[toplevel_param.name] = {}
@@ -1268,8 +1281,13 @@ def _enqueue_results(
12681281
if toplevel_param.type == "array":
12691282
res_list = self._finalize_res_dict_array(result_dict, all_params)
12701283
elif toplevel_param.type in ("numeric", "text", "complex"):
1284+
collected_params = all_params.copy()
1285+
collected_params.remove(toplevel_param)
1286+
12711287
res_list = self._finalize_res_dict_numeric_text_or_complex(
1272-
result_dict, toplevel_param, inff_params, deps_params
1288+
result_dict,
1289+
toplevel_param,
1290+
collected_params,
12731291
)
12741292
else:
12751293
res_dict: dict[str, VALUE] = {
@@ -1278,18 +1296,12 @@ def _enqueue_results(
12781296
res_list = [res_dict]
12791297
self._results += res_list
12801298

1281-
# Finally, handle standalone parameters
1282-
1283-
standalones = set(interdeps.standalones).intersection(set(result_dict))
1284-
1285-
if standalones:
1286-
stdln_dict = {st: result_dict[st] for st in standalones}
1287-
self._results += self._finalize_res_dict_standalones(stdln_dict)
1288-
if self._in_memory_cache:
1289-
for st in standalones:
1290-
new_results[st.name] = {
1291-
st.name: self._reshape_array_for_cache(st, result_dict[st])
1292-
}
1299+
if len(unused_results) > 0:
1300+
log.warning(
1301+
f"Results for parameters {unused_results} were not added to the "
1302+
"DataSet because they are not part of the interdependencies. "
1303+
"This will be an error in a future version of QCoDeS. "
1304+
)
12931305

12941306
if self._in_memory_cache:
12951307
self.cache.add_data(new_results)
@@ -1328,10 +1340,9 @@ def reshaper(val: Any, ps: ParamSpecBase) -> VALUE:
13281340

13291341
@staticmethod
13301342
def _finalize_res_dict_numeric_text_or_complex(
1331-
result_dict: Mapping[ParamSpecBase, numpy.ndarray],
1343+
result_dict: Mapping[ParamSpecBase, npt.NDArray],
13321344
toplevel_param: ParamSpecBase,
1333-
inff_params: set[ParamSpecBase],
1334-
deps_params: set[ParamSpecBase],
1345+
params: set[ParamSpecBase],
13351346
) -> list[dict[str, VALUE]]:
13361347
"""
13371348
Make a res_dict in the format expected by DataSet.add_results out
@@ -1341,7 +1352,7 @@ def _finalize_res_dict_numeric_text_or_complex(
13411352
"""
13421353

13431354
res_list: list[dict[str, VALUE]] = []
1344-
all_params = inff_params.union(deps_params).union({toplevel_param})
1355+
all_params = params.union({toplevel_param})
13451356

13461357
t_map = {"numeric": float, "text": str, "complex": complex}
13471358

@@ -1352,21 +1363,16 @@ def _finalize_res_dict_numeric_text_or_complex(
13521363
else:
13531364
# We first massage all values into np.arrays of the same
13541365
# shape
1355-
flat_results: dict[str, numpy.ndarray] = {}
1366+
flat_results: dict[str, npt.NDArray] = {}
13561367

13571368
toplevel_val = result_dict[toplevel_param]
13581369
flat_results[toplevel_param.name] = toplevel_val.ravel()
13591370
N = len(flat_results[toplevel_param.name])
1360-
for dep in deps_params:
1361-
if result_dict[dep].shape == ():
1362-
flat_results[dep.name] = numpy.repeat(result_dict[dep], N)
1363-
else:
1364-
flat_results[dep.name] = result_dict[dep].ravel()
1365-
for inff in inff_params:
1366-
if numpy.shape(result_dict[inff]) == ():
1367-
flat_results[inff.name] = numpy.repeat(result_dict[inff], N)
1371+
for param in params:
1372+
if result_dict[param].shape == ():
1373+
flat_results[param.name] = numpy.repeat(result_dict[param], N)
13681374
else:
1369-
flat_results[inff.name] = result_dict[inff].ravel()
1375+
flat_results[param.name] = result_dict[param].ravel()
13701376

13711377
# And then put everything into the list
13721378

@@ -1379,7 +1385,7 @@ def _finalize_res_dict_numeric_text_or_complex(
13791385

13801386
@staticmethod
13811387
def _finalize_res_dict_standalones(
1382-
result_dict: Mapping[ParamSpecBase, numpy.ndarray],
1388+
result_dict: Mapping[ParamSpecBase, npt.NDArray],
13831389
) -> list[dict[str, VALUE]]:
13841390
"""
13851391
Massage all standalone parameters into the correct shape

src/qcodes/dataset/data_set_cache.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import pandas as pd
2626
import xarray as xr
2727

28+
from qcodes.dataset.descriptions.dependencies import InterDependencies_
2829
from qcodes.dataset.descriptions.rundescriber import RunDescriber
2930
from qcodes.dataset.sqlite.connection import AtomicConnection
3031

@@ -91,6 +92,30 @@ def data(self) -> ParameterData:
9192

9293
return self._data
9394

95+
@staticmethod
96+
def _empty_data_dict(
97+
interdeps: InterDependencies_,
98+
) -> dict[str, dict[str, npt.NDArray]]:
99+
"""
100+
Create an dictionary with empty numpy arrays as values
101+
matching the expected output of ``DataSet``'s ``get_parameter_data`` /
102+
``cache.data`` so that the order of keys in the returned dictionary
103+
is the same as the order of parameters in the interdependencies
104+
in this class.
105+
"""
106+
107+
output: dict[str, dict[str, npt.NDArray]] = {}
108+
for toplevel_param in interdeps.top_level_parameters:
109+
toplevel_param, deps, infs = interdeps.all_parameters_in_tree_by_group(
110+
toplevel_param
111+
)
112+
113+
output[toplevel_param.name] = {}
114+
params = [toplevel_param, *deps, *infs]
115+
for param in params:
116+
output[toplevel_param.name][param.name] = np.array([])
117+
return output
118+
94119
def prepare(self) -> None:
95120
"""
96121
Set up the internal datastructure of the cache.
@@ -99,7 +124,7 @@ def prepare(self) -> None:
99124
"""
100125

101126
if self._data == {}:
102-
self._data = self.rundescriber.interdeps._empty_data_dict()
127+
self._data = self._empty_data_dict(self.rundescriber.interdeps)
103128
else:
104129
raise RuntimeError("Cannot prepare a cache that is not empty")
105130

@@ -145,7 +170,7 @@ def to_pandas_dataframe_dict(self) -> dict[str, pd.DataFrame]:
145170
146171
"""
147172
data = self.data()
148-
return load_to_dataframe_dict(data)
173+
return load_to_dataframe_dict(data, self.rundescriber.interdeps)
149174

150175
def to_pandas_dataframe(self) -> pd.DataFrame:
151176
"""
@@ -158,7 +183,7 @@ def to_pandas_dataframe(self) -> pd.DataFrame:
158183
159184
"""
160185
data = self.data()
161-
return load_to_concatenated_dataframe(data)
186+
return load_to_concatenated_dataframe(data, self.rundescriber.interdeps)
162187

163188
def to_xarray_dataarray_dict(
164189
self, *, use_multi_index: Literal["auto", "always", "never"] = "auto"
@@ -266,7 +291,7 @@ def append_shaped_parameter_data_to_existing_arrays(
266291
Updated write and read status, and the updated ``data``
267292
268293
"""
269-
parameters = tuple(ps.name for ps in rundescriber.interdeps.non_dependencies)
294+
parameters = tuple(ps.name for ps in rundescriber.interdeps.top_level_parameters)
270295
merged_data = {}
271296

272297
updated_write_status = dict(write_status)

src/qcodes/dataset/data_set_in_memory.py

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -658,12 +658,24 @@ def _enqueue_results(
658658
self._raise_if_not_writable()
659659
interdeps = self._rundescriber.interdeps
660660

661-
toplevel_params = set(interdeps.dependencies).intersection(set(result_dict))
661+
result_parameters = set(result_dict.keys())
662+
toplevel_params = set(interdeps.top_level_parameters).intersection(
663+
result_parameters
664+
)
662665
new_results: dict[str, dict[str, npt.NDArray]] = {}
666+
667+
unused_results = result_parameters.copy()
668+
663669
for toplevel_param in toplevel_params:
664-
inff_params = set(interdeps.inferences.get(toplevel_param, ()))
665-
deps_params = set(interdeps.dependencies.get(toplevel_param, ()))
666-
all_params = inff_params.union(deps_params).union({toplevel_param})
670+
# Transitively collect all parameters that are related to any parameter
671+
# in the current tree, including parameters that dependencies are inferred from
672+
all_params = interdeps.find_all_parameters_in_tree(toplevel_param)
673+
# Only include parameters that are present in result_dict
674+
# we keep track of results unused in any tree and raise a warning at the end
675+
# if there are any
676+
all_params = all_params.intersection(result_dict.keys())
677+
678+
unused_results = unused_results.difference(all_params)
667679

668680
new_results[toplevel_param.name] = {}
669681
new_results[toplevel_param.name][toplevel_param.name] = (
@@ -677,15 +689,12 @@ def _enqueue_results(
677689
self._reshape_array_for_cache(param, result_dict[param])
678690
)
679691

680-
# Finally, handle standalone parameters
681-
682-
standalones = set(interdeps.standalones).intersection(set(result_dict))
683-
684-
if standalones:
685-
for st in standalones:
686-
new_results[st.name] = {
687-
st.name: self._reshape_array_for_cache(st, result_dict[st])
688-
}
692+
if len(unused_results) > 0:
693+
log.warning(
694+
f"Results for parameters {unused_results} were not added to the "
695+
"DataSet because they are not part of the interdependencies. "
696+
"This will be an error in a future version of QCoDeS. "
697+
)
689698

690699
self.cache.add_data(new_results)
691700

0 commit comments

Comments
 (0)