Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 0 additions & 129 deletions pina/collector.py

This file was deleted.

21 changes: 10 additions & 11 deletions pina/data/data_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from torch.utils.data.distributed import DistributedSampler
from ..label_tensor import LabelTensor
from .dataset import PinaDatasetFactory, PinaTensorDataset
from ..collector import Collector


class DummyDataloader:
Expand Down Expand Up @@ -330,9 +329,7 @@ def __init__(
self.pin_memory = pin_memory

# Collect data
collector = Collector(problem)
collector.store_fixed_data()
collector.store_sample_domains()
problem.collect_data()

# Check if the splits are correct
self._check_slit_sizes(train_size, test_size, val_size)
Expand Down Expand Up @@ -361,7 +358,9 @@ def __init__(
# raises NotImplementedError
self.val_dataloader = super().val_dataloader

self.collector_splits = self._create_splits(collector, splits_dict)
self.data_splits = self._create_splits(
problem.collected_data, splits_dict
)
self.transfer_batch_to_device = self._transfer_batch_to_device

def setup(self, stage=None):
Expand All @@ -376,23 +375,23 @@ def setup(self, stage=None):
"""
if stage == "fit" or stage is None:
self.train_dataset = PinaDatasetFactory(
self.collector_splits["train"],
self.data_splits["train"],
max_conditions_lengths=self.find_max_conditions_lengths(
"train"
),
automatic_batching=self.automatic_batching,
)
if "val" in self.collector_splits.keys():
if "val" in self.data_splits.keys():
self.val_dataset = PinaDatasetFactory(
self.collector_splits["val"],
self.data_splits["val"],
max_conditions_lengths=self.find_max_conditions_lengths(
"val"
),
automatic_batching=self.automatic_batching,
)
elif stage == "test":
self.test_dataset = PinaDatasetFactory(
self.collector_splits["test"],
self.data_splits["test"],
max_conditions_lengths=self.find_max_conditions_lengths("test"),
automatic_batching=self.automatic_batching,
)
Expand Down Expand Up @@ -473,7 +472,7 @@ def _apply_shuffle(condition_dict, len_data):
for (
condition_name,
condition_dict,
) in collector.data_collections.items():
) in collector.items():
len_data = len(condition_dict["input"])
if self.shuffle:
_apply_shuffle(condition_dict, len_data)
Expand Down Expand Up @@ -540,7 +539,7 @@ def find_max_conditions_lengths(self, split):
"""

max_conditions_lengths = {}
for k, v in self.collector_splits[split].items():
for k, v in self.data_splits[split].items():
if self.batch_size is None:
max_conditions_lengths[k] = len(v["input"])
elif self.repeat:
Expand Down
70 changes: 48 additions & 22 deletions pina/problem/abstract_problem.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,11 @@ def __init__(self):
Initialization of the :class:`AbstractProblem` class.
"""
self._discretised_domains = {}
# create collector to manage problem data

# create hook conditions <-> problems
for condition_name in self.conditions:
self.conditions[condition_name].problem = self

self._batching_dimension = 0
Comment thread
dario-coscia marked this conversation as resolved.

# Store in domains dict all the domains object directly passed to
# ConditionInterface. Done for back compatibility with PINA <0.2
if not hasattr(self, "domains"):
Expand All @@ -41,24 +38,23 @@ def __init__(self):
self.domains[cond_name] = cond.domain
cond.domain = cond_name

@property
def batching_dimension(self):
"""
Get batching dimension.

:return: The batching dimension.
:rtype: int
"""
return self._batching_dimension
self._collected_data = {}

@batching_dimension.setter
def batching_dimension(self, value):
@property
def collected_data(self):
"""
Set the batching dimension.
Return the collected data from the problem's conditions.

:param int value: The batching dimension.
:return: The collected data. Keys are condition names, and values are
dictionaries containing the input points and the corresponding
equations or target points.
:rtype: dict
"""
self._batching_dimension = value
if not self._collected_data:
raise RuntimeError(
"You have to call collect_data() before accessing the data."
)
return self._collected_data

# back compatibility 0.1
@property
Expand All @@ -71,11 +67,12 @@ def input_pts(self):
:rtype: dict
"""
to_return = {}
for cond_name, cond in self.conditions.items():
if hasattr(cond, "input"):
to_return[cond_name] = cond.input
elif hasattr(cond, "domain"):
to_return[cond_name] = self._discretised_domains[cond.domain]
if self._collected_data is None:
raise RuntimeError(
"You have to call collect_data() before accessing the data."
)
for cond_name, data in self._collected_data.items():
to_return[cond_name] = data["input"]
return to_return

@property
Expand Down Expand Up @@ -300,3 +297,32 @@ def add_points(self, new_points_dict):
self.discretised_domains[k] = LabelTensor.vstack(
[self.discretised_domains[k], v]
)

def collect_data(self):
"""
Aggregate data from the problem's conditions into a single dictionary.
"""
data = {}
# check if all domains are discretised
if not self.are_all_domains_discretised:
raise RuntimeError(
"All domains must be discretised before aggregating data."
)
# Iterate over the conditions and collect data
for condition_name in self.conditions:
condition = self.conditions[condition_name]
# Check if the condition has an domain attribute
if hasattr(condition, "domain"):
# Store the discretisation points
samples = self.discretised_domains[condition.domain]
data[condition_name] = {
"input": samples,
"equation": condition.equation,
}
else:
# If the condition does not have a domain attribute, store
# the input and target points
keys = condition.__slots__
values = [getattr(condition, name) for name in keys]
data[condition_name] = dict(zip(keys, values))
self._collected_data = data
Loading