hamilton/tests/function_modifiers/test_adapters.py at e4f07b9a035b214a149ac630350f2aad14f9e1bd · apache/hamilton · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
from __future__ import annotations

import dataclasses
from collections import Counter
from typing import Any, Collection, Dict, List, Tuple, Type

import pandas as pd
import pytest

from hamilton import ad_hoc_utils, base, driver, graph, node
from hamilton.function_modifiers import base as fm_base
from hamilton.function_modifiers import extract_fields, save_to, source, value
from hamilton.function_modifiers.adapters import (
    InvalidDecoratorException,
    LoadFromDecorator,
    SaveToDecorator,
    dataloader,
    datasaver,
    load_from,
    resolve_adapter_class,
    resolve_kwargs,
)
from hamilton.function_modifiers.base import DefaultNodeCreator
from hamilton.htypes import custom_subclass_check
from hamilton.io.data_adapters import DataLoader, DataSaver
from hamilton.io.default_data_loaders import JSONDataSaver
from hamilton.registry import LOADER_REGISTRY


def test_default_adapters_are_available():
    assert len(LOADER_REGISTRY) > 0


def test_default_adapters_are_registered_once():
    assert "json" in LOADER_REGISTRY
    count_unique = {
        # we want str() of the class to get the fully qualified class name.
        key: Counter([str(value) for value in values])
        for key, values in LOADER_REGISTRY.items()
    }
    for key, value_ in count_unique.items():
        for impl, count in value_.items():
            assert count == 1, (
                f"Adapter for {key} registered multiple times for {impl}. This should not"
                f" happen, as items should just be registered once."
            )


@dataclasses.dataclass
class MockDataLoader(DataLoader):
    required_param: int
    required_param_2: int
    required_param_3: str
    default_param: int = 4
    default_param_2: int = 5
    default_param_3: str = "6"

    @classmethod
    def applicable_types(cls) -> Collection[Type]:
        return [int]

    def load_data(self, type_: Type[int]) -> Tuple[int, Dict[str, Any]]:
        return ..., {"required_param": self.required_param, "default_param": self.default_param}

    @classmethod
    def name(cls) -> str:
        return "mock"


def test_load_from_decorator():
    def fn(data: int) -> int:
        return data

    decorator = LoadFromDecorator(
        [MockDataLoader],
        required_param=value("1"),
        required_param_2=value("2"),
        required_param_3=value("3"),
    )
    nodes_raw = DefaultNodeCreator().generate_nodes(fn, {})
    nodes = decorator.transform_dag(nodes_raw, {}, fn)
    assert len(nodes) == 3
    nodes_by_name = {node_.name: node_ for node_ in nodes}
    assert len(nodes_by_name) == 3
    assert "fn" in nodes_by_name
    assert nodes_by_name["fn.load_data.data"].tags == {
        "hamilton.data_loader.source": "mock",
        "hamilton.data_loader": True,
        "hamilton.data_loader.has_metadata": True,
        "hamilton.data_loader.node": "data",
        "hamilton.data_loader.classname": MockDataLoader.__qualname__,
    }
    assert nodes_by_name["fn.select_data.data"].tags == {
        "hamilton.data_loader.source": "mock",
        "hamilton.data_loader": True,
        "hamilton.data_loader.has_metadata": False,
        "hamilton.data_loader.node": "data",
        "hamilton.data_loader.classname": MockDataLoader.__qualname__,
    }


def test_load_from_decorator_resolve_kwargs():
    kwargs = dict(
        required_param=source("1"),
        required_param_2=value(2),
        required_param_3=value("3"),
        default_param=source("4"),
        default_param_2=value(5),
    )

    dependency_kwargs, literal_kwargs = resolve_kwargs(kwargs)
    assert dependency_kwargs == {"required_param": "1", "default_param": "4"}
    assert literal_kwargs == {"required_param_2": 2, "required_param_3": "3", "default_param_2": 5}


def test_load_from_decorator_resolve_kwargs_with_literals():
    kwargs = dict(
        required_param=source("1"),
        required_param_2=2,
        required_param_3="3",
        default_param=source("4"),
        default_param_2=5,
    )

    dependency_kwargs, literal_kwargs = resolve_kwargs(kwargs)
    assert dependency_kwargs == {"required_param": "1", "default_param": "4"}
    assert literal_kwargs == {"required_param_2": 2, "required_param_3": "3", "default_param_2": 5}


def test_load_from_decorator_validate_succeeds():
    decorator = LoadFromDecorator(
        [MockDataLoader],
        required_param=source("1"),
        required_param_2=value(2),
        required_param_3=value("3"),
    )

    def fn(injected_data: int) -> int:
        return injected_data

    decorator.validate(fn)


def test_load_from_decorator_validate_succeeds_with_inject():
    decorator = LoadFromDecorator(
        [MockDataLoader],
        inject_="injected_data",
        required_param=source("1"),
        required_param_2=value(2),
        required_param_3=value("3"),
    )

    def fn(injected_data: int, dependent_data: int) -> int:
        return injected_data + dependent_data

    decorator.validate(fn)


def test_load_from_decorator_validate_fails_dont_know_which_param_to_inject():
    decorator = LoadFromDecorator(
        [MockDataLoader],
        required_param=source("1"),
        required_param_2=value(2),
        required_param_3=value("3"),
    )

    def fn(injected_data: int, other_possible_injected_data: int) -> int:
        return injected_data + other_possible_injected_data

    with pytest.raises(fm_base.InvalidDecoratorException):
        decorator.validate(fn)


def test_load_from_decorator_validate_fails_inject_not_in_fn():
    decorator = LoadFromDecorator(
        [MockDataLoader],
        inject_="injected_data",
        required_param=source("1"),
        required_param_2=value(2),
        required_param_3=value("3"),
    )

    def fn(dependent_data: int) -> int:
        return dependent_data

    with pytest.raises(fm_base.InvalidDecoratorException):
        decorator.validate(fn)


def test_load_from_decorator_validate_fails_inject_missing_param():
    decorator = LoadFromDecorator(
        [MockDataLoader],
        required_param=source("1"),
        required_param_2=value(2),
        # This is commented out cause it'll be missing
        # required_param_3=value("3"),
    )

    def fn(data: int) -> int:
        return data

    with pytest.raises(fm_base.InvalidDecoratorException):
        decorator.validate(fn)


@dataclasses.dataclass
class StringDataLoader(DataLoader):
    def load_data(self, type_: Type) -> Tuple[str, Dict[str, Any]]:
        return "foo", {"loader": "string_data_loader"}

    @classmethod
    def name(cls) -> str:
        return "dummy"

    @classmethod
    def applicable_types(cls) -> Collection[Type]:
        return [str]


@dataclasses.dataclass
class AnyDataLoader(DataLoader):
    value: Any

    def load_data(self, type_: Type) -> Tuple[Any, Dict[str, Any]]:
        return self.value, {"loader": "any_data_loader"}

    @classmethod
    def name(cls) -> str:
        return "dummy"

    @classmethod
    def applicable_types(cls) -> Collection[Type]:
        return [Any]


@dataclasses.dataclass
class IntDataLoader(DataLoader):
    def load_data(self, type_: Type) -> Tuple[int, Dict[str, Any]]:
        return 1, {"loader": "int_data_loader"}

    @classmethod
    def name(cls) -> str:
        return "dummy"

    @classmethod
    def applicable_types(cls) -> Collection[Type]:
        return [int]


@dataclasses.dataclass
class IntDataLoader2(DataLoader):
    @classmethod
    def applicable_types(cls) -> Collection[Type]:
        return [int]

    def load_data(self, type_: Type) -> Tuple[int, Dict[str, Any]]:
        return 2, {"loader": "int_data_loader_2"}

    @classmethod
    def name(cls) -> str:
        return "dummy"


def test_validate_fails_incorrect_type():
    decorator = LoadFromDecorator(
        [StringDataLoader, IntDataLoader],
    )

    def fn_str_inject(injected_data: str) -> str:
        return injected_data

    def fn_int_inject(injected_data: int) -> int:
        return injected_data

    def fn_bool_inject(injected_data: bool) -> bool:
        return injected_data

    # This is valid as there is one parameter and its a type that the decorator supports
    decorator.validate(fn_str_inject)

    # This is valid as there is one parameter and its a type that the decorator supports
    decorator.validate(fn_int_inject)

    # This is invalid as there is one parameter and it is not a type that the decorator supports
    with pytest.raises(fm_base.InvalidDecoratorException):
        decorator.validate(fn_bool_inject)


def test_validate_selects_correct_type():
    decorator = LoadFromDecorator(
        [StringDataLoader, IntDataLoader],
    )

    def fn_str_inject(injected_data: str) -> str:
        return injected_data

    def fn_int_inject(injected_data: int) -> int:
        return injected_data

    def fn_bool_inject(injected_data: bool) -> bool:
        return injected_data

    # This is valid as there is one parameter and its a type that the decorator supports
    decorator.validate(fn_str_inject)

    # This is valid as there is one parameter and its a type that the decorator supports
    decorator.validate(fn_int_inject)

    # This is invalid as there is one parameter and it is not a type that the decorator supports
    with pytest.raises(fm_base.InvalidDecoratorException):
        decorator.validate(fn_bool_inject)


# Note that this tests an internal API, but we would like to test this to ensure
# class selection is correct
@pytest.mark.parametrize(
    "type_,classes,correct_class",
    [
        (str, [StringDataLoader, IntDataLoader, IntDataLoader2], StringDataLoader),
        (int, [StringDataLoader, IntDataLoader, IntDataLoader2], IntDataLoader2),
        (int, [IntDataLoader2, IntDataLoader], IntDataLoader),
        (int, [IntDataLoader, IntDataLoader2], IntDataLoader2),
        (int, [StringDataLoader], None),
        (str, [IntDataLoader], None),
        (dict, [IntDataLoader], None),
        (dict, [IntDataLoader, StringDataLoader], None),
        (str, [AnyDataLoader, StringDataLoader], StringDataLoader),
        (dict, [AnyDataLoader, StringDataLoader], AnyDataLoader),
    ],
)
def test_resolve_correct_loader_class(
    type_: Type[Type], classes: List[Type[DataLoader]], correct_class: Type[DataLoader]
):
    assert resolve_adapter_class(type_, classes) == correct_class


def test_decorator_validate():
    decorator = LoadFromDecorator(
        [StringDataLoader, IntDataLoader, IntDataLoader2],
    )

    def fn_str_inject(injected_data: str) -> str:
        return injected_data

    def fn_int_inject(injected_data: int) -> int:
        return injected_data

    def fn_bool_inject(injected_data: bool) -> bool:
        return injected_data

    # This is valid as there is one parameter and its a type that the decorator supports
    decorator.validate(fn_str_inject)
    decorator.validate(fn_int_inject)
    # This is invalid as there is one parameter and it is not a type that the decorator supports
    with pytest.raises(fm_base.InvalidDecoratorException):
        decorator.validate(fn_bool_inject)


# End-to-end tests are probably cleanest
# We've done a bunch of tests of internal structures for other decorators,
# but that leaves the testing brittle
# We don't test the driver, we just use the function_graph to tests the nodes
def test_load_from_decorator_end_to_end():
    @LoadFromDecorator(
        [StringDataLoader, IntDataLoader, IntDataLoader2],
    )
    def fn_str_inject(injected_data: str) -> str:
        return injected_data

    fg = graph.FunctionGraph.from_modules(
        ad_hoc_utils.create_temporary_module(fn_str_inject), config={}
    )
    result = fg.execute(inputs={}, nodes=fg.nodes.values())
    assert result["fn_str_inject"] == "foo"
    assert result["fn_str_inject.load_data.injected_data"] == (
        "foo",
        {"loader": "string_data_loader"},
    )


# End-to-end tests are probably cleanest
# We've done a bunch of tests of internal structures for other decorators,
# but that leaves the testing brittle
# We don't test the driver, we just use the function_graph to tests the nodes
def test_load_from_decorator_end_to_end_with_multiple():
    @LoadFromDecorator(
        [StringDataLoader, IntDataLoader, IntDataLoader2],
        inject_="injected_data_1",
    )
    @LoadFromDecorator(
        [StringDataLoader, IntDataLoader, IntDataLoader2],
        inject_="injected_data_2",
    )
    def fn_str_inject(injected_data_1: str, injected_data_2: int) -> str:
        return "".join([injected_data_1] * injected_data_2)

    fg = graph.FunctionGraph.from_modules(
        ad_hoc_utils.create_temporary_module(fn_str_inject), config={}
    )
    result = fg.execute(inputs={}, nodes=fg.nodes.values())
    assert result["fn_str_inject"] == "foofoo"
    assert result["fn_str_inject.load_data.injected_data_1"] == (
        "foo",
        {"loader": "string_data_loader"},
    )

    assert result["fn_str_inject.load_data.injected_data_2"] == (
        2,
        {"loader": "int_data_loader_2"},
    )


@pytest.mark.parametrize(
    "source_",
    [
        value("tests/resources/data/test_load_from_data.json"),
        source("test_data"),
    ],
)
def test_load_from_decorator_json_file(source_):
    @load_from.json(path=source_)
    def raw_json_data(data: Dict[str, Any]) -> Dict[str, Any]:
        return data

    def number_employees(raw_json_data: Dict[str, Any]) -> int:
        return len(raw_json_data["employees"])

    def sum_age(raw_json_data: Dict[str, Any]) -> float:
        return sum([employee["age"] for employee in raw_json_data["employees"]])

    def mean_age(sum_age: float, number_employees: int) -> float:
        return sum_age / number_employees

    config = {}
    dr = driver.Driver(
        config,
        ad_hoc_utils.create_temporary_module(raw_json_data, number_employees, sum_age, mean_age),
        adapter=base.DefaultAdapter(),
    )
    result = dr.execute(
        ["mean_age"], inputs={"test_data": "tests/resources/data/test_load_from_data.json"}
    )
    assert result["mean_age"] - 32.33333 < 0.0001


def test_loader_fails_for_missing_attribute():
    with pytest.raises(AttributeError):
        load_from.not_a_loader(param=value("foo"))


def test_pandas_extensions_end_to_end(tmp_path_factory):
    output_path = str(tmp_path_factory.mktemp("test_pandas_extensions_end_to_end") / "output.csv")
    input_path = "tests/resources/data/test_load_from_data.csv"

    @save_to.csv(path=source("output_path"), output_name_="save_df")
    @load_from.csv(path=source("input_path"))
    def df(data: pd.DataFrame) -> pd.DataFrame:
        return data

    config = {}
    dr = driver.Driver(
        config,
        ad_hoc_utils.create_temporary_module(df),
        adapter=base.DefaultAdapter(),
    )
    # run once to check that loading is correct
    result = dr.execute(
        ["df", "save_df"],
        inputs={"input_path": input_path, "output_path": output_path},
    )
    assert result["df"].shape == (3, 5)
    assert result["df"].loc[0, "firstName"] == "John"

    #
    result_just_read = dr.execute(
        ["df"],
        inputs={"input_path": output_path},
    )
    # This is just reading the same file we wrote out, so it should be the same
    pd.testing.assert_frame_equal(result["df"], result_just_read["df"])


@dataclasses.dataclass
class MarkingSaver(DataSaver):
    markers: set
    more_markers: set

    def save_data(self, data: int) -> Dict[str, Any]:
        self.markers.add(data)
        self.more_markers.add(data)
        return {}

    @classmethod
    def applicable_types(cls) -> Collection[Type]:
        return [int, dict]

    @classmethod
    def name(cls) -> str:
        return "marker"


def test_save_to_decorator():
    def fn() -> int:
        return 1

    marking_set = set()
    marking_set_2 = set()
    decorator = SaveToDecorator(
        [MarkingSaver],
        output_name_="save_fn",
        markers=value(marking_set),
        more_markers=source("more_markers"),
    )
    node_ = node.Node.from_fn(fn)
    nodes = decorator.transform_node(node_, {}, fn)
    assert len(nodes) == 2
    nodes_by_name = {node_.name: node_ for node_ in nodes}
    assert "save_fn" in nodes_by_name
    assert "fn" in nodes_by_name
    save_fn_node = nodes_by_name["save_fn"]
    assert sorted(save_fn_node.input_types.keys()) == ["fn", "more_markers"]
    assert save_fn_node(**{"fn": 1, "more_markers": marking_set_2}) == {}
    assert save_fn_node.tags == {
        "hamilton.data_saver": True,
        "hamilton.data_saver.sink": "marker",
        "hamilton.data_saver.classname": MarkingSaver.__qualname__,
    }
    # Check that the markers are updated, ensuring that the save_fn is called
    assert marking_set_2 == {1}
    assert marking_set == {1}


def test_save_to_decorator_with_target():
    @extract_fields({"a": int, "b": int})
    def fn() -> dict:
        return {"a": 1, "b": 2}

    decorator = SaveToDecorator(
        [JSONDataSaver],
        output_name_="save_fn",
        path=value("unused"),
        target_="fn",  # save the original
    )
    node_ = node.Node.from_fn(fn)
    nodes = {n.name: n for n in decorator.transform_node(node_, {}, fn)}
    saver_node = nodes["save_fn"]
    # We just want to make sure it gets the right input
    assert list(saver_node.input_types) == ["fn"]


@dataclasses.dataclass
class DefaultFactoryLoader(DataLoader):
    field_with_factory: int = dataclasses.field(default_factory=int)

    def __post_init__(self):
        self.param2 = self.field_with_factory + 1

    def load_data(self, type_: Type[int]) -> Tuple[int, Dict[str, Any]]:
        return self.param2, {}

    @classmethod
    def applicable_types(cls) -> Collection[Type]:
        return [int]

    @classmethod
    def name(cls) -> str:
        return "factory"


def test_loader_default_factory_field():
    @LoadFromDecorator([DefaultFactoryLoader])
    def foo(param: int) -> int:
        return param

    fg = graph.create_function_graph(
        ad_hoc_utils.create_temporary_module(foo),
        config={},
    )
    assert len(fg) == 3
    assert "foo" in fg


@dataclasses.dataclass
class DefaultFactorySaver(DataSaver):
    field_with_factory: int = dataclasses.field(default_factory=int)

    def __post_init__(self):
        self.param2 = self.field_with_factory + 1

    def save_data(self, data: int) -> Dict[str, Any]:
        return {}

    @classmethod
    def applicable_types(cls) -> Collection[Type]:
        return [int]

    @classmethod
    def name(cls) -> str:
        return "factory"


def test_saver_default_factory_field():
    @SaveToDecorator([DefaultFactorySaver])
    def foo(param: int) -> int:
        return param

    fg = graph.create_function_graph(
        ad_hoc_utils.create_temporary_module(foo),
        config={},
    )
    assert len(fg) == 3
    assert "foo" in fg


@dataclasses.dataclass
class OptionalParamDataLoader(DataLoader):
    param: int = 1

    def load_data(self, type_: Type[int]) -> Tuple[int, Dict[str, Any]]:
        return self.param, {}

    @classmethod
    def applicable_types(cls) -> Collection[Type]:
        return [int]

    @classmethod
    def name(cls) -> str:
        return "optional"


def test_adapters_optional_params():
    @LoadFromDecorator([OptionalParamDataLoader])
    def foo(param: int) -> int:
        return param

    fg = graph.create_function_graph(
        ad_hoc_utils.create_temporary_module(foo),
        config={},
    )
    assert len(fg) == 3
    assert "foo" in fg


def test_save_to_with_input_from_other_fn():
    # This tests that we can refer to another node in save_to
    def output_path() -> str:
        return "output.json"

    @save_to.json(path=source("output_path"), output_name_="save_fn")
    def fn() -> dict:
        return {"a": 1}

    fg = graph.create_function_graph(
        ad_hoc_utils.create_temporary_module(output_path, fn),
        config={},
    )

    assert len(fg) == 3


def test_load_from_with_input_from_other_fn():
    # This tests that we can refer to another node in load_from
    def input_path() -> str:
        return "input.json"

    @load_from.json(path=source("input_path"))
    def fn(data: dict) -> dict:
        return data

    fg = graph.create_function_graph(
        ad_hoc_utils.create_temporary_module(input_path, fn),
        config={},
    )
    assert len(fg) == 4


def test_load_from_with_multiple_inputs():
    # This tests that we can refer to another node in load_from

    @load_from.json(
        path=value("input_1.json"),
        inject_="data1",
    )
    @load_from.json(
        path=value("input_2.json"),
        inject_="data2",
    )
    def fn(data1: dict, data2: dict) -> dict:
        return {**data1, **data2}

    fg = graph.create_function_graph(
        ad_hoc_utils.create_temporary_module(fn),
        config={},
    )
    # One filter, one loader for each and the transform function
    assert len(fg) == 5


import sys

if sys.version_info >= (3, 9):
    dict_ = dict
    tuple_ = tuple
else:
    dict_ = Dict
    tuple_ = Tuple


# Mock functions for dataloader & datasaver testing
def correct_dl_function(foo: int) -> tuple_[int, dict_]:
    return 1, {}


def correct_dl_function_with_subscripts(foo: int) -> tuple_[Dict[str, int], Dict[str, str]]:
    return {"a": 1}, {"b": "c"}


def correct_ds_function(data: float) -> dict_:
    return {}


def no_return_annotation_function():
    return 1, {}


def non_tuple_return_function() -> int:
    return 1


def incorrect_tuple_length_function() -> tuple_[int]:
    return (1,)


def incorrect_second_element_function() -> tuple_[int, list]:
    return 1, []


def incorrect_dict_subscript() -> tuple_[int, Dict[int, str]]:
    return 1, {1: "a"}


incorrect_funcs = [
    no_return_annotation_function,
    non_tuple_return_function,
    incorrect_tuple_length_function,
    incorrect_second_element_function,
    incorrect_dict_subscript,
]


@pytest.mark.parametrize("func", incorrect_funcs, ids=[f.__name__ for f in incorrect_funcs])
def test_dl_validate_incorrect_functions(func):
    dl = dataloader()
    with pytest.raises(InvalidDecoratorException):
        dl.validate(func)


@pytest.mark.skipif(
    sys.version_info < (3, 9, 0),
    reason="dataloader not guarenteed to work with subscripted tuples on 3.8",
)
def test_dl_validate_with_correct_function():
    dl = dataloader()
    try:
        dl.validate(correct_dl_function)
    except InvalidDecoratorException:
        # i.e. fail the test if there's an error
        pytest.fail("validate() raised InvalidDecoratorException unexpectedly!")


def test_dl_validate_with_subscripts():
    dl = dataloader()
    try:
        dl.validate(correct_dl_function_with_subscripts)
    except InvalidDecoratorException:
        # i.e. fail the test if there's an error
        pytest.fail("validate() raised InvalidDecoratorException unexpectedly!")


def test_ds_validate_with_correct_function():
    dl = datasaver()
    try:
        dl.validate(correct_ds_function)
    except InvalidDecoratorException:
        # i.e. fail the test if there's an error
        pytest.fail("validate() raised InvalidDecoratorException unexpectedly!")


def test_ds_validate_incorrect_function():
    dl = dataloader()
    with pytest.raises(InvalidDecoratorException):
        dl.validate(non_tuple_return_function)


def test_dataloader():
    annotation = dataloader()
    (node1, node2) = annotation.generate_nodes(correct_dl_function, {})
    assert node1.name == "correct_dl_function.loader"
    assert node1.input_types["foo"][1] == node.DependencyType.REQUIRED
    assert node1.callable(foo=0) == (1, {})
    assert node1.tags == {
        "hamilton.data_loader": True,
        "hamilton.data_loader.classname": "correct_dl_function()",
        "hamilton.data_loader.has_metadata": True,
        "hamilton.data_loader.node": "correct_dl_function",
        "hamilton.data_loader.source": "loader",
        "module": "tests.function_modifiers.test_adapters",
    }
    assert node2.name == "correct_dl_function"
    assert node2.callable(**{"correct_dl_function.loader": (1, {})}) == 1
    assert node2.tags == {
        "hamilton.data_loader": True,
        "hamilton.data_loader.classname": "correct_dl_function()",
        "hamilton.data_loader.has_metadata": False,
        "hamilton.data_loader.node": "correct_dl_function",
        "hamilton.data_loader.source": "correct_dl_function",
    }


def test_dataloader_future_annotations():
    from tests.resources import nodes_with_future_annotation

    fn_to_collect = nodes_with_future_annotation.sample_dataloader
    fg = graph.create_function_graph(
        ad_hoc_utils.create_temporary_module(fn_to_collect),
        config={},
    )
    # the data loaded is a list
    assert custom_subclass_check(fg["sample_dataloader"].type, list)


def test_datasaver():
    annotation = datasaver()
    (node1,) = annotation.generate_nodes(correct_ds_function, {})
    assert node1.name == "correct_ds_function"
    assert node1.input_types["data"][1] == node.DependencyType.REQUIRED
    assert node1.callable(data=0.0) == {}
    assert node1.tags == {
        "hamilton.data_saver": True,
        "hamilton.data_saver.classname": "correct_ds_function()",
        "hamilton.data_saver.sink": "correct_ds_function",
        "module": "tests.function_modifiers.test_adapters",
    }