GeoscienceAustralia
diff --git a/‎gnssanalysis/gn_utils.py‎
Lines changed: 371 additions & 1 deletion b/‎gnssanalysis/gn_utils.py‎
Lines changed: 371 additions & 1 deletion
diff --git a/‎tests/baseline_dataframe_records/TestDataFrameHashUtils/test_duplicate_df_rejection.pickledlist‎
755 Bytes b/‎tests/baseline_dataframe_records/TestDataFrameHashUtils/test_duplicate_df_rejection.pickledlist‎
755 Bytes
diff --git a/‎tests/baseline_dataframe_records/TestDataFrameHashUtils/test_duplicate_df_rejection.pickledlist_sha256‎
Lines changed: 1 addition & 0 deletions b/‎tests/baseline_dataframe_records/TestDataFrameHashUtils/test_duplicate_df_rejection.pickledlist_sha256‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/baseline_dataframe_records/TestDataFrameHashUtils/test_repeat_caller_rejection.pickledlist‎
595 Bytes b/‎tests/baseline_dataframe_records/TestDataFrameHashUtils/test_repeat_caller_rejection.pickledlist‎
595 Bytes
diff --git a/‎tests/baseline_dataframe_records/TestDataFrameHashUtils/test_repeat_caller_rejection.pickledlist_sha256‎
Lines changed: 1 addition & 0 deletions b/‎tests/baseline_dataframe_records/TestDataFrameHashUtils/test_repeat_caller_rejection.pickledlist_sha256‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/baseline_dataframe_records/TestDataFrameHashUtils/test_verify_refusal_in_wrong_mode.pickledlist‎
595 Bytes b/‎tests/baseline_dataframe_records/TestDataFrameHashUtils/test_verify_refusal_in_wrong_mode.pickledlist‎
595 Bytes
diff --git a/‎tests/baseline_dataframe_records/TestDataFrameHashUtils/test_verify_refusal_in_wrong_mode.pickledlist_sha256‎
Lines changed: 1 addition & 0 deletions b/‎tests/baseline_dataframe_records/TestDataFrameHashUtils/test_verify_refusal_in_wrong_mode.pickledlist_sha256‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/test_utils.py‎
Lines changed: 129 additions & 1 deletion b/‎tests/test_utils.py‎
Lines changed: 129 additions & 1 deletion
@@ -0,0 +1 @@
+6b5020201b08f64a2e7412422e03f94a6e7b0479f3a69a792967cec80b17a08b
@@ -0,0 +1 @@
+1b369c0e1d2ee74b36b233cb0655cc5e0158b334ec0757f546d5e45e6d05d58e
@@ -0,0 +1 @@
+1b369c0e1d2ee74b36b233cb0655cc5e0158b334ec0757f546d5e45e6d05d58e
@@ -1,8 +1,11 @@
 import logging
+import os
+import unittest
+from pandas import DataFrame
 from pyfakefs.fake_filesystem_unittest import TestCase
 from pathlib import Path
 
-from gnssanalysis.gn_utils import delete_entire_directory
+from gnssanalysis.gn_utils import DataFrameHashUtils, delete_entire_directory
 import gnssanalysis.gn_utils as ga_utils
 
 
@@ -64,3 +67,128 @@ def test_configure_logging(self):
 
         # Verify
         self.assertEqual(logger_not_output, None)
+
+
+class TestDataFrameHashUtils(unittest.TestCase):
+
+    def test_verify_refusal_in_wrong_mode(self):
+        mode_backup = DataFrameHashUtils.mode
+        try:
+            df = DataFrame(["a", "b", "c"])
+
+            # Baseline (do not commit uncommented!) Note: every function needs its own baseline, becuase the
+            # function name determines the filename, unless we override that.
+            # DataFrameHashUtils.mode = "baseline"
+            # DataFrameHashUtils.record_baseline([df])
+
+            # In baseline (write) mode, verify should be refused.
+            DataFrameHashUtils.mode = "baseline"
+
+            with self.assertWarns(Warning) as warning_assessor:
+                self.assertFalse(
+                    DataFrameHashUtils.verify([df]),
+                    "DF list verification should not succeed in 'baseline' mode",
+                )
+            # Ensure the expected warning, and only that warning, was raised
+            captured_warnings = warning_assessor.warnings
+            self.assertEqual(
+                "Refusing to run verify method while not in verify mode. Set DataframeHashUtils.mode = 'verify' first",
+                str(captured_warnings[0].message),
+            )
+            self.assertEqual(
+                len(captured_warnings),
+                1,
+                "Expected exactly 1 warning. Check what other warnings are being raised!",
+            )
+
+            # Should succeed in correct mode.
+            DataFrameHashUtils.mode = "verify"
+            self.assertTrue(
+                DataFrameHashUtils.verify([df]),
+                "DF list verification should succeed in 'verify' mode",
+            )
+        finally:
+            # Ensure flag reset to avoid impacts on other tests (across the whole suite)
+            DataFrameHashUtils.mode = mode_backup
+
+    def test_repeat_caller_rejection(self):
+        # These functions determine what files to write/read baselines from, based on the identity of the (test)
+        # function that called them. Therefore, calling twice from the same function would cause the *same baseline
+        # files* to be read/written for a different part of the unit test.
+        # That would have the effect of:
+        # - in write mode: overwriting the baseline file for a previous part of the test function.
+        # - in read mode: repeating verification of the same file against a different DF list (which would likely fail).
+
+        # We're only testing it with the verify function below, but both verify and baseline functions use the same
+        # caller check logic, and store the caller record statically in a class variable. ?
+
+        df = DataFrame(["a", "b", "c"])
+
+        # Baseline (every function needs its own baseline, becuase the function name determines the filename,
+        # unless we override that)
+        # DataFrameHashUtils.mode = "baseline"
+        # DataFrameHashUtils.record_baseline([df])
+
+        self.assertTrue(
+            DataFrameHashUtils.verify([df]),
+            "DF list verification should succeed on *first* call from a function.",
+        )
+        with self.assertRaises(ValueError):
+            DataFrameHashUtils.verify([df])
+            self.fail("DF list verification should fail on *second*/repeated calls from a function.")
+
+    def test_duplicate_df_rejection(self):
+
+        # List to aggregate DFs for hashing
+        dfs_to_hash: list[DataFrame] = []
+
+        df = DataFrame(["a", "b", "c"])  # Let's call this Dataframe 'a'
+        dfs_to_hash.extend([df])
+
+        # Overwrite local variable, as often happens in our unit tests
+        df = DataFrame(["b", "c", "d"])  # Let's call this Dataframe 'b'
+
+        # This might look questionable, but is ok, because we saved a reference to dataframe 'a' to the list,
+        # before overwriting local var 'df' to point at dataframe 'b'.
+        dfs_to_hash.extend([df])
+
+        # Baseline this test (this should only be committed commented out!)
+        # DataFrameHashUtils.mode = "baseline"
+        # DataFrameHashUtils.record_baseline(dfs_to_hash)
+
+        # Will return True if verification succeeded. False if baseline missing or mode != verify
+        self.assertTrue(
+            DataFrameHashUtils.verify(dfs_to_hash),
+            "DF list verification should succeed here (unless baseline files are missing, or baselining has been turned on)",
+        )
+
+        # The local variable df still points to the same DF, so now the list contains [a,b,b]. This should be an error.
+        dfs_to_hash.extend([df])
+        with self.assertRaises(ValueError):
+            DataFrameHashUtils.verify(dfs_to_hash)
+
+    def test_caller_identity_fetch(self):
+        def wrapper_function():
+            class_name, func_name = DataFrameHashUtils.get_grandparent_caller_id()
+            self.assertEqual(class_name, "TestDataFrameHashUtils")
+            self.assertEqual(func_name, "test_caller_identity_fetch")
+
+        # We have to do this (create an extra stack frame) because the function looks for
+        # the *grandparent* caller, not parent caller.
+        wrapper_function()
+
+
+# For use with debugger
+# if __name__ == "__main__":
+
+#     logging.basicConfig(format="%(levelname)s: %(message)s")
+#     logger = logging.getLogger()
+#     logger.setLevel(logging.DEBUG)
+
+#     os.chdir("./tests")
+
+#     df_hash_tests = TestDataFrameHashUtils()
+#     df_hash_tests.test_duplicate_df_rejection()
+#     df_hash_tests.test_verify_refusal_in_wrong_mode
+#     df_hash_tests.test_repeat_caller_rejection()
+#     df_hash_tests.test_caller_identity_fetch()
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+6b5020201b08f64a2e7412422e03f94a6e7b0479f3a69a792967cec80b17a08b`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+1b369c0e1d2ee74b36b233cb0655cc5e0158b334ec0757f546d5e45e6d05d58e`