From 95146fe45f114ec2f257158e19c44cfc687ca9b6 Mon Sep 17 00:00:00 2001
From: ahmedk <karim.ahmed@xfel.eu>
Date: Thu, 4 May 2023 09:25:33 +0200
Subject: [PATCH] Multiple changes after test with many releases until 3.10.1

- Add new configurations for available tests like blc-strips for AGIPD
- Add configs for epix100 and DSSC after changing default values in main notebooks. To test old data.
- Add new Jungfrau and GH2 tests.
- Fix the numerical validation and fixes to `--find-difference`.
- Add arg to only apply numerical validation with slurm sumbitions.
---
 tests/test_reference_runs/callab_tests.py     | 101 +++++++++++++
 tests/test_reference_runs/conftest.py         |  14 +-
 .../test_pre_deployment.py                    | 140 ++++++++++--------
 3 files changed, 195 insertions(+), 60 deletions(-)

diff --git a/tests/test_reference_runs/callab_tests.py b/tests/test_reference_runs/callab_tests.py
index ae40282a5..9ab0d807c 100644
--- a/tests/test_reference_runs/callab_tests.py
+++ b/tests/test_reference_runs/callab_tests.py
@@ -3,6 +3,7 @@ automated_test_config = {
         "det_type": "AGIPD",
         "cal_type": "CORRECT",
         "config": {
+            "blc-stripes": True,
             "out-folder": "{}/{}/{}",
             "in-folder": "/gpfs/exfel/exp/SPB/202131/p900215/raw",
             "run": "95",
@@ -10,6 +11,7 @@ automated_test_config = {
             "karabo-id": "SPB_DET_AGIPD1M-1",
             "slurm-mem": "750",
             "sequences": "0,1,2",
+            "rel-gain": True,
             "ctrl-source-template": "{}/MDL/FPGA_COMP",
         },
         "reference-folder": "{}/{}/{}",
@@ -34,6 +36,8 @@ automated_test_config = {
         "det_type": "AGIPD",
         "cal_type": "CORRECT",
         "config": {
+            "blc-stripes": True,
+            "rel-gain": True,
             "out-folder": "{}/{}/{}",
             "in-folder": "/gpfs/exfel/exp/SPB/202131/p900215/raw",
             "run": "262",
@@ -65,6 +69,8 @@ automated_test_config = {
         "det_type": "AGIPD",
         "cal_type": "CORRECT",
         "config": {
+            "blc-stripes": True,
+            "rel-gain": True,
             "out-folder": "{}/{}/{}",
             "in-folder": "/gpfs/exfel/exp/MID/202121/p002929/raw",
             "run": "21",
@@ -95,6 +101,8 @@ automated_test_config = {
         "det_type": "AGIPD",
         "cal_type": "CORRECT",
         "config": {
+            "blc-stripes": True,
+            "rel-gain": True,
             "out-folder": "{}/{}/{}",
             "in-folder": "/gpfs/exfel/exp/HED/202131/p900228/raw",
             "run": "29",
@@ -125,6 +133,8 @@ automated_test_config = {
         "det_type": "AGIPD",
         "cal_type": "CORRECT",
         "config": {
+            "blc-stripes": True,
+            "rel-gain": True,
             "out-folder": "{}/{}/{}",
             "in-folder": "/gpfs/exfel/exp/MID/202121/p002929/raw",
             "run": "21",
@@ -134,6 +144,7 @@ automated_test_config = {
             "karabo-id": "MID_DET_AGIPD1M-1",
             "slurm-mem": "750",
             "ctrl-source-template": "{}/MDL/FPGA_COMP",
+            "sequences-per-node": 1,
         },
         "reference-folder": "{}/{}/{}",
     },
@@ -141,6 +152,8 @@ automated_test_config = {
         "det_type": "AGIPD",
         "cal_type": "CORRECT",
         "config": {
+            "blc-stripes": True,
+            "rel-gain": True,
             "out-folder": "{}/{}/{}",
             "in-folder": "/gpfs/exfel/exp/MID/202121/p002929/raw",
             "run": "21",
@@ -224,6 +237,21 @@ automated_test_config = {
         },
         "reference-folder": "{}/{}/{}",
     },
+    "FXE_XAD_JF1M-DARK-BURST_LONGRUN": {
+        "det_type": "JUNGFRAU",
+        "cal_type": "DARK",
+        "config": {
+            "out-folder": "{}/{}/{}",
+            "in-folder": "/gpfs/exfel/exp/FXE/202321/p004576/raw",
+            "run-high": "112",
+            "run-med": "113",
+            "run-low": "118",
+            "karabo-id-control": "",
+            "karabo-id": "FXE_XAD_JF1M",
+            "karabo-da": ["JNGFR01", "JNGFR02"],
+        },
+        "reference-folder": "{}/{}/{}",
+    },
     "FXE_XAD_JF1M-CORRECT-SINGLE": {
         "det_type": "JUNGFRAU",
         "cal_type": "CORRECT",
@@ -315,6 +343,30 @@ automated_test_config = {
         },
         "reference-folder": "{}/{}/{}",
     },
+    "HED_IA1_JF500K2-CORRECT-ADAPTIVE": {
+        "det_type": "JUNGFRAU",
+        "cal_type": "CORRECT",
+        "config": {
+            "out-folder": "{}/{}/{}",
+            "in-folder": "/gpfs/exfel/exp/HED/202102/p002656/raw",
+            "run": "230",
+            "karabo-id": "HED_IA1_JF500K2",
+            "karabo-da": "JNGFR02",
+        },
+        "reference-folder": "{}/{}/{}",
+    },
+    "HED_IA1_JF500K3-CORRECT-ADAPTIVE": {
+        "det_type": "JUNGFRAU",
+        "cal_type": "CORRECT",
+        "config": {
+            "out-folder": "{}/{}/{}",
+            "in-folder": "/gpfs/exfel/exp/HED/202102/p002656/raw",
+            "run": "230",
+            "karabo-id": "HED_IA1_JF500K3",
+            "karabo-da": "JNGFR03",
+        },
+        "reference-folder": "{}/{}/{}",
+    },
     "HED_IA1_JF500K1-DARK-ADAPTIVE": {
         "det_type": "JUNGFRAU",
         "cal_type": "DARK",
@@ -329,6 +381,34 @@ automated_test_config = {
         },
         "reference-folder": "{}/{}/{}",
     },
+    "HED_IA1_JF500K2-DARK-ADAPTIVE": {
+        "det_type": "JUNGFRAU",
+        "cal_type": "DARK",
+        "config": {
+            "out-folder": "{}/{}/{}",
+            "in-folder": "/gpfs/exfel/exp/HED/202102/p002656/raw",
+            "run-high": "219",
+            "run-med": "220",
+            "run-low": "221",
+            "karabo-id": "HED_IA1_JF500K2",
+            "karabo-da": "JNGFR02",
+        },
+        "reference-folder": "{}/{}/{}",
+    },
+    "HED_IA1_JF500K3-DARK-ADAPTIVE": {
+        "det_type": "JUNGFRAU",
+        "cal_type": "DARK",
+        "config": {
+            "out-folder": "{}/{}/{}",
+            "in-folder": "/gpfs/exfel/exp/HED/202102/p002656/raw",
+            "run-high": "219",
+            "run-med": "220",
+            "run-low": "221",
+            "karabo-id": "HED_IA1_JF500K3",
+            "karabo-da": "JNGFR03",
+        },
+        "reference-folder": "{}/{}/{}",
+    },
    "HED_IA1_JF500K1-DARK-FIXED":
     {
         "det_type": "JUNGFRAU",
@@ -354,6 +434,7 @@ automated_test_config = {
             "run": "230",
             "karabo-id": "HED_IA1_EPX100-1",
             "karabo-da": "EPIX01",
+            "fix-temperature": 290,
         },
         "reference-folder": "{}/{}/{}",
     },
@@ -427,6 +508,11 @@ automated_test_config = {
             "run": "1723",
             "karabo-id": "SCS_DET_DSSC1M-1",
             "slow-data-path": "SCS_CDIDET_DSSC/FPGA/PPT_Q",
+            "slow-data-aggregators":
+                - 1
+                - 2
+                - 3
+                - 4
         },
         "reference-folder": "{}/{}/{}",
     },
@@ -448,6 +534,7 @@ automated_test_config = {
         "config": {
             "out-folder": "{}/{}/{}",
             "in-folder": "/gpfs/exfel/exp/FXE/202231/p900298/raw",
+            "karabo-da": "GH201",
             "run-high": "7",
             "run-med": "8",
             "run-low": "9",
@@ -461,8 +548,22 @@ automated_test_config = {
         "config": {
             "out-folder": "{}/{}/{}",
             "in-folder": "/gpfs/exfel/exp/FXE/202231/p900298/raw",
+            "karabo-da": "GH201",
             "run": "7",
             "karabo-id": "FXE_XAD_G2XES",
+            "no-offset-correction": True,
+        },
+        "reference-folder": "{}/{}/{}",
+    },
+    "SPB_50UM_GH2-CORRECT": {
+        "det_type": "Gotthard2",
+        "cal_type": "CORRECT",
+        "config": {
+            "out-folder": "{}/{}/{}",
+            "in-folder": "/gpfs/exfel/exp/SPB/202321/p004577/raw",
+            "karabo-da": "GH200",
+            "run": "98",
+            "karabo-id": "SPB_50UM_GH2",
         },
         "reference-folder": "{}/{}/{}",
     },
diff --git a/tests/test_reference_runs/conftest.py b/tests/test_reference_runs/conftest.py
index 1e8f290ea..502966f57 100644
--- a/tests/test_reference_runs/conftest.py
+++ b/tests/test_reference_runs/conftest.py
@@ -31,6 +31,13 @@ def pytest_addoption(parser):
         default=False,
         help="Skips tests for numerical validation for produced h5files.",
     )
+
+    parser.addoption(
+        "--validation-only",
+        action="store_true",
+        default=False,
+        help="Skips tests for numerical validation for produced h5files.",
+    )
     parser.addoption(
         "--find-difference",
         action="store_true",
@@ -61,6 +68,8 @@ def release_test_config(request):
     calibration = request.config.getoption("--calibration")
     skip_numerical_validation = request.config.getoption(
         "--no-numerical-validation")
+    validate_only = request.config.getoption(
+        "--validation-only")
     find_difference = request.config.getoption(
         "--find-difference")
     picked_test = request.config.getoption("--picked-test")
@@ -68,8 +77,9 @@ def release_test_config(request):
     out_folder = request.config.getoption("--out-folder")
     return (
         detectors, calibration, picked_test,
-        skip_numerical_validation, find_difference,
-        reference_folder, out_folder,
+        skip_numerical_validation, validate_only,
+        find_difference, reference_folder,
+        out_folder,
     )
 
 
diff --git a/tests/test_reference_runs/test_pre_deployment.py b/tests/test_reference_runs/test_pre_deployment.py
index 30c6c2a58..929ca5897 100644
--- a/tests/test_reference_runs/test_pre_deployment.py
+++ b/tests/test_reference_runs/test_pre_deployment.py
@@ -42,7 +42,7 @@ def file_md5(
     return md5.digest()
 
 
-def find_differences(file1: pathlib.Path, file2: pathlib.Path):
+def find_differences(file1: pathlib.Path, file2: pathlib.Path, files_mapping):
     """Find difference in groups, datasest, and objects
     between two h5files.
     Args:
@@ -62,36 +62,47 @@ def find_differences(file1: pathlib.Path, file2: pathlib.Path):
     groups_f1 = []
     datasets_f1 = []
     objects_f1 = []
+
     groups_f2 = []
     datasets_f2 = []
     objects_f2 = []
 
     # Fill groups, datasets, and objects list to compare both h5files' attrs.
-    _collect_attrs = partial(collect_attrs, groups_f1, datasets_f1, objects_f1)
-    f1 = h5py.File(file1, 'r')
-    f1.visititems(_collect_attrs)
-    _collect_attrs = partial(collect_attrs, groups_f2, datasets_f2, objects_f2)
-    f2 = h5py.File(file2, 'r')
-    f2.visititems(_collect_attrs)
-    try:
+    with h5py.File(file1, 'r') as f1, h5py.File(file2, 'r') as f2:
+        _collect_attrs = partial(collect_attrs, groups_f1, datasets_f1, objects_f1)
+        f1.visititems(_collect_attrs)
+        _collect_attrs = partial(collect_attrs, groups_f2, datasets_f2, objects_f2)
+        f2.visititems(_collect_attrs)
         # Compare groups, datasets, and objects to have the same content.
-        assert groups_f1 == groups_f2, f"{file1} and {file2} consists of different groups."  # noqa
-        assert datasets_f1 == datasets_f2, f"{file1} and {file2} consists of different datasets."  # noqa
-        assert objects_f1 == objects_f2, f"{file1} and {file2} consists of different datasets."  # noqa
+        assert set(groups_f1) == set(groups_f2), f"{files_mapping[file1]} and {files_mapping[file2]} consists of different groups."  # noqa
+        assert set(datasets_f1) == set(datasets_f2), f"{files_mapping[file1]} and {files_mapping[file2]} consists of different datasets."  # noqa
+        assert set(objects_f1) == set(objects_f2), f"{files_mapping[file1]} and {files_mapping[file2]} consists of different datasets."  # noqa
 
         # Compare every dataset value.
         h5_diff = []
         for d in datasets_f1:
-            if not np.allclose(f1[d][:], f2[d][:]):
+            try:
+                if not np.allclose(f1[d][()], f2[d][()], equal_nan=True):
+                    h5_diff.append(d)
+            except ValueError as e:
+                print(f"ValueError: {e}, {d}")
+                h5_diff.append(d)
+            except AttributeError as e:
+                print(f"AttributeError: {e}, {d}")
                 h5_diff.append(d)
         for d in objects_f1:
-            if f1[d][:] != f2[d][:]:
+            try:
+                if isinstance(f1[d][()], bytes):
+                    if f1[d][()].decode('utf-8') != f2[d][()].decode('utf-8'):
+                        h5_diff.append(d)
+                elif f1[d][()].dtype != f1[d][()].dtype and not f1[d][()] != f2[d][()]:  # pnccd files has only list of bytes
+                    h5_diff.append(d)
+            except ValueError as e:
+                print(f"ValueError: {e}, {d}")
+            except AttributeError as e:
+                print(f"AttributeError: {e}, {d}, {f1[d][()].decode('utf-8')}")
                 h5_diff.append(d)
-        assert not h5_diff, f"{[d for d in h5_diff]} datasets contain different values."  # noqa
-    finally:
-        f1.close()
-        f2.close()
-
+        assert not h5_diff, f"{[d for d in h5_diff]} datasets contain different values for {files_mapping[file1]} and {files_mapping[file2]}"  # noqa
 
 def validate_files(
     ref_folder: pathlib.PosixPath,
@@ -129,8 +140,12 @@ def validate_files(
         prefix="cal_",
         delete=True,
     )
-    hp1 = h5py.File(out_tf.name, 'w', driver='core', backing_store=True)
-    hp2 = h5py.File(ref_tf.name, 'w', driver='core', backing_store=True)
+    hp1 = h5py.File(out_tf.name, 'w')
+    hp2 = h5py.File(ref_tf.name, 'w')
+    files_mapping = {
+        out_tf.name: test_file,
+        ref_tf.name: ref_folder / test_file.name,
+        }
 
     # Copy h5 files for validation and exclude selected attrs.
     for dest, f in zip([hp1, hp2], [test_file, ref_folder / test_file.name]):
@@ -142,7 +157,9 @@ def validate_files(
     hp2.close()
     result = file_md5(out_tf.name) == file_md5(ref_tf.name)
     if not result and find_difference:
-        find_differences(out_tf.name, ref_tf.name)
+        find_differences(out_tf.name, ref_tf.name, files_mapping)
+        print("No differences found. Most probably there are NANs in the data.")
+        result = True
     out_tf.close()
     ref_tf.close()
 
@@ -203,7 +220,7 @@ def test_xfel_calibrate(
 
     (
         detectors, calibration, picked_test,
-        skip_numerical_validation, find_difference,
+        skip_numerical_validation, only_validate, find_difference,
         reference_dir_base, out_dir_base,
     ) = release_test_config
 
@@ -237,6 +254,45 @@ def test_xfel_calibrate(
     reference_folder = pathlib.Path(val_dict["reference-folder"].format(
         reference_dir_base, cal_conf["karabo-id"], test_key))
 
+    def validate_files_now():
+        # 3rd Check number of produced h5 files.
+        h5files = list(out_folder.glob("*.h5"))
+        expected_h5files = list(reference_folder.glob("*.h5"))
+        assert len(h5files) == len(expected_h5files), f"{test_key} failure, number of files are not as expected."  # noqa
+        print(f"{test_key}'s calibration h5files numbers are as expected.")
+        non_valid_files = []
+        # Hard coded datasets to exclude from numerical validation.
+        # These datasets are know to be updated everytime.
+        if cal_type.lower() == "correct":
+            exclude_attrs = ["METADATA/creationDate", "METADATA/updateDate", "INDEX/origin", "METADATA/updateDate", "CONTROL/", "/RUN", "/INDEX/HED_IA1_EPX100-1/DET/RECEIVER", "/INDEX/HED_IA1_EPX100-1/DET/CONTROL"]
+        else:
+            exclude_attrs = ["report"]
+
+        # 4th check that test and reference h5files are identical.
+        _validate_files = partial(
+            validate_files,
+            reference_folder,
+            out_folder,
+            exclude_attrs,
+            find_difference,
+        )
+        processes = 16
+        if "LPD" in str(h5files[0]):
+            processes = 4
+        with multiprocessing.Pool(processes=processes) as pool:
+            result = pool.map(_validate_files, h5files)
+
+        # Collect non-valid files, if any, to display them in the error message.
+        for valid, file in result:
+            if not valid:
+                non_valid_files.append(file)
+        assert len(non_valid_files) == 0, f"{test_key} failure, while validating metadata for {non_valid_files}"  # noqa
+        print(f"{test_key}'s calibration h5files are validated successfully.")
+
+    if only_validate:
+        validate_files_now()
+        return
+
     cmd += ["--slurm-name", test_key]
     f = io.StringIO()
 
@@ -267,50 +323,18 @@ def test_xfel_calibrate(
         ] for s in states):
             slurm_watcher = False
         else:
-            time.sleep(0.5)
+            time.sleep(2)
 
     # 1st check that all jobs were COMPLETED without errors.
     states = res.stdout.decode().split("\n")[2:-1]
     assert all(s.strip() == "COMPLETED" for s in states), f"{test_key} failure, calibration jobs were not completed. {jobids}: {states}"  # noqa
     print(f"{test_key}'s jobs were COMPLETED")
-    time.sleep(2.0)
+    time.sleep(1.0)
 
     # 2nd check for report availability.
     report_file = out_folder / f"{report_name}.pdf"
     assert report_file.exists(), f"{test_key} failure, report doesn't exists."
 
-    # 3rd Check number of produced h5 files.
-    h5files = list(out_folder.glob("*.h5"))
-    expected_h5files = list(reference_folder.glob("*.h5"))
-    assert len(h5files) == len(expected_h5files), f"{test_key} failure, number of files are not as expected."  # noqa
-    print(f"{test_key}'s calibration h5files numbers are as expected.")
-
     # Stop tests at this point, if desired.
-    if skip_numerical_validation:
-        return
-
-    non_valid_files = []
-    # Hard coded datasets to exclude from numerical validation.
-    # These datasets are know to be updated everytime.
-    if cal_type.lower() == "correct":
-        exclude_attrs = ["METADATA/creationDate", "METADATA/updateDate"]
-    else:
-        exclude_attrs = ["report"]
-
-    # 4th check that test and reference h5files are identical.
-    _validate_files = partial(
-        validate_files,
-        reference_folder,
-        out_folder,
-        exclude_attrs,
-        find_difference,
-    )
-    with multiprocessing.Pool() as pool:
-        result = pool.map(_validate_files, h5files)
-
-    # Collect non-valid files, if any, to display them in the error message.
-    for valid, file in result:
-        if not valid:
-            non_valid_files.append(file)
-    assert len(non_valid_files) == 0, f"{test_key} failure, while validating metadata for {non_valid_files}"  # noqa
-    print(f"{test_key}'s calibration h5files are validated successfully.")
+    if not skip_numerical_validation:
+        validate_files_now()
-- 
GitLab