Thomas Kluyver · a3bf4c68 · 98cea14d · 83ac2288 · b0f1c5f0 · 303cb726
--- a/tests/test_reference_runs/test_pre_deployment.py

+ 49

− 19
+++ b/tests/test_reference_runs/test_pre_deployment.py

+ 49

− 19
 @@ -41,8 +41,8 @@ class ComparisonResult:
            print(f"  + NEW: {ds}")
        for ds in self.missing_dsets:
            print(f"  - MISSING: {ds}")
-        for ds in self.changed_dsets:
-            print(f"  ~ CHANGED: {ds}")
+        for ds, detail in self.changed_dsets:
+            print(f"  ~ CHANGED: {ds} ({detail})")


 def gather_dsets(f: h5py.File):
 @@ -56,6 +56,19 @@ def gather_dsets(f: h5py.File):
    return res


+def iter_sized_chunks(ds: h5py.Dataset, chunk_size: int):
+    """Make slices of the dataset along the first axis
+
+    Aims for block_size bytes per block"""
+    if ds.ndim == 0:  # Scalar
+        yield ()
+        return
+
+    chunk_l = min(chunk_size // (ds.dtype.itemsize * np.prod(ds.shape[1:])), 1)
+    for start in range(0, ds.shape[0], chunk_l):
+        yield slice(start, start + chunk_l)
+
+
 def validate_file(
    ref_folder: pathlib.PosixPath,
    out_folder: pathlib.PosixPath,
 @@ -69,18 +82,33 @@ def validate_file(
        out_dsets = gather_dsets(fout)
        changed = []
        for dsname in sorted((ref_dsets & out_dsets) - exclude_dsets):
-            ref_arr = fref[dsname][()]
-            out_arr = fout[dsname][()]
-            if isinstance(ref_arr, np.ndarray) ^ isinstance(out_arr, np.ndarray):
-                eq = False  # One is an array, the other not
-            elif isinstance(ref_arr, np.ndarray):
-                # Both arrays
-                eq = np.array_equal(ref_arr, out_arr, equal_nan=True)
+            ref_ds = fref[dsname]
+            out_ds = fout[dsname]
+            if out_ds.shape != ref_ds.shape:
+                changed.append((dsname, f"Shape: {ref_ds.shape} -> {out_ds.shape}")) # noqa
+            elif out_ds.dtype != ref_ds.dtype:
+                changed.append((dsname, f"Dtype: {ref_ds.dtype} -> {out_ds.dtype}")) # noqa
            else:
-                # Both single values
-                eq = ref_arr == out_arr
-            if not eq:
-                changed.append(dsname)
+                floaty = np.issubdtype(ref_ds.dtype, np.floating) \
+                        or np.issubdtype(ref_ds.dtype, np.complexfloating)
+
+                # Compare data incrementally rather than loading it all at once;
+                # read in blocks of ~64 MB (arbitrary limit) along first axis.
+                for chunk_slice in iter_sized_chunks(ref_ds, 64 * 1024 * 1024):
+                    ref_chunk = ref_ds[chunk_slice]
+                    out_chunk = out_ds[chunk_slice]
+                    if floaty:
+                        eq = np.allclose(ref_chunk, out_chunk, equal_nan=True)
+                    else:
+                        eq = np.array_equal(ref_chunk, out_chunk)
+                    if not eq:
+                        # If just 1 entry, show the values
+                        if ref_ds.size == 1:
+                            r, o = np.squeeze(ref_chunk), np.squeeze(out_chunk)
+                            changed.append((dsname, f"Value: {r} -> {o}"))
+                        else:
+                            changed.append((dsname, "Data changed"))
+                        break

    return ComparisonResult(
        test_file,
 @@ -90,7 +118,9 @@ def validate_file(
    )


-def parse_config(cmd: List[str], config: Dict[str, Any], out_folder: str) -> List[str]:
+def parse_config(
+        cmd: List[str], config: Dict[str, Any], out_folder: str
+) -> List[str]:
    """Convert a dictionary to a list of arguments.

    Values that are not strings will be cast.
 @@ -233,7 +263,7 @@ def slurm_watcher(test_key: str, std_out: str):
    list(automated_test_config.items()),
    ids=list(automated_test_config.keys()),
 )
-def test_xfel_calibrate(test_key: str, val_dict: dict, release_test_config: Tuple):
+def test_xfel_calibrate(test_key: str, val_dict: dict, release_test_config: Tuple):  # noqa
    """Test xfel calibrate detectors and calibrations written
    in the given callab_test YAML file.
    Args:
 @@ -275,16 +305,16 @@ def test_xfel_calibrate(test_key: str, val_dict: dict, release_test_config: Tupl

    cal_conf = val_dict["config"]

-    out_folder = pathlib.Path(
-        cal_conf["out-folder"].format(out_dir_base, cal_conf["karabo-id"], test_key)
-    )
+    out_folder = pathlib.Path(cal_conf["out-folder"].format(
+        out_dir_base, cal_conf["karabo-id"], test_key
+    ))
    reference_folder = pathlib.Path(
        val_dict["reference-folder"].format(
            reference_dir_base, cal_conf["karabo-id"], test_key
        )
    )

-    report_name = out_folder / f"{test_key}_{datetime.now().strftime('%y%m%d_%H%M%S')}"
+    report_name = out_folder / f"{test_key}_{datetime.now().strftime('%y%m%d_%H%M%S')}" # noqa

    cal_conf["report-to"] = str(report_name)

 @@ -301,7+331,7 @@

    if not use_slurm:  # e.g. for Gitlab CI.
        cmd += ["--no-cluster-job"]

    cmd += [
        "--slurm-name",
        test_key,