Skip to content
Snippets Groups Projects

[Tests] clearer comparison of HDF5 files

Merged Thomas Kluyver requested to merge test/compare-h5-files into master
1 file
+ 22
15
Compare changes
  • Side-by-side
  • Inline
@@ -41,8 +41,8 @@ class ComparisonResult:
print(f" + NEW: {ds}")
for ds in self.missing_dsets:
print(f" - MISSING: {ds}")
for ds in self.changed_dsets:
print(f" ~ CHANGED: {ds}")
for ds, detail in self.changed_dsets:
print(f" ~ CHANGED: {ds} ({detail})")
def gather_dsets(f: h5py.File):
@@ -69,20 +69,27 @@ def validate_file(
out_dsets = gather_dsets(fout)
changed = []
for dsname in sorted((ref_dsets & out_dsets) - exclude_dsets):
ref_arr = fref[dsname][()]
out_arr = fout[dsname][()]
if isinstance(ref_arr, np.ndarray) ^ isinstance(out_arr, np.ndarray):
eq = False # One is an array, the other not
elif isinstance(ref_arr, np.ndarray):
# Both arrays
nanable = np.issubdtype(ref_arr.dtype, np.floating) \
or np.issubdtype(ref_arr.dtype, np.complexfloating)
eq = np.array_equal(ref_arr, out_arr, equal_nan=nanable)
ref_ds = fref[dsname]
out_ds = fout[dsname]
if out_ds.shape != ref_ds.shape:
changed.append((dsname, f"Shape: {ref_ds.shape} -> {out_ds.shape}"))
elif out_ds.dtype != ref_ds.dtype:
changed.append((dsname, f"Dtype: {ref_ds.dtype} -> {out_ds.dtype}"))
else:
# Both single values
eq = ref_arr == out_arr
if not eq:
changed.append(dsname)
# Compare chunk by chunk to not use lots of RAM
nanable = np.issubdtype(ref_ds.dtype, np.floating) \
or np.issubdtype(ref_ds.dtype, np.complexfloating)
for chunk_slice in ref_ds.iter_chunks():
ref_chunk = ref_ds[chunk_slice]
out_chunk = out_ds[chunk_slice]
if not np.allclose(ref_chunk, out_chunk, equal_nan=nanable):
# If just 1 entry, show the values
if ref_ds.size == 1:
r, o = np.squeeze(ref_chunk), np.squeeze(out_chunk)
changed.append((dsname, f"Value: {r} -> {o}"))
else:
changed.append((dsname, "Data changed"))
break
return ComparisonResult(
test_file,
Loading