Skip to content
Snippets Groups Projects
Commit b0f1c5f0 authored by Thomas Kluyver's avatar Thomas Kluyver
Browse files

Avoid loading large arrays into memory in one go

parent 303cb726
No related branches found
No related tags found
1 merge request!933[Tests] clearer comparison of HDF5 files
...@@ -41,8 +41,8 @@ class ComparisonResult: ...@@ -41,8 +41,8 @@ class ComparisonResult:
print(f" + NEW: {ds}") print(f" + NEW: {ds}")
for ds in self.missing_dsets: for ds in self.missing_dsets:
print(f" - MISSING: {ds}") print(f" - MISSING: {ds}")
for ds in self.changed_dsets: for ds, detail in self.changed_dsets:
print(f" ~ CHANGED: {ds}") print(f" ~ CHANGED: {ds} ({detail})")
def gather_dsets(f: h5py.File): def gather_dsets(f: h5py.File):
...@@ -69,20 +69,27 @@ def validate_file( ...@@ -69,20 +69,27 @@ def validate_file(
out_dsets = gather_dsets(fout) out_dsets = gather_dsets(fout)
changed = [] changed = []
for dsname in sorted((ref_dsets & out_dsets) - exclude_dsets): for dsname in sorted((ref_dsets & out_dsets) - exclude_dsets):
ref_arr = fref[dsname][()] ref_ds = fref[dsname]
out_arr = fout[dsname][()] out_ds = fout[dsname]
if isinstance(ref_arr, np.ndarray) ^ isinstance(out_arr, np.ndarray): if out_ds.shape != ref_ds.shape:
eq = False # One is an array, the other not changed.append((dsname, f"Shape: {ref_ds.shape} -> {out_ds.shape}"))
elif isinstance(ref_arr, np.ndarray): elif out_ds.dtype != ref_ds.dtype:
# Both arrays changed.append((dsname, f"Dtype: {ref_ds.dtype} -> {out_ds.dtype}"))
nanable = np.issubdtype(ref_arr.dtype, np.floating) \
or np.issubdtype(ref_arr.dtype, np.complexfloating)
eq = np.array_equal(ref_arr, out_arr, equal_nan=nanable)
else: else:
# Both single values # Compare chunk by chunk to not use lots of RAM
eq = ref_arr == out_arr nanable = np.issubdtype(ref_ds.dtype, np.floating) \
if not eq: or np.issubdtype(ref_ds.dtype, np.complexfloating)
changed.append(dsname) for chunk_slice in ref_ds.iter_chunks():
ref_chunk = ref_ds[chunk_slice]
out_chunk = out_ds[chunk_slice]
if not np.allclose(ref_chunk, out_chunk, equal_nan=nanable):
# If just 1 entry, show the values
if ref_ds.size == 1:
r, o = np.squeeze(ref_chunk), np.squeeze(out_chunk)
changed.append((dsname, f"Value: {r} -> {o}"))
else:
changed.append((dsname, "Data changed"))
break
return ComparisonResult( return ComparisonResult(
test_file, test_file,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment