import os import h5py def paths_to_tree(paths): """Convert paths to a nested-dict tree, with True at leaves""" tree = {} for path in paths: tree_part = tree path_names = path.strip('/').split('/') for name in path_names[:-1]: tree_part = tree_part.setdefault(name, {}) if tree_part is True: break # A previous path was a prefix of this one else: tree_part[path_names[-1]] = True return tree def copy_except_tree(src_group: h5py.Group, dest_group: h5py.Group, except_tree): for name in src_group: except_tree_part = except_tree.get(name) if except_tree_part is True: # Totally excluded pass elif except_tree_part is None: # Not excluded src_group.copy(name, dest_group, name, without_attrs=True) else: # Partially excluded src_subgroup = src_group[name] assert isinstance(src_subgroup, h5py.Group) copy_except_tree( src_subgroup, dest_group.require_group(name), except_tree_part ) def h5_copy_except_paths(src_group, dest_group, except_paths): """Copy an HDF5 file except for a list of paths to ignore This tries to copy entire groups where possible, to minimise overhead. """ # If src_group/dest_group are file paths, open them with h5py. if isinstance(src_group, (str, bytes, os.PathLike)): with h5py.File(src_group, 'r') as src_file: return h5_copy_except_paths(src_file, dest_group, except_paths) if isinstance(dest_group, (str, bytes, os.PathLike)): with h5py.File(dest_group, 'a') as dest_file: return h5_copy_except_paths(src_group, dest_file, except_paths) copy_except_tree(src_group, dest_group, paths_to_tree(except_paths))