import os

import h5py

def paths_to_tree(paths):
    """Convert paths to a nested-dict tree, with True at leaves"""
    tree = {}
    
    for path in paths:
        tree_part = tree
        path_names = path.strip('/').split('/')
        for name in path_names[:-1]:
            tree_part = tree_part.setdefault(name, {})
            if tree_part is True:
                break  # A previous path was a prefix of this one
        else:
            tree_part[path_names[-1]] = True
    
    return tree
        

def copy_except_tree(src_group: h5py.Group, dest_group: h5py.Group, except_tree):
    for name in src_group:
        except_tree_part = except_tree.get(name)
        if except_tree_part is True:  # Totally excluded
            pass
        elif except_tree_part is None:  # Not excluded
            src_group.copy(name, dest_group, name, without_attrs=True)
        else:  # Partially excluded
            src_subgroup = src_group[name]
            assert isinstance(src_subgroup, h5py.Group)
            copy_except_tree(
                src_subgroup, dest_group.require_group(name), except_tree_part
            )
            

def h5_copy_except_paths(src_group, dest_group, except_paths):
    """Copy an HDF5 file except for a list of paths to ignore

    This tries to copy entire groups where possible, to minimise overhead.
    """
    # If src_group/dest_group are file paths, open them with h5py.
    if isinstance(src_group, (str, bytes, os.PathLike)):
        with h5py.File(src_group, 'r') as src_file:
            return h5_copy_except_paths(src_file, dest_group, except_paths)
    if isinstance(dest_group, (str, bytes, os.PathLike)):
        with h5py.File(dest_group, 'a') as dest_file:
            return h5_copy_except_paths(src_group, dest_file, except_paths)

    copy_except_tree(src_group, dest_group, paths_to_tree(except_paths))