diff --git a/cal_tools/cal_tools/agipdlib.py b/cal_tools/cal_tools/agipdlib.py index 7d16b27d92886e91a1dd97883973bf2a8ec8a92c..77b4f882cd089373c1297f9fc7a4c9ed741d5dbd 100644 --- a/cal_tools/cal_tools/agipdlib.py +++ b/cal_tools/cal_tools/agipdlib.py @@ -235,24 +235,46 @@ class AgipdCorrections: data_path = f'{agipd_base}/image' data_dict = self.shared_dict[i_proc] - with h5py.File(file_name, 'r') as infile: - with h5py.File(ofile_name, 'w') as outfile: + image_fields = [ + 'data', 'gain', 'mask', 'trainId', 'pulseId', 'cellId', 'blShift', + ] + compress_fields = ['gain', 'mask'] - n_img = data_dict['nImg'][0] - if n_img == 0: - return - trains = data_dict['trainId'][:n_img] + n_img = data_dict['nImg'][0] + if n_img == 0: + return + trains = data_dict['trainId'][:n_img] + + with h5py.File(ofile_name, 'w') as outfile: + # Copy any other data from the input file. + # This includes indexes, so it's important that the corrected data + # we write is aligned with the raw data. + with h5py.File(file_name, 'r') as infile: self.copy_and_sanitize_non_cal_data(infile, outfile, agipd_base, idx_base, trains) - outfile[data_path]['data'] = data_dict['data'][:n_img] - outfile[data_path]['gain'] = data_dict['gain'][:n_img] - outfile[data_path]['blShift'] = data_dict['blShift'][:n_img] - outfile[data_path]['mask'] = data_dict['mask'][:n_img] - outfile[data_path]['cellId'] = data_dict['cellId'][:n_img] - outfile[data_path]['pulseId'] = data_dict['pulseId'][:n_img] - outfile[data_path]['trainId'] = data_dict['trainId'][:n_img] + # All corrected data goes in a /INSTRUMENT/.../image group + image_grp = outfile[data_path] + + # Set up all the datasets before filling them. This puts the + # metadata about the datasets together at the start of the file, + # so it's efficient to examine the file structure. + for field in image_fields: + arr = data_dict[field][:n_img] + kw = {} + if field in compress_fields: + kw.update(compression='gzip', compression_opts=1) + if arr.ndim > 1: + kw['chunks'] = (1,) + arr.shape[1:] # 1 chunk = 1 image + + image_grp.create_dataset( + field, shape=arr.shape, dtype=arr.dtype, **kw + ) + + # Write the corrected data + for field in image_fields: + image_grp[field][:] = data_dict[field][:n_img] def cm_correction(self, i_proc, asic): """