diff --git a/cal_tools/cal_tools/agipdlib.py b/cal_tools/cal_tools/agipdlib.py index 7a83a61d959060bbd21d224462aa513a3582ed9c..11d4438584b2db40bea2970558a1066269cc6e2c 100644 --- a/cal_tools/cal_tools/agipdlib.py +++ b/cal_tools/cal_tools/agipdlib.py @@ -235,24 +235,46 @@ class AgipdCorrections: data_path = f'{agipd_base}/image' data_dict = self.shared_dict[i_proc] - with h5py.File(file_name, 'r') as infile: - with h5py.File(ofile_name, 'w') as outfile: + image_fields = [ + 'trainId', 'pulseId', 'cellId', 'data', 'gain', 'mask', 'blShift', + ] + compress_fields = ['gain', 'mask'] - n_img = data_dict['nImg'][0] - if n_img == 0: - return - trains = data_dict['trainId'][:n_img] + n_img = data_dict['nImg'][0] + if n_img == 0: + return + trains = data_dict['trainId'][:n_img] + + with h5py.File(ofile_name, 'w') as outfile: + # Copy any other data from the input file. + # This includes indexes, so it's important that the corrected data + # we write is aligned with the raw data. + with h5py.File(file_name, 'r') as infile: self.copy_and_sanitize_non_cal_data(infile, outfile, agipd_base, idx_base, trains) - outfile[data_path]['data'] = data_dict['data'][:n_img] - outfile[data_path]['gain'] = data_dict['gain'][:n_img] - outfile[data_path]['blShift'] = data_dict['blShift'][:n_img] - outfile[data_path]['mask'] = data_dict['mask'][:n_img] - outfile[data_path]['cellId'] = data_dict['cellId'][:n_img] - outfile[data_path]['pulseId'] = data_dict['pulseId'][:n_img] - outfile[data_path]['trainId'] = data_dict['trainId'][:n_img] + # All corrected data goes in a /INSTRUMENT/.../image group + image_grp = outfile[data_path] + + # Set up all the datasets before filling them. This puts the + # metadata about the datasets together at the start of the file, + # so it's efficient to examine the file structure. + for field in image_fields: + arr = data_dict[field][:n_img] + kw = {'fletcher32': True} + if field in compress_fields: + kw.update(compression='gzip', compression_opts=1, shuffle=True) + if arr.ndim > 1: + kw['chunks'] = (1,) + arr.shape[1:] # 1 chunk = 1 image + + image_grp.create_dataset( + field, shape=arr.shape, dtype=arr.dtype, **kw + ) + + # Write the corrected data + for field in image_fields: + image_grp[field][:] = data_dict[field][:n_img] def cm_correction(self, i_proc, asic): """ @@ -1000,14 +1022,14 @@ class AgipdCorrections: for i in range(n_cores_files): self.shared_dict.append({}) self.shared_dict[i]['cellId'] = sharedmem.empty(shape[0], - dtype='i4') + dtype='u2') self.shared_dict[i]['pulseId'] = sharedmem.empty(shape[0], - dtype='i4') + dtype='u8') self.shared_dict[i]['trainId'] = sharedmem.empty(shape[0], - dtype='i4') + dtype='u8') self.shared_dict[i]['moduleIdx'] = sharedmem.empty(1, dtype='i4') self.shared_dict[i]['nImg'] = sharedmem.empty(1, dtype='i4') - self.shared_dict[i]['mask'] = sharedmem.empty(shape, dtype='i4') + self.shared_dict[i]['mask'] = sharedmem.empty(shape, dtype='u4') self.shared_dict[i]['data'] = sharedmem.empty(shape, dtype='f4') self.shared_dict[i]['rawgain'] = sharedmem.empty(shape, dtype='u2') self.shared_dict[i]['gain'] = sharedmem.empty(shape, dtype='u1')