From fe313267915a2949e7da7476fd954372c279e48c Mon Sep 17 00:00:00 2001 From: Egor Sobolev <egor.sobolev@xfel.eu> Date: Sun, 25 Feb 2024 16:34:21 +0100 Subject: [PATCH] Refactor write_file on AgipdCorrections to create indexes using the DataFile instead of copying them from the raw data. For compatibility, it still copies some xtdf groups --- src/cal_tools/agipdlib.py | 88 ++++++++++++++++++++++++++++++--------- 1 file changed, 68 insertions(+), 20 deletions(-) diff --git a/src/cal_tools/agipdlib.py b/src/cal_tools/agipdlib.py index 348d621c9..4bb13fdbd 100644 --- a/src/cal_tools/agipdlib.py +++ b/src/cal_tools/agipdlib.py @@ -750,11 +750,7 @@ class AgipdCorrections: :param ofile_name: Name of output file including path :param i_proc: Index of shared memory array """ - - module_idx = int(file_name.split('/')[-1].split('-')[2][-2:]) - agipd_base = f'INSTRUMENT/{self.h5_data_path}/'.format(module_idx) - idx_base = self.h5_index_path.format(module_idx) - data_path = f'{agipd_base}/image' + from .files import DataFile # Obtain a shallow copy of the pointer map to allow for local # changes in this method. @@ -767,23 +763,75 @@ class AgipdCorrections: n_img = data_dict['nImg'][0] if n_img == 0: return - trains = data_dict['trainId'][:n_img] - - # Re-cast fields in-place, i.e. using the same memory region. - for field, dtype in self.recast_image_fields.items(): - data_dict[field] = cast_array_inplace(data_dict[field], dtype) - - with h5py.File(ofile_name, "w") as outfile: - # Copy any other data from the input file. - # This includes indexes, so it's important that the corrected data - # we write is aligned with the raw data. - with h5py.File(file_name, "r") as infile: - self.copy_and_sanitize_non_cal_data( - infile, outfile, agipd_base, idx_base, trains - ) + + dc = H5File(file_name) + + # make index for corrected images + trains, count = np.unique(data_dict['trainId'][:n_img], + return_counts=True) + + # backward compatibility BEGIN + # make index the samse as in raw file + raw_trains = dc.files[0].train_ids # zero trainId are removed + image_count = np.zeros(len(raw_trains), dtype=int) + idx = np.searchsorted(raw_trains, trains, + sorter=np.argsort(raw_trains)) + image_count[idx] = count + # backward compatibility END + + # parse filename and get parameters + out_folder, fname = os.path.split(ofile_name) + tokens = os.path.splitext(fname)[0].split('-') + runno = int(tokens[1][1:]) + modno = int(tokens[2][-2:]) + agg = tokens[2] + seqno = int(tokens[3][1:]) + + agipd_base = self.h5_data_path.format(modno) + karabo_id, _, channel = agipd_base.split('/') + agipd_corr_source = f"{karabo_id}/CORR/{channel}" + + agipd_corr_channels = { + "image": image_count, + } + + instrument_channels = [f"{agipd_corr_source}/{ch}" + for ch in agipd_corr_channels] + + # backward compatibility BEGIN + copy_channels = { + ch: dc.files[0].get_index(agipd_base, ch)[1] + for ch in ["detector", "header", "trailer"] + } + instrument_channels += [f"{agipd_base}/{ch}" for ch in copy_channels] + # backward compatibility END + + with DataFile.from_details(out_folder, agg, runno, seqno) as outfile: + outfile.create_metadata( + like=dc, instrument_channels=instrument_channels) + outfile.create_index(raw_trains, from_file=dc.files[0]) + + agipd_src = outfile.create_instrument_source(agipd_corr_source) + agipd_src.create_index(**agipd_corr_channels) # All corrected data goes in a /INSTRUMENT/.../image group - image_grp = outfile[data_path] + image_grp = agipd_src.require_group("image") + image_index_grp = agipd_src.get_index_group("image") + + # backward compatibility BEGIN + agipd_legacy = outfile.create_instrument_source(agipd_base) + # this two lines must go after `agipd_base` instrument source + # creation, overwise it tries to create existing h5 group + outfile[f"INDEX/{agipd_base}/image"] = image_index_grp + outfile[f"INSTRUMENT/{agipd_base}/image"] = image_grp + + agipd_legacy.create_index(**copy_channels) + for ch in copy_channels: + dc.files[0].file.copy( + f"INSTRUMENT/{agipd_base}/{ch}", agipd_legacy, ch, + without_attrs=True + ) + # backward compatibility END # Set up all the datasets before filling them. This puts the # metadata about the datasets together at the start of the file, -- GitLab