diff --git a/src/cal_tools/agipdlib.py b/src/cal_tools/agipdlib.py index 69659257f39e5111d8eb1555f9d3547f2015ac84..05662d4fa126cca01f9f213668739e99d0b9c62c 100644 --- a/src/cal_tools/agipdlib.py +++ b/src/cal_tools/agipdlib.py @@ -1,7 +1,6 @@ import os -import posixpath import zlib -from dataclasses import dataclass, field +from dataclasses import dataclass from datetime import datetime from logging import warning from multiprocessing import Manager @@ -29,9 +28,9 @@ from cal_tools.agipdutils import ( melt_snowy_pixels, ) from cal_tools.enums import AgipdGainMode, BadPixels, SnowResolution -from cal_tools.h5_copy_except import h5_copy_except_paths from logging import warning + @dataclass class AgipdCtrl: """Access AGIPD control parameters from a single run. @@ -732,11 +731,7 @@ class AgipdCorrections: :param ofile_name: Name of output file including path :param i_proc: Index of shared memory array """ - - module_idx = int(file_name.split('/')[-1].split('-')[2][-2:]) - agipd_base = f'INSTRUMENT/{self.h5_data_path}/'.format(module_idx) - idx_base = self.h5_index_path.format(module_idx) - data_path = f'{agipd_base}/image' + from .files import DataFile # Obtain a shallow copy of the pointer map to allow for local # changes in this method. @@ -749,23 +744,52 @@ class AgipdCorrections: n_img = data_dict['nImg'][0] if n_img == 0: return - trains = data_dict['trainId'][:n_img] # Re-cast fields in-place, i.e. using the same memory region. for field, dtype in self.recast_image_fields.items(): data_dict[field] = cast_array_inplace(data_dict[field], dtype) - with h5py.File(ofile_name, "w") as outfile: - # Copy any other data from the input file. - # This includes indexes, so it's important that the corrected data - # we write is aligned with the raw data. - with h5py.File(file_name, "r") as infile: - self.copy_and_sanitize_non_cal_data( - infile, outfile, agipd_base, idx_base, trains - ) + dc = H5File(file_name) + + # make index for corrected images + trains, count = np.unique(data_dict['trainId'][:n_img], + return_counts=True) + + # parse filename and get parameters + out_folder, fname = os.path.split(ofile_name) + tokens = os.path.splitext(fname)[0].split('-') + runno = int(tokens[1][1:]) + modno = int(tokens[2][-2:]) + agg = tokens[2] + seqno = int(tokens[3][1:]) + + agipd_base = self.h5_data_path.format(modno) + karabo_id, _, channel = agipd_base.split('/') + channel = channel.partition(":")[0] + ":output" + agipd_corr_source = f"{karabo_id}/CORR/{channel}" + + instrument_channels = [f"{agipd_corr_source}/image"] + + # backward compatibility BEGIN + instrument_channels.append(f"{agipd_base}/image") + # backward compatibility END + + with DataFile.from_details(out_folder, agg, runno, seqno) as outfile: + outfile.create_metadata( + like=dc, instrument_channels=instrument_channels) + outfile.create_index(trains, from_file=dc.files[0]) # All corrected data goes in a /INSTRUMENT/.../image group - image_grp = outfile[data_path] + agipd_src = outfile.create_instrument_source(agipd_corr_source) + agipd_src.create_index(image=count) + image_grp = agipd_src.require_group("image") + + # backward compatibility BEGIN + outfile[f"INDEX/{agipd_base}"] = h5py.SoftLink( + f"/INDEX/{agipd_corr_source}") + outfile[f"INSTRUMENT/{agipd_base}"] = h5py.SoftLink( + f"/INSTRUMENT/{agipd_corr_source}") + # backward compatibility END # Set up all the datasets before filling them. This puts the # metadata about the datasets together at the start of the file, @@ -1229,74 +1253,7 @@ class AgipdCorrections: return n_img_sel - def copy_and_sanitize_non_cal_data(self, infile, outfile, agipd_base, - idx_base, trains): - """ Copy and sanitize data in `infile` that is not touched by - `correctAGIPD` - """ - # these are touched in the correct function, do not copy them here - dont_copy = ["data", "cellId", "trainId", "pulseId", "status", - "length"] - dont_copy = [posixpath.join(agipd_base, "image", ds) - for ds in dont_copy] - - # don't copy index as we may need to adjust if we filter trains - dont_copy.append(posixpath.join(idx_base, "image")) - - h5_copy_except_paths(infile, outfile, dont_copy) - - # sanitize indices - for do in ["image", ]: - # uq: INDEX/trainID - # fidxv: INDEX/.../image/first idx values - # cntsv: INDEX/.../image/counts values - - # Extract parameters through identifying - # unique trains, index and numbers. - uq, fidxv, cntsv = np.unique(trains, return_index=True, return_counts=True) # noqa - - # Validate calculated CORR INDEX contents by checking - # difference between trainId stored in RAW data and trains from - train_diff = np.isin(np.array(infile["/INDEX/trainId"]), uq, invert=True) # noqa - - # Insert zeros for missing trains. - # fidxv and cntsv should have same length as - # raw INDEX/.../image/first and INDEX/.../image/count, - # respectively - - # first_inc = first incrementation - first_inc = True - for i, diff in enumerate(train_diff): - if diff: - if i < len(cntsv): - cntsv = np.insert(cntsv, i, 0) - fidxv = np.insert(fidxv, i, 0) if i == 0 else np.insert(fidxv, i, fidxv[i]) - else: - # append if at the end of the array - cntsv = np.append(cntsv, 0) - # increment fidxv once with the - # no. of processed mem-cells. - if first_inc: - fidxv = np.append(fidxv, - (2 * fidxv[i-1]) - fidxv[i-2]) - first_inc = False - else: - fidxv = np.append(fidxv, fidxv[i-1]) - - # save INDEX contents (first, count) in CORR files - outfile.create_dataset(idx_base + "{}/first".format(do), - fidxv.shape, - dtype=fidxv.dtype, - data=fidxv, - fletcher32=True) - outfile.create_dataset(idx_base + "{}/count".format(do), - cntsv.shape, - dtype=cntsv.dtype, - data=cntsv, - fletcher32=True) - - def init_constants( - self, cons_data: dict, module_idx: int, variant: dict): + def init_constants(self, cons_data: dict, module_idx: int, variant: dict): """ For CI derived gain, a mean multiplication factor of 4.48 compared to medium gain is used, as no reliable CI data for all memory cells