From fe313267915a2949e7da7476fd954372c279e48c Mon Sep 17 00:00:00 2001
From: Egor Sobolev <egor.sobolev@xfel.eu>
Date: Sun, 25 Feb 2024 16:34:21 +0100
Subject: [PATCH] Refactor write_file on AgipdCorrections to create indexes
 using the DataFile instead of copying them from the raw data. For
 compatibility, it still copies some xtdf groups

---
 src/cal_tools/agipdlib.py | 88 ++++++++++++++++++++++++++++++---------
 1 file changed, 68 insertions(+), 20 deletions(-)

diff --git a/src/cal_tools/agipdlib.py b/src/cal_tools/agipdlib.py
index 348d621c9..4bb13fdbd 100644
--- a/src/cal_tools/agipdlib.py
+++ b/src/cal_tools/agipdlib.py
@@ -750,11 +750,7 @@ class AgipdCorrections:
         :param ofile_name: Name of output file including path
         :param i_proc: Index of shared memory array
         """
-
-        module_idx = int(file_name.split('/')[-1].split('-')[2][-2:])
-        agipd_base = f'INSTRUMENT/{self.h5_data_path}/'.format(module_idx)
-        idx_base = self.h5_index_path.format(module_idx)
-        data_path = f'{agipd_base}/image'
+        from .files import DataFile
 
         # Obtain a shallow copy of the pointer map to allow for local
         # changes in this method.
@@ -767,23 +763,75 @@ class AgipdCorrections:
         n_img = data_dict['nImg'][0]
         if n_img == 0:
             return
-        trains = data_dict['trainId'][:n_img]
-
-        # Re-cast fields in-place, i.e. using the same memory region.
-        for field, dtype in self.recast_image_fields.items():
-            data_dict[field] = cast_array_inplace(data_dict[field], dtype)
-
-        with h5py.File(ofile_name, "w") as outfile:
-            # Copy any other data from the input file.
-            # This includes indexes, so it's important that the corrected data
-            # we write is aligned with the raw data.
-            with h5py.File(file_name, "r") as infile:
-                self.copy_and_sanitize_non_cal_data(
-                    infile, outfile, agipd_base, idx_base, trains
-                )
+
+        dc = H5File(file_name)
+
+        # make index for corrected images
+        trains, count = np.unique(data_dict['trainId'][:n_img],
+                                  return_counts=True)
+
+        # backward compatibility BEGIN
+        # make index the samse as in raw file
+        raw_trains = dc.files[0].train_ids  # zero trainId are removed
+        image_count = np.zeros(len(raw_trains), dtype=int)
+        idx = np.searchsorted(raw_trains, trains,
+                              sorter=np.argsort(raw_trains))
+        image_count[idx] = count
+        # backward compatibility END
+
+        # parse filename and get parameters
+        out_folder, fname = os.path.split(ofile_name)
+        tokens = os.path.splitext(fname)[0].split('-')
+        runno = int(tokens[1][1:])
+        modno = int(tokens[2][-2:])
+        agg = tokens[2]
+        seqno = int(tokens[3][1:])
+
+        agipd_base = self.h5_data_path.format(modno)
+        karabo_id, _, channel = agipd_base.split('/')
+        agipd_corr_source = f"{karabo_id}/CORR/{channel}"
+
+        agipd_corr_channels = {
+            "image": image_count,
+        }
+
+        instrument_channels = [f"{agipd_corr_source}/{ch}"
+                               for ch in agipd_corr_channels]
+
+        # backward compatibility BEGIN
+        copy_channels = {
+            ch: dc.files[0].get_index(agipd_base, ch)[1]
+            for ch in ["detector", "header", "trailer"]
+        }
+        instrument_channels += [f"{agipd_base}/{ch}" for ch in copy_channels]
+        # backward compatibility END
+
+        with DataFile.from_details(out_folder, agg, runno, seqno) as outfile:
+            outfile.create_metadata(
+                like=dc, instrument_channels=instrument_channels)
+            outfile.create_index(raw_trains, from_file=dc.files[0])
+
+            agipd_src = outfile.create_instrument_source(agipd_corr_source)
+            agipd_src.create_index(**agipd_corr_channels)
 
             # All corrected data goes in a /INSTRUMENT/.../image group
-            image_grp = outfile[data_path]
+            image_grp = agipd_src.require_group("image")
+            image_index_grp = agipd_src.get_index_group("image")
+
+            # backward compatibility BEGIN
+            agipd_legacy = outfile.create_instrument_source(agipd_base)
+            # this two lines must go after `agipd_base` instrument source
+            # creation, overwise it tries to create existing h5 group
+            outfile[f"INDEX/{agipd_base}/image"] = image_index_grp
+            outfile[f"INSTRUMENT/{agipd_base}/image"] = image_grp
+
+            agipd_legacy.create_index(**copy_channels)
+            for ch in copy_channels:
+                dc.files[0].file.copy(
+                    f"INSTRUMENT/{agipd_base}/{ch}", agipd_legacy, ch,
+                    without_attrs=True
+                )
+            # backward compatibility END
 
             # Set up all the datasets before filling them. This puts the
             # metadata about the datasets together at the start of the file,
-- 
GitLab