From 2049c42562de8a0472626a2d00b6d4c187945649 Mon Sep 17 00:00:00 2001
From: Philipp Schmidt <philipp.schmidt@xfel.eu>
Date: Mon, 16 May 2022 14:45:07 +0200
Subject: [PATCH] Reorder writes in LPD correct for faster access

---
 notebooks/LPD/LPD_Correct_Fast.ipynb |  7 +++----
 src/cal_tools/files.py               | 19 +++++++++++--------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/notebooks/LPD/LPD_Correct_Fast.ipynb b/notebooks/LPD/LPD_Correct_Fast.ipynb
index c9e80f472..bbd744301 100644
--- a/notebooks/LPD/LPD_Correct_Fast.ipynb
+++ b/notebooks/LPD/LPD_Correct_Fast.ipynb
@@ -423,22 +423,21 @@
     "                train_ids=dc.train_ids,\n",
     "                timestamps=fa.file['INDEX/timestamp'][sel_trains],\n",
     "                flags=fa.validity_flag[sel_trains])\n",
+    "            outp_file.create_metadata(like=dc, instrument_channels=(f'{outp_source_name}/image',))\n",
     "            \n",
     "            outp_source = outp_file.create_instrument_source(outp_source_name)\n",
     "            \n",
     "            outp_source.create_index(image=image_counts)\n",
-    "            outp_source.create_key('image.data', data=out_data,\n",
-    "                                   chunks=(chunks_data, 256, 256))\n",
     "            outp_source.create_key('image.cellId', data=in_cell,\n",
     "                                   chunks=(chunks_ids,))\n",
     "            outp_source.create_key('image.pulseId', data=in_pulse,\n",
     "                                   chunks=(chunks_ids,))\n",
+    "            outp_source.create_key('image.data', data=out_data,\n",
+    "                                   chunks=(chunks_data, 256, 256))\n",
     "            write_compressed_frames(\n",
     "                out_gain, outp_file, f'INSTRUMENT/{outp_source_name}/image/gain', comp_threads=8)\n",
     "            write_compressed_frames(\n",
     "                out_mask, outp_file, f'INSTRUMENT/{outp_source_name}/image/mask', comp_threads=8)\n",
-    "            \n",
-    "            outp_file.create_metadata(like=dc)\n",
     "    write_time = perf_counter() - start\n",
     "    \n",
     "    total_time = open_time + read_time + correct_time + write_time\n",
diff --git a/src/cal_tools/files.py b/src/cal_tools/files.py
index c24a31b45..57beac95a 100644
--- a/src/cal_tools/files.py
+++ b/src/cal_tools/files.py
@@ -242,7 +242,7 @@ class DataFile(h5py.File):
                         creation_date=None, update_date=None, proposal=0,
                         run=None, sequence=None, daq_library='1.x',
                         karabo_framework='2.x', control_sources=(),
-                        instrument_sources=()):
+                        instrument_channels=()):
         """Create METADATA datasets.
 
         Args:
@@ -267,9 +267,10 @@ class DataFile(h5py.File):
             control_sources (Iterable, optional): Control sources in
                 this file, sources created via create_control_source are
                 automatically included.
-            instrument_sources (Iterable, optional): Instrument sources
-                in this file, sources created via
-                create_instrument_source are automatically included.
+            instrument_channels (Iterable, optional): Instrument
+                channels (source and first component of data hash) in
+                this file, channels created via create_instrument_source
+                are automatically included.
 
         Returns:
             None
@@ -307,11 +308,13 @@ class DataFile(h5py.File):
         sources = {name: 'CONTROL'
                    for name in chain(self.__control_sources, control_sources)}
 
-        # Expand known and specified instrument sources with their
-        # channels.
+        # Add in the specified instrument data channels.
+        sources.update({full_channel: 'INSTRUMENT'
+                        for full_channel in instrument_channels})
+
+        # Add in those already in the file, if not already passed.
         sources.update({f'{name}/{channel}': 'INSTRUMENT'
-                        for name in chain(self.__instrument_sources,
-                                          instrument_sources)
+                        for name in self.__instrument_sources
                         for channel in self[f'INSTRUMENT/{name}']})
 
         source_names = sorted(sources.keys())
-- 
GitLab