From d876e20fa6becf095c38debb40450782860b94f9 Mon Sep 17 00:00:00 2001
From: Philipp Schmidt <philipp.schmidt@xfel.eu>
Date: Wed, 18 May 2022 17:40:58 +0200
Subject: [PATCH] Create virtual CXI files by sequence rather than by job chunk

---
 notebooks/LPD/LPD_Correct_Fast.ipynb | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/notebooks/LPD/LPD_Correct_Fast.ipynb b/notebooks/LPD/LPD_Correct_Fast.ipynb
index 6c6990439..13e756eec 100644
--- a/notebooks/LPD/LPD_Correct_Fast.ipynb
+++ b/notebooks/LPD/LPD_Correct_Fast.ipynb
@@ -57,7 +57,7 @@
     "overwrite = True  # set to True if existing data should be overwritten\n",
     "chunks_data = 1  # HDF chunk size for pixel data in number of frames.\n",
     "chunks_ids = 32  # HDF chunk size for cellId and pulseId datasets.\n",
-    "create_virtual_cxi_in = ''  # Folder to create virtual CXI files in (for each job's chunk)\n",
+    "create_virtual_cxi_in = ''  # Folder to create virtual CXI files in (for each sequence).\n",
     "\n",
     "# Parallelization options\n",
     "sequences_per_node = 1  # Sequence files to process per node\n",
@@ -608,12 +608,17 @@
     "    vcxi_folder = Path(create_virtual_cxi_in)\n",
     "    vcxi_folder.mkdir(parents=True, exist_ok=True)\n",
     "    \n",
-    "    if not sequences or sequences == [-1]:\n",
-    "        seqs = 'all'\n",
-    "    else:\n",
-    "        seqs = '_'.join(str(s) for s in sequences)\n",
+    "    def sort_files_by_seq(by_seq, outp_path):\n",
+    "        by_seq.setdefault(int(outp_path.stem[-5:]), []).append(outp_path)\n",
+    "        return by_seq\n",
     "    \n",
-    "    det.write_virtual_cxi(vcxi_folder / f'r{run}_seqs_{seqs}.cxi')"
+    "    from functools import reduce\n",
+    "    reduce(sort_files_by_seq, output_paths, output_by_seq := {})\n",
+    "        \n",
+    "    for seq_number, seq_output_paths in output_by_seq.items():\n",
+    "        # Create data collection and detector components only for this sequence.\n",
+    "        det = LPD1M(xd.DataCollection.from_paths(seq_output_paths), detector_name=karabo_id)\n",
+    "        det.write_virtual_cxi(vcxi_folder / f'VCXI-LPD-R{run:04d}-S{seq_number:05d}.cxi')"
    ]
   }
  ],
-- 
GitLab