From 50a65338c645685f8b17c5ddb6fe0cd61f104568 Mon Sep 17 00:00:00 2001
From: Laurent Mercadier <laurent.mercadier@xfel.eu>
Date: Mon, 15 Jul 2024 13:21:57 +0200
Subject: [PATCH] Modify load() to look at both raw and proc folders

---
 src/toolbox_scs/load.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/toolbox_scs/load.py b/src/toolbox_scs/load.py
index 99ef299..3aa0c0d 100644
--- a/src/toolbox_scs/load.py
+++ b/src/toolbox_scs/load.py
@@ -39,7 +39,6 @@ log = logging.getLogger(__name__)
 
 def load(proposalNB=None, runNB=None,
          fields=None,
-         subFolder='raw',
          display=False,
          validate=False,
          subset=None,
@@ -48,6 +47,7 @@ def load(proposalNB=None, runNB=None,
          extract_xgm=True,
          extract_bam=True,
          bunchPattern='sase3',
+         parallelize=True,
          ):
     """
     Load a run and extract the data. Output is an xarray with aligned
@@ -66,10 +66,6 @@ def load(proposalNB=None, runNB=None,
         {"extra": {'source': 'SCS_CDIFFT_MAG/SUPPLY/CURRENT',
                    'key': 'actual_current.value',
                    'dim': None}}
-    subFolder: str
-        'raw', 'proc' (processed) or 'all' (both 'raw' and 'proc') to access
-        data from either or both of those folders. If 'all' is used, sources
-        present in 'proc' overwrite those in 'raw'. The default is 'raw'.
     display: bool
         whether to show the run.info or not
     validate: bool
@@ -97,10 +93,13 @@ def load(proposalNB=None, runNB=None,
     bunchPattern: str
         bunch pattern used to extract the Fast ADC pulses.
         A string or a dict as in::
-
         {'FFT_PD2': 'sase3', 'ILH_I0': 'scs_ppl'}
 
         Ignored if extract_digitizers=False.
+    parallelize: bool
+        from EXtra-Data: enable or disable opening files in parallel.
+        Particularly useful if creating child processes is not allowed
+        (e.g. in a daemonized multiprocessing.Process).
 
     Returns
     -------
@@ -114,8 +113,8 @@ def load(proposalNB=None, runNB=None,
     >>> run, data = tb.load(2212, 208, ['SCS_SA3', 'MCP2apd', 'nrj'])
 
     """
-    runFolder = find_run_path(proposalNB, runNB, subFolder)
-    run = ed.RunDirectory(runFolder)
+    runFolder = find_run_path(proposalNB, runNB, 'raw')
+    run = ed.open_run(proposalNB, runNB, data='all', parallelize=parallelize)
     if subset is not None:
         run = run.select_trains(subset)
     if fields is None:
-- 
GitLab