From 4ae0f38efa7f773cda5a2f46d2b20805986d2a72 Mon Sep 17 00:00:00 2001
From: Rafael Gort <rafael.gort@xfel.eu>
Date: Thu, 1 Oct 2020 12:44:21 +0200
Subject: [PATCH] Updated documentation and adapted test suites

---
 src/toolbox_scs/detectors/dssc.py            | 32 +++++---
 src/toolbox_scs/detectors/dssc_processing.py | 19 +++--
 src/toolbox_scs/test/test_dssc_cls.py        | 81 ++++++++++++++++----
 3 files changed, 95 insertions(+), 37 deletions(-)

diff --git a/src/toolbox_scs/detectors/dssc.py b/src/toolbox_scs/detectors/dssc.py
index a5aa64b..58cb59b 100644
--- a/src/toolbox_scs/detectors/dssc.py
+++ b/src/toolbox_scs/detectors/dssc.py
@@ -127,6 +127,8 @@ class DSSCBinner:
                         xgm_threshold=(0, np.inf), normevery=1):
         """
         creates a mask for dssc frames according to measured xgm intensity.
+        Once such a mask has been constructed, it will be used in the data
+        reduction process to drop out-of-bounds pulses.
         """
         fpt = self.info['frames_per_train']
         n_trains = self.info['number_of_trains']
@@ -188,10 +190,20 @@ class DSSCBinner:
                      xgm_normalization=False, normevery=1
                     ):
         """
-        Load and bin dssc data according to self.bins
+        Load and bin dssc data according to self.bins. No data is returned by
+        this method. The condensed data is written to file by the worker
+        processes directly.
 
         Parameters
         ----------
+        modules: list of ints
+            a list containing the module numbers that should be processed. If
+            empty, all modules are processed.
+        filepath: str
+            the path where the files containing the reduced data should be
+            stored.
+        chunksize: int
+            The number of trains that should be read in one iterative step.
         backend: str
             joblib multiprocessing backend to be used. At the moment it can be
             any of joblibs standard backends: 'loky' (default),
@@ -204,20 +216,16 @@ class DSSCBinner:
             cpu's.
             Note that when using the default backend there is no need to adjust 
             this parameter with the current implementation.
-        modules: list of ints
-            a list containing the module numbers that should be processed. If
-            empty, all modules are processed.
-        chunksize: int
-            The number of trains that should be read in one iterative step.
         dark_image: xarray.DataArray
-            DataArray with dimensions compatible with the loaded dssc data.
+            DataArray with dimensions compatible with the loaded dssc data. If
+            given, it will be subtracted from the dssc data before the binning.
+            The dark image needs to be of dimension module, trainId, pulse, x 
+            and y.
+        xgm_normalization: boolean
+            if true, the dssc data is normalized by the xgm data before the
+            binning.
         normevery: int
             integer indicating which out of normevery frame will be normalized.
-
-        Returns
-        -------
-        data: xarray.DataArray
-            DataArray containing the processed data.
         """
         log.info("Bin data according to binners")
         log.info(f'Process {chunksize} trains per chunk')
diff --git a/src/toolbox_scs/detectors/dssc_processing.py b/src/toolbox_scs/detectors/dssc_processing.py
index 256e13f..02fb10c 100644
--- a/src/toolbox_scs/detectors/dssc_processing.py
+++ b/src/toolbox_scs/detectors/dssc_processing.py
@@ -140,8 +140,8 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
                       path='./',
                       pulsemask=None,
                       dark_image=None,
-                      xgm_normalization=False,
                       xgm_mnemonic='SCS_SA3',
+                      xgm_normalization=False,
                       normevery=1
                      ):
     """
@@ -158,18 +158,21 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
     module : int
         DSSC module to process
     chunksize : int
-        number of trains to process simultaneously
+        number of trains to load simultaneously
     info: dictionary
         dictionary containing keys 'dims', 'frames_per_train', 'total_frames', 
-        'trainIds'
+        'trainIds', 'number_of_trains'.
     dssc_binners: dictionary
         a dictionary containing binner objects created by the tbdet member
         function "create_binner()"
+    path : str
+        location in which the .h5 files, containing the binned data, should
+        be stored.
     pulsemask : numpy.ndarray
         array of booleans to be used to mask dssc data according to xgm data.
     dark_image: xarray.DataArray
         an xarray dataarray with matching coordinates with the loaded data. If
-        dark_image is not None it will be substracted from each individual dssc
+        dark_image is not None it will be subtracted from each individual dssc
         frame.
     xgm_normalization: bool
         true if the data should be divided by the corresponding xgm value.
@@ -209,7 +212,7 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
         chunk_hist = xr.full_like(chunk_data[:,:,0,0], fill_value=1)
 
         # ---------------------------------------------------------------------
-        # optional blocks -> ToDo: see merge request !89
+        # optional blocks -> ToDo: see merge request !87
         # ---------------------------------------------------------------------
         # option 1: prefiltering -> xgm pulse masking
         if pulsemask is not None:
@@ -218,9 +221,9 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
             chunk_data = chunk_data.where(pulsemask)
             chunk_hist = chunk_hist.where(pulsemask)
 
-        # option 2: substraction of dark image/s
+        # option 2: subtraction of dark image/s
         if dark_image is not None:
-            log.debug(f'Module {module}: substract dark')
+            log.debug(f'Module {module}: subtract dark')
             chunk_data.values = chunk_data.values - dark_image.values
             # slower: using xarray directly
             #chunk_data = chunk_data - dark_image
@@ -267,7 +270,7 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
     module_data = module_data.transpose('trainId', 'pulse', 'x', 'y')
     module_data.attrs['module'] = module
 
-    log.info(f'saving module {module}')
+    log.debug(f'saving module {module}')
     if not os.path.isdir(path):
         os.mkdir(path)
     fname = f'run_{run_nr}_module{module}.h5'
diff --git a/src/toolbox_scs/test/test_dssc_cls.py b/src/toolbox_scs/test/test_dssc_cls.py
index c43b45f..e548a54 100644
--- a/src/toolbox_scs/test/test_dssc_cls.py
+++ b/src/toolbox_scs/test/test_dssc_cls.py
@@ -20,7 +20,8 @@ suites = {"no-processing": (
                 "test_create",
                 ),
           "processing": (
-                "test_normalization_all2",
+                "test_processing_quick",
+                #"test_normalization_all",
                 )
           }
 
@@ -42,13 +43,7 @@ class TestDSSC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         log_root.info("Start global setup")
-        # ---------------------------------------------------------------------
-        # global test settings
-        # ---------------------------------------------------------------------
-
         setup_tmp_dir()
-
-        # ---------------------------------------------------------------------
         log_root.info("Finished global setup, start tests")
 
     @classmethod
@@ -95,12 +90,12 @@ class TestDSSC(unittest.TestCase):
                   "'/gpfs/exfel/exp/SCS/201901/p002212/raw/r2354'"
         self.assertEqual(str(cm.exception), err_msg)
 
-
-    def test_normalization_all2(self):
+    def test_processing_quick(self):
         proposal_nb = 2530
-
-        # dark
+        module_list=[2]
         run_nb = 49
+
+
         run_info = tbdet.load_dssc_info(proposal_nb, run_nb)
         fpt = run_info['frames_per_train']
         n_trains = run_info['number_of_trains']
@@ -117,9 +112,58 @@ class TestDSSC(unittest.TestCase):
                                          buckets_pulse)
         binners = {'trainId': binner1, 'pulse': binner2}
         bin_obj = tbdet.DSSCBinner(proposal_nb, run_nb, binners=binners)
-        dark = bin_obj.process_data(modules=[15], chunksize=248)
+        bin_obj.process_data(
+                    modules=module_list, filepath='./tmp/', chunksize=248)
+        filename = f'./tmp/run_{run_nb}_module{module_list[0]}.h5'
+        self.assertTrue(os.path.isfile(filename))
+
+        run_formatted = tbdet.DSSCFormatter('./tmp/')
+        run_formatted.combine_files()
+        attrs = {'run_type':'useful description',
+                 'comment':'blabla',
+                 'run_number':run_nb}
+        run_formatted.add_attributes(attrs)
+        run_formatted.save_formatted_data(
+                f'./tmp/run_{run_nb}_formatted.h5')
+        data = tbdet.load_xarray(f'./tmp/run_{run_nb}_formatted.h5')
+        self.assertIsNotNone(data)
+
+    def test_normalization_all(self):
+        proposal_nb = 2530
+        module_list=[2]
+
+        # dark
+        run_nb = 49
+        run_info = tbdet.load_dssc_info(proposal_nb, run_nb)
+        fpt = run_info['frames_per_train']
+        n_trains = run_info['number_of_trains']
+        trainIds = run_info['trainIds']
 
-        # run to normalize
+        buckets_train = np.zeros(n_trains)
+
+        binner1 = tbdet.create_dssc_bins("trainId",
+                                         trainIds,
+                                         buckets_train)
+        binner2 = tbdet.create_dssc_bins("pulse",
+                                         np.linspace(0,fpt-1,fpt, dtype=int),
+                                         np.linspace(0,fpt-1,fpt, dtype=int))
+        binners = {'trainId': binner1, 'pulse': binner2}
+        bin_obj = tbdet.DSSCBinner(proposal_nb, run_nb, binners=binners)
+        bin_obj.process_data(
+                    modules=module_list, filepath='./tmp/', chunksize=248)
+        filename = f'./tmp/run_{run_nb}_module{module_list[0]}.h5'
+        self.assertTrue(os.path.isfile(filename))
+
+        run_formatted = tbdet.DSSCFormatter('./tmp/')
+        run_formatted.combine_files()
+        attrs = {'run_type':'useful description',
+                 'comment':'blabla',
+                 'run_number':run_nb}
+        run_formatted.add_attributes(attrs)
+        run_formatted.save_formatted_data(
+                f'./tmp/run_{run_nb}_formatted.h5')
+
+        # main run
         run_nb = 50
         run_info = tbdet.load_dssc_info(proposal_nb, run_nb)
         fpt = run_info['frames_per_train']
@@ -138,15 +182,18 @@ class TestDSSC(unittest.TestCase):
         binners = {'trainId': binner1, 'pulse': binner2}
         bin_obj = tbdet.DSSCBinner(proposal_nb, run_nb, binners=binners)
 
-        bin_params = {'modules':[15],
+        dark = tbdet.load_xarray('./tmp/run_49_formatted.h5')
+        bin_params = {'modules':module_list,
                       'chunksize':248,
+                      'filepath':'./tmp/',
                       'xgm_normalization':True,
                       'normevery':2,
-                      'dark_image':dark['data'][:,0,0,:,:]
+                      'dark_image':dark['data']
                      }
 
-        data = bin_obj.process_data(**bin_params)
-        self.assertIsNotNone(data.data)
+        bin_obj.process_data(**bin_params)
+        filename = f'./tmp/run_{run_nb}_module{module_list[0]}.h5'
+        self.assertTrue(os.path.isfile(filename))
 
 
 
-- 
GitLab