Merge branch 'cherry-pick-4ae0f38e' into 'DSSC_methods_AS'

Updated documentation and adapted test suites See merge request !93

Merge branch 'cherry-pick-4ae0f38e' into 'DSSC_methods_AS'
Updated documentation and adapted test suites See merge request !93
d220cc94 · Rafael Gort · 40d8b717 · 26a4c163 · d220cc94 · d220cc94
Commit d220cc94 authored 4 years ago by Rafael Gort
--- a/src/toolbox_scs/detectors/dssc.py
+++ b/src/toolbox_scs/detectors/dssc.py
@@ -132,6 +132,8 @@ class DSSCBinner:
                        xgm_threshold=(0, np.inf), normevery=1):
        """
        creates a mask for dssc frames according to measured xgm intensity.
+        Once such a mask has been constructed, it will be used in the data
+        reduction process to drop out-of-bounds pulses.
        """
        fpt = self.info['frames_per_train']
        n_trains = self.info['number_of_trains']
@@ -193,10 +195,20 @@ class DSSCBinner:
                     xgm_normalization=False, normevery=1
                    ):
        """
-        Load and bin dssc data according to self.bins
+        Load and bin dssc data according to self.bins. No data is returned by
+        this method. The condensed data is written to file by the worker
+        processes directly.

        Parameters
        ----------
+        modules: list of ints
+            a list containing the module numbers that should be processed. If
+            empty, all modules are processed.
+        filepath: str
+            the path where the files containing the reduced data should be
+            stored.
+        chunksize: int
+            The number of trains that should be read in one iterative step.
        backend: str
            joblib multiprocessing backend to be used. At the moment it can be
            any of joblibs standard backends: 'loky' (default),
@@ -209,20 +221,16 @@ class DSSCBinner:
            cpu's.
            Note that when using the default backend there is no need to adjust 
            this parameter with the current implementation.
-        modules: list of ints
-            a list containing the module numbers that should be processed. If
-            empty, all modules are processed.
-        chunksize: int
-            The number of trains that should be read in one iterative step.
        dark_image: xarray.DataArray
-            DataArray with dimensions compatible with the loaded dssc data.
+            DataArray with dimensions compatible with the loaded dssc data. If
+            given, it will be subtracted from the dssc data before the binning.
+            The dark image needs to be of dimension module, trainId, pulse, x 
+            and y.
+        xgm_normalization: boolean
+            if true, the dssc data is normalized by the xgm data before the
+            binning.
        normevery: int
            integer indicating which out of normevery frame will be normalized.
-
-        Returns
-        -------
-        data: xarray.DataArray
-            DataArray containing the processed data.
        """
        log.info("Bin data according to binners")
        log.info(f'Process {chunksize} trains per chunk')

--- a/src/toolbox_scs/detectors/dssc_processing.py
+++ b/src/toolbox_scs/detectors/dssc_processing.py
@@ -140,8 +140,8 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
                      path='./',
                      pulsemask=None,
                      dark_image=None,
-                      xgm_normalization=False,
                      xgm_mnemonic='SCS_SA3',
+                      xgm_normalization=False,
                      normevery=1
                     ):
    """
@@ -158,18 +158,21 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
    module : int
        DSSC module to process
    chunksize : int
-        number of trains to process simultaneously
+        number of trains to load simultaneously
    info: dictionary
        dictionary containing keys 'dims', 'frames_per_train', 'total_frames', 
-        'trainIds'
+        'trainIds', 'number_of_trains'.
    dssc_binners: dictionary
        a dictionary containing binner objects created by the tbdet member
        function "create_binner()"
+    path : str
+        location in which the .h5 files, containing the binned data, should
+        be stored.
    pulsemask : numpy.ndarray
        array of booleans to be used to mask dssc data according to xgm data.
    dark_image: xarray.DataArray
        an xarray dataarray with matching coordinates with the loaded data. If
-        dark_image is not None it will be substracted from each individual dssc
+        dark_image is not None it will be subtracted from each individual dssc
        frame.
    xgm_normalization: bool
        true if the data should be divided by the corresponding xgm value.
@@ -209,7 +212,7 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
        chunk_hist = xr.full_like(chunk_data[:,:,0,0], fill_value=1)

        # ---------------------------------------------------------------------
-        # optional blocks -> ToDo: see merge request !89
+        # optional blocks -> ToDo: see merge request !87
        # ---------------------------------------------------------------------
        # option 1: prefiltering -> xgm pulse masking
        if pulsemask is not None:
@@ -218,9 +221,9 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
            chunk_data = chunk_data.where(pulsemask)
            chunk_hist = chunk_hist.where(pulsemask)

-        # option 2: substraction of dark image/s
+        # option 2: subtraction of dark image/s
        if dark_image is not None:
-            log.debug(f'Module {module}: substract dark')
+            log.debug(f'Module {module}: subtract dark')
            chunk_data.values = chunk_data.values - dark_image.values
            # slower: using xarray directly
            #chunk_data = chunk_data - dark_image
@@ -267,7 +270,7 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
    module_data = module_data.transpose('trainId', 'pulse', 'x', 'y')
    module_data.attrs['module'] = module

-    log.info(f'saving module {module}')
+    log.debug(f'saving module {module}')
    if not os.path.isdir(path):
        os.mkdir(path)
    fname = f'run_{run_nr}_module{module}.h5'

--- a/src/toolbox_scs/test/test_dssc_cls.py
+++ b/src/toolbox_scs/test/test_dssc_cls.py
@@ -20,7 +20,8 @@ suites = {"no-processing": (
                "test_create",
                ),
          "processing": (
-                "test_normalization_all2",
+                "test_processing_quick",
+                #"test_normalization_all",
                )
          }

@@ -42,13 +43,7 @@ class TestDSSC(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        log_root.info("Start global setup")
-        # ---------------------------------------------------------------------
-        # global test settings
-        # ---------------------------------------------------------------------
-
        setup_tmp_dir()
-
-        # ---------------------------------------------------------------------
        log_root.info("Finished global setup, start tests")

    @classmethod
@@ -95,12 +90,12 @@ class TestDSSC(unittest.TestCase):
                  "'/gpfs/exfel/exp/SCS/201901/p002212/raw/r2354'"
        self.assertEqual(str(cm.exception), err_msg)

-
-    def test_normalization_all2(self):
+    def test_processing_quick(self):
        proposal_nb = 2530
-
-        # dark
+        module_list=[2]
        run_nb = 49
+
+
        run_info = tbdet.load_dssc_info(proposal_nb, run_nb)
        fpt = run_info['frames_per_train']
        n_trains = run_info['number_of_trains']
@@ -117,9 +112,58 @@ class TestDSSC(unittest.TestCase):
                                         buckets_pulse)
        binners = {'trainId': binner1, 'pulse': binner2}
        bin_obj = tbdet.DSSCBinner(proposal_nb, run_nb, binners=binners)
-        dark = bin_obj.process_data(modules=[15], chunksize=248)
+        bin_obj.process_data(
+                    modules=module_list, filepath='./tmp/', chunksize=248)
+        filename = f'./tmp/run_{run_nb}_module{module_list[0]}.h5'
+        self.assertTrue(os.path.isfile(filename))
+
+        run_formatted = tbdet.DSSCFormatter('./tmp/')
+        run_formatted.combine_files()
+        attrs = {'run_type':'useful description',
+                 'comment':'blabla',
+                 'run_number':run_nb}
+        run_formatted.add_attributes(attrs)
+        run_formatted.save_formatted_data(
+                f'./tmp/run_{run_nb}_formatted.h5')
+        data = tbdet.load_xarray(f'./tmp/run_{run_nb}_formatted.h5')
+        self.assertIsNotNone(data)
+
+    def test_normalization_all(self):
+        proposal_nb = 2530
+        module_list=[2]
+
+        # dark
+        run_nb = 49
+        run_info = tbdet.load_dssc_info(proposal_nb, run_nb)
+        fpt = run_info['frames_per_train']
+        n_trains = run_info['number_of_trains']
+        trainIds = run_info['trainIds']

-        # run to normalize
+        buckets_train = np.zeros(n_trains)
+
+        binner1 = tbdet.create_dssc_bins("trainId",
+                                         trainIds,
+                                         buckets_train)
+        binner2 = tbdet.create_dssc_bins("pulse",
+                                         np.linspace(0,fpt-1,fpt, dtype=int),
+                                         np.linspace(0,fpt-1,fpt, dtype=int))
+        binners = {'trainId': binner1, 'pulse': binner2}
+        bin_obj = tbdet.DSSCBinner(proposal_nb, run_nb, binners=binners)
+        bin_obj.process_data(
+                    modules=module_list, filepath='./tmp/', chunksize=248)
+        filename = f'./tmp/run_{run_nb}_module{module_list[0]}.h5'
+        self.assertTrue(os.path.isfile(filename))
+
+        run_formatted = tbdet.DSSCFormatter('./tmp/')
+        run_formatted.combine_files()
+        attrs = {'run_type':'useful description',
+                 'comment':'blabla',
+                 'run_number':run_nb}
+        run_formatted.add_attributes(attrs)
+        run_formatted.save_formatted_data(
+                f'./tmp/run_{run_nb}_formatted.h5')
+
+        # main run
        run_nb = 50
        run_info = tbdet.load_dssc_info(proposal_nb, run_nb)
        fpt = run_info['frames_per_train']
@@ -138,15 +182,18 @@ class TestDSSC(unittest.TestCase):
        binners = {'trainId': binner1, 'pulse': binner2}
        bin_obj = tbdet.DSSCBinner(proposal_nb, run_nb, binners=binners)

-        bin_params = {'modules':[15],
+        dark = tbdet.load_xarray('./tmp/run_49_formatted.h5')
+        bin_params = {'modules':module_list,
                      'chunksize':248,
+                      'filepath':'./tmp/',
                      'xgm_normalization':True,
                      'normevery':2,
-                      'dark_image':dark['data'][:,0,0,:,:]
+                      'dark_image':dark['data']
                     }

-        data = bin_obj.process_data(**bin_params)
-        self.assertIsNotNone(data.data)
+        bin_obj.process_data(**bin_params)
+        filename = f'./tmp/run_{run_nb}_module{module_list[0]}.h5'
+        self.assertTrue(os.path.isfile(filename))