From 33896191ee893b783bb81ce4510db9475529d0f4 Mon Sep 17 00:00:00 2001
From: Rafael Gort <rafael.gort@xfel.eu>
Date: Sun, 3 May 2020 12:37:13 +0200
Subject: [PATCH] Added snippets for doc. Cleaned unnecessary in-package use of
 ed wrapper. Added log to dssc module.

---
 README.rst                                    |  25 +++-
 doc/index.rst                                 |  10 ++
 index.rst                                     |  13 ++
 src/toolbox_scs/__init__.py                   |   4 +-
 src/toolbox_scs/detectors/dssc.py             | 100 ++++++++++++----
 src/toolbox_scs/load.py                       | 113 ++++++++++++------
 src/toolbox_scs/test/test_detectors_common.py |   2 +-
 src/toolbox_scs/test/test_detectors_dssc.py   |  15 ++-
 src/toolbox_scs/test/test_top_level.py        |   4 +-
 src/toolbox_scs/util/data_access.py           |   5 +
 10 files changed, 223 insertions(+), 68 deletions(-)
 create mode 100644 doc/index.rst
 create mode 100644 index.rst

diff --git a/README.rst b/README.rst
index 1db37f5..eeb564d 100644
--- a/README.rst
+++ b/README.rst
@@ -11,4 +11,27 @@ be selected on the online cluster by:
 `module load exfel exfel_anaconda3`
 
 before launching the jupyter-notebook or on max-jhub by selecting the 'xfel'
-kernel instead of the 'Python 3' anaconda environement maintained by DESY.
\ No newline at end of file
+kernel instead of the 'Python 3' anaconda environement maintained by DESY.
+
+Installation
+############
+
+As long as the ToolBox is not yet added to the exfel_anaconda3 environment it needs to be installed locally.
+
+Activate environment mentioned above and check installation of scs_toolbox:
+
+.. code:: bash
+
+    pip show toolbox_scs
+
+If the toolbox has been installed in your home directory previously, everything is set up. Otherwise it needs to be installed (only once). In that case enter the following command in the directory where the *setup.py* script is located:
+
+.. code:: bash
+
+    pip install --user .
+
+If you intend to develop code in the toolbox use the -e flag for installation. This creates a symbolic link to the source code you are working on.
+
+.. code:: bash
+
+    pip install --user -e .
\ No newline at end of file
diff --git a/doc/index.rst b/doc/index.rst
new file mode 100644
index 0000000..50b3c80
--- /dev/null
+++ b/doc/index.rst
@@ -0,0 +1,10 @@
+``Getting started`` 
+~~~~~~~~~~~~~~~~~~~
+
+
+``How to`` 
+~~~~~~~~~~
+
+
+``Contribute`` 
+~~~~~~~~~~~~~~
diff --git a/index.rst b/index.rst
new file mode 100644
index 0000000..0f838a4
--- /dev/null
+++ b/index.rst
@@ -0,0 +1,13 @@
+The SCS Toolbox
+===============
+
+.. toctree::
+   :maxdepth: 2
+
+   doc/index.rst
+
+
+Module index
+============
+
+*to be done*
diff --git a/src/toolbox_scs/__init__.py b/src/toolbox_scs/__init__.py
index 7fe650e..5b2cc72 100644
--- a/src/toolbox_scs/__init__.py
+++ b/src/toolbox_scs/__init__.py
@@ -1,5 +1,5 @@
 from .load import (load, concatenateRuns, load_scan_variable,
-            run_by_proposal, run_by_path)
+            load_run, run_by_path)
 
 from .constants import mnemonics
 
@@ -8,7 +8,7 @@ __all__ = (
     "load",
     "concatenateRuns",
     "load_scan_variable",
-    "run_by_proposal",
+    "load_run",
     "run_by_path",
     # Classes
     # Variables
diff --git a/src/toolbox_scs/detectors/dssc.py b/src/toolbox_scs/detectors/dssc.py
index 601f01c..1bb38f1 100644
--- a/src/toolbox_scs/detectors/dssc.py
+++ b/src/toolbox_scs/detectors/dssc.py
@@ -21,7 +21,6 @@ import xarray as xr
 import pandas as pd
 
 import extra_data as ed
-from ..load import run_by_proposal as _open_run
 
 
 log = logging.getLogger(__name__)
@@ -45,7 +44,7 @@ def load_dssc_info(proposal, run_nr):
         {'dims': tuple, 'frames_per_train': int, 'total_frames': int} 
     """
 
-    module = _open_run(proposal, run_nr, include='*DSSC00*')
+    module = ed.open_run(proposal, run_nr, include='*DSSC00*')
     info = module.detector_info('SCS_DET_DSSC1M-1/DET/0CH0:xtdf')
     log.debug("Fetched information for DSSC module nr. 0.")
     return info
@@ -81,6 +80,7 @@ def calc_xgm_frame_indices(nbunches, framepattern):
         if 'dark' not in p:
             frame_indices.append(np.arange(i, frame_max, n_frames))
 
+    log.debug("Constructed coordinate array for XGM data.")
     return np.sort(np.concatenate(frame_indices))
 
 
@@ -118,14 +118,27 @@ def prepare_module_empty(scan_variable, framepattern):
         module_data[name] = empty.copy()
         module_data['sum_count_' + name] = empty_sum_count.copy()
 
+    log.debug("Prepared empty data array for single dssc module")
     return module_data
 
 
 def load_chunk_data(sel, sourcename, maxframes=None):
-    '''Load DSSC data (sel is a DataCollection or a subset of a DataCollection
-    obtained by its select_trains() method). The flattened multi-index
-    (trains+pulses) is unraveled before returning the data.
-    '''
+    """
+    Load selected DSSC data. The flattened multi-index (trains+pulses) is 
+    unraveled before returning the data.
+
+    Parameters
+    ----------
+    sel: extra_data.DataCollection
+        a DataCollection or a subset of a DataCollection obtained by its 
+        select_trains() method
+    sourcename: str
+    
+    Returns
+    -------
+    xarray.DataArray
+    """
+    
     info = sel.detector_info(sourcename)
     fpt = info['frames_per_train']
     frames_total = info['total_frames']
@@ -142,21 +155,42 @@ def load_chunk_data(sel, sourcename, maxframes=None):
                         dict(trainId_pulse=midx)
                        ).unstack('trainId_pulse')
     data = data.transpose('trainId', 'pulse', 'x', 'y')
+
     return data.loc[{'pulse': np.s_[:maxframes]}]
 
 
 def merge_chunk_data(module_data, chunk_data, framepattern):
-    '''Merge chunk data with prepared dataset for entire module.
+    """
+    Merge chunk data with prepared dataset for entire module.
     Aligns on "scan_variable" and sums values for variables
     ['pumped', 'unpumped', 'sum_count']
     Concatenates the data along a new dimension ('tmp') and uses
-    the sum() method for automatic dtype conversion'''
+    the sum() method for automatic dtype conversion
+    
+    Parameters
+    ----------
+    module_data: xarray.Dataset
+        module data array to be filled
+    chunk_data: xarray.Dataset
+        loaded chunk of data to be merged into module_data
+    framepattern: list of strings
+        example: ['pumped', 'unpumped', 'sum_count']
+
+    Returns
+    -------
+    module_data: xarray.Dataset
+        merged module data:
+    """
+
     where = dict(scan_variable=chunk_data.scan_variable)
     for name in framepattern:
         for prefix in ['', 'sum_count_']:
             var = prefix + name
-            summed = xr.concat([module_data[var].loc[where], chunk_data[var]], dim='tmp').sum('tmp')
+            summed = xr.concat([module_data[var].loc[where], chunk_data[var]], 
+                               dim='tmp').sum('tmp')
             module_data[var].loc[where] = summed
+            
+    log.debug("Merged chunked data")
     return module_data
 
 
@@ -164,13 +198,25 @@ def split_frames(data, pattern, prefix=''):
     """
     Split frames according to "pattern" (possibly repeating) and average over 
     resulting splits.
-    "pattern" is a list of frame names (order matters!). Examples:
-        pattern = ['pumped', 'pumped_dark', 'unpumped', 'unpumped_dark']  # 4 
-        DSSC frames, 2 FEL pulses
-        pattern = ['pumped', 'unpumped']  # 2 FEL frames, no intermediate darks
-        pattern = ['image']  # no splitting, average over all frames
-    Returns a dataset with data variables named prefix + framename
+
+    Parameters
+    ----------
+    data: 
+    pattern:  
+        A list of frame names (order matters!). Examples:
+            # 4 DSSC frames, 2 FEL pulses
+            pattern = ['pumped', 'pumped_dark', 'unpumped', 'unpumped_dark']
+            # 2 FEL frames, no intermediate darks
+            pattern = ['pumped', 'unpumped']
+            # no splitting, average over all frames
+            pattern = ['image']
+
+    Returns
+    -------
+    dataset: xarray.DataArray
+        a dataset with data variables named prefix + framename
     """
+
     n = len(pattern)
     dataset = xr.Dataset()
     for i, name in enumerate(pattern):
@@ -203,16 +249,19 @@ def process_intra_train(job):
     dims = ['pulse', 'x', 'y']
     coords = {'pulse': np.arange(fpt, dtype=int)}
     shape = [fpt, 128, 512]
-    module_data = xr.DataArray(np.zeros(shape, dtype=float), dims=dims, coords=coords)
+    module_data = xr.DataArray(np.zeros(shape, dtype=float), dims=dims, 
+                               coords=coords)
     module_data = module_data.to_dataset(name='image')
-    module_data['sum_count'] = xr.DataArray(np.zeros(fpt, dtype=int), dims=['pulse'])
+    module_data['sum_count'] = xr.DataArray(np.zeros(fpt, dtype=int), 
+                                            dims=['pulse'])
     
     ntrains = len(collection.train_ids)
     chunks = np.arange(ntrains, step=chunksize)
     if module == 15:
         pbar = tqdm(total=len(chunks))
     for start_index in chunks:
-        sel = collection.select_trains(kd.by_index[start_index:start_index + chunksize])
+        sel = collection.select_trains(
+                    kd.by_index[start_index:start_index + chunksize])
         data = load_chunk_data(sel, sourcename, maxframes)
         data = data.to_dataset(name='image')
         
@@ -220,8 +269,10 @@ def process_intra_train(job):
         data = data.sum('trainId')
 
         for var in ['image', 'sum_count']:
-            # concatenating and using the sum() method automatically takes care of dtype casting if necessary
-            module_data[var] = xr.concat([module_data[var], data[var]], dim='tmp').sum('tmp')
+            # concatenating and using the sum() method automatically takes care 
+            # of dtype casting if necessary
+            module_data[var] = xr.concat([module_data[var], data[var]], 
+                                         dim='tmp').sum('tmp')
         if module == 15:
             pbar.update(1)
     
@@ -266,6 +317,7 @@ def process_dssc_module(job):
 
     """
 
+    log.info(f"processing dssc module {module}: start")
     proposal = job['proposal']
     run_nr = job['run_nr']
     module = job['module']
@@ -300,6 +352,8 @@ def process_dssc_module(job):
                             ed.by_index[start_index:start_index + chunksize])
         nframes = sel.detector_info(sourcename)['total_frames']
         if nframes > 0:  # some chunks have no DSSC data at all
+            log.debug(f"Module {module}: "
+                      f"load trains {start_index}:{start_index + chunksize}")
             data = load_chunk_data(sel, sourcename)
             sum_count = xr.full_like(data[..., 0, 0], fill_value=1)
             if pulsemask is not None:
@@ -314,10 +368,16 @@ def process_dssc_module(job):
             # aligns on trainId, drops non-matching trains
             data['scan_variable'] = scan
             data = data.groupby('scan_variable').sum('trainId')
+
+            log.debug(f"Module {module}: "
+                      f"merge trains {start_index}:{start_index + chunksize}")
             module_data = merge_chunk_data(module_data, data, framepattern)
+
         if module == 15:
             pbar.update(1)
 
     for name in framepattern:
         module_data[name] = module_data[name] / module_data['sum_count_' + name]
+
+    log.info(f"processing module {module}: done")
     return module_data
\ No newline at end of file
diff --git a/src/toolbox_scs/load.py b/src/toolbox_scs/load.py
index 947dea0..574e106 100644
--- a/src/toolbox_scs/load.py
+++ b/src/toolbox_scs/load.py
@@ -1,17 +1,20 @@
 # -*- coding: utf-8 -*-
-""" Toolbox for SCS.
+""" 
+    Toolbox for SCS.
 
     Various utilities function to quickly process data measured at the SCS 
     instruments.
 
     Copyright (2019) SCS Team.
 """
+
 import os
 import logging
 
 import numpy as np
 import xarray as xr
-from extra_data import by_index, RunDirectory, open_run
+
+import extra_data as ed
 from extra_data.read_machinery import find_proposal
 
 from .misc.bunch_pattern import extractBunchPattern
@@ -21,32 +24,60 @@ from .util.exceptions import *
 log = logging.getLogger(__name__)
 
 
-def load(fields, runNB, proposalNB, subFolder='raw', display=False, validate=False,
-         subset=by_index[:], rois={}, useBPTable=True):
-    """ Load a run and extract the data. Output is an xarray with aligned trainIds
-
-        Inputs:
-            fields: list of mnemonic strings to load specific data such as "fastccd", "SCS_XGM",
-                or dictionnaries defining a custom mnemonic such as
-                {"extra": {'SCS_CDIFFT_MAG/SUPPLY/CURRENT', 'actual_current.value', None}}
-            runNB: (str, int) run number as integer
-            proposalNB: (str, int) of the proposal number e.g. 'p002252' or 2252
-            subFolder: (str) sub-folder from which to load the data. Use 'raw' for raw
-                data or 'proc' for processed data.
-            display: (bool) whether to show the run.info or not
-            validate: (bool) whether to run extra-data-validate or not
-            subset: a subset of train that can be load with by_index[:5] for the
-                first 5 trains
-            rois: a dictionnary of mnemonics with a list of rois definition and the desired
-                names, for example {'fastccd':{'ref':{'roi':by_index[730:890, 535:720],
-                'dim': ['ref_x', 'ref_y']}, 'sam':{'roi':by_index[1050:1210, 535:720],
-                'dim': ['sam_x', 'sam_y']}}}
-            useBPTable: If True, uses the raw bunch pattern table to extract sase pulse
-                number and indices in the trains. If false, load the data from BUNCH_DECODER
-                middle layer device.
-
-        Outputs:
-            res: an xarray DataSet with aligned trainIds
+def load(fields, runNB, proposalNB,
+         subFolder='raw',
+         display=False,
+         validate=False,
+         subset=ed.by_index[:],
+         rois={},
+         useBPTable=True):
+    """ 
+    Load a run and extract the data. Output is an xarray with aligned 
+    trainIds
+
+    Parameters
+    ----------
+    
+    fields: list of dictionaries 
+        list of mnemonic strings to load specific data such as "fastccd", 
+        "SCS_XGM", or dictionnaries defining a custom mnemonic such as
+            {"extra": 
+                {'SCS_CDIFFT_MAG/SUPPLY/CURRENT', 
+                 'actual_current.value', None}}
+    runNB: (str, int) 
+        run number as integer
+    proposalNB: (str, int) 
+        of the proposal number e.g. 'p002252' or 2252
+    subFolder: (str) 
+        sub-folder from which to load the data. Use 'raw' for raw data 
+        or 'proc' for processed data.
+    display: (bool) 
+        whether to show the run.info or not
+    validate: (bool) 
+        whether to run extra-data-validate or not
+    subset: 
+        a subset of train that can be load with by_index[:5] for the first 5 
+        trains
+    rois: dictionary
+        a dictionnary of mnemonics with a list of rois definition and 
+        the desired names, for example:
+            {'fastccd':
+                {'ref':
+                    {'roi': by_index[730:890, 535:720],
+                     'dim': ['ref_x', 'ref_y']},
+                 'sam':
+                    {'roi':by_index[1050:1210, 535:720],
+                     'dim': ['sam_x', 'sam_y']}}}
+
+    useBPTable: boolean
+        If True, uses the raw bunch pattern table to extract sase pulse number 
+        and indices in the trains. If false, load the data from BUNCH_DECODER 
+        middle layer device.
+
+    Returns
+    -------
+    res: xarray.DataArray
+        an xarray DataSet with aligned trainIds
     """
     
     if isinstance(runNB, int):
@@ -54,7 +85,7 @@ def load(fields, runNB, proposalNB, subFolder='raw', display=False, validate=Fal
     if isinstance(proposalNB,int):
         proposalNB = 'p{:06d}'.format(proposalNB)
     runFolder = os.path.join(find_proposal(proposalNB), subFolder, runNB)
-    run = RunDirectory(runFolder).select_trains(subset)
+    run = ed.RunDirectory(runFolder).select_trains(subset)
 
     if validate:
         get_ipython().system('extra-data-validate ' + runFolder)
@@ -86,7 +117,8 @@ def load(fields, runNB, proposalNB, subFolder='raw', display=False, validate=Fal
         if type(f) == dict:
             # extracting mnemomic defined on the spot
             if len(f.keys()) > 1:
-                print('Loading only one "on-the-spot" mnemonic at a time, skipping all others !')
+                print('Loading only one "on-the-spot" mnemonic at a time, ' 
+                      'skipping all others !')
             k = list(f.keys())[0]
             v = f[k]
         else:
@@ -110,12 +142,15 @@ def load(fields, runNB, proposalNB, subFolder='raw', display=False, validate=Fal
 
         if k not in rois:
             # no ROIs selection, we read everything
-            vals.append(run.get_array(v['source'], v['key'], extra_dims=v['dim']))
+            vals.append(run.get_array(v['source'], v['key'],
+                                      extra_dims=v['dim']))
             keys.append(k)
         else:
-            # ROIs selection, for each ROI we select a region of the data and save it with new name and dimensions
+            # ROIs selection, for each ROI we select a region of the data and 
+            # save it with new name and dimensions
             for nk,nv in rois[k].items():
-                vals.append(run.get_array(v['source'], v['key'], extra_dims=nv['dim'], roi=nv['roi']))
+                vals.append(run.get_array(v['source'], v['key'], 
+                                          extra_dims=nv['dim'], roi=nv['roi']))
                 keys.append(nk)
     
     aligned_vals = xr.align(*vals, join='inner')
@@ -126,12 +161,12 @@ def load(fields, runNB, proposalNB, subFolder='raw', display=False, validate=Fal
     return result
 
 
-def run_by_proposal(proposal, run, **kwargs):
+def load_run(proposal, run, **kwargs):
     """
     Get run in given proposal
 
-    Wraps the extra_data open_run routine, to ease its use for the 
-    scs-toolbox user.
+    Wraps the extra_data open_run routine, out of convenience for the toolbox 
+    user.
 
     Parameters
     ----------
@@ -153,7 +188,7 @@ def run_by_proposal(proposal, run, **kwargs):
         DataCollection object containing information about the specified
         run. Data can be loaded using built-in class methods.
     """
-    return open_run(proposal, run, **kwargs)
+    return ed.open_run(proposal, run, **kwargs)
 
 
 def run_by_path(path):
@@ -174,7 +209,7 @@ def run_by_path(path):
         DataCollection object containing information about the specified
         run. Data can be loaded using built-in class methods.
     """
-    return RunDirectory(path)
+    return ed.RunDirectory(path)
 
 
 def concatenateRuns(runs):
@@ -228,7 +263,7 @@ def load_scan_variable(run, mnemonic, stepsize=None):
     Example
     -------
     >>> import toolbox_scs as tb
-    >>> run = tb.run_by_proposal(2212, 235)
+    >>> run = tb.load_run(2212, 235)
     >>> mnemonic = 'PP800_PhaseShifter'
     >>> scan_variable = tb.load_scan_variable(
                                     self.ed_run, mnemonic, 0.5)
diff --git a/src/toolbox_scs/test/test_detectors_common.py b/src/toolbox_scs/test/test_detectors_common.py
index 4a13950..7e54d49 100644
--- a/src/toolbox_scs/test/test_detectors_common.py
+++ b/src/toolbox_scs/test/test_detectors_common.py
@@ -34,7 +34,7 @@ class TestDetectors(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         log_root.info("Start global setup.")
-        cls._run = tb.run_by_proposal(2212, 235)
+        cls._run = tb.load_run(2212, 235)
 
         fields = ["sase1", "sase3", "npulses_sase3", 
                        "npulses_sase1", "MCP2apd", "SCS_SA3", "nrj"]
diff --git a/src/toolbox_scs/test/test_detectors_dssc.py b/src/toolbox_scs/test/test_detectors_dssc.py
index bd4a38d..eb54fac 100644
--- a/src/toolbox_scs/test/test_detectors_dssc.py
+++ b/src/toolbox_scs/test/test_detectors_dssc.py
@@ -43,6 +43,7 @@ suites = {"no-processing": (
                 "test_prepareempty",
                 "test_loadchunkdata",
                 "test_splitframes",
+                "test_mergechunks",
                 ),
           "full": (
                 "test_info",
@@ -51,6 +52,7 @@ suites = {"no-processing": (
                 "test_prepareempty",
                 "test_loadchunkdata",
                 "test_splitframes",
+                "test_mergechunks",
                 "test_processmodule",
                 )
           }
@@ -77,7 +79,7 @@ class TestDSSC(unittest.TestCase):
         cls._scanfile = './tmp/scan.h5'
         cls._maskfile = './tmp/mask.h5'
 
-        cls._run = tb.run_by_proposal(proposal, run_nr, include='*DA*')
+        cls._run = tb.load_run(proposal, run_nr, include='*DA*')
         cls._scan_variable = tb.load_scan_variable(cls._run, 
                                                    scan_variable, stepsize)
         cls._scan_variable.to_netcdf(cls._scanfile, group='data', mode='w',
@@ -154,6 +156,15 @@ class TestDSSC(unittest.TestCase):
                             ed.by_index[start_index:start_index + chunksize])
         data = load_chunk_data(sel, sourcename)
         self.assertIsNotNone(data)
+        log_root.debug(f"Loaded {ntrains} trains for {sourcename}")
+
+
+    def test_splitframes(self):
+        pass
+    
+    
+    def test_mergechunks(self):
+        pass
 
 
     def test_processmodule(self):
@@ -182,8 +193,6 @@ class TestDSSC(unittest.TestCase):
 
         print('finished processing modules:', strftime('%X'))
         
-    def test_splitframes(self):
-        pass
 
 def list_suites():
     print("""\nPossible test suites:\n-------------------------""")
diff --git a/src/toolbox_scs/test/test_top_level.py b/src/toolbox_scs/test/test_top_level.py
index 74f7829..fc133af 100644
--- a/src/toolbox_scs/test/test_top_level.py
+++ b/src/toolbox_scs/test/test_top_level.py
@@ -20,7 +20,7 @@ suites = {"packaging": (
           "load": (
                 "test_load",
                 "test_openrun",
-                #"test_openrunpath",
+                "test_openrunpath",
                 "test_loadscanvariable1",
                 "test_loadscanvariable2",
                 )
@@ -65,7 +65,7 @@ class TestToolbox(unittest.TestCase):
         self.assertEqual(run_tb['npulses_sase3'].values[0], 42)
         
     def test_openrun(self):
-        run = tb.run_by_proposal(2212, 235)
+        run = tb.load_run(2212, 235)
         src = 'SCS_DET_DSSC1M-1/DET/0CH0:xtdf'
         self.assertTrue(src in run.all_sources)
     
diff --git a/src/toolbox_scs/util/data_access.py b/src/toolbox_scs/util/data_access.py
index 66ff996..948f650 100644
--- a/src/toolbox_scs/util/data_access.py
+++ b/src/toolbox_scs/util/data_access.py
@@ -39,6 +39,11 @@ def find_run_dir(proposal, run):
         Error raised if the constructed path does not exist. This may
         happen when entering a non-valid run number, or the folder has
         been renamed/removed.
+        
+    Comment
+    -------
+    The rather unspecified Exeption raised, when entering a invalid proposal 
+    number stems from the ed package -> to be fixed externally.
 
     """
     rdir = None
-- 
GitLab