From 8a6a2242419dd67d55a4ceffe256b2f471e841f1 Mon Sep 17 00:00:00 2001
From: Laurent Mercadier <laurent.mercadier@xfel.eu>
Date: Mon, 30 Sep 2024 20:05:48 +0200
Subject: [PATCH] Add docstring to new functions

---
 src/toolbox_scs/detectors/digitizers.py | 147 +++++++++++++++++++++++-
 1 file changed, 146 insertions(+), 1 deletion(-)

diff --git a/src/toolbox_scs/detectors/digitizers.py b/src/toolbox_scs/detectors/digitizers.py
index b5389ba..9dc268d 100644
--- a/src/toolbox_scs/detectors/digitizers.py
+++ b/src/toolbox_scs/detectors/digitizers.py
@@ -1260,6 +1260,44 @@ def timFactorFromTable(voltage, photonEnergy, mcp=1):
 def extract_digitizer_peaks(proposal, runNB, mnemonic, bunchPattern=None,
                             integParams=None, autoFind=True, save=True,
                             subdir='usr/processed_runs'):
+    """
+    Extract the peaks from digitizer raw traces and saves them into a file.
+    The calculation is a trapezoidal integration between 'pulseStart' and
+    'pulseStop' with subtraction of a baseline defined as the median between
+    'baseStart' and 'baseStop'.
+    The integration parameters can either be provided using integParams, or
+    determined by a peak finding algorithm using autoFind. If the bunchPattern
+    argument is provided, the pulse ids are aligned to it.
+
+    Parameters
+    ----------
+    proposal: int
+        the proposal number
+    runNB: int
+        the run number
+    mnemonic: str
+        the mnemonic containing raw traces. Example: 'XRD_MCP_BIGraw'
+    bunchPattern: str
+        'sase3' or 'scs_ppl'. If provided, checks the bunch pattern table using
+        Extra XrayPulses or OpticalPulses and aligns the pulse ids.
+        If None, the pulse ids are not aligned and indexed between 0 and the
+        number of pulses per train.
+    integParams: dict
+        dictionnary with keys ['pulseStart', 'pulseStop', 'baseStart',
+        'baseStop', 'period', 'npulses']. If provided, autoFind is set to False.
+        If bunchPattern is not None, 'period' and 'npulses' are adjusted to match
+        the bunch pattern and align the pulse ids.
+    autoFind: bool
+        If True, a peak-finding algorithm is used to determine 'pulseStart',
+        'pulseStop', 'baseStart', 'baseStop', and 'period' and 'npulses' if
+        bunchPattern is None (otherwise the period and npulses from bunchPattern
+        are used for pulse id alingment).
+    save: bool
+        whether to save the result to a file or not.
+    subdir: str
+        subdirectory. The data is stored in
+        <proposal path>/<subdir>/r{runNB:04d}/f'r{runNB:04d}-digitizers-data.h5'
+    """
     if integParams is None and autoFind is False:
         log.warning('integParams not provided and autoFind is False. '
                     'Cannot compute peak integration parameters.')
@@ -1343,9 +1381,9 @@ def extract_digitizer_peaks(proposal, runNB, mnemonic, bunchPattern=None,
     traces = run[source, key].xarray(name=mnemonic.replace('raw', 'avg'),
                                      extra_dims=mnemo['dim'])
     trace = traces.mean('trainId')
+
     # find peak integration parameters
     if autoFind == True:
-        #params = find_integration_params(trace, period, npulses)
         params = find_integ_params(trace)
         if (period is not None and params['period'] != period
             or npulses is not None and params['npulses'] != npulses):
@@ -1384,7 +1422,31 @@ def extract_digitizer_peaks(proposal, runNB, mnemonic, bunchPattern=None,
         save_peaks(proposal, runNB, data, trace, subdir)
     return data
 
+
 def save_peaks(proposal, runNB, peaks, avg_trace, subdir):
+    '''
+    Save the peaks extracted with extract_digitizer_peaks() as well as the
+    average raw trace in a dataset at the proposal + subdir location.
+    If a dataset already exists, the new data is merged with it.
+
+    Parameters
+    ----------
+    proposal: int
+        the proposal number
+    runNB: int
+        the run number
+    peaks: xarray DataArray
+        the 2D-array obtained by extract_digitizer_peaks()
+    avg_trace: xarray DataArray
+        the average raw trace over the trains
+    subdir: str
+        subdirectory. The data is stored in
+        <proposal path>/<subdir>/r{runNB:04d}/f'r{runNB:04d}-digitizers-data.h5'
+
+    Returns
+    -------
+        None
+    '''
     root = find_proposal(f'p{proposal:06d}')
     path = os.path.join(root, subdir + f'/r{runNB:04d}/')
     os.makedirs(path, exist_ok=True)
@@ -1409,9 +1471,40 @@ def save_peaks(proposal, runNB, peaks, avg_trace, subdir):
         ds = ds_peaks.merge(avg_trace.rename({avg_trace.dims[0]: 'sampleId'}))
     ds.to_netcdf(fname, format='NETCDF4')
     print(f'saved data into {fname}.')
+    return None
+
 
 def load_processed_peaks(proposal, runNB, mnemonic=None,
                          data='usr/processed_runs', merge_with=None):
+    """
+    Load processed digitizer peaks data.
+
+    Parameters
+    ----------
+    proposal: int
+        the proposal number
+    runNB: int
+        the run number
+    mnemonic: str
+        the mnemonic containing peaks. Example: 'XRD_MCP_BIGpeaks'.
+        If None, the entire dataset is loaded
+    data: str
+        subdirectory. The data is stored in
+        <proposal path>/<subdir>/r{runNB:04d}/f'r{runNB:04d}-digitizers-data.h5'
+    merge_with: xarray Dataset
+        A dataset to merge the data with.
+
+    Returns
+    -------
+    xarray DataArray if menmonic is not None and merge_with is None
+    xarray Dataset if mnemonic is None or merge_with is not None.
+
+    Example
+    -------
+    # load the mono energy and the MCP_BIG peaks
+    run, ds = tb.load(proposal, runNB, 'nrj')
+    ds = tb.load_processed_peaks(proposal, runNB,'XRD_MCP_BIGpeaks', merge_with=ds)
+    """
     if mnemonic is None:
         return load_all_processed_peaks(proposal, runNB, data, merge_with)
     root = find_proposal(f'p{proposal:06d}')
@@ -1434,8 +1527,30 @@ def load_processed_peaks(proposal, runNB, mnemonic=None,
         print(f'Mnemonic {mnemonic} not found in {fname}')
         return merge_with
 
+
 def load_all_processed_peaks(proposal, runNB, data='usr/processed_runs',
                              merge_with=None):
+    """
+    Load processed digitizer peaks dataset. The data contains the peaks,
+    the average raw trace and the integration parameters (attribute) of
+    each processed digitizer source.
+
+    Parameters
+    ----------
+    proposal: int
+        the proposal number
+    runNB: int
+        the run number
+    data: str
+        subdirectory. The data is stored in
+        <proposal path>/<subdir>/r{runNB:04d}/f'r{runNB:04d}-digitizers-data.h5'
+    merge_with: xarray Dataset
+        A dataset to merge the data with.
+
+    Returns
+    -------
+    xarray Dataset
+    """
     root = find_proposal(f'p{proposal:06d}')
     path = os.path.join(root, data + f'/r{runNB:04d}/')
     fname = path + f'r{runNB:04d}-digitizers-data.h5'
@@ -1448,8 +1563,38 @@ def load_all_processed_peaks(proposal, runNB, data='usr/processed_runs',
         print(f'{fname} not found.')
         return merge_with
 
+
 def check_processed_peak_params(proposal, runNB, mnemonic, data='usr/processed_runs',
                                 plot=True, show_all=False):
+    """
+    Check the integration parameters used to generate the processed peak values.
+
+    Parameters
+    ----------
+    proposal: int
+        the proposal number
+    runNB: int
+        the run number
+    mnemonic: str
+        the mnemonic containing peaks. Example: 'XRD_MCP_BIGpeaks'.
+        If None, the entire dataset is loaded
+    data: str
+        subdirectory. The data is stored in
+        <proposal path>/<subdir>/r{runNB:04d}/f'r{runNB:04d}-digitizers-data.h5'
+    plot: bool
+        If True, displays the raw trace and peak integration regions.
+    show_all: bool
+        If True, displays the entire raw trace and all peak integration
+        regions (this can be time-consuming).
+        If False, shows the first and last pulses.
+
+    Returns
+    -------
+    params: dict
+        the integration parameters with keys ['pulseStart', 'pulseStop',
+        'baseStart', 'baseStop', 'period', 'npulses'].
+        See `extract_digitizer_peaks()`.
+    """
     root = find_proposal(f'p{proposal:06d}')
     path = os.path.join(root, data + f'/r{runNB:04d}/')
     fname = path + f'r{runNB:04d}-digitizers-data.h5'
-- 
GitLab