From 8d294e9f2069f0edce9cb9188e94613a5c4671ea Mon Sep 17 00:00:00 2001
From: Laurent Mercadier <laurent.mercadier@xfel.eu>
Date: Fri, 19 Mar 2021 09:40:27 +0100
Subject: [PATCH] Changed arguments of get_tim_peaks and get_fast_adc_peaks to
 take DataCollection, flake8

---
 src/toolbox_scs/detectors/digitizers.py | 326 +++++++++++++-----------
 1 file changed, 184 insertions(+), 142 deletions(-)

diff --git a/src/toolbox_scs/detectors/digitizers.py b/src/toolbox_scs/detectors/digitizers.py
index 0bb2662..5057429 100644
--- a/src/toolbox_scs/detectors/digitizers.py
+++ b/src/toolbox_scs/detectors/digitizers.py
@@ -1,8 +1,8 @@
 """ Digitizers related sub-routines
 
     Copyright (2021) SCS Team.
-    
-    (contributions preferrably comply with pep8 code structure 
+
+    (contributions preferrably comply with pep8 code structure
     guidelines.)
 """
 
@@ -20,15 +20,16 @@ from ..misc.bunch_pattern_external import is_sase_1, is_sase_3, is_ppl
 
 log = logging.getLogger(__name__)
 
+
 def peaks_from_raw_trace(traces,
-                      intstart,
-                      intstop,
-                      bkgstart,
-                      bkgstop,
-                      period,
-                      npulses,
-                      extra_dim='pulseId'
-                      ):
+                         intstart,
+                         intstop,
+                         bkgstart,
+                         bkgstop,
+                         period,
+                         npulses,
+                         extra_dim='pulseId'
+                         ):
     """
     Computes peaks from raw digitizer traces by trapezoidal integration.
 
@@ -38,7 +39,7 @@ def peaks_from_raw_trace(traces,
         numpy array is provided, the second dimension is that of the samples.
     intstart: int or list or 1D-numpy array
         trace index of integration start. If 1d array, each value is the start
-        of one peak. The period and npulses parameters are ignored. 
+        of one peak. The period and npulses parameters are ignored.
     intstop: int
         trace index of integration stop
     bkgstart: int
@@ -56,10 +57,10 @@ def peaks_from_raw_trace(traces,
     -------
     xarray DataArray
     """
-            
-    assert len(traces.shape)==2
-    
-    if type(traces) is xr.DataArray:
+
+    assert len(traces.shape) == 2
+
+    if isinstance(traces, xr.DataArray):
         ntid = traces.sizes['trainId']
         coords = traces.coords
         traces = traces.values
@@ -67,7 +68,7 @@ def peaks_from_raw_trace(traces,
             traces = traces.T
     else:
         coords = None
-    
+
     if hasattr(intstart, '__len__'):
         intstart = np.array(intstart)
         pulses = intstart - intstart[0]
@@ -78,19 +79,19 @@ def peaks_from_raw_trace(traces,
     results = xr.DataArray(np.empty((traces.shape[0], len(pulses))),
                            coords=coords,
                            dims=['trainId', extra_dim])
-                           
-    for i,p in enumerate(pulses):
+
+    for i, p in enumerate(pulses):
         a = intstart + p
         b = intstop + p
         bkga = bkgstart + p
         bkgb = bkgstop + p
         if b > traces.shape[1]:
             break
-        bg = np.outer(np.median(traces[:,bkga:bkgb], axis=1),
+        bg = np.outer(np.median(traces[:, bkga:bkgb], axis=1),
                       np.ones(b-a))
-        results[:,i] = np.trapz(traces[:,a:b] - bg, axis=1)
+        results[:, i] = np.trapz(traces[:, a:b] - bg, axis=1)
     return results
-        
+
 
 def get_peaks(run,
               data=None,
@@ -113,7 +114,7 @@ def get_peaks(run,
         DataCollection containing the digitizer data
     data: xarray DataArray or str
         array containing the raw traces or peak-integrated values from the
-        digitizer. If str, must be one of the ToolBox mnemonics. If None, 
+        digitizer. If str, must be one of the ToolBox mnemonics. If None,
         the data is loaded via the source and key arguments.
     source: str
         Name of digitizer source, e.g. 'SCS_UTC1_ADQ/ADC/1:network'. Only
@@ -137,7 +138,7 @@ def get_peaks(run,
         bunch pattern is missing.
     bunchPattern: string or dict
         match the peaks to the bunch pattern: 'sase1', 'sase3', 'scs_ppl'.
-        This will dictate the name of the pulse ID coordinates: 'sa1_pId', 
+        This will dictate the name of the pulse ID coordinates: 'sa1_pId',
         'sa3_pId' or 'scs_ppl'. Alternatively, a dict with source, key and
         pattern can be provided, e.g. {'source':'SCS_RR_UTC/TSYS/TIMESERVER',
         'key':'bunchPatternTable.value', 'pattern':'sase3'}
@@ -147,7 +148,7 @@ def get_peaks(run,
     indices: array, slice
         indices from the peak-integrated data to retrieve. Only required
         when bunch pattern is missing and useRaw is False.
-        
+
     Returns
     -------
     xarray.DataArray containing digitizer peaks with pulse coordinates
@@ -158,114 +159,113 @@ def get_peaks(run,
     # load data
     if data is None:
         arr = run.get_array(source, key)
-    elif type(data) is str:
+    elif isinstance(data, str):
         arr = run.get_array(*_mnemonics[data].values())
         source = _mnemonics[data]['source']
         key = _mnemonics[data]['key']
     else:
         arr = data
-    dim = [d for d in arr.dims if d is not 'trainId'][0]
-    #check if bunch pattern is provided
+    dim = [d for d in arr.dims if d != 'trainId'][0]
+    # check if bunch pattern is provided
     bpt = None
-    if type(bunchPattern) is dict:
-        bpt = run.get_array(bunchPattern['source'],
-                            bunchPattern['key']).rename({'dim_0':'pulse_slot'})
+    if isinstance(bunchPattern, dict):
+        bpt = run.get_array(bunchPattern['source'], bunchPattern['key'])
+        bpt = bpt.rename({'dim_0': 'pulse_slot'})
         pattern = bunchPattern['pattern']
     elif _mnemonics['bunchPatternTable']['source'] in run.all_sources:
         bpt = run.get_array(*_mnemonics['bunchPatternTable'].values())
         pattern = bunchPattern
     if bpt is not None:
-        #load mask and extract pulse Id:
-        if pattern is 'sase3':
+        # load mask and extract pulse Id:
+        if pattern == 'sase3':
             mask = is_sase_3(bpt)
             extra_dim = 'sa3_pId'
-        elif pattern is 'sase1':
+        elif pattern == 'sase1':
             mask = is_sase_1(bpt)
             extra_dim = 'sa1_pId'
-        elif pattern is 'scs_ppl':
+        elif pattern == 'scs_ppl':
             mask = is_ppl(bpt)
             extra_dim = 'ol_pId'
         else:
             extra_dim = 'pulseId'
-        valid_tid = mask.where(mask.sum(dim='pulse_slot')>0, 
+        valid_tid = mask.where(mask.sum(dim='pulse_slot') > 0,
                                drop=True).trainId
         mask_on = mask.sel(trainId=valid_tid)
-        if (mask_on == mask_on[0]).all().values == False:
+        if (mask_on == mask_on[0]).all().values is False:
             log.debug('Pattern changed during the run!')
         pid = np.unique(np.where(mask_on)[1])
         npulses = len(pid)
         extra_coords = pid
         mask_final = mask_on.where(mask_on, drop=True).fillna(False)
-        mask_final = mask_final.astype(bool).rename({'pulse_slot':extra_dim})
+        mask_final = mask_final.astype(bool).rename({'pulse_slot': extra_dim})
         log.debug(f'Bunch pattern: {npulses} pulses for {pattern}.')
     if extra_dim is None:
         extra_dim = 'pulseId'
-                                
+
     # 1. Use peak-integrated data from digitizer
     if useRaw is False:
-        #1.1 No bunch pattern provided
+        # 1.1 No bunch pattern provided
         if bpt is None:
             log.debug('Missing bunch pattern info.')
             if indices is None:
-                raise TypeError(f'indices argument must be provided '+
+                raise TypeError('indices argument must be provided ' +
                                 'when bunch pattern info is missing.')
-            return arr.isel({dim:indices}).rename({dim:'pulseId'})
+            return arr.isel({dim: indices}).rename({dim: 'pulseId'})
 
-        #1.2 Bunch pattern is provided
+        # 1.2 Bunch pattern is provided
         if npulses == 1:
-            return arr.sel({dim:0}).expand_dims(extra_dim).T.assign_coords(
-                {extra_dim:extra_coords})
+            return arr.sel({dim: 0}).expand_dims(extra_dim).T.assign_coords(
+                {extra_dim: extra_coords})
         # verify period used by APD and match it to pid from bunchPattern
-        if digitizer is 'FastADC':
+        if digitizer == 'FastADC':
             adc_source = source.split(':')[0]
-            enable_key = (source.split(':')[1].split('.')[0] 
+            enable_key = (source.split(':')[1].split('.')[0]
                           + '.enablePeakComputation.value')
             if run.get_array(adc_source, enable_key)[0] is False:
-                raise ValueError('The digitizer did not record '+
+                raise ValueError('The digitizer did not record ' +
                                  'peak-integrated data.')
-            period_key = (source.split(':')[1].split('.')[0] + 
+            period_key = (source.split(':')[1].split('.')[0] +
                           '.pulsePeriod.value')
             period = run.get_array(adc_source, period_key)[0].values/24
-        if digitizer is 'ADQ412':
+        if digitizer == 'ADQ412':
             board_source = source.split(':')[0]
             board = key[19]
             channel = key[21]
-            channel_to_number = {'A':0, 'B':1, 'C':2, 'D':3}
+            channel_to_number = {'A': 0, 'B': 1, 'C': 2, 'D': 3}
             channel = channel_to_number[channel]
-            in_del_key = (f'board{board}.apd.channel_{channel}'+
+            in_del_key = (f'board{board}.apd.channel_{channel}' +
                           '.initialDelay.value')
             initialDelay = run.get_array(board_source, in_del_key)[0].values
-            up_lim_key = (f'board{board}.apd.channel_{channel}'+
+            up_lim_key = (f'board{board}.apd.channel_{channel}' +
                           '.upperLimit.value')
             upperLimit = run.get_array(board_source, up_lim_key)[0].values
             period = (upperLimit - initialDelay)/440
         stride = (pid[1] - pid[0]) / period
         if period < 1:
-            log.warning('Warning: the pulse period in digitizer was '+
-                        'smaller than the separation of two pulses '+
+            log.warning('Warning: the pulse period in digitizer was ' +
+                        'smaller than the separation of two pulses ' +
                         'at 4.5 MHz.')
             stride = 1
         if stride < 1:
-            raise ValueError('Pulse period in digitizer was too large '+
-                    'compared to actual pulse separation. Some pulses '+
-                    'were not recorded.')
+            raise ValueError('Pulse period in digitizer was too large ' +
+                             'compared to actual pulse separation. Some ' +
+                             'pulses were not recorded.')
         stride = int(stride)
         log.debug(f'period {period}, stride {stride}')
         if npulses*stride > arr.sizes[dim]:
-            raise ValueError('The number of pulses recorded by digitizer '+
-                             f'that correspond to actual {pattern} pulses '+
+            raise ValueError('The number of pulses recorded by digitizer ' +
+                             f'that correspond to actual {pattern} pulses ' +
                              'is too small.')
-        peaks = arr.isel({dim:slice(0,npulses*stride,stride)}).rename(
-                                                    {dim:extra_dim})
-        peaks = peaks.where(mask_final, drop=True)
-        return peaks.assign_coords({extra_dim:extra_coords})
+        peaks = arr.isel({dim: slice(0, npulses*stride, stride)})
+        peaks = peaks.rename({dim: extra_dim}).where(mask_final, drop=True)
+        return peaks.assign_coords({extra_dim: extra_coords})
 
     # 2. Use raw data from digitizer
-    #minimum samples between two pulses, according to digitizer type
+    # minimum samples between two pulses, according to digitizer type
     min_distance = 1
-    if digitizer is 'FastADC':
+    if digitizer == 'FastADC':
         min_distance = 24
-    if digitizer is 'ADQ412':
+    if digitizer == 'ADQ412':
         min_distance = 440
     if autoFind:
         integParams = find_integ_params(arr.mean(dim='trainId'),
@@ -275,12 +275,12 @@ def get_peaks(run,
     # 2.1. No bunch pattern provided
     if bpt is None:
         log.debug('Missing bunch pattern info.')
-        required_keys=['intstart', 'intstop', 'bkgstart',
-                       'bkgstop', 'period', 'npulses']
+        required_keys = ['intstart', 'intstop', 'bkgstart',
+                         'bkgstop', 'period', 'npulses']
         if integParams is None or not all(name in integParams
                                           for name in required_keys):
-            raise TypeError('All keys of integParams argument '+
-                            f'{required_keys} are required when '+
+            raise TypeError('All keys of integParams argument ' +
+                            f'{required_keys} are required when ' +
                             'bunch pattern info is missing.')
         log.debug(f'Retrieving {integParams["npulses"]} pulses.')
         if extra_dim is None:
@@ -292,10 +292,10 @@ def get_peaks(run,
                                     integParams['period'],
                                     integParams['npulses'],
                                     extra_dim=extra_dim)
-    
+
     # 2.2 Bunch pattern is provided
     sample_id = (pid-pid[0])*min_distance
-    #override auto find parameters
+    # override auto find parameters
     if isinstance(integParams['intstart'], (int, np.integer)):
         integParams['intstart'] = sample_id + integParams['intstart']
         integParams['period'] = integParams['npulses'] = None
@@ -308,7 +308,7 @@ def get_peaks(run,
                                  integParams['npulses'],
                                  extra_dim=extra_dim)
     peaks = peaks.where(mask_final, drop=True)
-    return peaks.assign_coords({extra_dim:extra_coords})
+    return peaks.assign_coords({extra_dim: extra_coords})
 
 
 def find_integ_params(trace, min_distance=1, height=None, width=1):
@@ -332,24 +332,23 @@ def find_integ_params(trace, min_distance=1, height=None, width=1):
     dict with keys 'intstart', 'intstop', 'bkgstart', 'bkgstop', 'period',
         'npulses' and values in number of samples.
     """
-    if type(trace) is xr.DataArray:
+    if isinstance(trace, xr.DataArray):
         trace = trace.values
     bl = np.median(trace)
     trace_no_bl = trace - bl
     if np.max(trace_no_bl) < np.abs(np.min(trace_no_bl)):
-        posNeg = 'negative'
         trace_no_bl *= -1
         trace = bl + trace_no_bl
-    noise = trace[:100]
-    noise_ptp = np.max(noise) - np.min(noise)
+    # noise = trace[:100]
+    # noise_ptp = np.max(noise) - np.min(noise)
     if height is None:
         height = trace_no_bl.max()/20
     centers, peaks = find_peaks(trace_no_bl, distance=min_distance,
-                           height=height, width=width)
+                                height=height, width=width)
     npulses = len(centers)
-    if npulses==0:
+    if npulses == 0:
         raise ValueError('Could not automatically find peaks.')
-    elif npulses==1:
+    elif npulses == 1:
         period = 0
     else:
         period = np.median(np.diff(centers)).astype(int)
@@ -359,63 +358,82 @@ def find_integ_params(trace, min_distance=1, height=None, width=1):
                        + 0.5*np.mean(peaks['widths'])).astype(int)
     bkgstop = intstart - int(0.5*np.mean(peaks['widths']))
     bkgstart = np.max([bkgstop - min_distance, 0])
-    result = {'intstart':intstart, 'intstop':intstop,
-              'bkgstart':bkgstart, 'bkgstop':bkgstop,
-              'period':period, 'npulses':npulses}
+    result = {'intstart': intstart, 'intstop': intstop,
+              'bkgstart': bkgstart, 'bkgstop': bkgstop,
+              'period': period, 'npulses': npulses}
     return result
 
-def get_tim_peaks(data, bunchPattern='sase3', 
+
+def get_tim_peaks(data=None, run=None, bunchPattern='sase3',
                   integParams=None, keepAllSase=False):
     """
-    Automatically computes TIM peaks from sources in data. Sources 
-    can be raw traces (e.g. "MCP2raw") or peak-integrated data 
+    Automatically computes TIM peaks from sources in data. Sources
+    can be raw traces (e.g. "MCP2raw") or peak-integrated data
     (e.g. "MCP2apd"). The bunch pattern table is used to assign
     the pulse id coordinates.
-    
+
     Parameters
     ----------
-    data: xarray Dataset containing TIM data
+    data: xarray Dataset, str or list of str
+        xarray Dataset containing TIM data. If str or list of str,
+        must be mnemonics for TIM, e.g. "MCP2apd" or "MCP3raw" and
+        run must be provided.
+    run: extra_data.DataCollection
+        DataCollection containing the digitizer data. If provided,
+        data must be a str or list of str. If None, data must be
+        an xarray Dataset containing actual data.
     bunchPattern: str
         'sase1' or 'sase3' or 'scs_ppl', bunch pattern
         used to extract peaks. The pulse ID dimension will be named
         'sa1_pId', 'sa3_pId' or 'ol_pId', respectively.
     integParams: dict
         dictionnary for raw trace integration, e.g.
-        {'intstart':100, 'intstop':200, 'bkgstart':50, 
-        'bkgstop':99, 'period':24, 'npulses':500}. 
+        {'intstart':100, 'intstop':200, 'bkgstart':50,
+        'bkgstop':99, 'period':440, 'npulses':500}.
         If None, integration parameters are computed
         automatically.
     keepAllSase: bool
         Only relevant in case of sase-dedicated trains. If
-        True, the trains for SASE 1 are kept, else they are
-        dropped.
-        
+        True, all trains are kept, else only those of the
+        bunchPattern are kept.
+
     Returns
     -------
-    xarray Dataset with all TIM variables substituted by 
+    xarray Dataset with all TIM variables substituted by
     the peak caclulated values (e.g. "MCP2raw" becomes
     "MCP2peaks").
     """
-    
-    proposal = data.attrs['runFolder'].split('/')[-3]
-    runNB = data.attrs['runFolder'].split('/')[-1]
-    run = ed.open_run(proposal, runNB)
-    peakDict = {}
-    toRemove = []
-    autoFind=True
+    if run is None:
+        if bool(data) is False:
+            raise ValueError('At least run or data must be provided.')
+        if not isinstance(data, xr.Dataset):
+            raise TypeError(f'data type is {type(data)}. Expected ' +
+                            'xarray Dataset.')
+    else:
+        if bool(data) is False or not (
+                           isinstance(data, str) or isinstance(data, list)):
+            raise TypeError(f'data is {type(data)}. Expected str or ' +
+                            'list of str.')
+    if isinstance(data, xr.Dataset):
+        proposal = data.attrs['runFolder'].split('/')[-3]
+        runNB = data.attrs['runFolder'].split('/')[-1]
+        run = ed.open_run(proposal, runNB)
+        keys = [k for k in data if "MCP" in k]
+        is_ds = True
+    else:
+        keys = data if isinstance(data, list) else [data]
+        is_ds = False
+    autoFind = True
     if integParams is not None:
         autoFind = False
-    for c in range(1,5):
-        key = f'MCP{c}raw'
-        useRaw = True
-        if key not in data:
-            key = f'MCP{c}apd'
-            useRaw = False
-        if key not in data:
-            continue
+    peakDict = {}
+    toRemove = []
+    for key in keys:
+        useRaw = True if 'raw' in key else False
         log.debug(f'Retrieving TIM peaks from {key}...')
         mnemonic = _mnemonics[key]
-        peaks = get_peaks(run, data[key],
+        d = data[key] if is_ds else key
+        peaks = get_peaks(run, d,
                           source=mnemonic['source'],
                           key=mnemonic['key'],
                           digitizer='ADQ412',
@@ -423,76 +441,100 @@ def get_tim_peaks(data, bunchPattern='sase3',
                           autoFind=autoFind,
                           integParams=integParams,
                           bunchPattern=bunchPattern)
-        peakDict[f'MCP{c}peaks'] = peaks
+        newKey = key.replace('raw', 'peaks').replace('apd', 'peaks')
+        peakDict[newKey] = peaks
         toRemove.append(key)
-    ds = data.drop(toRemove)
+    ds = xr.Dataset()
+    if is_ds:
+        ds = data.drop(toRemove)
     join = 'outer' if keepAllSase else 'inner'
     ds = ds.merge(peakDict, join=join)
     return ds
 
-def get_fast_adc_peaks(data, bunchPattern='scs_ppl', 
+
+def get_fast_adc_peaks(data=None, run=None, bunchPattern='scs_ppl',
                        integParams=None, keepAllSase=False):
     """
     Automatically computes Fast ADC peaks from sources in data.
     Sources can be raw traces (e.g. "FastADC2raw") or peak-
     integrated data (e.g. "FastADC2peaks"). The bunch pattern
     table is used to assign the pulse id coordinates.
-    
+
     Parameters
     ----------
-    data: xarray Dataset containing TIM data
+    data: xarray Dataset, str or list of str
+        xarray Dataset containing Fast ADC data. If str or list
+        of str, list of mnemonics for FastADC, e.g. "FastADC5raw"
+        or "FastADC5peaks" and run must be provided.
+    run: extra_data.DataCollection
+        DataCollection containing the digitizer data. If provided,
+        data must be a str or list of str. If None, data must be
+        an xarray Dataset containing actual data.
     bunchPattern: str
         'sase1' or 'sase3' or 'scs_ppl', bunch pattern
         used to extract peaks.
     integParams: dict
         dictionnary for raw trace integration, e.g.
-        {'intstart':100, 'intstop':200, 'bkgstart':50, 
-        'bkgstop':99, 'period':24, 'npulses':500}. 
+        {'intstart':100, 'intstop':200, 'bkgstart':50,
+        'bkgstop':99, 'period':24, 'npulses':500}.
         If None, integration parameters are computed
         automatically.
     keepAllSase: bool
         Only relevant in case of sase-dedicated trains. If
-        True, the trains for SASE 1 are kept, else they are
-        dropped.
-    display: bool
-        If True, displays information
-        
+        True, all trains are kept, else only those of the
+        bunchPattern are kept.
+
     Returns
     -------
-    xarray Dataset with all Fast ADC variables substituted by 
+    xarray Dataset with all Fast ADC variables substituted by
     the peak caclulated values (e.g. "FastADC2raw" becomes
     "FastADC2peaks").
     """
 
-    proposal = data.attrs['runFolder'].split('/')[-3]
-    runNB = data.attrs['runFolder'].split('/')[-1]
-    run = ed.open_run(proposal, runNB)
-    peakDict = {}
-    toRemove = []
-    autoFind=True
+    if run is None:
+        if bool(data) is False:
+            raise ValueError('At least run or data must be provided.')
+        if not isinstance(data, xr.Dataset):
+            raise TypeError(f'data type is {type(data)}. Expected ' +
+                            'xarray Dataset.')
+    else:
+        if bool(data) is False or not (
+                            isinstance(data, str) or isinstance(data, list)):
+            raise TypeError(f'data is {type(data)}. Expected str or ' +
+                            'list of str.')
+    if isinstance(data, xr.Dataset):
+        proposal = data.attrs['runFolder'].split('/')[-3]
+        runNB = data.attrs['runFolder'].split('/')[-1]
+        run = ed.open_run(proposal, runNB)
+        keys = [k for k in data if "FastADC" in k]
+        is_ds = True
+    else:
+        keys = data if isinstance(data, list) else [data]
+        is_ds = False
+    autoFind = True
     if integParams is not None:
         autoFind = False
-    for c in range(1,10):
-        key = f'FastADC{c}raw'
-        useRaw = True
-        if key not in data:
-            key = f'FastADC{c}peaks'
-            useRaw = False
-        if key not in data:
-            continue
-        log.debug('Retrieving Fast ADC peaks from {key}...')
+    peakDict = {}
+    toRemove = []
+    for key in keys:
+        useRaw = True if 'raw' in key else False
+        log.debug(f'Retrieving Fast ADC peaks from {key}...')
         mnemonic = _mnemonics[key]
-        peaks = get_peaks(run, data[key],
+        d = data[key] if is_ds else key
+        peaks = get_peaks(run, d,
                           source=mnemonic['source'],
                           key=mnemonic['key'],
-                          digitizer='FastADC',
+                          digitizer='ADQ412',
                           useRaw=useRaw,
                           autoFind=autoFind,
                           integParams=integParams,
                           bunchPattern=bunchPattern)
-        peakDict[f'FastADC{c}peaks'] = peaks
+        newKey = key.replace('raw', 'peaks')
+        peakDict[newKey] = peaks
         toRemove.append(key)
-    ds = data.drop(toRemove)
+    ds = xr.Dataset()
+    if is_ds:
+        ds = data.drop(toRemove)
     join = 'outer' if keepAllSase else 'inner'
     ds = ds.merge(peakDict, join=join)
     return ds
-- 
GitLab