Compare revisions

c61ef985 · c61ef985 · c61ef985 · c61ef985 · c61ef985 · c61ef985
--- a/src/toolbox_scs/base/knife_edge.py
+++ b/src/toolbox_scs/base/knife_edge.py
+import numpy as np
+from scipy import special
+from scipy.optimize import curve_fit
+
+
+__all__ = ['knife_edge', 'knife_edge_base']
+
+
+def knife_edge(positions, intensities, axisRange=None, p0=None):
+    """
+    Calculates the beam radius at 1/e^2 from a knife-edge scan by
+    fitting with erfc function: f(a,b,u) = a*erfc(u) + b or
+    where u = sqrt(2)*(x-x0)/w0 with w0 the beam radius at 1/e^2
+    and x0 the beam center.
+
+    Parameters
+    ----------
+    positions : np.ndarray
+        Motor position values, typically 1D
+    intensities : np.ndarray
+        Intensity values, could be either 1D or 2D, with the number or rows
+        equivalent with the position size
+    axisRange : sequence of two floats or None
+        Edges of the scanning axis between which to apply the fit.
+    p0 : list of floats, numpy 1D array
+        Initial parameters used for the fit: x0, w0, a, b. If None, a beam
+        radius of 100 um is assumed.
+
+    Returns
+    -------
+    width : float
+        The beam radius at 1/e^2
+    std : float
+        The standard deviation of the width
+    """
+    popt, pcov = knife_edge_base(positions, intensities,
+                                 axisRange=axisRange, p0=p0)
+    width, std = 0, 0
+    if popt is not None and pcov is not None:
+        width, std = np.abs(popt[1]), pcov[1, 1]**0.5
+    return width, std
+
+
+def knife_edge_base(positions, intensities, axisRange=None, p0=None):
+    """
+    The base implementation of the knife-edge scan analysis.
+
+    Calculates the beam radius at 1/e^2 from a knife-edge scan by
+    fitting with erfc function: f(a,b,u) = a*erfc(u) + b or
+    where u = sqrt(2)*(x-x0)/w0 with w0 the beam radius at 1/e^2
+    and x0 the beam center.
+
+    Parameters
+    ----------
+    positions : np.ndarray
+        Motor position values, typically 1D
+    intensities : np.ndarray
+        Intensity values, could be either 1D or 2D, with the number or rows
+        equivalent with the position size
+    axisRange : sequence of two floats or None
+        Edges of the scanning axis between which to apply the fit.
+    p0 : list of floats, numpy 1D array
+        Initial parameters used for the fit: x0, w0, a, b. If None, a beam
+        radius of 100 um is assumed.
+
+    Returns
+    -------
+    popt : sequence of floats or None
+        The parameters of the resulting fit.
+    pcov : sequence of floats
+        The covariance matrix of the resulting fit.
+    """
+    # Prepare arrays
+    positions, intensities = prepare_arrays(positions, intensities,
+                                            xRange=axisRange)
+
+    # Estimate initial fitting params
+    if p0 is None:
+        p0 = [np.mean(positions), 0.1, np.max(intensities) / 2, 0]
+
+    # Fit
+    popt, pcov = function_fit(erfc, positions, intensities, p0=p0)
+
+    return popt, pcov
+
+
+def function_fit(func, x, y, **kwargs):
+    """A wrapper for scipy.optimize curve_fit()
+    """
+    # Fit
+    try:
+        popt, pcov = curve_fit(func, x, y, **kwargs)
+    except (TypeError, RuntimeError) as err:
+        print("Fit did not converge:", err)
+        popt, pcov = (None, None)
+    return popt, pcov
+
+
+def prepare_arrays(arrX: np.ndarray, arrY: np.ndarray,
+                   xRange=None, yRange=None):
+    """
+    Preprocessing of the input x and y arrays.
+
+    This involves the following steps.
+    1. Converting the arrays to 1D of the same size
+    2. Select the ranges from the input x- and y-ranges
+    3. Retrieve finite values.
+    """
+    # Convert both arrays to 1D of the same size
+    arrX, arrY = arrays_to1d(arrX, arrY)
+
+    # Select ranges
+    if xRange is not None:
+        low, high = xRange
+        if low == high:
+            raise ValueError('The supplied xRange is not a valid range.')
+        mask_ = range_mask(arrX, low, high)
+        arrX = arrX[mask_]
+        arrY = arrY[mask_]
+    if yRange is not None:
+        low, high = yRange
+        if low == high:
+            raise ValueError('The supplied xRange is not a valid range.')
+        mask_ = range_mask(arrY, low, high)
+        arrX = arrX[mask_]
+        arrY = arrY[mask_]
+
+    # Clean both arrays by only getting finite values
+    finite_idx = np.isfinite(arrX) & np.isfinite(arrY)
+    arrX = arrX[finite_idx]
+    arrY = arrY[finite_idx]
+
+    return arrX, arrY
+
+
+def arrays_to1d(arrX: np.ndarray, arrY: np.ndarray):
+    """Flatten two arrays and matches their sizes
+    """
+    assert arrX.shape[0] == arrY.shape[0]
+    arrX, arrY = arrX.flatten(), arrY.flatten()
+    if len(arrX) > len(arrY):
+        arrY = np.repeat(arrY, len(arrX) // len(arrY))
+    else:
+        arrX = np.repeat(arrX, len(arrY) // len(arrX))
+    return arrX, arrY
+
+
+def range_mask(array, minimum=None, maximum=None):
+    """Retrieve the resulting array from the given minimum and maximum
+    """
+    default = np.ones(array.shape, dtype=bool)
+    min_slice, max_slice = default, default
+    if minimum is not None:
+        if minimum > np.nanmax(array):
+            raise ValueError('The range minimum is too large.')
+        min_slice = array >= minimum
+
+    if maximum is not None:
+        if maximum < np.nanmin(array):
+            raise ValueError('The range maximum is too small.')
+        max_slice = array <= maximum
+
+    return min_slice & max_slice
+
+
+def erfc(x, x0, w0, a, b):
+    return a * special.erfc(np.sqrt(2) * (x - x0) / w0) + b
--- a/src/toolbox_scs/base/tests/__init__.py
+++ b/src/toolbox_scs/base/tests/__init__.py
--- a/src/toolbox_scs/base/tests/test_knife_edge.py
+++ b/src/toolbox_scs/base/tests/test_knife_edge.py
+import numpy as np
+import pytest
+
+from ..knife_edge import erfc, knife_edge_base, prepare_arrays, range_mask
+
+
+def test_range_mask():
+    arr = np.array([1, 2, 3, 4, 5])
+
+    # Exact
+    slice_ = range_mask(arr, minimum=2, maximum=4)
+    np.testing.assert_array_equal(slice_, [False, True, True, True, False])
+
+    # Range exceeds the closest values
+    slice_ = range_mask(arr, minimum=1.75, maximum=4.25)
+    np.testing.assert_array_equal(slice_, [False, True, True, True, False])
+
+    # Range misses the closest values
+    slice_ = range_mask(arr, minimum=2.25, maximum=3.75)
+    np.testing.assert_array_equal(slice_, [False, False, True, False, False])
+
+    # Equidistant
+    slice_ = range_mask(arr, minimum=2.5, maximum=4.5)
+    np.testing.assert_array_equal(slice_, [False, False, True, True, False])
+
+    # Out of bounds, valid minimum
+    slice_ = range_mask(arr, minimum=0)
+    np.testing.assert_array_equal(slice_, [True, True, True, True, True])
+
+    # Out of bounds, invalid minimum
+    with pytest.raises(ValueError):
+        range_mask(arr, minimum=6)
+
+    # Out of bounds, valid maximum
+    slice_ = range_mask(arr, maximum=6)
+    np.testing.assert_array_equal(slice_, [True, True, True, True, True])
+
+    # Out of bounds, invalid minimum
+    with pytest.raises(ValueError):
+        range_mask(arr, maximum=0)
+
+    # with NaNs
+    arr = np.array([1, np.nan, 3, np.nan, 5])
+    slice_ = range_mask(arr, minimum=3)
+    np.testing.assert_array_equal(slice_, [False, False, True, False, True])
+
+
+def test_prepare_arrays_nans():
+    # Setup test values
+    trains, pulses = 5, 10
+    size = trains * pulses
+    motor = np.arange(trains)
+    signal = np.random.randint(100, size=(trains, pulses))
+
+    # Test finite motor and signal values
+    positions, intensities = prepare_arrays(motor, signal)
+    assert positions.shape == (size,)
+    assert intensities.shape == (size,)
+
+    # Test finite motors and signals with NaNs
+    signal_nan = _with_values(signal, value=np.nan, num=20)
+    positions, intensities = prepare_arrays(motor, signal_nan)
+    assert positions.shape == (size-20,)
+    assert np.isfinite(positions).all()
+    assert intensities.shape == (size-20,)
+    assert np.isfinite(intensities).all()
+
+    # Test finite signals and motors with NaNs
+    motor_nan = _with_values(motor, value=np.nan, num=3)
+    positions, intensities = prepare_arrays(motor_nan, signal)
+    assert positions.shape == ((trains-3) * pulses,)
+    assert np.isfinite(positions).all()
+    assert intensities.shape == ((trains-3) * pulses,)
+    assert np.isfinite(intensities).all()
+
+
+def test_prepare_arrays_size():
+    trains, pulses = 5, 10
+    size = trains * pulses
+    motor = np.arange(trains)
+    signal = np.random.randint(100, size=(trains, pulses))
+
+    # Test finite motor and 2D signals
+    positions, intensities = prepare_arrays(motor, signal)
+    assert positions.shape == (size,)
+    assert intensities.shape == (size,)
+
+    # Test finite motor and 1D signals
+    positions, intensities = prepare_arrays(motor, signal[:, 0])
+    assert positions.shape == (trains,)
+    assert intensities.shape == (trains,)
+
+
+def test_prepare_arrays_range():
+    trains, pulses = 5, 10
+    motor = np.arange(trains)
+    signal = np.random.randint(100, size=(trains, pulses))
+
+    # Test valid range, inside bounds
+    positions, intensities = prepare_arrays(motor, signal, xRange=[2, 4])
+    assert (positions.min(), positions.max()) == (2, 4)
+    unique = np.unique(positions)
+    np.testing.assert_array_equal(unique, [2, 3, 4])
+    assert intensities.shape == (unique.size * pulses,)
+
+    # Test invalid ranges
+    with pytest.raises(ValueError):
+        prepare_arrays(motor, signal, xRange=[5, 3])
+    with pytest.raises(ValueError):
+        prepare_arrays(motor, signal, xRange=[3, 3])
+
+
+def test_knife_edge_base():
+    p0 = [0, -1.5, 1, 0]
+    x = np.linspace(-3, 3)
+    y = erfc(x, *p0)
+    noise = y * np.random.normal(0, .02, y.shape)  # 2% error
+    eff_y = y + noise
+
+    # Test noisy data
+    popt, _ = knife_edge_base(x, eff_y)
+    np.testing.assert_allclose(p0, popt, atol=1e-1)
+
+    # Test flipped data
+    popt, _ = knife_edge_base(x, eff_y[::-1])
+    p0[1] = abs(p0[1])  # Absolute value when flipped
+    np.testing.assert_allclose(p0, popt, atol=1e-1)
+
+
+def _with_values(array, value, num=5):
+    copy = array.astype(np.float)
+    copy.ravel()[np.random.choice(copy.size, num, replace=False)] = value
+    return copy
--- a/src/toolbox_scs/constants.py
+++ b/src/toolbox_scs/constants.py
+__all__ = [
+    'mnemonics'
+]
+
+
+mnemonics = {
+    # Machine
+    "sase3": ({'source': 'SCS_RR_UTC/MDL/BUNCH_DECODER',
+               'key': 'sase3.pulseIds.value',
+               'dim': ['bunchId']},),
+    "sase2": ({'source': 'SCS_RR_UTC/MDL/BUNCH_DECODER',
+               'key': 'sase2.pulseIds.value',
+               'dim': ['bunchId']},),
+    "sase1": ({'source': 'SCS_RR_UTC/MDL/BUNCH_DECODER',
+               'key': 'sase1.pulseIds.value',
+               'dim': ['bunchId']},),
+    "laser": ({'source': 'SCS_RR_UTC/MDL/BUNCH_DECODER',
+               'key': 'laser.pulseIds.value',
+               'dim': ['bunchId']},),
+    "maindump": ({'source': 'SCS_RR_UTC/MDL/BUNCH_DECODER',
+                  'key': 'maindump.pulseIds.value',
+                  'dim': ['bunchId']},),
+    "bunchpattern": ({'source': 'SCS_RR_UTC/TSYS/TIMESERVER',
+                      'key': 'readBunchPatternTable.value',
+                      'dim': None},),
+    "bunchPatternTable": (
+        {'source': 'SCS_RR_UTC/TSYS/TIMESERVER:outputBunchPattern',
+         'key': 'data.bunchPatternTable',
+         'dim': ['pulse_slot']},
+        {'source': 'SCS_RR_UTC/TSYS/TIMESERVER',
+         'key': 'bunchPatternTable.value',
+         'dim': ['pulse_slot']}),
+    "npulses_sase3": ({'source': 'SCS_RR_UTC/MDL/BUNCH_DECODER',
+                       'key': 'sase3.nPulses.value',
+                       'dim': None},),
+    "npulses_sase1": ({'source': 'SCS_RR_UTC/MDL/BUNCH_DECODER',
+                       'key': 'sase1.nPulses.value',
+                       'dim': None},),
+    "npulses_laser": ({'source': 'SCS_RR_UTC/MDL/BUNCH_DECODER',
+                       'key': 'laser.nPulses.value',
+                       'dim': None},),
+    "bunchPatternTable_SA3": (
+        {'source': 'SA3_BR_UTC/TSYS/TIMESERVER:outputBunchPattern',
+         'key': 'data.bunchPatternTable',
+         'dim': ['pulse_slot']},),
+
+    # Bunch Arrival Monitors
+    "BAM414": ({'source': 'XFEL_SDIAG_BAM/DOOCS/414_B2:output',
+                'key': 'data.absoluteTD',
+                'dim': ['BAMbunchId'],
+                'extract': 'BAM'},
+               {'source': 'SCS_ILH_LAS/DOOCS/BAM_414_B2:output',
+                'key': 'data.absoluteTD',
+                'dim': ['BAMbunchId'],
+                'extract': 'BAM'},
+               {'source': 'SCS_ILH_LAS/DOOCS/BAM_414_B2:output',
+                'key': 'data.lowChargeArrivalTime',
+                'dim': ['BAMbunchId'],
+                'extract': 'BAM'},),
+    "BAM414_avg": ({'source': 'XFEL_SDIAG_BAM/DOOCS/414_B2',
+                    'key': 'meanSA3.value',
+                    'dim': None,
+                    'extract': 'BAM'},
+                   {'source': 'SCS_ILH_LAS/DOOCS/BAM_414_B2',
+                    'key': 'meanSA3.value',
+                    'dim': None,
+                    'extract': 'BAM'},),
+    "BAM1932M": ({'source': 'XFEL_SDIAG_BAM/DOOCS/1932M_TL:output',
+                  'key': 'data.absoluteTD',
+                  'dim': ['BAMbunchId'],
+                  'extract': 'BAM'},
+                 {'source': 'SCS_ILH_LAS/DOOCS/BAM_1932M_TL:output',
+                  'key': 'data.absoluteTD',
+                  'dim': ['BAMbunchId'],
+                  'extract': 'BAM'},
+                 {'source': 'SCS_ILH_LAS/DOOCS/BAM_1932M_TL:output',
+                  'key': 'data.lowChargeArrivalTime',
+                  'dim': ['BAMbunchId'],
+                  'extract': 'BAM'},),
+    "BAM1932M_avg": ({'source': 'XFEL_SDIAG_BAM/DOOCS/1932M_TL',
+                      'key': 'meanSA3.value',
+                      'dim': None,
+                      'extract': 'BAM'},
+                     {'source': 'SCS_ILH_LAS/DOOCS/BAM_1932M_TL',
+                      'key': 'meanSA3.value',
+                      'dim': None,
+                      'extract': 'BAM'},),
+    "BAM1932S": ({'source': 'XFEL_SDIAG_BAM/DOOCS/1932S_TL:output',
+                  'key': 'data.absoluteTD',
+                  'dim': ['BAMbunchId'],
+                  'extract': 'BAM'},
+                 {'source': 'SCS_ILH_LAS/DOOCS/BAM_1932S_TL:output',
+                  'key': 'data.absoluteTD',
+                  'dim': ['BAMbunchId'],
+                  'extract': 'BAM'},
+                 {'source': 'SCS_ILH_LAS/DOOCS/BAM_1932S_TL:output',
+                  'key': 'data.lowChargeArrivalTime',
+                  'dim': ['BAMbunchId'],
+                  'extract': 'BAM'},),
+    "BAM1932S_avg": ({'source': 'XFEL_SDIAG_BAM/DOOCS/1932S_TL',
+                      'key': 'meanSA3.value',
+                      'dim': None,
+                      'extract': 'BAM'},
+                     {'source': 'SCS_ILH_LAS/DOOCS/BAM_1932S_TL',
+                      'key': 'meanSA3.value',
+                      'dim': None,
+                      'extract': 'BAM'},),
+    "BAM2955_S3": ({'source': 'XFEL_SDIAG_BAM/DOOCS/2955_S3:output',
+                    'key': 'data.absoluteTD',
+                    'dim': ['BAMbunchId'],
+                    'extract': 'BAM'},),
+    "BAM2955_S3_avg": ({'source': 'XFEL_SDIAG_BAM/DOOCS/2955_S3',
+                      'key': 'meanSA3.value',
+                      'dim': None,
+                      'extract': 'BAM'},),
+    "BAM_SLOW_FB": ({'source': 'SCS_BAM_FEEDBACK/MDL/CHECKER',
+                     'key': 'sf_enabled.value',
+                     'dim': None},),
+    "BAM_FAST_FB": ({'source': 'SCS_BAM_FEEDBACK/MDL/CHECKER',
+                     'key': 'ff_enabled.value',
+                     'dim': None},),
+
+    # SA3
+    "nrj": ({'source': 'SA3_XTD10_MONO/MDL/PHOTON_ENERGY',
+             'key': 'actualEnergy.value',
+             'dim': None},),
+    "nrj_target": ({'source': 'SA3_XTD10_MONO/MDL/PHOTON_ENERGY',
+                    'key': 'targetEnergy.value',
+                    'dim': None},),
+    "mono_order": ({'source': 'SA3_XTD10_MONO/MDL/PHOTON_ENERGY',
+                    'key': 'M.value',
+                    'dim': None},),
+    "M2BEND": ({'source': 'SA3_XTD10_MIRR-2/MOTOR/BENDER',
+                'key': 'actualPosition.value',
+                'dim': None},),
+    "tpi": ({'source': 'SCS_XTD10_TPI/DCTRL/SHUTTER',
+                     'key': 'operationModePLC.value',
+                     'dim': None},),
+    "TPI_STATE": ({'source': 'SCS_XTD10_TPI/DCTRL/SHUTTER',
+                     'key': 'hardwareStatusBitField.value',
+                     'dim': None},),
+    "VSLIT": ({'source': 'SA3_XTD10_VSLIT/MDL/BLADE',
+               'key': 'actualGap.value',
+               'dim': None},),
+    "ESLIT": ({'source': 'SCS_XTD10_ESLIT/MDL/MAIN',
+               'key': 'actualGap.value',
+               'dim': None},),
+    "HSLIT": ({'source': 'SCS_XTD10_HSLIT/MDL/BLADE',
+               'key': 'actualGap.value',
+               'dim': None},),
+    "H_BLADE_LEFT": ({'source': 'SCS_XTD10_HSLIT/MOTOR/BLADE_LEFT',
+                      'key': 'actualPosition.value',
+                      'dim': None},),
+    "H_BLADE_RIGHT": ({'source': 'SCS_XTD10_HSLIT/MOTOR/BLADE_RIGHT',
+                       'key': 'actualPosition.value',
+                       'dim': None},),
+    "transmission": ({'source': 'SA3_XTD10_VAC/MDL/GATT_TRANSMISSION_MONITOR',
+                      'key': 'Estimated_Tr.value',
+                      'dim': None},
+                     {'source': 'SA3_XTD10_GATT/MDL/GATT_TRANSMISSION_MONITOR',
+                      'key': 'Estimated_Tr.value',
+                      'dim': None},),
+    "transmission_setpoint": ({'source': 'SA3_XTD10_VAC/MDL/GATT_PHYSICS_UNIT',
+                               'key': 'TransmissionFactor.value',
+                               'dim': None},),
+    "transmission_col2": (
+        {'source': 'SA3_XTD10_VAC/MDL/GATT_TRANSMISSION_MONITOR',
+         'key': 'second_color_Estimated_Tr.value',
+         'dim': None},),
+    "GATT_pressure": ({'source': 'SA3_XTD10_VAC/MDL/GATT_P_CELL',
+                       'key': 'value.value',
+                       'dim': None},
+                      {'source': 'P_GATT',
+                       'key': 'value.value',
+                       'dim': None},),
+    "navitar": ({'source': 'SCS_XTD10_IMGES/CAM/BEAMVIEW_NAVITAR:daqOutput',
+                 'key': 'data.image.pixels',
+                 'dim': ['x', 'y']},),
+    "UND": ({'source': 'SA3_XTD10_UND/DOOCS/PHOTON_ENERGY',
+             'key': 'actualPosition.value',
+             'dim': None},),
+    "UND2": ({'source': 'SA3_XTD10_UND/DOOCS/PHOTON_ENERGY_COLOR2',
+              'key': 'actualPosition.value',
+              'dim': None},),
+    "UND3": ({'source': 'SA3_XTD10_UND/DOOCS/PHOTON_ENERGY_COLOR3',
+              'key': 'actualPosition.value',
+              'dim': None},),
+    "MAG_CHICANE_DELAY": ({'source': 'SA3_XTD4_CHICANE/DOOCS/SXR2CPP',
+                           'key': 'XFEL_MAGNETS_CHICANE_SXR2CPP.dtFs.value',
+                           'dim': None},),
+    "AppleX": ({'source': 'SA3_XTD10_UND/DOOCS/PHOTON_ENERGY',
+                'key': 'actualPolarization.value',
+                'dim': None},),
+    
+    # PES
+    "PES_1Araw": ({'source': 'SA3_XTD10_PES/ADC/1:network',
+                   'key': 'digitizers.channel_1_A.raw.samples',
+                   'dim': ['PESsampleId']},),
+    "PES_1Braw": ({'source': 'SA3_XTD10_PES/ADC/1:network',
+                     'key': 'digitizers.channel_1_B.raw.samples',
+                     'dim': ['PESsampleId']},),
+    "PES_1Craw": ({'source': 'SA3_XTD10_PES/ADC/1:network',
+                    'key': 'digitizers.channel_1_C.raw.samples',
+                    'dim': ['PESsampleId']},),
+    "PES_1Draw": ({'source': 'SA3_XTD10_PES/ADC/1:network',
+                     'key': 'digitizers.channel_1_D.raw.samples',
+                     'dim': ['PESsampleId']},),
+    "PES_2Araw": ({'source': 'SA3_XTD10_PES/ADC/1:network',
+                   'key': 'digitizers.channel_2_A.raw.samples',
+                   'dim': ['PESsampleId']},),
+    "PES_2Braw": ({'source': 'SA3_XTD10_PES/ADC/1:network',
+                     'key': 'digitizers.channel_2_B.raw.samples',
+                     'dim': ['PESsampleId']},),
+    "PES_2Craw": ({'source': 'SA3_XTD10_PES/ADC/1:network',
+                    'key': 'digitizers.channel_2_C.raw.samples',
+                    'dim': ['PESsampleId']},),
+    "PES_2Draw": ({'source': 'SA3_XTD10_PES/ADC/1:network',
+                     'key': 'digitizers.channel_2_D.raw.samples',
+                     'dim': ['PESsampleId']},),
+    "PES_4Araw": ({'source': 'SA3_XTD10_PES/ADC/1:network',
+                   'key': 'digitizers.channel_4_A.raw.samples',
+                   'dim': ['PESsampleId']},),
+    "PES_4Braw": ({'source': 'SA3_XTD10_PES/ADC/1:network',
+                     'key': 'digitizers.channel_4_B.raw.samples',
+                     'dim': ['PESsampleId']},),
+    "PES_4Craw": ({'source': 'SA3_XTD10_PES/ADC/1:network',
+                    'key': 'digitizers.channel_4_C.raw.samples',
+                    'dim': ['PESsampleId']},),
+    "PES_4Draw": ({'source': 'SA3_XTD10_PES/ADC/1:network',
+                     'key': 'digitizers.channel_4_D.raw.samples',
+                     'dim': ['PESsampleId']},),
+    "PES_3Aaw": ({'source': 'SA3_XTD10_PES/ADC/1:network',
+                   'key': 'digitizers.channel_3_A.raw.samples',
+                   'dim': ['PESsampleId']},),
+    "PES_3Braw": ({'source': 'SA3_XTD10_PES/ADC/1:network',
+                     'key': 'digitizers.channel_3_B.raw.samples',
+                     'dim': ['PESsampleId']},),
+    "PES_3Craw": ({'source': 'SA3_XTD10_PES/ADC/1:network',
+                    'key': 'digitizers.channel_3_C.raw.samples',
+                    'dim': ['PESsampleId']},),
+    "PES_3Draw": ({'source': 'SA3_XTD10_PES/ADC/1:network',
+                     'key': 'digitizers.channel_3_D.raw.samples',
+                     'dim': ['PESsampleId']},),
+    "PES_pressure": ({'source': 'SA3_XTD10_PES/GAUGE/G30310F',
+                      'key': 'value.value',
+                      'dim': None},),
+    "PES_RV": ({'source': 'SA3_XTD10_PES/MDL/DAQ_MPOD',
+                'key': 'u213.value',
+                'dim': None},),
+    "PES_N2": ({'source': 'SA3_XTD10_PES/DCTRL/V30300S_NITROGEN',
+                'key': 'hardwareState.value',
+                'dim': None},),
+    "PES_Ne": ({'source': 'SA3_XTD10_PES/DCTRL/V30310S_NEON',
+                'key': 'hardwareState.value',
+                'dim': None},),
+    "PES_Kr": ({'source': 'SA3_XTD10_PES/DCTRL/V30320S_KRYPTON',
+                'key': 'hardwareState.value',
+                'dim': None},),
+    "PES_Xe": ({'source': 'SA3_XTD10_PES/DCTRL/V30330S_XENON',
+                'key': 'hardwareState.value',
+                'dim': None},),
+
+    # XTD10 MCP (after GATT)
+    'XTD10_MCP3raw': ({'source': 'SA3_XTD10_MCP/ADC/1:channel_3.output',
+                       'key': 'data.rawData',
+                       'dim': ['XTD10_MCPsampleId']},),
+    'XTD10_MCP5raw': ({'source': 'SA3_XTD10_MCP/ADC/1:channel_5.output',
+                       'key': 'data.rawData',
+                       'dim': ['XTD10_MCPsampleId']},),
+    'XTD10_MCP9raw': ({'source': 'SA3_XTD10_MCP/ADC/1:channel_9.output',
+                       'key': 'data.rawData',
+                       'dim': ['XTD10_MCPsampleId']},),
+
+    # DPS imagers
+    "DPS1CAM2": ({'source': 'SCS_BLU_DPS-1/CAM/IMAGER2CAMERA:daqOutput',
+                  'key': 'data.image.pixels',
+                  'dim': ['dps1cam2_y', 'dps1cam2_x']},),
+    "DPS2CAM2": ({'source': 'SCS_BLU_DPS-2/CAM/IMAGER2CAMERA:daqOutput',
+                  'key': 'data.image.pixels',
+                  'dim': ['dps2cam2_y', 'dps2cam2_x']},),
+
+    # XOX
+    "XOX_ANALYZER": ({'source': 'SCS_BLU_DPS-2/MOTOR/X2',
+                      'key': 'actualPosition.value',
+                      'dim': None},),
+    "XOX_HWP": ({'source': 'SCS_ILH_LAS/MOTOR/LT_SPARE1',
+                 'key': 'actualPosition.value',
+                 'dim': None},),
+
+    # XTD10 XGM
+    # keithley
+    "XTD10_photonFlux": ({'source': 'SA3_XTD10_XGM/XGM/DOOCS',
+                          'key': 'pulseEnergy.photonFlux.value',
+                          'dim': None},),
+    "XTD10_photonFlux_sigma": ({'source': 'SA3_XTD10_XGM/XGM/DOOCS',
+                                'key': 'pulseEnergy.photonFluxSigma.value',
+                                'dim': None},),
+    # ADC
+    "XTD10_XGM": ({'source': 'SA3_XTD10_XGM/XGM/DOOCS:output',
+                   'key': 'data.intensityTD',
+                   'dim': ['XGMbunchId'],
+                   'extract': 'XGM'},),
+    "XTD10_XGM_sigma": ({'source': 'SA3_XTD10_XGM/XGM/DOOCS:output',
+                         'key': 'data.intensitySigmaTD',
+                         'dim': ['XGMbunchId'],
+                         'extract': 'XGM'},),
+    "XTD10_SA3": ({'source': 'SA3_XTD10_XGM/XGM/DOOCS:output',
+                   'key': 'data.intensitySa3TD',
+                   'dim': ['XGMbunchId'],
+                   'extract': 'XGM'},),
+    "XTD10_SA3_sigma": ({'source': 'SA3_XTD10_XGM/XGM/DOOCS:output',
+                         'key': 'data.intensitySa3SigmaTD',
+                         'dim': ['XGMbunchId'],
+                         'extract': 'XGM'},),
+    "XTD10_SA1": ({'source': 'SA3_XTD10_XGM/XGM/DOOCS:output',
+                   'key': 'data.intensitySa1TD',
+                   'dim': ['XGMbunchId'],
+                   'extract': 'XGM'},),
+    "XTD10_SA1_sigma": ({'source': 'SA3_XTD10_XGM/XGM/DOOCS:output',
+                         'key': 'data.intensitySa1SigmaTD',
+                         'dim': ['XGMbunchId'],
+                         'extract': 'XGM'},),
+    # low pass averaged ADC
+    "XTD10_slowTrain": ({'source': 'SA3_XTD10_XGM/XGM/DOOCS',
+                         'key': 'controlData.slowTrain.value',
+                         'dim': None},),
+    "XTD10_slowTrain_SA1": ({'source': 'SA3_XTD10_XGM/XGM/DOOCS',
+                             'key': 'controlData.slowTrainSa1.value',
+                             'dim': None},),
+    "XTD10_slowTrain_SA3": ({'source': 'SA3_XTD10_XGM/XGM/DOOCS',
+                             'key': 'controlData.slowTrainSa3.value',
+                             'dim': None},),
+
+    # SCS XGM
+    # keithley
+    "SCS_photonFlux": ({'source': 'SCS_BLU_XGM/XGM/DOOCS',
+                        'key': 'pulseEnergy.photonFlux.value',
+                        'dim': None},),
+    "SCS_photonFlux_sigma": ({'source': 'SCS_BLU_XGM/XGM/DOOCS',
+                              'key': 'pulseEnergy.photonFluxSigma.value',
+                              'dim': None},),
+    "SCS_HAMP_HV": ({'source': 'SCS_BLU_XGM/XGM/DOOCS',
+                     'key': 'hv.hamph.value',
+                     'dim': None},),
+    # ADC
+    "SCS_XGM": ({'source': 'SCS_BLU_XGM/XGM/DOOCS:output',
+                 'key': 'data.intensityTD',
+                 'dim': ['XGMbunchId'],
+                 'extract': 'XGM'},),
+    "SCS_XGM_sigma": ({'source': 'SCS_BLU_XGM/XGM/DOOCS:output',
+                       'key': 'data.intensitySigmaTD',
+                       'dim': ['XGMbunchId'],
+                       'extract': 'XGM'},),
+    "SCS_SA1": ({'source': 'SCS_BLU_XGM/XGM/DOOCS:output',
+                 'key': 'data.intensitySa1TD',
+                 'dim': ['XGMbunchId'],
+                 'extract': 'XGM'},),
+    "SCS_SA1_sigma": ({'source': 'SCS_BLU_XGM/XGM/DOOCS:output',
+                       'key': 'data.intensitySa1SigmaTD',
+                       'dim': ['XGMbunchId'],
+                       'extract': 'XGM'},),
+    "SCS_SA3": ({'source': 'SCS_BLU_XGM/XGM/DOOCS:output',
+                 'key': 'data.intensitySa3TD',
+                 'dim': ['XGMbunchId'],
+                 'extract': 'XGM'},),
+    "SCS_SA3_sigma": ({'source': 'SCS_BLU_XGM/XGM/DOOCS:output',
+                       'key': 'data.intensitySa3SigmaTD',
+                       'dim': ['XGMbunchId'],
+                       'extract': 'XGM'},),
+    # low pass averaged ADC
+    "SCS_slowTrain": ({'source': 'SCS_BLU_XGM/XGM/DOOCS',
+                       'key': 'controlData.slowTrain.value',
+                       'dim': None},),
+    "SCS_slowTrain_SA1": ({'source': 'SCS_BLU_XGM/XGM/DOOCS',
+                           'key': 'controlData.slowTrainSa1.value',
+                           'dim': None},),
+    "SCS_slowTrain_SA3": ({'source': 'SCS_BLU_XGM/XGM/DOOCS',
+                           'key': 'controlData.slowTrainSa3.value',
+                           'dim': None},),
+
+    # KBS
+    "HFM_CAPB": ({'source': 'SCS_KBS_HFM/ASENS/CAPB',
+                  'key': 'value.value',
+                  'dim': None},),
+    "HFM_CAPF": ({'source': 'SCS_KBS_HFM/ASENS/CAPF',
+                  'key': 'value.value',
+                  'dim': None},),
+    "HFM_CAPM": ({'source': 'SCS_KBS_HFM/ASENS/CAPM',
+                  'key': 'value.value',
+                  'dim': None},),
+    "HFM_BENDERB": ({'source': 'SCS_KBS_HFM/MOTOR/BENDERB',
+                     'key': 'encoderPosition.value',
+                     'dim': None},),
+    "HFM_BENDERF": ({'source': 'SCS_KBS_HFM/MOTOR/BENDERF',
+                     'key': 'encoderPosition.value',
+                     'dim': None},),
+    "VFM_CAPB": ({'source': 'SCS_KBS_VFM/ASENS/CAPB',
+                  'key': 'value.value',
+                  'dim': None},),
+    "VFM_CAPF": ({'source': 'SCS_KBS_VFM/ASENS/CAPF',
+                  'key': 'value.value',
+                  'dim': None},),
+    "VFM_CAPM": ({'source': 'SCS_KBS_VFM/ASENS/CAPM',
+                  'key': 'value.value',
+                  'dim': None},),
+    "VFM_BENDERB": ({'source': 'SCS_KBS_VFM/MOTOR/BENDERB',
+                     'key': 'encoderPosition.value',
+                     'dim': None},),
+    "VFM_BENDERF": ({'source': 'SCS_KBS_VFM/MOTOR/BENDERF',
+                     'key': 'encoderPosition.value',
+                     'dim': None},),
+
+    "HFM_BENDING": ({'source': 'SCS_KBS_HFM/MDL/AVERAGER',
+                     'key': 'result.value',
+                     'dim': None},),
+
+    "VFM_BENDING": ({'source': 'SCS_KBS_VFM/MDL/AVERAGER',
+                     'key': 'result.value',
+                     'dim': None},),
+
+    # AFS LASER
+    "AFS_PhaseShifter": ({'source': 'SCS_ILH_LAS/PHASESHIFTER/DOOCS',
+                          'key': 'actualPosition.value',
+                          'dim': None},),
+    "AFS_DelayLine": ({'source': 'SCS_ILH_LAS/MOTOR/LT3',
+                       'key': 'actualPosition.value',
+                       'dim': None},
+                      {'source': 'SCS_ILH_LAS/MOTOR/LT3',
+                         'key': 'AActualPosition.value',
+                         'dim': None},),
+    "AFS_HalfWP": ({'source': 'SCS_ILH_LAS/MOTOR/ROT_OPA_BWP1',
+                    'key': 'actualPosition.value',
+                    'dim': None},),
+    "AFS_FocusLens": ({'source': 'SCS_ILH_LAS/MOTOR/LT_SPARE1',
+                       'key': 'actualPosition.value',
+                       'dim': None},),
+    # 2nd lens of telescope
+    "AFS_TeleLens": ({'source': 'SCS_ILH_LAS/MOTOR/LT2',
+                      'key': 'actualPosition.value',
+                      'dim': None},),
+
+    # PP LASER 800 nm path
+    "PP800_PhaseShifter": ({'source': 'SCS_ILH_LAS/DOOCS/PP800_PHASESHIFTER',
+                            'key': 'actualPosition.value',
+                            'dim': None},),
+    "PP800_SynchDelayLine": ({'source': 'SCS_ILH_LAS/DOOCS/PPL_OPT_DELAY',
+                              'key': 'actualPosition.value',
+                              'dim': None},),
+    "PP800_DelayLine": ({'source': 'SCS_ILH_LAS/MOTOR/LT3',
+                         'key': 'actualPosition.value',
+                         'dim': None},
+                        {'source': 'SCS_ILH_LAS/MOTOR/LT3',
+                         'key': 'AActualPosition.value',
+                         'dim': None},),
+    "PP800_T0_mm": ({'source': 'SCS_ILH_LAS/MDL/OPTICALDELAY_PP800',
+                     'key': 'motorOffset.value',
+                     'dim': None},),
+    "PP800_HalfWP": ({'source': 'SCS_ILH_LAS/MOTOR/ROT8WP1',
+                      'key': 'actualPosition.value',
+                      'dim': None},),
+    "PP800_HWP_POWER": ({'source': 'SCS_ILH_LAS/MDL/WAVEPLATE_PP800',
+                         'key': 'actualPosition.value',
+                         'dim': None},),
+    "PP800_FocusLens": ({'source': 'SCS_ILH_LAS/MOTOR/LT_SPARE1',
+                         'key': 'actualPosition.value',
+                         'dim': None},),
+    "PP800_HWP_POLARIZATION": ({'source': 'SCS_CDIFFT_FDM/MOTOR/FOCUS_1',
+                                'key': 'actualPosition.value',
+                                'dim': None},),
+    "LIN_FocusLens": ({'source': 'SCS_CDIFFT_FDM/MOTOR/FOCUS_2',
+                       'key': 'actualPosition.value',
+                       'dim': None},),
+    "FFT_FocusLens": ({'source': 'SCS_CDIFFT_FDM/MOTOR/BARLOW_1',
+                       'key': 'actualPosition.value',
+                       'dim': None},),
+    # 1st lens of telescope (setup of August 2019)
+    "PP800_TeleLens": ({'source': 'SCS_ILH_LAS/MOTOR/LT7',
+                        'key': 'actualPosition.value',
+                        'dim': None},),
+    "ILH_8CAM1": ({'source': 'SCS_ILH_LAS/CAM/8CAM1:daqOutput',
+                   'key': 'data.image.pixels',
+                   'dim': ['8cam1_y', '8cam1_x']},),
+    "ILH_8CAM5": ({'source': 'SCS_ILH_LAS/CAM/8CAM5:daqOutput',
+                   'key': 'data.image.pixels',
+                   'dim': ['8cam5_y', '8cam5_x']},),
+    "ILH_10CAM1": ({'source': 'SCS_ILH_LAS/CAM/10CAM1:daqOutput',
+                    'key': 'data.image.pixels',
+                    'dim': ['10cam1_y', '10cam1_x']},),
+    "ILH_CAM_SPARE3": ({'source': 'SCS_ILH_LAS/CAM/CAM_SPARE3:daqOutput',
+                    'key': 'data.image.pixels',
+                    'dim': ['cam_spare3_y', 'cam_spare3_x']},),
+    "ILH_CAM_SPARE4": ({'source': 'SCS_ILH_LAS/CAM/CAM_SPARE4:daqOutput',
+                    'key': 'data.image.pixels',
+                    'dim': ['cam_spare4_y', 'cam_spare4_x']},),
+    "ILH_CAM_SPARE5": ({'source': 'SCS_ILH_LAS/CAM/CAM_SPARE5:daqOutput',
+                    'key': 'data.image.pixels',
+                    'dim': ['cam_spare5_y', 'cam_spare5_x']},),
+    "ILH_PIDelay": ({'source': 'SCS_ILH_LAS/MOTOR/PIDELAY',
+                     'key': 'actualPosition.value',
+                     'dim': None},),
+    "ZABER210_ODL": ({'source': 'SCS_ILH_LAS/MOTOR/ZABER210_ODL',
+                      'key': 'actualPosition.value',
+                      'dim': None},
+                     {'source': 'SCS_ILH_LAS/MOTOR/EOS_DELAY',
+                      'key': 'actualPosition.value',
+                      'dim': None},),
+    "ZABER110_ODL": ({'source': 'SCS_ILH_LAS/MOTOR/ZABER110_ODL',
+                      'key': 'actualPosition.value',
+                      'dim': None},),
+    "virtual_sample": ({'source': 'SCS_LIN_SCR/CAM/VIRTUAL_SAMPLE:daqOutput',
+                        'key': 'data.image.pixels',
+                        'dim': ['vs_y', 'vs_x']},),
+
+    # SHG SETUP in ILH
+
+    ## 800 nm delay line
+    "SHG_DELAY_800": ({'source': 'SCS_GPC_LAS/MOTOR/DELAY',
+                      'key': 'actualPosition.value',
+                      'dim': None},),
+    ## 266 nm attenuator
+    "SHG_ATT_266": ({'source': 'SCS_GPC_MOV/MOTOR/THETA',
+                   'key': 'actualPosition.value',
+                   'dim': None},),
+    ## finesse delay stage to find temporal overlap between 800 nm and 400 nm beams
+    "SHG_THG_DELAY": ({'source': 'SCS_LASLAB_LASER/MOTOR/STAGE-4',
+                      'key': 'actualPosition.value',
+                      'dim': None},),
+    ## 266 nm half wave plate otation stage
+    "SHG_HWP_266": ({'source': 'SCS_LASLAB_LASER/MOTOR/STAGE-2',
+                      'key': 'actualPosition.value',
+                      'dim': None},),
+    ## 266 nm lens
+    "SHG_LENS_266": ({'source': 'SCS_GPC_LAS/PMOTOR/PARABOLA_RY',
+                      'key': 'actualPosition.value',
+                      'dim': None},),
+    ## 800 nm attenuator
+    "SHG_ATT_800": ({'source': 'SCS_GPC_MOV/MOTOR/THETAMAG',
+                      'key': 'actualPosition.value',
+                      'dim': None},),
+    ## 800 nm HWM
+    "SHG_HWP_800": ({'source': 'SCS_LASLAB_LASER/MOTOR/STAGE-1',
+                      'key': 'actualPosition.value',
+                      'dim': None},),
+    ## 400 nm Glan polarizer
+    "SHG_GLAN_POL": ({'source': 'SCS_LASLAB_LASER/MOTOR/STAGE-3',
+                      'key': 'actualPosition.value',
+                      'dim': None},),
+    ## 800 nm lens
+    "SHG_LENS_800": ({'source': 'SCS_GPC_LAS/PMOTOR/PARABOLA_RZ',
+                      'key': 'actualPosition.value',
+                      'dim': None},),
+    ## sample TX
+    "SHG_SAM_TX": ({'source': 'SCS_GPC_LAS/PMOTOR/STAGE_X',
+                      'key': 'actualPosition.value',
+                      'dim': None},),
+    ## sample TY
+    "SHG_SAM_TY": ({'source': 'SCS_GPC_LAS/PMOTOR/STAGE_Z',
+                      'key': 'actualPosition.value',
+                      'dim': None},),
+    ## sample RY
+    "SHG_SAM_RY": ({'source': 'SCS_GPC_MOV/MOTOR/X',
+                      'key': 'actualPosition.value',
+                      'dim': None},),
+    ## cameras
+    "SHG_CAM1": ({'source': 'SCS_GPC_SHG/CAM/CAMERA1:daqOutput',
+                      'key': 'data.image.pixels',
+                      'dim': ['cam1_y', 'cam1_x']},),
+    "SHG_CAM2": ({'source': 'SCS_GPC_SHG/CAM/CAMERA2:daqOutput',
+                      'key': 'data.image.pixels',
+                      'dim': ['cam2_y', 'cam2_x']},),
+    "SHG_CAM3": ({'source': 'SCS_GPC_SHG/CAM/CAMERA3:daqOutput',
+                      'key': 'data.image.pixels',
+                      'dim': ['cam3_y', 'cam3_x']},),
+    "SHG_CAM4": ({'source': 'SCS_GPC_SHG/CAM/CAMERA4:daqOutput',
+                      'key': 'data.image.pixels',
+                      'dim': ['cam4_y', 'cam4_x']},),
+    "SHG_CAM5": ({'source': 'SCS_GPC_SHG/CAM/CAMERA5:daqOutput',
+                      'key': 'data.image.pixels',
+                      'dim': ['cam5_y', 'cam5_x']},),
+    "SHG_CAM6": ({'source': 'SCS_GPC_SHG/CAM/CAMERA6:daqOutput',
+                      'key': 'data.image.pixels',
+                      'dim': ['cam6_y', 'cam6_x']},),
+    "SHG_CAM7": ({'source': 'SCS_GPC_SHG/CAM/CAMERA7:daqOutput',
+                      'key': 'data.image.pixels',
+                      'dim': ['cam7_y', 'cam7_x']},),
+
+    # GPC
+    "GPC_EOS_DelayLine": ({'source': 'SCS_CDIDET_GRD/MOTOR/IMAGER',
+                           'key': 'actualPosition.value',
+                           'dim': None},),
+    "GPC_X": ({'source': 'SCS_GPC_MOV/MOTOR/X',
+               'key': 'actualPosition.value',
+               'dim': None},),
+    "GPC_Y": ({'source': 'SCS_GPC_MOV/MOTOR/Y',
+               'key': 'actualPosition.value',
+               'dim': None},),
+    "GPC_THETA": ({'source': 'SCS_GPC_MOV/MOTOR/THETA',
+                   'key': 'actualPosition.value',
+                   'dim': None},),
+    "GPC_THETAMAG": ({'source': 'SCS_GPC_MOV/MOTOR/THETAMAG',
+                      'key': 'actualPosition.value',
+                      'dim': None},),
+
+    # FFT
+    "scannerX": ({'source': 'SCS_CDIFFT_SAM/LMOTOR/SCANNERX',
+                  'key': 'actualPosition.value',
+                  'dim': None},),
+    "scannerY": ({'source': 'SCS_CDIFFT_SAM/MOTOR/SCANNERY2',
+                  'key': 'actualPosition.value',
+                  'dim': None},
+                 {'source': 'SCS_CDIFFT_SAM/MOTOR/SCANNERY',
+                  'key': 'actualPosition.value',
+                  'dim': None},),
+    "scannerY_enc": ({'source': 'SCS_CDIFFT_SAM/ENC/SCANNERY',
+                      'key': 'value.value',
+                      'dim': None},),
+    "sampleH": ({'source': 'SCS_CDIFFT_SAM/MDL/GRIDSAMPLE',
+                 'key': 'actualH.value',
+                 'dim': None},),
+    "sampleK": ({'source': 'SCS_CDIFFT_SAM/MDL/GRIDSAMPLE',
+                 'key': 'actualK.value',
+                 'dim': None},),
+    "FFT_SAM_Z": ({'source': 'SCS_CDIFFT_MOV/MOTOR/SAM_Z',
+                   'key': 'actualPosition.value',
+                   'dim': None},),
+    "FFT_SAM_Z_ENC": ({'source': 'SCS_CDIFFT_MOV/ENC/SAM_Z',
+                       'key': 'value.value',
+                       'dim': None},),
+    "magnet": ({'source': 'SCS_CDIFFT_MAG/MDL/FIELD',
+                'key': 'actualPosition.value',
+                'dim': None},),
+    "magnet_current": ({'source': 'SCS_CDIFFT_MAG/ASENS/PSU_CMON',
+                'key': 'value.value',
+                'dim': None},
+               {'source': 'SCS_CDIFFT_MAG/ASENS/CURRENT',
+                'key': 'value.value',
+                'dim': None},
+               {'source': 'SCS_CDIFFT_MAG/SUPPLY/CURRENT',
+                'key': 'actualCurrent.value',
+                'dim': None}),
+    "Vertical_FDM": ({'source': 'SCS_CDIFFT_LDM/CAM/CAMERA1A:daqOutput',
+                      'key': 'data.image.pixels',
+                      'dim': ['vfdm_y', 'vfdm_x']},),
+    "Horizontal_FDM": ({'source': 'SCS_CDIFFT_LDM/CAM/CAMERA2A:daqOutput',
+                        'key': 'data.image.pixels',
+                        'dim': ['hfdm_y', 'hfdm_x']},),
+    "LLC_webcam1": ({'source': 'SCS_CDILLC_VAC/CAM/WEBCAMERA1:daqOutput',
+                     'key': 'data.image.pixels',
+                     'dim': ['llc1_y', 'llc1_x']},),
+    "LLC_webcam2": ({'source': 'SCS_CDILLC_VAC/CAM/WEBCAMERA2:daqOutput',
+                     'key': 'data.image.pixels',
+                     'dim': ['llc2_y', 'llc2_x']},),
+    "LLC_webcam3": ({'source': 'SCS_CDILLC_VAC/CAM/WEBCAMERA3:daqOutput',
+                     'key': 'data.image.pixels',
+                     'dim': ['llc3_y', 'llc3_x']},),
+
+    # DSSC
+    "DSSC_delay": ({'source': 'SCS_RR_SYS/TSYS/UTC-1-S2',
+                    'key': 'backTrg3.delay.value',
+                    'dim': None},),
+
+    # CHEM
+    "chem_X": ({'source': 'SCS_CHEM_JET/MOTOR/MANA_X',
+                'key': 'actualPosition.value',
+                'dim': None},),
+    "chem_Y": ({'source': 'SCS_CHEM_JET/MOTOR/MANA_Y',
+                'key': 'actualPosition.value',
+                'dim': None},),
+    "chem_Z": ({'source': 'SCS_CHEM_JET/MOTOR/MANA_Z',
+                'key': 'actualPosition.value',
+                'dim': None},),
+    "chem_ppl_x":  ({'source': 'SCS_CHEM_LIN/PMOTOR/X',
+                'key': 'actualPosition.value',
+                'dim': None},),
+    "chem_ppl_y":  ({'source': 'SCS_CHEM_LIN/PMOTOR/Y',
+                'key': 'actualPosition.value',
+                'dim': None},),
+    "chem_ppl_pol":  ({'source': 'SCS_CDIFFT_FDM/MOTOR/FOCUS_1',
+                'key': 'actualPosition.value',
+                'dim': None},),
+
+    
+    # hRIXS
+    "hRIXS_det": ({'source': 'SCS_HRIXS_DET/CAM/CAMERA:daqOutput',
+                   'key': 'data.image.pixels',
+                   'dim': ['x', 'y']},),
+    "hRIXS_exposure": ({'source': 'SCS_HRIXS_DET/CAM/CAMERA',
+                        'key': 'ShutterTiming.exposureTime.value',
+                        'dim': None},),
+    "hRIXS_delay": ({'source': 'SCS_HRIXS_DET/MDL/CAMERA_SHUTTER',
+                     'key': 'delay.value',
+                     'dim': None},),
+    "hRIXS_index": ({'source': 'SCS_HRIXS_DET/MDL/CAMERA_SHUTTER',
+                     'key': 'currentIndex.value',
+                     'dim': None},),
+    "hRIXS_norm": ({'source': 'SCS_HRIXS_DET/MDL/CAMERA_SHUTTER',
+                     'key': 'xgmSum.value',
+                     'dim': None},),
+    "hRIXS_ABB": ({'source': 'SCS_HRIXS_MOV/MOTOR/ABB',
+                            'key': 'actualPosition.value',
+                            'dim': None},),
+    "hRIXS_ABL": ({'source': 'SCS_HRIXS_MOV/MOTOR/ABL',
+                            'key': 'actualPosition.value',
+                            'dim': None},),
+    "hRIXS_ABR": ({'source': 'SCS_HRIXS_MOV/MOTOR/ABR',
+                            'key': 'actualPosition.value',
+                            'dim': None},),
+    "hRIXS_ABT": ({'source': 'SCS_HRIXS_MOV/MOTOR/ABT',
+                            'key': 'actualPosition.value',
+                            'dim': None},),
+    "hRIXS_DRX": ({'source': 'SCS_HRIXS_MOV/MOTOR/DRX',
+                            'key': 'actualPosition.value',
+                            'dim': None},),
+    "hRIXS_DTY1":({'source': 'SCS_HRIXS_MOV/MOTOR/DTY1',
+                            'key': 'actualPosition.value',
+                            'dim': None},),
+    "hRIXS_DTZ": ({'source': 'SCS_HRIXS_MOV/MOTOR/DTZ',
+                            'key': 'actualPosition.value',
+                            'dim': None},),
+    "hRIXS_GMX": ({'source': 'SCS_HRIXS_MOV/MOTOR/GMX',
+                            'key': 'actualPosition.value',
+                            'dim': None},),
+    "hRIXS_GRX": ({'source': 'SCS_HRIXS_MOV/MOTOR/GRX',
+                            'key': 'actualPosition.value',
+                            'dim': None},),
+    "hRIXS_GTLY": ({'source': 'SCS_HRIXS_MOV/MOTOR/GTLY',
+                            'key': 'actualPosition.value',
+                            'dim': None},),
+    "hRIXS_GTRY": ({'source': 'SCS_HRIXS_MOV/MOTOR/GTRY',
+                            'key': 'actualPosition.value',
+                            'dim': None},),
+    "hRIXS_GTX": ({'source': 'SCS_HRIXS_MOV/MOTOR/GTX',
+                            'key': 'actualPosition.value',
+                            'dim': None},),
+    "hRIXS_GTZ": ({'source': 'SCS_HRIXS_MOV/MOTOR/GTZ',
+                            'key': 'actualPosition.value',
+                            'dim': None},),
+    "hRIXS_RRY": ({'source': 'SCS_HRIXS_MOV/MOTOR/RRY',
+                            'key': 'actualPosition.value',
+                            'dim': None},),
+
+    # XRD
+    "XRD_DRY": ({'source': 'SCS_XRD_MOV/MOTOR/DRY',
+             'key': 'actualPosition.value',
+             'dim': None},),
+
+    "XRD_SRX": ({'source': 'SCS_XRD_MOV/MOTOR/SRX',
+             'key': 'actualPosition.value',
+             'dim': None},),
+
+    "XRD_SRY": ({'source': 'SCS_XRD_MOV/MOTOR/SRY',
+             'key': 'actualPosition.value',
+             'dim': None},),
+
+    "XRD_SRZ": ({'source': 'SCS_XRD_MOV/MOTOR/SRZ',
+             'key': 'actualPosition.value',
+             'dim': None},),
+
+    "XRD_STX": ({'source': 'SCS_XRD_MOV/MOTOR/STX',
+             'key': 'actualPosition.value',
+             'dim': None},),
+
+    "XRD_STY": ({'source': 'SCS_XRD_MOV/MOTOR/STY',
+             'key': 'actualPosition.value',
+             'dim': None},),
+
+    "XRD_STZ": ({'source': 'SCS_XRD_MOV/MOTOR/STZ',
+             'key': 'actualPosition.value',
+             'dim': None},),
+
+    "XRD_SXT1Y": ({'source': 'SCS_XRD_MOV/MOTOR/SXT1Y',
+               'key': 'actualPosition.value',
+               'dim': None},),
+
+    "XRD_SXT2Y": ({'source': 'SCS_XRD_MOV/MOTOR/SXT2Y',
+               'key': 'actualPosition.value',
+               'dim': None},),
+
+    "XRD_SXTX": ({'source': 'SCS_XRD_MOV/MOTOR/SXTX',
+              'key': 'actualPosition.value',
+              'dim': None},),
+
+    "XRD_SXTZ": ({'source': 'SCS_XRD_MOV/MOTOR/SXTZ',
+              'key': 'actualPosition.value',
+              'dim': None},),
+
+    "XRD_CRY": ({'source': 'SCS_XRD_MOV/MOTOR/CRY',
+             'key': 'actualPosition.value',
+             'dim': None},),
+
+    "XRD_CTX": ({'source': 'SCS_XRD_MOV/MOTOR/CTX',
+             'key': 'actualPosition.value',
+             'dim': None},),
+
+    "XRD_CTY": ({'source': 'SCS_XRD_MOV/MOTOR/CTY',
+             'key': 'actualPosition.value',
+             'dim': None},),
+
+    "XRD_CTZ": ({'source': 'SCS_XRD_MOV/MOTOR/CTZ',
+             'key': 'actualPosition.value',
+             'dim': None},),
+
+    # Viking
+    "VIKING_FILTER": ({'source': 'SCS_EXP_VIKING/MDL/FILTER_SELECT',
+                       'key': 'actualIncrement.value',
+                       'dim': None},),
+
+    "VIKING_AX": ({'source': 'SCS_EXP_VIKING/MOTOR/AX',
+                 'key': 'actualPosition.value',
+                  'dim': None},),
+
+    "VIKING_Y": ({'source': 'SCS_EXP_VIKING/MOTOR/Y',
+                 'key': 'actualPosition.value',
+                  'dim': None},),
+
+    "VIKING_Z": ({'source': 'SCS_EXP_VIKING/MOTOR/Z',
+                 'key': 'actualPosition.value',
+                  'dim': None},),
+
+    # PI-MTE3 CCD camera
+    "MTE3": ({'source': 'SCS_CDIDET_MTE3/CAM/CAMERA:daqOutput',
+              'key': 'data.image.pixels',
+              'dim': ['x', 'y']},),
+    "GRID_H": ({'source': 'SCS_CDIFFT_SAM/MDL/GRIDSAMPLE',
+              'key': 'actualH.value',
+             'dim': None},),
+    "GRID_K": ({'source': 'SCS_CDIFFT_SAM/MDL/GRIDSAMPLE',
+              'key': 'actualK.value',
+             'dim': None},),
+
+    # Andor Newton CCD camera
+    "newton": ({'source': 'SCS_EXP_NEWTON/CAM/CAMERA:daqOutput',
+                'key': 'data.image.pixels',
+                'dim': ['newt_y', 'newt_x']},),
+
+    # Andor Newton CCD camera
+    "MaranaX": ({'source': 'SCS_HRIXS_DET/CAM/MARANAX:daqOutput',
+                'key': 'data.image.pixels',
+                'dim': ['mara_y', 'mara_x']},),                
+
+    # FastCCD, if in raw folder, raw images
+    #          if in proc folder, dark substracted and relative gain corrected
+    "fastccd": ({'source': 'SCS_CDIDET_FCCD2M/DAQ/FCCD:daqOutput',
+                 'key': 'data.image.pixels',
+                 'dim': ['x', 'y']},),
+    # FastCCD with common mode correction
+    "fastccd_cm": ({'source': 'SCS_CDIDET_FCCD2M/DAQ/FCCD:daqOutput',
+                    'key': 'data.image.pixels_cm',
+                    'dim': ['x', 'y']},),
+    # FastCCD charge split correction in very low photon count regime
+    "fastccd_classified": ({'source': 'SCS_CDIDET_FCCD2M/DAQ/FCCD:daqOutput',
+                            'key': 'data.image.pixels_classified',
+                            'dim': ['x', 'y']},),
+    # FastCCD event multiplicity from the charge split correction:
+    # 0: no events
+    # 100, 101: single events
+    # 200-203: charge split into two pixels in four different orientations
+    # 300-303: charge split into three pixels in four different orientations
+    # 400-403: charge split into four pixels in four different orientations
+    # 1000: charge in more than four neighboring pixels. Cannot be produced
+    # by a single photon alone.
+    "fastccd_patterns": ({'source': 'SCS_CDIDET_FCCD2M/DAQ/FCCD:daqOutput',
+                          'key': 'data.image.patterns',
+                          'dim': ['x', 'y']},),
+    # FastCCD gain map, 0 high gain, 1 medium gain, 2 low gain
+    "fastccd_gain": ({'source': 'SCS_CDIDET_FCCD2M/DAQ/FCCD:daqOutput',
+                      'key': 'data.image.gain',
+                      'dim': ['x', 'y']},),
+    # FastCCD mask, bad pixel map to be ignored if > 0
+    "fastccd_mask": ({'source': 'SCS_CDIDET_FCCD2M/DAQ/FCCD:daqOutput',
+                      'key': 'data.image.mask',
+                      'dim': ['x', 'y']},),
+
+    # TIM
+    "MCP1apd": ({'source': 'SCS_UTC1_ADQ/ADC/1:network',
+                 'key': 'digitizers.channel_1_D.apd.pulseIntegral',
+                 'dim': ['apdId'],
+                 'extract': 'peaks'},),
+    "MCP1raw": ({'source': 'SCS_UTC1_ADQ/ADC/1:network',
+                 'key': 'digitizers.channel_1_D.raw.samples',
+                 'dim': ['samplesId'],
+                 'extract': 'peaks'},),
+    "MCP2apd": ({'source': 'SCS_UTC1_ADQ/ADC/1:network',
+                 'key': 'digitizers.channel_1_C.apd.pulseIntegral',
+                 'dim': ['apdId'],
+                 'extract': 'peaks'},),
+    "MCP2raw": ({'source': 'SCS_UTC1_ADQ/ADC/1:network',
+                 'key': 'digitizers.channel_1_C.raw.samples',
+                 'dim': ['samplesId'],
+                 'extract': 'peaks'},),
+    "MCP3apd": ({'source': 'SCS_UTC1_ADQ/ADC/1:network',
+                 'key': 'digitizers.channel_1_B.apd.pulseIntegral',
+                 'dim': ['apdId'],
+                 'extract': 'peaks'},),
+    "MCP3raw": ({'source': 'SCS_UTC1_ADQ/ADC/1:network',
+                 'key': 'digitizers.channel_1_B.raw.samples',
+                 'dim': ['samplesId'],
+                 'extract': 'peaks'},),
+    "MCP4apd": ({'source': 'SCS_UTC1_ADQ/ADC/1:network',
+                 'key': 'digitizers.channel_1_A.apd.pulseIntegral',
+                 'dim': ['apdId'],
+                 'extract': 'peaks'},),
+    "MCP4raw": ({'source': 'SCS_UTC1_ADQ/ADC/1:network',
+                 'key': 'digitizers.channel_1_A.raw.samples',
+                 'dim': ['samplesId'],
+                 'extract': 'peaks'},),
+
+    # FastADC
+    "FastADC0peaks": ({'source': 'SCS_UTC1_MCP/ADC/1:channel_0.output',
+                       'key': 'data.peaks',
+                       'dim': ['peakId'],
+                       'extract': 'peaks'},),
+    "FastADC0raw": ({'source': 'SCS_UTC1_MCP/ADC/1:channel_0.output',
+                     'key': 'data.rawData',
+                     'dim': ['fadc_samplesId'],
+                     'extract': 'peaks'},),
+    "FastADC1peaks": ({'source': 'SCS_UTC1_MCP/ADC/1:channel_1.output',
+                       'key': 'data.peaks',
+                       'dim': ['peakId'],
+                       'extract': 'peaks'},),
+    "FastADC1raw": ({'source': 'SCS_UTC1_MCP/ADC/1:channel_1.output',
+                     'key': 'data.rawData',
+                     'dim': ['fadc_samplesId'],
+                     'extract': 'peaks'},),
+    "FastADC2peaks": ({'source': 'SCS_UTC1_MCP/ADC/1:channel_2.output',
+                       'key': 'data.peaks',
+                       'dim': ['peakId'],
+                       'extract': 'peaks'},),
+    "FastADC2raw": ({'source': 'SCS_UTC1_MCP/ADC/1:channel_2.output',
+                     'key': 'data.rawData',
+                     'dim': ['fadc_samplesId'],
+                     'extract': 'peaks'},),
+    "FastADC3peaks": ({'source': 'SCS_UTC1_MCP/ADC/1:channel_3.output',
+                       'key': 'data.peaks',
+                       'dim': ['peakId'],
+                       'extract': 'peaks'},),
+    "FastADC3raw": ({'source': 'SCS_UTC1_MCP/ADC/1:channel_3.output',
+                     'key': 'data.rawData',
+                     'dim': ['fadc_samplesId'],
+                     'extract': 'peaks'},),
+    "FastADC4peaks": ({'source': 'SCS_UTC1_MCP/ADC/1:channel_4.output',
+                       'key': 'data.peaks',
+                       'dim': ['peakId'],
+                       'extract': 'peaks'},),
+    "FastADC4raw": ({'source': 'SCS_UTC1_MCP/ADC/1:channel_4.output',
+                     'key': 'data.rawData',
+                     'dim': ['fadc_samplesId'],
+                     'extract': 'peaks'},),
+    "FastADC5peaks": ({'source': 'SCS_UTC1_MCP/ADC/1:channel_5.output',
+                       'key': 'data.peaks',
+                       'dim': ['peakId'],
+                       'extract': 'peaks'},),
+    "FastADC5raw": ({'source': 'SCS_UTC1_MCP/ADC/1:channel_5.output',
+                     'key': 'data.rawData',
+                     'dim': ['fadc_samplesId'],
+                     'extract': 'peaks'},),
+    "FastADC6peaks": ({'source': 'SCS_UTC1_MCP/ADC/1:channel_6.output',
+                       'key': 'data.peaks',
+                       'dim': ['peakId'],
+                       'extract': 'peaks'},),
+    "FastADC6raw": ({'source': 'SCS_UTC1_MCP/ADC/1:channel_6.output',
+                     'key': 'data.rawData',
+                     'dim': ['fadc_samplesId'],
+                     'extract': 'peaks'},),
+    "FastADC7peaks": ({'source': 'SCS_UTC1_MCP/ADC/1:channel_7.output',
+                       'key': 'data.peaks',
+                       'dim': ['peakId'],
+                       'extract': 'peaks'},),
+    "FastADC7raw": ({'source': 'SCS_UTC1_MCP/ADC/1:channel_7.output',
+                     'key': 'data.rawData',
+                     'dim': ['fadc_samplesId'],
+                     'extract': 'peaks'},),
+    "FastADC8peaks": ({'source': 'SCS_UTC1_MCP/ADC/1:channel_8.output',
+                       'key': 'data.peaks',
+                       'dim': ['peakId'],
+                       'extract': 'peaks'},),
+    "FastADC8raw": ({'source': 'SCS_UTC1_MCP/ADC/1:channel_8.output',
+                     'key': 'data.rawData',
+                     'dim': ['fadc_samplesId'],
+                     'extract': 'peaks'},),
+    "FastADC9peaks": ({'source': 'SCS_UTC1_MCP/ADC/1:channel_9.output',
+                       'key': 'data.peaks',
+                       'dim': ['peakId'],
+                       'extract': 'peaks'},),
+    "FastADC9raw": ({'source': 'SCS_UTC1_MCP/ADC/1:channel_9.output',
+                     'key': 'data.rawData',
+                     'dim': ['fadc_samplesId'],
+                     'extract': 'peaks'},),
+
+    # FastADC 2
+    "FastADC2_0peaks": ({'source': 'SCS_UTC2_FADC/ADC/1:channel_0.output',
+                         'key': 'data.peaks',
+                         'dim': ['peakId'],
+                         'extract': 'peaks'},),
+    "FastADC2_0raw": ({'source': 'SCS_UTC2_FADC/ADC/1:channel_0.output',
+                       'key': 'data.rawData',
+                       'dim': ['fadc2_samplesId'],
+                       'extract': 'peaks'},),
+    "FastADC2_1peaks": ({'source': 'SCS_UTC2_FADC/ADC/1:channel_1.output',
+                         'key': 'data.peaks',
+                         'dim': ['peakId'],
+                         'extract': 'peaks'},),
+    "FastADC2_1raw": ({'source': 'SCS_UTC2_FADC/ADC/1:channel_1.output',
+                       'key': 'data.rawData',
+                       'dim': ['fadc2_samplesId'],
+                       'extract': 'peaks'},),
+    "FastADC2_2peaks": ({'source': 'SCS_UTC2_FADC/ADC/1:channel_2.output',
+                         'key': 'data.peaks',
+                         'dim': ['peakId'],
+                         'extract': 'peaks'},),
+    "FastADC2_2raw": ({'source': 'SCS_UTC2_FADC/ADC/1:channel_2.output',
+                       'key': 'data.rawData',
+                       'dim': ['fadc2_samplesId'],
+                       'extract': 'peaks'},),
+    "FastADC2_3peaks": ({'source': 'SCS_UTC2_FADC/ADC/1:channel_3.output',
+                         'key': 'data.peaks',
+                         'dim': ['peakId'],
+                         'extract': 'peaks'},),
+    "FastADC2_3raw": ({'source': 'SCS_UTC2_FADC/ADC/1:channel_3.output',
+                       'key': 'data.rawData',
+                       'dim': ['fadc2_samplesId'],
+                       'extract': 'peaks'},),
+    "FastADC2_4peaks": ({'source': 'SCS_UTC2_FADC/ADC/1:channel_4.output',
+                         'key': 'data.peaks',
+                         'dim': ['peakId'],
+                         'extract': 'peaks'},),
+    "FastADC2_4raw": ({'source': 'SCS_UTC2_FADC/ADC/1:channel_4.output',
+                       'key': 'data.rawData',
+                       'dim': ['fadc2_samplesId'],
+                       'extract': 'peaks'},),
+    "FastADC2_5peaks": ({'source': 'SCS_UTC2_FADC/ADC/1:channel_5.output',
+                         'key': 'data.peaks',
+                         'dim': ['peakId'],
+                         'extract': 'peaks'},),
+    "FastADC2_5raw": ({'source': 'SCS_UTC2_FADC/ADC/1:channel_5.output',
+                       'key': 'data.rawData',
+                       'dim': ['fadc2_samplesId'],
+                       'extract': 'peaks'},),
+    "FastADC2_6peaks": ({'source': 'SCS_UTC2_FADC/ADC/1:channel_6.output',
+                        'key': 'data.peaks',
+                         'dim': ['peakId'],
+                         'extract': 'peaks'},),
+    "FastADC2_6raw": ({'source': 'SCS_UTC2_FADC/ADC/1:channel_6.output',
+                       'key': 'data.rawData',
+                       'dim': ['fadc2_samplesId'],
+                       'extract': 'peaks'},),
+    "FastADC2_7peaks": ({'source': 'SCS_UTC2_FADC/ADC/1:channel_7.output',
+                         'key': 'data.peaks',
+                         'dim': ['peakId'],
+                         'extract': 'peaks'},),
+    "FastADC2_7raw": ({'source': 'SCS_UTC2_FADC/ADC/1:channel_7.output',
+                       'key': 'data.rawData',
+                       'dim': ['fadc2_samplesId'],
+                       'extract': 'peaks'},),
+    "FastADC2_8peaks": ({'source': 'SCS_UTC2_FADC/ADC/1:channel_8.output',
+                         'key': 'data.peaks',
+                         'dim': ['peakId'],
+                         'extract': 'peaks'},),
+    "FastADC2_8raw": ({'source': 'SCS_UTC2_FADC/ADC/1:channel_8.output',
+                       'key': 'data.rawData',
+                       'dim': ['fadc2_samplesId'],
+                       'extract': 'peaks'},),
+    "FastADC2_9peaks": ({'source': 'SCS_UTC2_FADC/ADC/1:channel_9.output',
+                         'key': 'data.peaks',
+                         'dim': ['peakId'],
+                         'extract': 'peaks'},),
+    "FastADC2_9raw": ({'source': 'SCS_UTC2_FADC/ADC/1:channel_9.output',
+                       'key': 'data.rawData',
+                       'dim': ['fadc2_samplesId'],
+                       'extract': 'peaks'},),
+    "FFT_PD2raw": ({'source': 'SCS_FFT_DIAG/ADC/PD2:output',
+                    'key': 'data.rawData',
+                    'dim': ['fftpd2_samplesId'],
+                    'extract': 'peaks'},),
+    "FFT_MCPraw": ({'source': 'SCS_FFT_MCP/ADC/MCP:output',
+                    'key': 'data.rawData',
+                    'dim': ['fftmcp_samplesId'],
+                    'extract': 'peaks'},),
+    "FFT_REFLraw": ({'source': 'SCS_FFT_REFL/ADC/DIODE_UP:output',
+                     'key': 'data.rawData',
+                     'dim': ['fftrefl_samplesId'],
+                     'extract': 'peaks'},),
+    "I0_ILHraw": ({'source': 'SCS_ILH_LAS/ADC/I0_ILH:output',
+                   'key': 'data.rawData',
+                   'dim': ['i0ilh_samplesId'],
+                   'extract': 'peaks'},),
+    "I0_LINraw": ({'source': 'SCS_LIN_LAS/ADC/I0:output',
+                   'key': 'data.rawData',
+                   'dim': ['i0lin_samplesId'],
+                   'extract': 'peaks'},),
+    "REFLECTOMETERraw": ({'source': 'SCS_LIN_LAS/ADC/REFLECTOMETER:output',
+                       'key': 'data.rawData',
+                       'dim': ['linrefl_samplesId'],
+                       'extract': 'peaks'},),
+    "CHEM_PDFILTERraw": ({'source': 'SCS_CHEM_DIAG/ADC/DIODE_FILTER:output',
+                          'key': 'data.rawData',
+                          'dim': ['chempdfilt_samplesId'],
+                          'extract': 'peaks'},),
+    "CHEM_PDNOFILTERraw": ({'source': 'SCS_CHEM_DIAG/ADC/DIODE_NOFILTER:output',
+                            'key': 'data.rawData',
+                            'dim': ['chempdnofilt_samplesId'],
+                            'extract': 'peaks'},),
+    "CHEM_APDraw": ({'source': 'SCS_CHEM_DET/ADC/APD:output',
+                     'key': 'data.rawData',
+                     'dim': ['chemapd_samplesId'],
+                     'extract': 'peaks'},),
+    "XOX_EOSPDraw": ({'source': 'SCS_PAM_XOX/ADC/EOS_PD:output',
+                      'key': 'data.rawData',
+                      'dim': ['xoxeos_samplesId'],
+                      'extract': 'peaks'},),
+
+    "XRD_MCP_SMALLraw": ({'source': 'SCS_XRD_DET/ADC/MCP_SMALL:output',
+                          'key': 'data.rawData',
+                          'dim': ['mcpsmall_samplesId'],
+                          'extract': 'peaks'},),
+
+    "XRD_MCP_BIGraw": ({'source': 'SCS_XRD_DET/ADC/MCP_BIG:output',
+                          'key': 'data.rawData',
+                          'dim': ['mcpbig_samplesId'],
+                          'extract': 'peaks'},),
+    "XRD_PD2raw": ({'source': 'SCS_XRD_DET/ADC/DIODE2:output',
+                    'key': 'data.rawData',
+                    'dim': ['pd2_samplesId'],
+                    'extract': 'peaks'},),
+    "XRD_PD6raw": ({'source': 'SCS_XRD_DET/ADC/DIODE6:output',
+                    'key': 'data.rawData',
+                    'dim': ['pd6_samplesId'],
+                    'extract': 'peaks'},),
+
+    # KARABACON
+    "KARABACON": ({'source': 'SCS_DAQ_SCAN/MDL/KARABACON',
+                   'key': 'actualStep.value',
+                   'dim': None},),
+
+    # GOTTHARD
+    "Gotthard1": ({'source': 'SCS_PAM_XOX/DET/GOTTHARD_RECEIVER1:daqOutput',
+                   'key': 'data.adc',
+                   'dim': ['gott_pId', 'pixelId']},),
+    "Gotthard2": ({'source': 'SCS_PAM_XOX/DET/GOTTHARD_RECEIVER2:daqOutput',
+                   'key': 'data.adc',
+                   'dim': ['gott_pId', 'pixelId']},),
+    "GH21": ({'source': 'SCS_XOX_GH21/CORR/RECEIVER:daqOutput',
+              'key': 'data.adc',
+              'dim': ['gh2_pId', 'pixelId']},
+             {'source': 'SCS_XOX_GH21/CORR/GOTTHARD2_RECEIVER1:daqOutput',
+              'key': 'data.adc',
+              'dim': ['gh2_pId', 'pixelId']},),
+    "GH22": ({'source': 'SCS_XOX_GH22/CORR/RECEIVER:daqOutput',
+              'key': 'data.adc',
+              'dim': ['gh2_pId', 'pixelId']},
+             {'source': 'SCS_XOX_GH22/CORR/GOTTHARD2_RECEIVER2:daqOutput',
+              'key': 'data.adc',
+              'dim': ['gh2_pId', 'pixelId']},),
+    "GH21_raw": ({'source': 'SCS_XOX_GH21/DET/GOTTHARD2_RECEIVER1:daqOutput',
+                  'key': 'data.adc',
+                  'dim': ['gh2_pId', 'pixelId']},),
+    "GH22_raw": ({'source': 'SCS_XOX_GH22/DET/GOTTHARD2_RECEIVER2:daqOutput',
+                  'key': 'data.adc',
+                  'dim': ['gh2_pId', 'pixelId']},),
+
+    # BASLER RACER
+    "Racer1": ({'source': 'SCS_PAM_XOX/CAM/RACER1:daqOutput',
+                'key': 'data.image.pixels',
+                'dim': ['racer1_pId', 'racer1_x']},),
+
+    # BBO2 RY (interferometric timing tool, Nov 2020, p2711)
+    "BBO_RY": ({'source': 'SCS_CDIDET_GRD/MOTOR/IMAGER',
+                'key': 'actualPosition.value',
+                'dim': None},)
+}
--- a/src/toolbox_scs/detectors/LICENSE_BSD
+++ b/src/toolbox_scs/detectors/LICENSE_BSD
+BSD 3-Clause License
+
+Copyright (c) 2019, Michael Schneider
+Copyright (c) 2020, SCS-team
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
--- a/src/toolbox_scs/detectors/__init__.py
+++ b/src/toolbox_scs/detectors/__init__.py
+from .azimuthal_integrator import *
+from .bam_detectors import *
+from .digitizers import *
+from .dssc import *
+from .dssc_data import *
+from .dssc_misc import *
+from .dssc_processing import *
+from .gotthard2 import *
+from .hrixs import *
+from .pes import *
+from .viking import *
+from .xgm import *
+
+__all__ = (
+    azimuthal_integrator.__all__
+    + bam_detectors.__all__
+    + digitizers.__all__
+    + dssc.__all__
+    + dssc_data.__all__
+    + dssc_misc.__all__
+    + dssc_processing.__all__
+    + gotthard2.__all__
+    + hrixs.__all__
+    + pes.__all__
+    + viking.__all__
+    + xgm.__all__
+)
--- a/src/toolbox_scs/detectors/azimuthal_integrator.py
+++ b/src/toolbox_scs/detectors/azimuthal_integrator.py
+import logging
+import numpy as np
+
+__all__ = [
+    'AzimuthalIntegrator',
+    'AzimuthalIntegratorDSSC'
+]
+
+log = logging.getLogger(__name__)
+
+
+class AzimuthalIntegrator(object):
+    def __init__(self, imageshape, center, polar_range,
+                 aspect=204/236, **kwargs):
+        '''
+        Create a reusable integrator for repeated azimuthal integration of
+        similar images. Calculates array indices for a given parameter set that
+        allows fast recalculation.
+
+        Parameters
+        ==========
+        imageshape : tuple of ints
+            The shape of the images to be integrated over.
+
+        center : tuple of ints
+            center coordinates in pixels
+
+        polar_range : tuple of ints
+            start and stop polar angle (in degrees) to restrict integration to
+            wedges
+
+        dr : int, optional
+            radial width of the integration slices. Takes non-square DSSC
+            pixels into account.
+
+        nrings : int, optional
+            Number of integration rings. Can be given as an alternative to dr
+
+        aspect: float, default 204/236 for DSSC
+            aspect ratio of the pixel pitch
+
+        Returns
+        =======
+        ai : azimuthal_integrator instance
+            Instance can directly be called with image data:
+            > az_intensity = ai(image)
+            radial distances and the polar mask are accessible as attributes:
+            > ai.distance
+            > ai.polar_mask
+        '''
+        self.xcoord = None
+        self.ycoord = None
+
+        self._calc_dist_array(imageshape, center, aspect)
+        self._calc_polar_mask(polar_range)
+        self._calc_indices(**kwargs)
+
+    def _calc_dist_array(self, shape, center, aspect):
+        '''Calculate pixel coordinates for the given shape.'''
+        self.center = center
+        self.shape = shape
+        self.aspect = aspect
+        cx, cy = center
+        log.info(f'azimuthal center: {center}')
+        sx, sy = shape
+        xcoord, ycoord = np.ogrid[:sx, :sy]
+        self.xcoord = xcoord - cx
+        self.ycoord = ycoord - cy
+        # distance from center, hexagonal pixel shape taken into account
+        self.dist_array = np.hypot(self.xcoord * aspect, self.ycoord)
+
+    def _calc_indices(self, **kwargs):
+        '''Calculates the list of indices for the flattened image array.'''
+        maxdist = self.dist_array.max()
+        mindist = self.dist_array.min()
+        dr = kwargs.get('dr', None)
+        nrings = kwargs.get('nrings', None)
+        if (dr is None) and (nrings is None):
+            raise AssertionError('Either <dr> or <nrings> needs to be given.')
+        if (dr is not None) and (nrings is not None):
+            log.warning('Both <dr> and <nrings> given. <dr> takes precedence.')
+        if (dr is None):
+            dr = maxdist / nrings
+
+        idx = np.indices(dimensions=self.shape)
+        self.index_array = np.ravel_multi_index(idx, self.shape)
+
+        self.distance = np.array([])
+        self.flat_indices = []
+        for dist in np.arange(mindist, maxdist + dr, dr):
+            ring_mask = (self.polar_mask
+                         * (self.dist_array >= (dist - dr))
+                         * (self.dist_array < dist))
+            self.flat_indices.append(self.index_array[ring_mask])
+            self.distance = np.append(self.distance, dist)
+
+    def _calc_polar_mask(self, polar_range):
+        self.polar_range = polar_range
+        prange = np.abs(polar_range[1] - polar_range[0])
+        if prange > 180:
+            raise ValueError('Integration angle too wide, should be within 180'
+                             ' degrees')
+        if prange < 1e-6:
+            raise ValueError('Integration angle too narrow')
+
+        if prange == 180:
+            self.polar_mask = np.ones(self.shape, dtype=bool)
+        else:
+            tmin, tmax = np.deg2rad(np.sort(polar_range)) % np.pi
+            polar_array = np.arctan2(self.xcoord, self.ycoord)
+            polar_array = np.mod(polar_array, np.pi)
+            self.polar_mask = (polar_array > tmin) * (polar_array < tmax)
+
+    def calc_q(self, distance, wavelength):
+        '''Calculate momentum transfer coordinate.
+
+        Parameters
+        ==========
+        distance : float
+            Sample - detector distance in meter
+        wavelength : float
+            wavelength of scattered light in meter
+
+        Returns
+        =======
+        deltaq : np.ndarray
+            Momentum transfer coordinate in 1/m
+        '''
+        res = 4 * np.pi \
+                * np.sin(np.arctan(self.distance / distance) / 2) / wavelength
+        return res
+
+    def __call__(self, image):
+        assert self.shape == image.shape, 'image shape does not match'
+        image_flat = np.ravel(image)
+        return np.array([np.nansum(image_flat[indices])
+                         for indices in self.flat_indices])
+
+
+class AzimuthalIntegratorDSSC(AzimuthalIntegrator):
+    def __init__(self, geom, polar_range, dxdy=(0, 0), **kwargs):
+        '''
+        Create a reusable integrator for repeated azimuthal integration of
+        similar images. Calculates array indices for a given parameter set that
+        allows fast recalculation. Directly uses a
+        extra_geom.detectors.DSSC_1MGeometry instance for correct pixel
+        positions
+
+        Parameters
+        ==========
+        geom : extra_geom.detectors.DSSC_1MGeometry
+            loaded geometry instance
+
+        polar_range : tuple of ints
+            start and stop polar angle (in degrees) to restrict integration to
+            wedges
+
+        dr : int, optional
+            radial width of the integration slices. Takes non-square DSSC
+            pixels into account.
+
+        nrings : int, optional
+            Number of integration rings. Can be given as an alternative to dr
+
+        dxdy : tuple of floats, default (0, 0)
+            global coordinate shift to adjust center outside of geom object
+            (meter)
+
+        Returns
+        =======
+        ai : azimuthal_integrator instance
+            Instance can directly be called with image data:
+            > az_intensity = ai(image)
+            radial distances and the polar mask are accessible as attributes:
+            > ai.distance
+            > ai.polar_mask
+        '''
+
+        self.xcoord = None
+        self.ycoord = None
+
+        self._calc_dist_array(geom, dxdy)
+        self._calc_polar_mask(polar_range)
+        self._calc_indices(**kwargs)
+
+    def _calc_dist_array(self, geom, dxdy):
+        self.dxdy = dxdy
+        pos = geom.get_pixel_positions()
+        self.shape = pos.shape[:-1]
+        self.xcoord = pos[..., 0] + dxdy[0]
+        self.ycoord = pos[..., 1] + dxdy[1]
+        self.dist_array = np.hypot(self.xcoord, self.ycoord)
--- a/src/toolbox_scs/detectors/bam_detectors.py
+++ b/src/toolbox_scs/detectors/bam_detectors.py
+""" Beam Arrival Monitor related sub-routines
+
+    Copyright (2021) SCS Team.
+
+    (contributions preferrably comply with pep8 code structure
+    guidelines.)
+"""
+
+import logging
+import xarray as xr
+import numpy as np
+
+from ..misc.bunch_pattern_external import is_pulse_at
+from ..mnemonics_machinery import mnemonics_for_run
+from ..constants import mnemonics as _mnemonics
+from ..misc.bunch_pattern import (npulses_has_changed,
+                                  get_unique_sase_pId, load_bpt)
+from toolbox_scs.load import get_array
+
+__all__ = [
+    'get_bam',
+    'get_bam_params',
+]
+
+log = logging.getLogger(__name__)
+
+
+def get_bam(run, mnemonics=None, merge_with=None, bunchPattern='sase3',
+            pulseIds=None):
+    """
+    Load beam arrival monitor (BAM) data and align their pulse ID
+    according to the bunch pattern. Sources can be loaded on the fly
+    via the mnemonics argument, or processed from an existing data set
+    (merge_with).
+
+    Parameters
+    ----------
+    run: extra_data.DataCollection
+        DataCollection containing the bam data.
+    mnemonics: str or list of str
+        mnemonics for BAM, e.g. "BAM1932M" or ["BAM414", "BAM1932M"].
+        the arrays are either taken from merge_with or loaded from
+        the DataCollection run.
+    merge_with: xarray Dataset
+        If provided, the resulting Dataset will be merged with this
+        one. If merge_with contains variables in mnemonics, they will
+        be selected, aligned and merged.
+    bunchPattern: str
+        'sase1' or 'sase3' or 'scs_ppl', bunch pattern
+        used to extract peaks. The pulse ID dimension will be named
+        'sa1_pId', 'sa3_pId' or 'ol_pId', respectively.
+    pulseIds:  list, 1D array
+        Pulse Ids. If None, they are automatically loaded.
+    Returns
+    -------
+    xarray Dataset with pulse-resolved BAM variables aligned,
+        merged with Dataset *merge_with* if provided.
+
+    Example
+    -------
+    >>> import toolbox_scs as tb
+    >>> run = tb.open_run(2711, 303)
+    >>> bam = tb.get_bam(run, 'BAM1932S')
+    """
+    bam_mnemos = ['BAM4', 'BAM1', 'BAM2']
+    mnemonics = [mnemonics] if isinstance(mnemonics, str) else mnemonics
+    m2 = []
+    for m in mnemonics:
+        if any([(k in m) for k in bam_mnemos]):
+            m2.append(m)
+    mnemonics = list(set(m2))
+    if len(mnemonics) == 0:
+        log.info('no BAM mnemonics to process. Skipping.')
+        return merge_with
+    # Prepare the dataset of non-BAM data to merge with
+    if bool(merge_with):
+        ds_mw = merge_with.drop(mnemonics, errors='ignore')
+    else:
+        ds_mw = xr.Dataset()
+
+    dim_names = {'sase3': 'sa3_pId', 'sase1': 'sa1_pId',
+                 'scs_ppl': 'ol_pId'}
+    bpt = load_bpt(run, ds_mw)
+    if bpt is not None:
+        mask = is_pulse_at(bpt, bunchPattern)
+        mask = mask.rename({'pulse_slot': dim_names[bunchPattern]})
+    ds = xr.Dataset()
+    run_mnemonics = mnemonics_for_run(run)
+    for m in mnemonics:
+        if merge_with is not None and m in merge_with:
+            da_bam = merge_with[m]
+        else:
+            da_bam = get_array(run, m)
+        if len(da_bam.dims) == 2:
+            if 'BAMbunchId' not in da_bam.dims:
+                continue
+            da_bam = da_bam.sel(BAMbunchId=slice(0, None, 2))
+            # align the pulse Id
+            if bpt is not None:
+                n = mask.sizes[dim_names[bunchPattern]]
+                da_bam = da_bam.isel(BAMbunchId=slice(0, n))
+                da_bam = da_bam.assign_coords(BAMbunchId=np.arange(0, n))
+                da_bam = da_bam.rename(BAMbunchId=dim_names[bunchPattern])
+                da_bam = da_bam.where(mask, drop=True)
+            # make sure unit is picosecond
+            if run_mnemonics[m]['key'] != 'data.lowChargeArrivalTime':
+                da_bam *= 1e-3
+        else:
+            # The 1D values (mean, std dev...) are in fs, need to convert to ps
+            mnemo = run_mnemonics[m]
+            first_val = run[mnemo['source']][mnemo['key']].train_from_index(0)[1]
+            if first_val == da_bam[0]:
+                da_bam *= 1e-3
+        ds = ds.merge(da_bam, join='inner')
+    # merge with non-BAM dataset
+    ds = ds_mw.merge(ds, join='inner')
+    return ds
+
+
+'''
+def get_bam_old(run, mnemonics=None, merge_with=None, bunchPattern='sase3'):
+    """
+    Load beam arrival monitor (BAM) data and align their pulse ID
+    according to the bunch pattern. Sources can be loaded on the fly
+    via the mnemonics argument, or processed from an existing data set
+    (merge_with).
+
+    Parameters
+    ----------
+    run: extra_data.DataCollection
+        DataCollection containing the bam data.
+    mnemonics: str or list of str
+        mnemonics for BAM, e.g. "BAM1932M" or ["BAM414", "BAM1932M"].
+        If None, defaults to "BAM1932M" in case no merge_with dataset
+        is provided.
+    merge_with: xarray Dataset
+        If provided, the resulting Dataset will be merged with this
+        one. The BAM variables of merge_with (if any) will also be
+        selected, aligned and merged.
+    bunchPattern: str
+        'sase1' or 'sase3' or 'scs_ppl', bunch pattern
+        used to extract peaks. The pulse ID dimension will be named
+        'sa1_pId', 'sa3_pId' or 'ol_pId', respectively.
+    Returns
+    -------
+    xarray Dataset with pulse-resolved BAM variables aligned,
+        merged with Dataset *merge_with* if provided.
+
+    Example
+    -------
+    >>> import toolbox_scs as tb
+    >>> import toolbox_scs.detectors as tbdet
+    >>> run, _ = tb.load(2711, 303)
+    >>> bam = tbdet.get_bam(run)
+
+    """
+    # get the list of mnemonics to process
+    mnemonics = mnemonics_to_process(mnemonics, merge_with, 'BAM')
+
+    if len(mnemonics) == 0:
+        log.info('No array with unaligned BAM data to extract. Skipping.')
+        return merge_with
+    else:
+        log.info(f'Extracting BAM data from {mnemonics}.')
+
+    # Prepare the dataset of non-BAM data to merge with
+    if bool(merge_with):
+        mw_ds = merge_with.drop(mnemonics, errors='ignore')
+    else:
+        mw_ds = xr.Dataset()
+
+    run_mnemonics = mnemonics_for_run(run)
+    # extract the XFEL pulses: slice(0,5400,2)
+    roi = np.s_[:5400:2]
+    ds = xr.Dataset()
+    for m in mnemonics:
+        if bool(merge_with) and m in merge_with:
+            val = merge_with[m].isel({run_mnemonics[m]['dim'][0]: roi})
+            log.debug(f'Using {m} from merge_with dataset.')
+        else:
+            val = run.get_array(*run_mnemonics[m].values(), roi=roi, name=m)
+            log.debug(f'Loading {m} from DataCollection.')
+        val[run_mnemonics[m]['dim'][0]] = np.arange(2700)
+        # Since winter shutdown 2021-2022, units changed from ps to fs
+        # Here we convert to ps
+        if run_mnemonics[m]['key'] != 'data.lowChargeArrivalTime':
+            val *= 1e-3
+        ds = ds.merge(val, join='inner')
+
+    # check if bunch pattern table exists
+    if bool(merge_with) and 'bunchPatternTable' in merge_with:
+        bpt = merge_with['bunchPatternTable']
+        log.debug('Using bpt from merge_with dataset.')
+    elif 'bunchPatternTable' in run_mnemonics:
+        bpt = run.get_array(*run_mnemonics['bunchPatternTable'].values())
+        log.debug('Loaded bpt from extra_data run.')
+    else:
+        bpt = None
+
+    # align the pulse Id
+    if bpt is not None and len(ds.variables) > 0:
+        dim_names = {'sase3': 'sa3_pId', 'sase1': 'sa1_pId',
+                     'scs_ppl': 'ol_pId'}
+        mask = is_pulse_at(bpt, bunchPattern)
+        mask = mask.rename({'pulse_slot': dim_names[bunchPattern]})
+        ds = ds.rename({run_mnemonics['BAM1932M']['dim'][0]:
+                        dim_names[bunchPattern]})
+        ds = ds.where(mask, drop=True)
+
+    # merge with non-BAM dataset
+    ds = mw_ds.merge(ds, join='inner')
+
+    return ds
+'''
+
+
+def get_bam_params(run, mnemo_or_source='BAM1932S'):
+    """
+    Extract the run values of bamStatus[1-3] and bamError.
+
+    Parameters
+    ----------
+    run: extra_data.DataCollection
+        DataCollection containing the bam data.
+    mnemo_or_source: str
+        mnemonic of the BAM, e.g. 'BAM414', or source name,
+        e.g. 'SCS_ILH_LAS/DOOCS/BAM_414_B2.
+
+    Returns
+    -------
+    params: dict
+        dictionnary containing the extracted parameters.
+
+    Note
+    ----
+    The extracted parameters are run values, they do not reflect any
+    possible change during the run.
+    """
+
+    if mnemo_or_source in _mnemonics:
+        run_mnemonics = mnemonics_for_run(run)
+        source = run_mnemonics[mnemo_or_source]['source'].split(':')[0]
+    else:
+        source = mnemo_or_source
+    res = {}
+    res['status1'] = run.get_run_value(source, 'bamStatus1.value')
+    res['status2'] = run.get_run_value(source, 'bamStatus2.value')
+    res['status3'] = run.get_run_value(source, 'bamStatus3.value')
+    res['error'] = run.get_run_value(source, 'bamError.value')
+
+    return res
--- a/src/toolbox_scs/detectors/deprecated/digitizers_old.py
+++ b/src/toolbox_scs/detectors/deprecated/digitizers_old.py
+""" Digitizers related sub-routines
+
+    Copyright (2021) SCS Team.
+
+    (contributions preferrably comply with pep8 code structure
+    guidelines.)
+"""
+
+import logging
+import os
+
+import numpy as np
+import xarray as xr
+import matplotlib.pyplot as plt
+from scipy.signal import find_peaks
+from scipy.signal import fftconvolve
+
+from ..misc.bunch_pattern_external import is_pulse_at
+from ..util.exceptions import ToolBoxValueError
+from ..mnemonics_machinery import (mnemonics_to_process,
+                                   mnemonics_for_run)
+from extra_data import open_run
+from extra_data.read_machinery import find_proposal
+from extra.components import XrayPulses, OpticalLaserPulses
+
+__all__ = [
+    'check_peak_params',
+    'get_digitizer_peaks',
+    'get_laser_peaks',
+    'get_peaks',
+    'get_tim_peaks',
+    'digitizer_signal_description',
+    'get_dig_avg_trace',
+    'extract_digitizer_peaks',
+    'load_processed_peaks',
+    'check_processed_peak_params'
+]
+
+log = logging.getLogger(__name__)
+
+
+def peaks_from_raw_trace(traces, pulseStart, pulseStop, baseStart, baseStop,
+                         period=None, npulses=None, extra_dim=None):
+    """
+    Computes peaks from raw digitizer traces by trapezoidal integration.
+
+    Parameters
+    ----------
+    traces: xarray DataArray or numpy array containing raw traces. If
+        numpy array is provided, the second dimension is that of the samples.
+    pulseStart: int or list or 1D-numpy array
+        trace index of integration start. If 1d array, each value is the start
+        of one peak. The period and npulses parameters are ignored.
+    pulseStop: int
+        trace index of integration stop.
+    baseStart: int
+        trace index of background start.
+    baseStop: int
+        trace index of background stop.
+    period: int
+        number of samples between two peaks. Ignored if intstart is a 1D-array.
+    npulses: int
+        number of pulses. Ignored if intstart is a 1D-array.
+    extra_dim: str
+        Name given to the dimension along the peaks. Defaults to 'pulseId'.
+
+    Returns
+    -------
+    xarray DataArray
+    """
+
+    assert len(traces.shape) == 2
+
+    if isinstance(traces, xr.DataArray):
+        ntid = traces.sizes['trainId']
+        coords = traces.coords
+        traces = traces.values
+        if traces.shape[0] != ntid:
+            traces = traces.T
+    else:
+        coords = None
+
+    if hasattr(pulseStart, '__len__'):
+        pulseStart = np.array(pulseStart)
+        pulses = pulseStart - pulseStart[0]
+        pulseStart = pulseStart[0]
+    else:
+        pulses = range(0, npulses*period, period)
+
+    if extra_dim is None:
+        extra_dim = 'pulseId'
+    results = xr.DataArray(np.empty((traces.shape[0], len(pulses))),
+                           coords=coords,
+                           dims=['trainId', extra_dim])
+
+    for i, p in enumerate(pulses):
+        a = pulseStart + p
+        b = pulseStop + p
+        bkga = baseStart + p
+        bkgb = baseStop + p
+        if b > traces.shape[1]:
+            break
+        bg = np.outer(np.median(traces[:, bkga:bkgb], axis=1),
+                      np.ones(b-a))
+        results[:, i] = np.trapz(traces[:, a:b] - bg, axis=1)
+    return results
+
+
+def peaks_from_apd(array, params, digitizer, bpt, bunchPattern):
+    """
+    Extract peak-integrated data according to the bunch pattern.
+
+    Parameters
+    ----------
+    array: xarray DataArray
+        2D array containing peak-integrated data
+    params: dict
+        peak-integration parameters of the digitizer
+    digitizer: str
+        digitizer type, one of {'FastADC', 'ADQ412'}
+    bpt: xarray DataArray
+        bunch pattern table
+    bunchPattern: str
+        one of {'sase1', 'sase3', 'scs_ppl'}, used to select pulses and
+        assign name of the pulse id dimension.
+
+    Returns
+    -------
+    xarray DataArray with pulse id coordinates.
+    """
+    if params['enable'] == 0 or (array == 1.).all():
+        raise ValueError('The digitizer did not record integrated peaks. '
+                         'Consider using raw traces from the same channel '
+                         'for peak integration.')
+    if digitizer == 'FastADC':
+        min_distance = 24
+    if digitizer == 'ADQ412':
+        min_distance = 440
+    period = params['period']
+    if period % min_distance != 0:
+        log.warning(f'Warning: the pulse period ({period} samples) of '
+                    'digitizer is not a multiple of the pulse separation at '
+                    f'4.5 MHz ({min_distance} samples). Pulse id assignment '
+                    'is likely to fail.')
+    stride = int(period/min_distance)
+    npulses_apd = params['npulses']
+    dim_names = {'sase3': 'sa3_pId', 'sase1': 'sa1_pId', 'scs_ppl': 'ol_pId'}
+    pulse_dim = dim_names[bunchPattern]
+    arr_dim = [dim for dim in array.dims if dim != 'trainId'][0]
+    if npulses_apd > array.sizes[arr_dim]:
+        log.warning(f'The digitizer was set to record {npulses_apd} pulses '
+                    f'but the array length is only {array.sizes[arr_dim]}.')
+        npulses_apd = array.sizes[arr_dim]
+    mask = is_pulse_at(bpt, bunchPattern).rename({'pulse_slot': pulse_dim})
+    mask = mask.where(mask.trainId.isin(array.trainId), drop=True)
+    mask = mask.assign_coords({pulse_dim: np.arange(bpt.sizes['pulse_slot'])})
+    pid = np.sort(np.unique(np.where(mask)[1]))
+    npulses_bpt = len(pid)
+    apd_coords = np.arange(pid[0], pid[0] + stride*npulses_apd, stride)
+    noalign = False
+    if len(np.intersect1d(apd_coords, pid, assume_unique=True)) < npulses_bpt:
+        log.warning('Not all pulses were recorded. The digitizer '
+                    'was set to record pulse ids '
+                    f'{apd_coords[apd_coords<bpt.sizes["pulse_slot"]]} but the'
+                    'bunch pattern for'
+                    f' {bunchPattern} is {pid}. Skipping pulse ID alignment.')
+        noalign = True
+    array = array.isel({arr_dim: slice(0, npulses_apd)})
+    array = array.where(array != 1.)
+    if noalign:
+        return array
+    array = array.rename(
+        {arr_dim: pulse_dim}).assign_coords({pulse_dim: apd_coords})
+    array, mask = xr.align(array, mask, join='inner')
+    array = array.where(mask, drop=True)
+    return array
+
+
+def get_peaks(run,
+              data,
+              mnemonic,
+              useRaw=True,
+              autoFind=True,
+              integParams=None,
+              bunchPattern='sase3',
+              bpt=None,
+              extra_dim=None,
+              indices=None,
+              ):
+    """
+    Extract peaks from one source (channel) of a digitizer.
+
+    Parameters
+    ----------
+    run: extra_data.DataCollection
+        DataCollection containing the digitizer data
+    data: xarray DataArray or str
+        array containing the raw traces or peak-integrated values from the
+        digitizer. If str, must be one of the ToolBox mnemonics. If None,
+        the data is loaded via the source and key arguments.
+    mnemonic: str or dict
+        ToolBox mnemonic or dict with source and key as in 
+        {'source': 'SCS_UTC1_ADQ/ADC/1:network',
+        'key': 'digitizers.channel_1_A.raw.samples'}
+    useRaw: bool
+        If True, extract peaks from raw traces. If False, uses the APD (or
+        peaks) data from the digitizer.
+    autoFind: bool
+        If True, finds integration parameters by inspecting the average raw
+        trace. Only valid if useRaw is True.
+    integParams: dict
+        dictionnary containing the integration parameters for raw trace
+        integration: 'pulseStart', 'pulseStop', 'baseStart', 'baseStop',
+        'period', 'npulses'. Not used if autoFind is True. All keys are
+        required when bunch pattern is missing.
+    bunchPattern: str
+        match the peaks to the bunch pattern: 'sase1', 'sase3', 'scs_ppl'.
+        This will dictate the name of the pulse ID coordinates: 'sa1_pId',
+        'sa3_pId' or 'scs_ppl'.
+    bpt: xarray DataArray
+        bunch pattern table
+    extra_dim: str
+        Name given to the dimension along the peaks. If None, the name is given
+        according to the bunchPattern.
+    indices: array, slice
+        indices from the peak-integrated data to retrieve. Only required
+        when bunch pattern is missing and useRaw is False.
+
+    Returns
+    -------
+    xarray.DataArray containing digitizer peaks with pulse coordinates
+    """
+    arr = data
+    dim = [d for d in arr.dims if d != 'trainId'][0]
+
+    # Load bunch pattern table
+    run_mnemonics = mnemonics_for_run(run)
+    if bpt is None and bunchPattern != 'None':
+        if 'bunchPatternTable' in run_mnemonics:
+            m = run_mnemonics['bunchPatternTable']
+            bpt = run.get_array(m['source'], m['key'], m['dim'])
+        pattern = bunchPattern
+    else:
+        pattern = bunchPattern
+    if bunchPattern == 'None':
+        bpt = None
+
+    # Find digitizer type
+    m = mnemonic if isinstance(mnemonic, dict) else run_mnemonics[mnemonic]
+    digitizer = digitizer_type(run, m.get('source'))
+
+    # 1. Peak-integrated data from digitizer
+    if useRaw is False:
+        # 1.1 No bunch pattern provided
+        if bpt is None:
+            log.info('Missing bunch pattern info.')
+            if indices is None:
+                raise TypeError('indices argument must be provided '
+                                'when bunch pattern info is missing.')
+            if extra_dim is None:
+                extra_dim = 'pulseId'
+            return arr.isel({dim: indices}).rename({dim: extra_dim})
+
+        # 1.2 Bunch pattern is provided
+        if isinstance(mnemonic, dict):
+            peak_params = channel_peak_params(run, mnemonic.get('source'),
+                                              mnemonic.get('key'))
+        else:
+            peak_params = channel_peak_params(run, mnemonic)
+        log.debug(f'Digitizer peak integration parameters: {peak_params}')
+        return peaks_from_apd(arr, peak_params, digitizer, bpt, bunchPattern)
+
+    # 2. Use raw data from digitizer
+    # minimum pulse period @ 4.5MHz, according to digitizer type
+    min_distance = 1
+    if digitizer == 'FastADC':
+        min_distance = 24
+    if digitizer == 'ADQ412':
+        min_distance = 440
+    if autoFind:
+        stride = int(np.max([1, np.floor(arr.sizes['trainId']/200)]))
+        trace = arr.isel(trainId=slice(0, None, stride)).mean(dim='trainId')
+        try:
+            integParams = find_integ_params(trace)
+        except ValueError as err:
+            log.warning(f'{err}, trying with averaged trace over all trains.')
+            trace = arr.mean(dim='trainId')
+            integParams = find_integ_params(trace)
+        log.debug(f'Auto find peaks result: {integParams}')
+
+    required_keys = ['pulseStart', 'pulseStop', 'baseStart',
+                     'baseStop', 'period', 'npulses']
+    if integParams is None or not all(name in integParams
+                                      for name in required_keys):
+        raise TypeError('All keys of integParams argument '
+                        f'{required_keys} are required when '
+                        'bunch pattern info is missing.')
+
+    # 2.1. No bunch pattern provided
+    if bpt is None:
+        log.info('Missing bunch pattern info.')
+        log.debug(f'Retrieving {integParams["npulses"]} pulses.')
+        if extra_dim is None:
+            extra_dim = 'pulseId'
+        return peaks_from_raw_trace(arr, integParams['pulseStart'],
+                                    integParams['pulseStop'],
+                                    integParams['baseStart'],
+                                    integParams['baseStop'],
+                                    integParams['period'],
+                                    integParams['npulses'],
+                                    extra_dim=extra_dim)
+
+    # 2.2 Bunch pattern is provided
+    # load mask and extract pulse Id:
+    dim_names = {'sase3': 'sa3_pId', 'sase1': 'sa1_pId',
+                 'scs_ppl': 'ol_pId', 'None': 'pulseId'}
+    extra_dim = dim_names[pattern]
+    valid_tid = np.intersect1d(arr.trainId, bpt.trainId, assume_unique=True)
+    mask = is_pulse_at(bpt, pattern)
+    pattern_changed = ~(mask == mask[0]).all().values
+    if not pattern_changed:
+        pid = np.nonzero(mask[0].values)[0]
+        valid_arr = arr
+    else:
+        mask = is_pulse_at(bpt.sel(trainId=valid_tid), pattern)
+        mask = mask.rename({'pulse_slot': extra_dim})
+        mask = mask.assign_coords(
+            {extra_dim: np.arange(bpt.sizes['pulse_slot'])})
+        mask_on = mask.where(mask, drop=True).fillna(False).astype(bool)
+        log.info(f'Bunch pattern of {pattern} changed during the run.')
+        pid = mask_on.coords[extra_dim]
+        # select trains containing pulses
+        valid_arr = arr.sel(trainId=mask_on.trainId)
+    npulses = len(pid)
+    log.debug(f'Bunch pattern: {npulses} pulses for {pattern}.')
+    if npulses == 1:
+        period_bpt = 0
+    else:
+        period_bpt = np.min(np.diff(pid))
+    if autoFind and period_bpt*min_distance != integParams['period']:
+        log.warning('The period from the bunch pattern is different than '
+                    'that found by the peak-finding algorithm. Either '
+                    'the algorithm failed or the bunch pattern source '
+                    f'({bunchPattern}) is not correct.')
+    # create array of sample indices for peak integration
+    sample_id = (pid-pid[0])*min_distance
+    # override auto find parameters
+    if isinstance(integParams['pulseStart'], (int, np.integer)):
+        integParams['pulseStart'] = integParams['pulseStart'] + sample_id
+    peaks = peaks_from_raw_trace(valid_arr, integParams['pulseStart'],
+                                 integParams['pulseStop'],
+                                 integParams['baseStart'],
+                                 integParams['baseStop'],
+                                 integParams['period'],
+                                 integParams['npulses'],
+                                 extra_dim)
+    if pattern_changed:
+        peaks = peaks.where(mask_on, drop=True)
+    return peaks.assign_coords({extra_dim: pid})
+
+
+def get_dig_avg_trace(run, mnemonic, ntrains=None):
+    """
+    Compute the average over ntrains evenly spaced accross all trains
+    of a digitizer trace.
+
+    Parameters
+    ----------
+    run: extra_data.DataCollection
+        DataCollection containing the digitizer data.
+    mnemonic: str
+        ToolBox mnemonic of the digitizer data, e.g. 'MCP2apd'.
+    ntrains: int
+        Number of trains used to calculate the average raw trace.
+        If None, all trains are used.
+
+    Returns
+    -------
+    trace: DataArray
+        The average digitizer trace
+    """
+    run_mnemonics = mnemonics_for_run(run)
+    if ntrains is None:
+        sel = run
+    else:
+        total_tid = len(run.train_ids)
+        stride = int(np.max([1, np.floor(total_tid/ntrains)]))
+        sel = run.select_trains(np.s_[0:None:stride])
+    m = run_mnemonics[mnemonic]
+    raw_trace = sel.get_array(m['source'], m['key'], m['dim'])
+    raw_trace = raw_trace.mean(dim='trainId')
+    return raw_trace
+
+
+def channel_peak_params(run, source, key=None, channel=None, board=None):
+    """
+    Extract peak-integration parameters used by a channel of the digitizer.
+
+    Parameters
+    ----------
+    run: extra_data.DataCollection
+        DataCollection containing the digitizer data.
+    source: str
+        ToolBox mnemonic of a digitizer data, e.g. "MCP2apd" or
+        "FastADC4peaks", or name of digitizer source, e.g.
+        'SCS_UTC1_ADQ/ADC/1:network'.
+    key: str
+        optional, used in combination of source (if source is not a ToolBox
+        mnemonics) instead of digitizer, channel and board.
+    channel: int or str
+        The digitizer channel for which to retrieve the parameters. If None,
+        inferred from the source mnemonic or source + key arguments.
+    board: int
+        Board of the ADQ412. If None, inferred from the source mnemonic or
+        source + key arguments.
+
+    Returns
+    -------
+    dict with peak integration parameters.
+    """
+    run_mnemonics = mnemonics_for_run(run)
+    if source in run_mnemonics:
+        m = run_mnemonics[source]
+        source = m['source']
+        key = m['key']
+    if 'network' in source:
+        return adq412_channel_peak_params(run, source, key, channel, board)
+    else:
+        return fastADC_channel_peak_params(run, source, channel)
+
+
+def fastADC_channel_peak_params(run, source, channel=None):
+    """
+    Extract peak-integration parameters used by a channel of the Fast ADC.
+
+    Parameters
+    ----------
+    run: extra_data.DataCollection
+        DataCollection containing the digitizer data.
+    source: str
+        Name of Fast ADC source, e.g. 'SCS_UTC1_MCP/ADC/1:channel_5.output'.
+    channel: int
+        The Fast ADC channel for which to retrieve the parameters. If None,
+        inferred from the source.
+
+    Returns
+    -------
+    dict with peak integration parameters.
+    """
+    fastADC_keys = {
+        'baseStart': ('baselineSettings.baseStart.value',
+                      'baseStart.value'),
+        'baseStop': ('baseStop.value',),
+        'baseLength': ('baselineSettings.length.value',),
+        'enable': ('enablePeakComputation.value',),
+        'pulseStart': ('initialDelay.value',),
+        'pulseLength': ('peakSamples.value',),
+        'npulses': ('numPulses.value',),
+        'period': ('pulsePeriod.value',)
+    }
+    if channel is None:
+        channel = int(source.split(':')[1].split('.')[0].split('_')[1])
+    baseName = f'channel_{channel}.'
+    source = source.split(':')[0]
+    data = run.select(source).train_from_index(0)[1][source]
+    result = {}
+    for mnemo, versions in fastADC_keys.items():
+        for v in versions:
+            key = baseName + v
+            if key in data:
+                result[mnemo] = data[key]
+    if 'baseLength' in result:
+        result['baseStop'] = result['baseStart'] + \
+                             result.pop('baseLength')
+    if 'pulseLength' in result:
+        result['pulseStop'] = result['pulseStart'] + \
+                             result.pop('pulseLength')
+    return result
+
+
+def adq412_channel_peak_params(run, source, key=None,
+                               channel=None, board=None):
+    """
+    Extract peak-integration parameters used by a channel of the ADQ412.
+
+    Parameters
+    ----------
+    run: extra_data.DataCollection
+        DataCollection containing the digitizer data.
+    source: str
+        Nname of ADQ412 source, e.g. 'SCS_UTC1_ADQ/ADC/1:network'.
+    key: str
+        optional, e.g. 'digitizers.channel_1_D.apd.pulseIntegral', used in
+        combination of source instead of channel and board.
+    channel: int or str
+        The ADQ412 channel for which to retrieve the parameters. If None,
+        inferred from the source mnemonic or source + key arguments.
+    board: int
+        Board of the ADQ412. If None, inferred from the source mnemonic or
+        source + key arguments.
+
+    Returns
+    -------
+    dict with peak integration parameters.
+    """
+    if key is None:
+        if channel is None or board is None:
+            raise ValueError('key or channel + board arguments are '
+                             'missing.')
+    else:
+        k = key.split('.')[1].split('_')
+        ch_to_int = {'A': 0, 'B': 1, 'C': 2, 'D': 3}
+        channel = ch_to_int[k[2]]
+        board = k[1]
+    baseName = f'board{board}.apd.channel_{channel}.'
+    source = source.split(':')[0]
+    adq412_keys = {
+        'baseStart': (baseName + 'baseStart.value',),
+        'baseStop': (baseName + 'baseStop.value',),
+        'enable': (baseName + 'enable.value',),
+        'pulseStart': (baseName + 'pulseStart.value',),
+        'pulseStop': (baseName + 'pulseStop.value',),
+        'initialDelay': (baseName + 'initialDelay.value',),
+        'upperLimit': (baseName + 'upperLimit.value',),
+        'npulses': (f'board{board}.apd.numberOfPulses.value',)
+    }
+
+    data = run.select(source).train_from_index(0)[1][source]
+    result = {}
+    for mnemo, versions in adq412_keys.items():
+        for key in versions:
+            if key in data:
+                result[mnemo] = data[key]
+    result['period'] = result.pop('upperLimit') - \
+                       result.pop('initialDelay')
+    return result
+
+
+def find_integ_params(trace, height=None, width=1):
+    """
+    Find integration parameters for peak integration of a raw
+    digitizer trace. Based on scipy find_peaks().
+
+    Parameters
+    ----------
+    trace: numpy array or xarray DataArray
+        The digitier raw trace used to find peaks
+    height: int
+        minimum threshold for peak determination
+    width: int
+        minimum width of peak
+
+    Returns
+    -------
+    dict with keys 'pulseStart', 'pulseStop', 'baseStart', 'baseStop',
+        'period', 'npulses' and values in number of samples.
+    """
+    if isinstance(trace, xr.DataArray):
+        trace = trace.values
+    trace_norm = trace - np.median(trace)
+    trace_norm = -trace_norm if np.mean(trace_norm) < 0 else trace_norm
+    SNR = np.max(np.abs(trace_norm)) / np.std(trace_norm[:100])
+    if SNR < 10:
+        log.warning('signal-to-noise ratio too low: cannot '
+                    'automatically find peaks.')
+        return {'pulseStart': 2, 'pulseStop': 3,
+               'baseStart': 0, 'baseStop': 1,
+               'period': 0, 'npulses': 1}
+    # Compute autocorrelation using fftconvolve
+    # from "https://stackoverflow.com/questions/12323959/"
+    #      "fast-cross-correlation-method-in-python"
+    if len(trace_norm) % 2 == 0:
+        n = len(trace_norm)
+    else:
+        n = len(trace_norm) - 1
+    hl = int(n/2)
+    a = trace_norm[:n]
+    b = np.zeros(2*n)
+    b[hl: hl + n] = a
+    # Do an array flipped convolution, which is a correlation.
+    ac_trace = fftconvolve(b, a[::-1], mode='valid')
+    # slower approach:
+    # ac_trace = np.correlate(trace_norm, trace_norm, mode='same')
+    n = int(len(ac_trace)/2)
+    # estimate quality of ac_trace and define min height to detect peaks
+    factor = np.abs(ac_trace.max() / np.median(ac_trace))
+    factor = 3 if factor > 20 else 1.5
+    ac_peaks = find_peaks(ac_trace[n:], height=ac_trace[n:].max()/factor)
+    if len(ac_peaks[0]) == 0:
+        period = 0
+        distance = 1
+    elif len(ac_peaks[0]) == 1:
+        period = ac_peaks[0][0]
+        distance = max(1, period-6)
+    else:
+        period = int(np.median(ac_peaks[0][1:] - ac_peaks[0][:-1]))
+        distance = max(1, period-6)  # smaller than period to account for all peaks
+    # define min height to detect peaks depending on signal quality
+    f = np.max([3, np.min([(SNR/10), 4])])
+    height = trace_norm.max() / f
+    peaks, dic = find_peaks(trace_norm, distance=distance,
+                            height=height, width=1)
+    # filter out peaks that are not periodically spaced
+    idx = np.ones(len(peaks), dtype=bool)
+    idx[1:] = np.isclose(peaks[1:] - peaks[:-1],
+                         np.ones(len(peaks[1:]))*period, atol=6)
+    peaks = peaks[idx]
+    for d in dic:
+        dic[d] = dic[d][idx]
+    npulses = len(peaks)
+    if npulses == 0:
+        raise ValueError('Could not automatically find peaks.')
+    pulseStart = np.mean(dic['left_ips'] - peaks + peaks[0], dtype=int)
+    pulseStop = np.mean(dic['right_ips'] - peaks + peaks[0], dtype=int)
+    baseStop = pulseStart - np.mean(peaks - dic['left_ips'])/2 - 1
+    baseStop = np.min([baseStop, pulseStart]).astype(int)
+    baseStop = np.max([baseStop, 0]).astype(int)
+    baseStart = baseStop - np.mean(dic['widths'])/2
+    baseStart = np.max([baseStart, 0]).astype(int)
+    result = {'pulseStart': pulseStart, 'pulseStop': pulseStop,
+              'baseStart': baseStart, 'baseStop': baseStop,
+              'period': period, 'npulses': npulses}
+    return result
+
+
+def get_peak_params(run, mnemonic, raw_trace=None, ntrains=None):
+    """
+    Get the peak region and baseline region of a raw digitizer trace used
+    to compute the peak integration. These regions are either those of the
+    digitizer peak-integration settings or those determined in get_tim_peaks
+    or get_fast_adc_peaks when the inputs are raw traces.
+
+    Parameters
+    ----------
+    run: extra_data.DataCollection
+        DataCollection containing the digitizer data.
+    mnemonic: str
+        ToolBox mnemonic of the digitizer data, e.g. 'MCP2apd'.
+    raw_trace: optional, 1D numpy array or xarray DataArray
+        Raw trace to display. If None, the average raw trace over
+        ntrains of the corresponding channel is loaded (this can
+        be time-consuming).
+    ntrains: optional, int
+        Only used if raw_trace is None. Number of trains used to
+        calculate the average raw trace of the corresponding channel.
+    """
+    run_mnemonics = mnemonics_for_run(run)
+    if mnemonic not in run_mnemonics:
+        raise ToolBoxValueError("mnemonic must be a ToolBox mnemonics")
+    if "raw" not in mnemonic:
+        params = channel_peak_params(run, mnemonic)
+        if 'enable' in params and params['enable'] == 0:
+            log.warning('The digitizer did not record peak-integrated data.')
+    else:
+        if raw_trace is None:
+            raw_trace = get_dig_avg_trace(run, mnemonic, ntrains)
+        params = find_integ_params(raw_trace)
+    return params
+
+
+def find_peak_integration_parameters(run, mnemonic, raw_trace=None, 
+                                     integParams=None, pattern=None):
+    '''
+    Finds peak integration parameters.
+    '''
+    run_mnemonics = mnemonics_for_run(run)
+    digitizer = digitizer_type(run, run_mnemonics[mnemonic]['source'])
+    pulse_period = 24 if digitizer == "FastADC" else 440
+
+    if integParams is None:  # load raw trace and find peaks
+        autoFind = True
+        if raw_trace is None:
+            raw_trace = get_dig_avg_trace(run, mnemonic)
+        params = get_peak_params(run, mnemonic, raw_trace)
+    else:  # inspect provided parameters
+        autoFind = False
+        required_keys = ['pulseStart', 'pulseStop', 'baseStart',
+                         'baseStop']
+        add_text = ''
+        if pattern is None and not hasattr(integParams['pulseStart'],
+                                           '__len__'):
+            required_keys += ['period', 'npulses']
+            add_text = 'Bunch pattern not provided. '
+        if not all(name in integParams for name in required_keys):
+            raise ValueError(add_text + 'All keys of integParams argument '
+                             f'{required_keys} are required.')
+        params = integParams.copy()
+
+    # extract the pulse ids from the parameters (starting at 0)
+    if hasattr(params['pulseStart'], '__len__'):
+        if params.get('npulses') is not None and (
+            params.get('npulses') != len(params['pulseStart']) ):
+            log.warning('The number of pulses does not match the length '
+                        'of pulseStart. Using length of pulseStart as '
+                        'the number of pulses.')
+        params['npulses'] = len(params['pulseStart'])
+        pulse_ids_params = ((np.array(params['pulseStart'])
+                             - params['pulseStart'][0]) / pulse_period).astype(int)
+    else:
+        pulse_ids_params = np.arange(0, params['npulses'] * params['period'] / pulse_period,
+                                     params['period'] / pulse_period).astype(int)
+
+    # Extract pulse_ids and npulses from bunch pattern
+    pulse_ids, npulses, period = None, None, None
+    regular = True
+    if pattern is not None:
+        bunchPattern = 'sase3' if hasattr(pattern, 'sase') else 'scs_ppl'
+        if pattern.is_constant_pattern() is False:
+            log.info('The number of pulses changed during the run.')
+            pulse_ids = np.unique(pattern.pulse_ids(labelled=False, copy=False))
+            npulses, period = None, None
+            regular = False
+        else:
+            pulse_ids = pattern.peek_pulse_ids(labelled=False)
+            npulses = len(pulse_ids)
+            if npulses > 1:
+                periods = np.diff(pulse_ids)
+                if len(np.unique(periods)) > 1:
+                    regular = False
+                else:
+                    period = np.unique(periods)[0] * pulse_period
+    # Compare parameters with bunch pattern
+        if len(pulse_ids_params) == len(pulse_ids):
+            if not (pulse_ids_params == pulse_ids - pulse_ids[0]).all():
+                log.warning('The provided pulseStart parameters do not match the '
+                            f'{bunchPattern} bunch pattern pulse ids. Using '
+                            'bunch pattern parameters.')
+            pulse_ids_params = pulse_ids
+            
+        if (npulses != params.get('npulses') or
+            period != params.get('period')):
+            if autoFind:
+                add_text = 'Automatically found '
+            else:
+                add_text = 'Provided '
+            log.warning(add_text + 'integration parameters '
+                f'(npulses={params.get("npulses")}, '
+                f'period={params.get("period")}) do not match the '
+                f'{bunchPattern} bunch pattern (npulses={npulses}, '
+                f'period={period}). Using bunch pattern parameters.')
+            pulse_ids_params = pulse_ids
+        params['npulses'] = npulses
+        params['period'] = period
+    if not hasattr(params['pulseStart'], '__len__'):
+        start = params['pulseStart']
+    else:
+        start = params['pulseStart'][0]
+
+    params['pulseStart'] = np.array([int(start + (pid - pulse_ids_params[0]) * pulse_period)
+                            for pid in pulse_ids_params])
+    return params, pulse_ids_params, regular, raw_trace
+
+
+def check_peak_params(run, mnemonic, raw_trace=None, ntrains=200, params=None,
+                      plot=True, show_all=False, bunchPattern='sase3'):
+    """
+    Checks and plots the peak parameters (pulse window and baseline window
+    of a raw digitizer trace) used to compute the peak integration. These
+    parameters are either set by the digitizer peak-integration settings,
+    or are determined by a peak finding algorithm (used in get_tim_peaks
+    or get_fast_adc_peaks) when the inputs are raw traces. The parameters
+    can also be provided manually for visual inspection. The plot either
+    shows the first and last pulse of the trace or the entire trace.
+
+    Parameters
+    ----------
+    run: extra_data.DataCollection
+        DataCollection containing the digitizer data.
+    mnemonic: str
+        ToolBox mnemonic of the digitizer data, e.g. 'MCP2apd'.
+    raw_trace: optional, 1D numpy array or xarray DataArray
+        Raw trace to display. If None, the average raw trace over
+        ntrains of the corresponding channel is loaded (this can
+        be time-consuming).
+    ntrains: optional, int
+        Only used if raw_trace is None. Number of trains used to
+        calculate the average raw trace of the corresponding channel.
+    plot: bool
+        If True, displays the raw trace and peak integration regions.
+    show_all: bool
+        If True, displays the entire raw trace and all peak integration
+        regions (this can be time-consuming).
+        If False, shows the first and last pulse according to the bunchPattern.
+    bunchPattern: optional, str
+        Only used if plot is True. Checks the bunch pattern against
+        the digitizer peak parameters and shows potential mismatch.
+
+    Returns
+    -------
+    dictionnary of peak integration parameters
+    """
+    '''
+    run_mnemonics = mnemonics_for_run(run)
+    if raw_trace is None:
+        raw_trace = get_dig_avg_trace(run, mnemonic, ntrains)
+    if params is None:
+        integParams = get_peak_params(run, mnemonic, raw_trace)
+        title = 'Auto-find peak params'
+    else:
+        title = 'Provided peak params'
+        integParams = params.copy()
+    if 'enable' in integParams and integParams['enable'] == 0:
+        log.warning('The digitizer did not record peak-integrated data.')
+    if not plot:
+        return integParams
+
+    digitizer = digitizer_type(run, run_mnemonics[mnemonic]['source'])
+    pulse_period = 24 if digitizer == "FastADC" else 440
+    pattern = None
+    try:
+        if bunchPattern == 'sase3':
+            pattern = XrayPulses(run)
+        if bunchPattern == 'scs_ppl':
+            pattern = OpticalLaserPulses(run)
+    except Exception as e:
+        log.warning(e)
+        bunchPattern = None
+
+    # Extract pulse_ids, npulses and period from bunch pattern
+    pulse_ids, npulses, period = None, None, None
+    regular = True
+    if pattern is not None:
+        if pattern.is_constant_pattern() is False:
+            log.info('The number of pulses changed during the run.')
+            pulse_ids = np.unique(pattern.pulse_ids(labelled=False, copy=False))
+            npulses, period = None, None
+            regular = False
+        else:
+            pulse_ids = pattern.peek_pulse_ids(labelled=False)
+            npulses = len(pulse_ids)
+            if npulses > 1:
+                periods = np.diff(pulse_ids)
+                if len(np.unique(periods)) > 1:
+                    regular = False
+                else:
+                    period = np.unique(periods)[0] * pulse_period
+        bp_params = {}
+        bp_params['npulses'] = npulses
+        bp_params['period'] = period
+        bp_params['pulse_ids'] = pulse_ids
+        bp_params['regular'] = regular
+        
+        if regular:
+            print(f'bunch pattern {bunchPattern}: {bp_params["npulses"]} pulses,'
+              f' {bp_params["period"]} samples between two pulses')
+        else:
+            print(f'bunch pattern {bunchPattern}: Not a regular pattern. '
+                  f'{bp_params["npulses"]} pulses, pulse_ids='
+                  f'{bp_params["pulse_ids"]}.')
+        if (npulses != integParams.get('npulses') or
+            period != integParams.get('period')):
+            log.warning(f'Integration parameters '
+                f'(npulses={integParams.get("npulses")}, '
+                f'period={integParams.get("period")}) do not match the '
+                f'the bunch pattern (npulses={npulses}, '
+                f'period={period}). Using bunch pattern parameters.')
+        integParams['npulses'] = npulses
+        integParams['period'] = period
+        start = integParams['pulseStart']
+        integParams['pulseStart'] = [start + (pid - pulse_ids[0]) * pulse_period
+                                    for pid in pulse_ids]
+    else:
+        bp_params = None
+    print(f'{title}: {integParams["npulses"]} pulses, {integParams["period"]}'
+          ' samples between two pulses')
+    fig, ax = plotPeakIntegrationWindow(raw_trace, integParams, bp_params, show_all)
+    ax[0].set_ylabel(mnemonic.replace('peaks', 'raw').replace('apd', 'raw'))
+    fig.suptitle(title, size=12)
+    '''
+    run_mnemonics = mnemonics_for_run(run)
+    digitizer = digitizer_type(run, run_mnemonics[mnemonic]['source'])
+    pulse_period = 24 if digitizer == "FastADC" else 440
+    if params is None:
+        title = 'Auto-find peak parameters'
+    else:
+        title = 'Provided peak parameters'
+    pattern = None
+    try:
+        if bunchPattern == 'sase3':
+            pattern = XrayPulses(run)
+        if bunchPattern == 'scs_ppl':
+            pattern = OpticalLaserPulses(run)
+    except Exception as e:
+        log.warning(e)
+        bunchPattern = None
+    integParams, pulse_ids, regular, raw_trace = find_peak_integration_parameters(
+        run, mnemonic, raw_trace, params, pattern)
+    if bunchPattern:
+        if regular:
+            print(f'bunch pattern {bunchPattern}: {len(pulse_ids)} pulses,'
+              f' {(pulse_ids[1] - pulse_ids[0]) * pulse_period} samples between two pulses')
+            print(title + ': ' + f' {integParams["npulses"]} pulses, '
+                  f'{integParams["period"]} samples between two pulses') 
+        else:
+            print(f'bunch pattern {bunchPattern}: Not a regular pattern. '
+                  f'{len(pulse_ids)} pulses, pulse_ids='
+                  f'{pulse_ids}.')
+    if plot:
+        if raw_trace is None:
+            raw_trace = get_dig_avg_trace(run, mnemonic)
+        fig, ax = plotPeakIntegrationWindow(raw_trace, integParams,
+                                            show_all=show_all)
+        fig.suptitle(title, size=12)
+    return integParams
+
+
+def plotPeakIntegrationWindow(raw_trace, params, show_all=False):
+    if hasattr(params['pulseStart'], '__len__'):
+        pulseStarts = np.array(params['pulseStart'])
+        pulseStops = params['pulseStop'] + pulseStarts - params['pulseStart'][0]
+        baseStarts = params['baseStart'] + pulseStarts - params['pulseStart'][0]
+        baseStops = params['baseStop'] + pulseStarts - params['pulseStart'][0]
+    else:
+        n = params['npulses']
+        p = params['period']
+        pulseStarts = [params['pulseStart'] + i*p for i in range(n)]
+        pulseStops = [params['pulseStop'] + i*p for i in range(n)]
+        baseStarts = [params['baseStart'] + i*p for i in range(n)]
+        baseStops = [params['baseStop'] + i*p for i in range(n)]
+
+    if show_all:
+        fig, ax = plt.subplots(figsize=(6, 3), constrained_layout=True)
+        for i in range(len(pulseStarts)):
+            lbl = 'baseline' if i == 0 else None
+            lp = 'peak' if i == 0 else None
+            ax.axvline(baseStarts[i], ls='--', color='k')
+            ax.axvline(baseStops[i], ls='--', color='k')
+            ax.axvspan(baseStarts[i], baseStops[i],
+                       alpha=0.5, color='grey', label=lbl)
+            ax.axvline(pulseStarts[i], ls='--', color='r')
+            ax.axvline(pulseStops[i], ls='--', color='r')
+            ax.axvspan(pulseStarts[i], pulseStops[i],
+                       alpha=0.2, color='r', label=lp)
+        ax.plot(raw_trace, color='C0', label='raw trace')
+        ax.legend(fontsize=8)
+        return fig, [ax]
+
+    fig, ax = plt.subplots(1, 2, figsize=(6, 3), constrained_layout=True)
+    for plot in range(2):
+        title = 'First pulse' if plot == 0 else 'Last pulse'
+        i = 0 if plot == 0 else -1
+        ax[plot].axvline(baseStarts[i], ls='--', color='k')
+        ax[plot].axvline(baseStops[i], ls='--', color='k')
+        ax[plot].axvspan(baseStarts[i], baseStops[i],
+                      alpha=0.5, color='grey', label='baseline')
+        ax[plot].axvline(pulseStarts[i], ls='--', color='r')
+        ax[plot].axvline(pulseStops[i], ls='--', color='r')
+        ax[plot].axvspan(pulseStarts[i], pulseStops[i],
+                      alpha=0.2, color='r', label='peak')
+        xmin = np.max([0, baseStarts[i]-200])
+        xmax = np.min([pulseStops[i]+200, raw_trace.size])
+        ax[plot].plot(np.arange(xmin, xmax), raw_trace[xmin:xmax], color='C0',
+                   label=title)
+        ax[plot].legend(fontsize=8)
+        ax[plot].set_xlim(xmin, xmax)
+        ax[plot].set_xlabel('digitizer samples')
+        ax[plot].set_title(title, size=10)
+    return fig, ax
+
+
+def digitizer_type(run, source):
+    """
+    Finds the digitizer type based on the class Id / name of the source.
+    Example source: 'SCS_UTC1_MCP/ADC/1'. Defaults to ADQ412 if not found.
+    """
+    ret = None
+    if '_MCP/ADC/1' in source:
+        ret = 'FastADC'
+    if '_ADQ/ADC/1' in source:
+        ret = 'ADQ412'
+    if ret is None:
+        digi_dict = {'FastAdc': 'FastADC',
+                     'FastAdcLegacy': 'FastADC',
+                     'AdqDigitizer': 'ADQ412',
+                     'PyADCChannel': 'FastADC',
+                     'PyADCChannelLegacy': 'FastADC'}
+        try:
+            source = source.split(':')[0]
+            classId = run.get_run_value(source, 'classId.value')
+            ret = digi_dict.get(classId)
+        except Exception as e:
+            log.warning(str(e))
+            log.warning(f'Could not find digitizer type from source {source}.')
+            ret = 'ADQ412'
+    return ret
+
+
+def get_tim_peaks(run, mnemonic=None, merge_with=None,
+                  bunchPattern='sase3', integParams=None,
+                  keepAllSase=False):
+    """
+    Automatically computes TIM peaks. Sources can be loaded on the
+    fly via the mnemonics argument, or processed from an existing data
+    set (merge_with). The bunch pattern table is used to assign the
+    pulse id coordinates.
+
+    Parameters
+    ----------
+    run: extra_data.DataCollection
+        DataCollection containing the digitizer data.
+    mnemonic: str
+        mnemonics for TIM, e.g. "MCP2apd".
+    merge_with: xarray Dataset
+        If provided, the resulting Dataset will be merged with this
+        one. The TIM variables of merge_with (if any) will also be
+        computed and merged.
+    bunchPattern: str
+        'sase1' or 'sase3' or 'scs_ppl', bunch pattern
+        used to extract peaks. The pulse ID dimension will be named
+        'sa1_pId', 'sa3_pId' or 'ol_pId', respectively.
+    integParams: dict
+        dictionnary for raw trace integration, e.g.
+        {'pulseStart':100, 'pulsestop':200, 'baseStart':50,
+        'baseStop':99, 'period':24, 'npulses':500}.
+        If None, integration parameters are computed automatically.
+    keepAllSase: bool
+        Only relevant in case of sase-dedicated trains. If True, all
+        trains are kept, else only those of the bunchPattern are kept.
+
+    Returns
+    -------
+    xarray Dataset with TIM variables substituted by
+    the peak caclulated values (e.g. "MCP2raw" becomes
+    "MCP2peaks"), merged with Dataset *merge_with* if provided.
+    """
+    return get_digitizer_peaks(run, mnemonic, merge_with,
+                               bunchPattern, integParams,
+                               keepAllSase)
+
+
+def get_laser_peaks(run, mnemonic=None, merge_with=None,
+                    bunchPattern='scs_ppl', integParams=None):
+    """
+    Extracts laser photodiode signal (peak intensity) from Fast ADC
+    digitizer. Sources can be loaded on the fly via the mnemonics
+    argument, and/or processed from an existing data set (merge_with).
+    The PP laser bunch pattern is used to assign the pulse idcoordinates.
+
+    Parameters
+    ----------
+    run: extra_data.DataCollection
+        DataCollection containing the digitizer data.
+    mnemonic: str
+        mnemonic for FastADC corresponding to laser signal, e.g.
+        "FastADC2peaks" or 'I0_ILHraw'.
+    merge_with: xarray Dataset
+        If provided, the resulting Dataset will be merged with this
+        one. The FastADC variables of merge_with (if any) will also be
+        computed and merged.
+    bunchPattern: str
+        'sase1' or 'sase3' or 'scs_ppl', bunch pattern
+        used to extract peaks.
+    integParams: dict
+        dictionnary for raw trace integration, e.g.
+        {'pulseStart':100, 'pulsestop':200, 'baseStart':50,
+        'baseStop':99, 'period':24, 'npulses':500}.
+        If None, integration parameters are computed
+        automatically.
+
+    Returns
+    -------
+    xarray Dataset with all Fast ADC variables substituted by
+    the peak caclulated values (e.g. "FastADC2raw" becomes
+    "FastADC2peaks").
+    """
+    return get_digitizer_peaks(run, mnemonic, merge_with,
+                               bunchPattern, integParams, False)
+
+
+def get_digitizer_peaks(run, mnemonic, merge_with=None,
+                        bunchPattern='sase3', integParams=None,
+                        keepAllSase=False):
+    """
+    Automatically computes digitizer peaks. A source can be loaded on the
+    fly via the mnemonic argument, or processed from an existing data set
+    (merge_with). The bunch pattern table is used to assign the pulse
+    id coordinates.
+
+    Parameters
+    ----------
+    run: extra_data.DataCollection
+        DataCollection containing the digitizer data.
+    mnemonic: str
+        mnemonic for FastADC or ADQ412, e.g. "I0_ILHraw" or "MCP3apd".
+        The data is either loaded from the DataCollection or taken from
+        merge_with.
+    merge_with: xarray Dataset
+        If provided, the resulting Dataset will be merged with this one.
+    bunchPattern: str or dict
+        'sase1' or 'sase3' or 'scs_ppl', 'None': bunch pattern
+    integParams: dict
+        dictionnary for raw trace integration, e.g.
+        {'pulseStart':100, 'pulsestop':200, 'baseStart':50,
+        'baseStop':99, 'period':24, 'npulses':500}.
+        If None, integration parameters are computed automatically.
+    keepAllSase: bool
+        Only relevant in case of sase-dedicated trains. If True, all
+        trains are kept, else only those of the bunchPattern are kept.
+
+    Returns
+    -------
+    xarray Dataset with digitizer peak variables. Raw variables are
+    substituted by the peak caclulated values (e.g. "FastADC2raw" becomes
+    "FastADC2peaks").
+    """
+    run_mnemonics = mnemonics_for_run(run)
+    if mnemonic not in run_mnemonics:
+        log.warning('Mnemonic not found in run. Skipping.')
+        return merge_with
+    if bool(merge_with) and mnemonic in merge_with:
+        for d in merge_with[mnemonic].dims:
+            if d in ['sa3_pId', 'ol_pId']:
+                log.warning(f'{mnemonic} already extracted. '
+                            'Skipping.')
+                return merge_with
+    # check if bunch pattern table exists
+    bpt = None
+    if bool(merge_with) and 'bunchPatternTable' in merge_with:
+        bpt = merge_with['bunchPatternTable']
+        log.debug('Using bpt from merge_with dataset.')
+    elif 'bunchPatternTable' in run_mnemonics:
+        m = run_mnemonics['bunchPatternTable']
+        bpt = run.get_array(m['source'], m['key'], m['dim'])
+        log.debug('Loaded bpt from DataCollection.')
+    elif 'bunchPatternTable_SA3' in run_mnemonics:
+        m = run_mnemonics['bunchPatternTable_SA3']
+        bpt = run.get_array(m['source'], m['key'], m['dim'])
+        log.debug('Loaded bpt from DataCollection.')
+    else:
+        log.warning('Could not load bunch pattern table.')
+    # prepare resulting dataset
+    if bool(merge_with):
+        mw_ds = merge_with.drop(mnemonic, errors='ignore')
+    else:
+        mw_ds = xr.Dataset()
+    # iterate over mnemonics and merge arrays in dataset
+    autoFind = True if integParams is None else False
+    m = run_mnemonics[mnemonic]
+    useRaw = True if 'raw' in mnemonic else False
+    if bool(merge_with) and mnemonic in merge_with:
+        data = merge_with[mnemonic]
+    else:
+        data = run.get_array(m['source'], m['key'], m['dim'])
+    peaks = get_peaks(run, data, mnemonic,
+                      useRaw=useRaw,
+                      autoFind=autoFind,
+                      integParams=integParams,
+                      bunchPattern=bunchPattern,
+                      bpt=bpt)
+    name = mnemonic.replace('raw', 'peaks').replace('apd', 'peaks')
+    join = 'outer' if keepAllSase else 'inner'
+    ds = mw_ds.merge(peaks.rename(name), join=join)
+    return ds
+
+
+def digitizer_signal_description(run, digitizer=None):
+    """
+    Check for the existence of signal description and return all corresponding
+    channels in a dictionnary.
+    
+    Parameters
+    ----------
+    run: extra_data.DataCollection
+        DataCollection containing the digitizer data.
+    digitizer: str or list of str (default=None)
+        Name of the digitizer: one in ['FastADC', FastADC2',
+        'ADQ412', 'ADQ412_2]
+        If None, all digitizers are used
+
+    Returns
+    -------
+    signal_description: dictionnary containing the signal description of
+        the digitizer channels.
+
+
+    Example
+    -------
+    import toolbox_scs as tb
+    run = tb.open_run(3481, 100)
+    signals = tb.digitizer_signal_description(run)
+    signals_fadc2 = tb.digitizer_signal_description(run, 'FastADC2')
+    """
+    if digitizer not in [None, 'FastADC', 'FastADC2']:
+        raise ValueError('digitizer must be one in '
+                         '["FastADC", "FastADC2"]')
+    if digitizer is None:
+        digitizer = ['FastADC', 'FastADC2', 'ADQ412', 'ADQ412_2']
+    else:
+        digitizer = [digitizer]
+    def key_fadc(i):
+        if i > 9:
+            return None
+        return f'channel_{i}.signalDescription.value'
+    def key_adq412(i):
+        if i > 3:
+            return None
+        return f'board1.channel_{i}.description.value'
+    
+    sources = {'FastADC': ['SCS_UTC1_MCP/ADC/1', key_fadc],
+               'FastADC2': ['SCS_UTC2_FADC/ADC/1', key_fadc],
+               'ADQ412': ['SCS_UTC1_ADQ/ADC/1', key_adq412],
+               'ADQ412_2': ['SCS_UTC2_ADQ/ADC/1', key_adq412]}
+    res = {}
+    for d in digitizer:
+        if sources[d][0] not in run.all_sources:
+            continue
+        if sources[d][1](0) not in run.get_run_values(sources[d][0]):
+            raise ValueError('No signal description available for '
+                             f'{d}: {sources[d][0]}')
+        for ch in range(10):
+            val = sources[d][1](ch)
+            if val is None:
+                break
+            desc = run.get_run_value(sources[d][0], val)
+            res[f'{d}_Ch{ch}'] = desc
+    return res
+
+
+def calibrateTIM(data, rollingWindow=200, mcp=1, plot=False, use_apd=True, intstart=None,
+                 intstop=None, bkgstart=None, bkgstop=None, t_offset=None, npulses_apd=None):
+    ''' Calibrate TIM signal (Peak-integrated signal) to the slow ion signal of SCS_XGM
+        (photocurrent read by Keithley, channel 'pulseEnergy.photonFlux.value').
+        The aim is to find F so that E_tim_peak[uJ] = F x TIM_peak. For this, we want to
+        match the SASE3-only average TIM pulse peak per train (TIM_avg) to the slow XGM 
+        signal E_slow.
+        Since E_slow is the average energy per pulse over all SASE1 and SASE3 
+        pulses (N1 and N3), we first extract the relative contribution C of the SASE3 pulses
+        by looking at the pulse-resolved signals of the SA3_XGM in the tunnel.
+        There, the signal of SASE1 is usually strong enough to be above noise level.
+        Let TIM_avg be the average of the TIM pulses (SASE3 only).
+        The calibration factor is then defined as: F = E_slow * C * (N1+N3) / ( N3 * TIM_avg ).
+        If N3 changes during the run, we locate the indices for which N3 is maximum and define
+        a window where to apply calibration (indices start/stop).
+        
+        Warning: the calibration does not include the transmission by the KB mirrors!
+        
+        Inputs:
+            data: xarray Dataset
+            rollingWindow: length of running average to calculate TIM_avg
+            mcp: MCP channel
+            plot: boolean. If True, plot calibration results.
+            use_apd: boolean. If False, the TIM pulse peaks are extract from raw traces using
+                     getTIMapd
+            intstart: trace index of integration start
+            intstop: trace index of integration stop
+            bkgstart: trace index of background start
+            bkgstop: trace index of background stop
+            t_offset: index separation between two pulses
+            npulses_apd: number of pulses
+            
+        Output:
+            F: float, TIM calibration factor.
+        
+    '''
+    start = 0
+    stop = None
+    npulses = data['npulses_sase3']
+    ntrains = npulses.shape[0]
+    if not np.all(npulses == npulses[0]):
+        start = np.argmax(npulses.values)
+        stop = ntrains + np.argmax(npulses.values[::-1]) - 1
+        if stop - start < rollingWindow:
+            print('not enough consecutive data points with the largest number of pulses per train')
+        start += rollingWindow
+        stop = np.min((ntrains, stop+rollingWindow))
+    filteredTIM = getTIMapd(data, mcp, use_apd, intstart, intstop, bkgstart, bkgstop, t_offset, npulses_apd)
+    sa3contrib = saseContribution(data, 'sase3', 'XTD10_XGM')
+    avgFast = filteredTIM.mean(axis=1).rolling(trainId=rollingWindow).mean()
+    ratio = ((data['npulses_sase3']+data['npulses_sase1']) *
+             data['SCS_photonFlux'] * sa3contrib) / (avgFast*data['npulses_sase3'])
+    F = float(ratio[start:stop].median().values)
+
+    if plot:
+        fig = plt.figure(figsize=(8,5))
+        ax = plt.subplot(211)
+        ax.set_title('E[uJ] = {:2e} x TIM (MCP{})'.format(F, mcp))
+        ax.plot(data['SCS_photonFlux'], label='SCS XGM slow (all SASE)', color='C0')
+        slow_avg_sase3 = data['SCS_photonFlux']*(data['npulses_sase1']
+                                                    +data['npulses_sase3'])*sa3contrib/data['npulses_sase3']
+        ax.plot(slow_avg_sase3, label='SCS XGM slow (SASE3 only)', color='C1')
+        ax.plot(avgFast*F, label='Calibrated TIM rolling avg', color='C2')
+        ax.legend(loc='upper left', fontsize=8)
+        ax.set_ylabel('Energy [$\mu$J]', size=10)
+        ax.plot(filteredTIM.mean(axis=1)*F, label='Calibrated TIM train avg', alpha=0.2, color='C9')
+        ax.legend(loc='best', fontsize=8, ncol=2)
+        plt.xlabel('train in run')
+        
+        ax = plt.subplot(234)
+        xgm_fast = selectSASEinXGM(data)
+        ax.scatter(filteredTIM, xgm_fast, s=5, alpha=0.1, rasterized=True)
+        fit, cov = np.polyfit(filteredTIM.values.flatten(),xgm_fast.values.flatten(),1, cov=True)
+        y=np.poly1d(fit)
+        x=np.linspace(filteredTIM.min(), filteredTIM.max(), 10)
+        ax.plot(x, y(x), lw=2, color='r')
+        ax.set_ylabel('Raw HAMP [$\mu$J]', size=10)
+        ax.set_xlabel('TIM (MCP{}) signal'.format(mcp), size=10)
+        ax.annotate(s='y(x) = F x + A\n'+
+                    'F = %.3e\n$\Delta$F/F = %.2e\n'%(fit[0],np.abs(np.sqrt(cov[0,0])/fit[0]))+
+                    'A = %.3e'%fit[1],
+                    xy=(0.5,0.6), xycoords='axes fraction', fontsize=10, color='r')
+        print('TIM calibration factor: %e'%(F))
+        
+        ax = plt.subplot(235)
+        ax.hist(filteredTIM.values.flatten()*F, bins=50, rwidth=0.8)
+        ax.set_ylabel('number of pulses', size=10)
+        ax.set_xlabel('Pulse energy MCP{} [uJ]'.format(mcp), size=10)
+        ax.set_yscale('log')
+        
+        ax = plt.subplot(236)
+        if not use_apd:
+            pulseStart = intstart
+            pulseStop = intstop
+        else:
+            pulseStart = data.attrs['run'].get_array(
+                'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.pulseStart.value')[0].values
+            pulseStop = data.attrs['run'].get_array(
+                'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.pulseStop.value')[0].values
+            
+        if 'MCP{}raw'.format(mcp) not in data:
+            tid, data = data.attrs['run'].train_from_index(0)
+            trace = data['SCS_UTC1_ADQ/ADC/1:network']['digitizers.channel_1_D.raw.samples']
+            print('no raw data for MCP{}. Loading trace from MCP1'.format(mcp))
+            label_trace='MCP1 Voltage [V]'
+        else:
+            trace = data['MCP{}raw'.format(mcp)][0]
+            label_trace='MCP{} Voltage [V]'.format(mcp)
+        ax.plot(trace[:pulseStop+25], 'o-', ms=2, label='trace')
+        ax.axvspan(pulseStart, pulseStop, color='C2', alpha=0.2, label='APD region')
+        ax.axvline(pulseStart, color='gray', ls='--')
+        ax.axvline(pulseStop, color='gray', ls='--')
+        ax.set_xlim(pulseStart - 25, pulseStop + 25)
+        ax.set_ylabel(label_trace, size=10)
+        ax.set_xlabel('sample #', size=10)
+        ax.legend(fontsize=8)
+
+    return F
+
+
+''' TIM calibration table
+    Dict with key= photon energy and value= array of polynomial coefficients for each MCP (1,2,3).
+    The polynomials correspond to a fit of the logarithm of the calibration factor as a function
+    of MCP voltage. If P is a polynomial and V the MCP voltage, the calibration factor (in microjoule
+    per APD signal) is given by -exp(P(V)).
+    This table was generated from the calibration of March 2019, proposal 900074, semester 201930, 
+    runs 69 - 111 (Ni edge):  https://in.xfel.eu/elog/SCS+Beamline/2323
+    runs 113 - 153 (Co edge): https://in.xfel.eu/elog/SCS+Beamline/2334
+    runs 163 - 208 (Fe edge): https://in.xfel.eu/elog/SCS+Beamline/2349
+'''
+tim_calibration_table = {
+    705.5: np.array([
+        [-6.85344690e-12,  5.00931986e-08, -1.27206912e-04, 1.15596821e-01, -3.15215367e+01],
+        [ 1.25613942e-11, -5.41566381e-08,  8.28161004e-05, -7.27230153e-02,  3.10984925e+01],
+        [ 1.14094964e-12,  7.72658935e-09, -4.27504907e-05, 4.07253378e-02, -7.00773062e+00]]),
+    779: np.array([
+        [ 4.57610777e-12, -2.33282497e-08,  4.65978738e-05, -6.43305156e-02,  3.73958623e+01],
+        [ 2.96325102e-11, -1.61393276e-07,  3.32600044e-04, -3.28468195e-01,  1.28328844e+02],
+        [ 1.14521506e-11, -5.81980336e-08,  1.12518434e-04, -1.19072484e-01,  5.37601559e+01]]),
+    851: np.array([
+        [ 3.15774215e-11, -1.71452934e-07,  3.50316512e-04, -3.40098861e-01,  1.31064501e+02],
+        [5.36341958e-11, -2.92533156e-07,  6.00574534e-04, -5.71083140e-01,  2.10547161e+02],
+        [ 3.69445588e-11, -1.97731342e-07,  3.98203522e-04, -3.78338599e-01,  1.41894119e+02]])
+}
+
+
+def timFactorFromTable(voltage, photonEnergy, mcp=1):
+    ''' Returns an energy calibration factor for TIM integrated peak signal (APD)
+        according to calibration from March 2019, proposal 900074, semester 201930, 
+        runs 69 - 111 (Ni edge):  https://in.xfel.eu/elog/SCS+Beamline/2323
+        runs 113 - 153 (Co edge): https://in.xfel.eu/elog/SCS+Beamline/2334
+        runs 163 - 208 (Fe edge): https://in.xfel.eu/elog/SCS+Beamline/2349
+        Uses the tim_calibration_table declared above.
+        
+        Inputs:
+            voltage: MCP voltage in volts.
+            photonEnergy: FEL photon energy in eV. Calibration factor is linearly
+                interpolated between the known values from the calibration table. 
+            mcp: MCP channel (1, 2, or 3).
+            
+        Output:
+            f: calibration factor in microjoule per APD signal
+    '''
+    energies = np.sort([key for key in tim_calibration_table])
+    if photonEnergy not in energies:
+        if photonEnergy > energies.max():
+            photonEnergy = energies.max()
+        elif photonEnergy < energies.min():
+            photonEnergy = energies.min()
+        else:
+            idx = np.searchsorted(energies, photonEnergy) - 1
+            polyA = np.poly1d(tim_calibration_table[energies[idx]][mcp-1])
+            polyB = np.poly1d(tim_calibration_table[energies[idx+1]][mcp-1])
+            fA = -np.exp(polyA(voltage))
+            fB = -np.exp(polyB(voltage))
+            f = fA + (fB-fA)/(energies[idx+1]-energies[idx])*(photonEnergy - energies[idx])
+            return f
+    poly = np.poly1d(tim_calibration_table[photonEnergy][mcp-1])
+    f = -np.exp(poly(voltage))
+    return f
+
+#############################################################################################
+#############################################################################################
+#############################################################################################
+
+def extract_digitizer_peaks(proposal, runNB, mnemonic, bunchPattern=None,
+                            integParams=None, save=False,
+                            subdir='usr/processed_runs'):
+    """
+    Extract the peaks from digitizer raw traces and saves them into a file.
+    The calculation is a trapezoidal integration between 'pulseStart' and
+    'pulseStop' with subtraction of a baseline defined as the median between
+    'baseStart' and 'baseStop'.
+    The integration parameters can either be provided using integParams, or
+    determined by a peak finding algorithm using autoFind. If the bunchPattern
+    argument is provided, the pulse ids are aligned to it.
+
+    Parameters
+    ----------
+    proposal: int
+        the proposal number
+    runNB: int
+        the run number
+    mnemonic: str
+        the mnemonic containing raw traces. Example: 'XRD_MCP_BIGraw'
+    bunchPattern: str
+        'sase3' or 'scs_ppl'. If provided, checks the bunch pattern table using
+        Extra XrayPulses or OpticalPulses and aligns the pulse ids.
+        If None, the pulse ids are not aligned and indexed between 0 and the
+        number of pulses per train.
+    integParams: dict
+        dictionnary with keys ['pulseStart', 'pulseStop', 'baseStart',
+        'baseStop', 'period', 'npulses']. If provided, autoFind is set to False.
+        If bunchPattern is not None, 'period' and 'npulses' are adjusted to match
+        the bunch pattern and align the pulse ids.
+    save: bool
+        whether to save the result to a file or not.
+    subdir: str
+        subdirectory. The data is stored in
+        <proposal path>/<subdir>/r{runNB:04d}/f'r{runNB:04d}-digitizers-data.h5'
+    """
+    if integParams is None and autoFind is False:
+        log.warning('integParams not provided and autoFind is False. '
+                    'Cannot compute peak integration parameters.')
+        return xr.DataArray()
+
+    run = open_run(proposal, runNB)
+    run_mnemonics = mnemonics_for_run(run)
+    mnemo = run_mnemonics.get(mnemonic)
+    if mnemo is None:
+        log.warning('Mnemonic not found. Skipping.')
+        return xr.DataArray()
+    source, key = mnemo['source'], mnemo['key']
+    extra_dim = {'sase3': 'sa3_pId', 'scs_ppl': 'ol_pId'}.get(bunchPattern)
+    if extra_dim is None:
+        extra_dim = 'pulseId'
+    digitizer = digitizer_type(run, source)
+    if digitizer == 'FastADC':
+        pulse_period = 24
+    if digitizer == 'ADQ412':
+        pulse_period = 440
+    pattern = None
+    try:
+        if bunchPattern == 'sase3':
+            pattern = XrayPulses(run)
+        if bunchPattern == 'scs_ppl':
+            pattern = OpticalLaserPulses(run)
+    except Exception as e:
+        log.warning(e)
+    '''
+    pattern = None
+    try:
+        if bunchPattern == 'sase3':
+            pattern = XrayPulses(run)
+        if bunchPattern == 'scs_ppl':
+            pattern = OpticalLaserPulses(run)
+    except Exception as e:
+        log.warning(e)
+        bunchPattern = None
+
+    # Extract pulse_ids, npulses and period from bunch pattern
+    pulse_ids, npulses, period = None, None, None
+    regular = True
+    if pattern is not None:
+        if pattern.is_constant_pattern() is False:
+            log.info('The number of pulses changed during the run.')
+            pulse_ids = np.unique(pattern.pulse_ids(labelled=False, copy=False))
+            npulses, period = None, None
+            regular = False
+        else:
+            pulse_ids = pattern.peek_pulse_ids(labelled=False)
+            npulses = len(pulse_ids)
+            if npulses > 1:
+                periods = np.diff(pulse_ids)
+                if len(np.unique(periods)) > 1:
+                    regular = False
+                else:
+                    period = np.unique(periods)[0] * pulse_period
+
+    # Use integration parameters, adjust them to match bunch pattern if necessary
+    if integParams is not None:
+        required_keys = ['pulseStart', 'pulseStop', 'baseStart',
+                         'baseStop']
+        if bunchPattern is None:
+            required_keys += ['period', 'npulses']
+        if not all(name in integParams for name in required_keys):
+            raise TypeError('All keys of integParams argument '
+                            f'{required_keys} are required.')
+        params = integParams.copy()
+        if pattern is not None:
+            if (npulses != params.get('npulses') or
+                period != params.get('period')):
+                log.warning(f'Integration parameters '
+                    f'(npulses={params.get("npulses")}, '
+                    f'period={params.get("period")}) do not match the '
+                    f'the bunch pattern (npulses={npulses}, '
+                    f'period={period}). Using bunch pattern parameters.')
+            params['npulses'] = npulses
+            params['period'] = period
+        else:
+            pulse_ids = np.arange(0, params['npulses'], params['period'],
+                                  dtype=np.uint64)
+        start = params['pulseStart']
+        params['pulseStart'] = [start + (pid - pulse_ids[0]) * pulse_period
+                                for pid in pulse_ids]
+    '''
+    
+    # load traces and calculate the average
+    traces = run[source, key].xarray(name=mnemonic, extra_dims=mnemo['dim'])
+    trace = traces.mean('trainId').rename(mnemonic.replace('raw', 'avg'))
+
+    params, pulse_ids, regular, trace = find_peak_integration_parameters(
+        run, mnemonic, trace, integParams, pattern)
+
+    '''
+    # find peak integration parameters
+    if integParams is None:
+        params = find_integ_params(trace)
+        if (period is not None and params['period'] != period
+            or npulses is not None and params['npulses'] != npulses):
+            log.warning(f'Bunch pattern (npulses={npulses}, period={period}) and '
+                  f'found integration parameters (npulses={params["npulses"]}, '
+                  f'period={params["period"]}) do not match. Using bunch '
+                  'pattern parameters.')
+            params["npulses"] = npulses
+            params["period"] = period
+        if pulse_ids is None:
+            pulse_ids = np.arange(params['npulses'], dtype=np.uint64)
+        start = params['pulseStart']
+        params['pulseStart'] = [start + (pid - pulse_ids[0]) * pulse_period
+                                for pid in pulse_ids]
+    '''
+    
+    # extract peaks
+    data = peaks_from_raw_trace(traces, **params, extra_dim=extra_dim)
+    data = data.rename(mnemonic.replace('raw', 'peaks'))
+
+    if regular is True:
+        data = data.assign_coords({extra_dim: pulse_ids})
+    else:
+        mask = pattern.pulse_mask(labelled=False)
+        mask = xr.DataArray(mask, dims=['trainId', extra_dim],
+                            coords={'trainId': run[source, key].train_ids,
+                                    extra_dim: np.arange(mask.shape[1])})
+        mask = mask.sel({extra_dim: pulse_ids})
+        data = data.where(mask, drop=True)
+
+    data.attrs['params_keys'] = list(params.keys())
+    if regular:
+        params['pulseStart'] = params['pulseStart'][0]
+    for p in params:
+        if params[p] is None:
+            params[p] = 'None'
+    data.attrs[f'params_{data.name}'] = list(params.values())
+    if save:
+        save_peaks(proposal, runNB, data, trace, subdir)
+    return data
+
+
+def save_peaks(proposal, runNB, peaks, avg_trace, subdir):
+    '''
+    Save the peaks extracted with extract_digitizer_peaks() as well as the
+    average raw trace in a dataset at the proposal + subdir location.
+    If a dataset already exists, the new data is merged with it.
+
+    Parameters
+    ----------
+    proposal: int
+        the proposal number
+    runNB: int
+        the run number
+    peaks: xarray DataArray
+        the 2D-array obtained by extract_digitizer_peaks()
+    avg_trace: xarray DataArray
+        the average raw trace over the trains
+    subdir: str
+        subdirectory. The data is stored in
+        <proposal path>/<subdir>/r{runNB:04d}/f'r{runNB:04d}-digitizers-data.h5'
+
+    Returns
+    -------
+        None
+    '''
+    root = find_proposal(f'p{proposal:06d}')
+    path = os.path.join(root, subdir + f'/r{runNB:04d}/')
+    os.makedirs(path, exist_ok=True)
+    fname = path + f'r{runNB:04d}-digitizers-data.h5'
+    ds_peaks = peaks.to_dataset(promote_attrs=True)
+
+    if os.path.isfile(fname):
+        ds = xr.load_dataset(fname)
+        ds = ds.drop_vars([peaks.name, avg_trace.name], errors='ignore')
+        for dim in ds.dims:
+            if all(dim not in ds[v].dims for v in ds):
+                ds = ds.drop_dims(dim)
+        dim_name = 'sampleId'
+        if 'sampleId' in ds.dims and ds.sizes['sampleId'] != len(avg_trace):
+            dim_name = 'sampleId2'
+        avg_trace = avg_trace.rename({avg_trace.dims[0]: dim_name})
+        if f'params_{peaks.name}' in ds.attrs:
+            del ds.attrs[f'params_{peaks.name}']
+        ds = xr.merge([ds, ds_peaks, avg_trace],
+                      combine_attrs='drop_conflicts', join='inner')
+    else:
+        ds = ds_peaks.merge(avg_trace.rename({avg_trace.dims[0]: 'sampleId'}))
+    ds.to_netcdf(fname, format='NETCDF4')
+    print(f'saved data into {fname}.')
+    return None
+
+
+def load_processed_peaks(proposal, runNB, mnemonic=None,
+                         data='usr/processed_runs', merge_with=None):
+    """
+    Load processed digitizer peaks data.
+
+    Parameters
+    ----------
+    proposal: int
+        the proposal number
+    runNB: int
+        the run number
+    mnemonic: str
+        the mnemonic containing peaks. Example: 'XRD_MCP_BIGpeaks'.
+        If None, the entire dataset is loaded
+    data: str
+        subdirectory. The data is stored in
+        <proposal path>/<subdir>/r{runNB:04d}/f'r{runNB:04d}-digitizers-data.h5'
+    merge_with: xarray Dataset
+        A dataset to merge the data with.
+
+    Returns
+    -------
+    xarray DataArray if menmonic is not None and merge_with is None
+    xarray Dataset if mnemonic is None or merge_with is not None.
+
+    Example
+    -------
+    # load the mono energy and the MCP_BIG peaks
+    run, ds = tb.load(proposal, runNB, 'nrj')
+    ds = tb.load_processed_peaks(proposal, runNB,'XRD_MCP_BIGpeaks', merge_with=ds)
+    """
+    if mnemonic is None:
+        return load_all_processed_peaks(proposal, runNB, data, merge_with)
+    root = find_proposal(f'p{proposal:06d}')
+    path = os.path.join(root, data + f'/r{runNB:04d}/')
+    fname = path + f'r{runNB:04d}-digitizers-data.h5'
+    if os.path.isfile(fname):
+        ds = xr.load_dataset(fname)
+        if mnemonic not in ds:
+            print(f'Mnemonic {mnemonic} not found in {fname}')
+            return {}
+        da = ds[mnemonic]
+        da.attrs['params_keys'] = ds.attrs['params_keys']
+        da.attrs[f'params_{mnemonic}'] = ds.attrs[f'params_{mnemonic}']
+        if merge_with is not None:
+            return merge_with.merge(da.to_dataset(promote_attrs=True),
+                                    combine_attrs='drop_conflicts', join='inner')
+        else:
+            return da
+    else:
+        print(f'Mnemonic {mnemonic} not found in {fname}')
+        return merge_with
+
+
+def load_all_processed_peaks(proposal, runNB, data='usr/processed_runs',
+                             merge_with=None):
+    """
+    Load processed digitizer peaks dataset. The data contains the peaks,
+    the average raw trace and the integration parameters (attribute) of
+    each processed digitizer source.
+
+    Parameters
+    ----------
+    proposal: int
+        the proposal number
+    runNB: int
+        the run number
+    data: str
+        subdirectory. The data is stored in
+        <proposal path>/<subdir>/r{runNB:04d}/f'r{runNB:04d}-digitizers-data.h5'
+    merge_with: xarray Dataset
+        A dataset to merge the data with.
+
+    Returns
+    -------
+    xarray Dataset
+    """
+    root = find_proposal(f'p{proposal:06d}')
+    path = os.path.join(root, data + f'/r{runNB:04d}/')
+    fname = path + f'r{runNB:04d}-digitizers-data.h5'
+    if os.path.isfile(fname):
+        if merge_with is not None:
+            return merge_with.merge(xr.load_dataset(fname),
+                                    combine_attrs='drop_conflicts', join='inner')
+        return xr.load_dataset(fname)
+    else:
+        print(f'{fname} not found.')
+        return merge_with
+
+
+def check_processed_peak_params(proposal, runNB, mnemonic, data='usr/processed_runs',
+                                plot=True, show_all=False):
+    """
+    Check the integration parameters used to generate the processed peak values.
+
+    Parameters
+    ----------
+    proposal: int
+        the proposal number
+    runNB: int
+        the run number
+    mnemonic: str
+        the mnemonic containing peaks. Example: 'XRD_MCP_BIGpeaks'.
+        If None, the entire dataset is loaded
+    data: str
+        subdirectory. The data is stored in
+        <proposal path>/<subdir>/r{runNB:04d}/f'r{runNB:04d}-digitizers-data.h5'
+    plot: bool
+        If True, displays the raw trace and peak integration regions.
+    show_all: bool
+        If True, displays the entire raw trace and all peak integration
+        regions (this can be time-consuming).
+        If False, shows the first and last pulses.
+
+    Returns
+    -------
+    params: dict
+        the integration parameters with keys ['pulseStart', 'pulseStop',
+        'baseStart', 'baseStop', 'period', 'npulses'].
+        See `extract_digitizer_peaks()`.
+    """
+    root = find_proposal(f'p{proposal:06d}')
+    path = os.path.join(root, data + f'/r{runNB:04d}/')
+    fname = path + f'r{runNB:04d}-digitizers-data.h5'
+    if os.path.isfile(fname):
+        ds = xr.load_dataset(fname)
+        if mnemonic.replace('raw', 'peaks') not in ds:
+            print(f'Mnemonic {mnemonic} not found in {fname}')
+            return {}
+        da = ds[mnemonic]
+        params = dict(zip(ds.attrs['params_keys'], ds.attrs[f'params_{mnemonic}']))
+        if plot:
+            plotPeakIntegrationWindow(ds[mnemonic.replace('peaks', 'avg')],
+                                      params, show_all=show_all)
+        return params
+    else:
+        print(f'{fname} not found.')
+        return {}
\ No newline at end of file
--- a/DSSC.py
+++ b/DSSC.py
-import multiprocessing
+from joblib import Parallel, delayed, parallel_backend
 from time import strftime
 import tempfile
 import shutil
@@ -7,18 +7,19 @@ import os
 import warnings
 import psutil

-import karabo_data as kd
-from karabo_data.read_machinery import find_proposal
-from karabo_data.geometry2 import DSSC_1MGeometry
-import ToolBox as tb
+import extra_data as ed
+from extra_data.read_machinery import find_proposal
+from extra_geom import DSSC_1MGeometry
 import matplotlib.pyplot as plt
 from mpl_toolkits.axes_grid1 import ImageGrid
 import numpy as np
 import xarray as xr
 import h5py
-
 from imageio import imread

+from ..constants import mnemonics as _mnemonics
+from .azimuthal_integrator import AzimuthalIntegrator
+
 class DSSC:
    
    def __init__(self, proposal, distance=1):
@@ -43,8 +44,9 @@ class DSSC:
        self.aspect = self.px_pitch_v/self.px_pitch_h # aspect ratio of the DSSC images
        self.geom = None
        self.mask = None
-        self.max_fraction_memory = 0.8
+        self.max_fraction_memory = 0.4
        self.filter_mask = None
+        self.Nworker = 16
        
        print('DSSC configuration')
        print(f'Topic: {self.topic}')
@@ -62,7 +64,7 @@ class DSSC:
            shutil.rmtree(self.tempdir)
    
    def open_run(self, run_nr, isDark=False):
-        """ Open a run with karabo-data and prepare the virtual dataset for multiprocessing
+        """ Open a run with extra-data and prepare the virtual dataset for multiprocessing
        
            inputs:
                run_nr: the run number
@@ -70,13 +72,12 @@ class DSSC:
        
        """
        
-        print('Opening run data with karabo-data')
+        print('Opening run data with extra-data')
        self.run_nr = run_nr
        self.xgm = None
        self.filter_mask = None
-        self.scan_vname = None
-        
-        self.run = kd.open_run(self.proposal, self.run_nr)
+
+        self.run = ed.open_run(self.proposal, self.run_nr)
        self.isDark = isDark
        self.plot_title = f'{self.proposal} run: {self.run_nr}'
        
@@ -100,14 +101,14 @@ class DSSC:
        print(f'Temporary directory: {self.tempdir}')

        print('Creating virtual dataset')
-        self.vdslist = self.create_virtual_dssc_datasets(self.run, path=self.tempdir)
+        self.vds_filenames = self.create_virtual_dssc_datasets(self.run, path=self.tempdir)
        
        # create a dummy scan variable for dark run
        # for other type or run, use DSSC.define_run function to overwrite it
        self.scan = xr.DataArray(np.ones_like(self.run.train_ids), dims=['trainId'],
-                                 coords={'trainId': self.run.train_ids})
+                                 coords={'trainId': self.run.train_ids}).to_dataset(
+                        name='scan_variable')
        self.scan_vname = 'dummy'
-        self.vds_scan = None
        
    def define_scan(self, vname, bins):
        """
@@ -120,29 +121,24 @@ class DSSC:
        """

        if type(vname) is dict:
-            self.scan = self.run.get_array(vname['source'], vname['key'])
+            scan = self.run.get_array(vname['source'], vname['key'])
        elif type(vname) is str:
-            if vname not in tb.mnemonics:
+            if vname not in _mnemonics:
                raise ValueError(f'{vname} not found in the ToolBox mnemonics table')
-            self.scan = self.run.get_array(tb.mnemonics[vname]['source'], tb.mnemonics[vname]['key'])
+            scan = self.run.get_array(_mnemonics[vname]['source'], _mnemonics[vname]['key'])
        else:
            raise ValueError(f'vname should be a string or a dict. We got {type(vname)}')
            
        if (type(bins) is int) or (type(bins) is float):
-            self.scan = bins * np.round(self.scan / bins)
+            scan = bins * np.round(scan / bins)
        else:
            # TODO: digitize the data
            raise ValueError(f'To be implemented')
        self.scan_vname = vname
-        
-        self.vds_scan = os.path.join(self.tempdir, 'scan_variable.h5')
-        if os.path.isfile(self.vds_scan):
-            os.remove(self.vds_scan)
-            
-        self.scan = self.scan.to_dataset(name='scan_variable')
+       
+        self.scan = scan.to_dataset(name='scan_variable')
        self.scan['xgm_pumped'] = self.xgm[:, :self.nbunches:2].mean('dim_0')
        self.scan['xgm_unpumped'] = self.xgm[:, 1:self.nbunches:2].mean('dim_0')
-        self.scan.to_netcdf(self.vds_scan, group='data')

        self.scan_counts = xr.DataArray(np.ones(len(self.scan['scan_variable'])),
                                        dims=['scan_variable'],
@@ -175,8 +171,9 @@ class DSSC:
        
        """
        if self.xgm is None:
-            self.xgm = self.run.get_array(tb.mnemonics['SCS_SA3']['source'],
-                                          tb.mnemonics['SCS_SA3']['key'], roi=kd.by_index[:self.nbunches])
+            self.xgm = self.run.get_array(_mnemonics['SCS_SA3']['source'],
+                                          _mnemonics['SCS_SA3']['key'], 
+                                          roi=ed.by_index[:self.nbunches])
            
    def plot_xgm_hist(self, nbins=100):
        """ Plots an histogram of the SCS XGM dedicated SAS3 data.
@@ -193,7 +190,7 @@ class DSSC:
        
        plt.figure(figsize=(5,3))
        plt.bar(bins_center, hist, align='center', width=width)
-        plt.xlabel(f"{tb.mnemonics['SCS_SA3']['source']}{tb.mnemonics['SCS_SA3']['key']}")
+        plt.xlabel(f"{_mnemonics['SCS_SA3']['source']}{_mnemonics['SCS_SA3']['key']}")
        plt.ylabel('density')
        plt.title(self.plot_title)
        
@@ -231,17 +228,28 @@ class DSSC:
        print((f'Rejecting {nrejected} out of {len(self.run.train_ids)} trains due to xgm '
               f'thresholds: [{self.xgm_low}, {self.xgm_high}]'))

-    def load_geom(self):
+    def load_geom(self, geopath=None, quad_pos=None):
        """ Loads and return the DSSC geometry.
+
+            inputs:
+                geopath: path to the h5 geometry file. If None uses a default file.
+                quad_pos: list of quadrants tuple position. If None uses a default position.
+
+            output:
+                return the loaded geometry
        """
-        quad_pos = [
-            (-124.100,    3.112),  # TR
-            (-133.068, -110.604),  # BR
-            (   0.988, -125.236),  # BL
-            (   4.528,   -4.912)   # TL
-            ]
-        path = '/gpfs/exfel/sw/software/exfel_environments/misc/git/karabo_data/docs/dssc_geo_june19.h5'
-        self.geom = DSSC_1MGeometry.from_h5_file_and_quad_positions(path, quad_pos)
+        if quad_pos is None:
+            quad_pos = [(-124.100,    3.112),  # TR
+                    (-133.068, -110.604),  # BR
+                    (   0.988, -125.236),  # BL
+                    (   4.528,   -4.912)   # TL
+                ]
+
+        if geopath is None:
+            geopath = '/gpfs/exfel/sw/software/git/EXtra-geom/docs/dssc_geo_june19.h5'
+
+        self.geom = DSSC_1MGeometry.from_h5_file_and_quad_positions(geopath, quad_pos)
+
        return self.geom
               
    def load_mask(self, fname, plot=True):
@@ -266,39 +274,39 @@ class DSSC:
        """ Create virtual datasets for each 16 DSSC modules used for the multiprocessing.
            
            input:
-                run: karabo-data run
+                run: extra-data run
                path: string where the virtual files are created
+            output:
+                dictionnary of key:module, value:virtual dataset filename
        """
        
-        vds_list = []
-        for m in tqdm(range(16)):
-            vds_filename = os.path.join(path, f'dssc{m}_vds.h5')
-            if os.path.isfile(vds_filename):
-                os.remove(vds_filename)
-            module_vds = run.get_virtual_dataset(f'SCS_DET_DSSC1M-1/DET/{m}CH0:xtdf',
-                                                 'image.data', filename=vds_filename)
-            vds_list.append([vds_filename, module_vds])
-        return vds_list
+        vds_filenames = {}
+
+        for module in tqdm(range(16)):
+            fname = os.path.join(path, f'dssc{module}_vds.h5')
+            if os.path.isfile(fname):
+                os.remove(fname)
+
+            vds = run.get_virtual_dataset(f'SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf',
+                                                 'image.data', filename=fname)
+
+            vds.file.close() # keep h5 file closed outside 'with' context
+
+            vds_filenames[module] = fname
+
+        return vds_filenames
    
    def binning(self, do_pulse_mean=True):
        """ Bin the DSSC data by the predifined scan type (DSSC.define()) using multiprocessing
        
        """
-        if self.vds_scan is None:
-            # probably a dark run with a dummy scan variable
-            self.vds_scan = os.path.join(self.tempdir, 'scan_variable.h5')
-            if os.path.isfile(self.vds_scan):
-                os.remove(self.vds_scan)
-            
-            self.scan = self.scan.to_dataset(name='scan_variable')
-            self.scan.to_netcdf(self.vds_scan, group='data')

        # get available memory in GB, we will try to use 80 % of it
        max_GB = psutil.virtual_memory().available/1024**3
        print(f'max available memory: {max_GB} GB')
        
-        # max_GB / (8byte * 16modules * 128px * 512px * N_pulses)
-        self.chunksize = int(self.max_fraction_memory*max_GB * 1024**3 // (8 * 16 * 128 * 512 * self.fpt))
+        # max_GB / (8byte * Nworker * 128px * 512px * N_pulses)
+        self.chunksize = int(self.max_fraction_memory*max_GB * 1024**3 // (8 * self.Nworker * 128 * 512 * self.fpt))
        
        print('processing', self.chunksize, 'trains per chunk')
                   
@@ -307,20 +315,29 @@ class DSSC:
            jobs.append(dict(
            module=m,
            fpt=self.fpt,
-            vdf_module=os.path.join(self.tempdir, f'dssc{m}_vds.h5'),
+            vds=self.vds_filenames[m],
            chunksize=self.chunksize,
-            vdf_scan=self.vds_scan,
+            scan=self.scan['scan_variable'],
            nbunches=self.nbunches,
            run_nr=self.run_nr,
            do_pulse_mean=do_pulse_mean
            ))
            
+        if self.Nworker != 16:
+            with warnings.catch_warnings():
+                warnings.simplefilter("default")
+                warnings.warn(('Nworker other than 16 known to cause issue' +
+                    '(https://in.xfel.eu/gitlab/SCS/ToolBox/merge_requests/76)'),
+                    RuntimeWarning)
+
        timestamp = strftime('%X')
        print(f'start time: {timestamp}')

-        with multiprocessing.Pool(16) as pool:
-            module_data = pool.map(process_one_module, jobs)
-    
+        with parallel_backend('loky', n_jobs=self.Nworker):
+            module_data = Parallel(verbose=20)(
+                delayed(process_one_module)(job) for job in tqdm(jobs)
+            )
+
        print('finished:', strftime('%X'))
    
        # rearange the multiprocessed data
@@ -352,9 +369,9 @@ class DSSC:
            save_folder = self.save_folder

        if self.isDark:
-            fname = f'run{self.run_nr}_dark.h5'  # no scan
+            fname = f'run{self.run_nr}_dark.nc'  # no scan
        else:
-            fname = f'run{self.run_nr}.h5'  # run with delay scan (change for other scan types!)
+            fname = f'run{self.run_nr}.nc'  # run with delay scan (change for other scan types!)


        save_path = os.path.join(save_folder, fname)
@@ -365,6 +382,7 @@ class DSSC:
                warnings.warn(f'Overwriting file: {save_path}')
                os.remove(save_path)
            self.module_data.to_netcdf(save_path, group='data')
+            self.module_data.close()
            os.chmod(save_path, 0o664)
            print('saving: ', save_path)
        else:
@@ -385,8 +403,10 @@ class DSSC:

        self.plot_title = f'{self.proposal} run: {runNB} dark: {dark_runNB}'
                   
-        dark = xr.open_dataset(os.path.join(save_folder, f'run{dark_runNB}_dark.h5'), group='data')
-        binned = xr.open_dataset(os.path.join(save_folder, f'run{runNB}.h5'), group='data')
+        dark = xr.load_dataset(os.path.join(save_folder, f'run{dark_runNB}_dark.nc'), group='data',
+            engine='netcdf4')
+        binned = xr.load_dataset(os.path.join(save_folder, f'run{runNB}.nc'), group='data',
+            engine='netcdf4')

        binned['pumped'] = (binned['pumped'] - dark['pumped'].values)
        binned['unpumped'] = (binned['unpumped'] - dark['unpumped'].values)
@@ -497,7 +517,7 @@ class DSSC:
        if center is None:
            center = c_geom

-        ai = tb.azimuthal_integrator(im_pumped_mean.shape, center, angle_range, dr=dr)
+        ai = AzimuthalIntegrator(im_pumped_mean.shape, center, angle_range, dr=dr)
        norm = ai(~np.isnan(im_pumped_mean))

        az_pump = []
@@ -592,22 +612,19 @@ class DSSC:
 def process_one_module(job):
    module = job['module']
    fpt = job['fpt']
-    data_vdf = job['vdf_module']
-    scan_vdf = job['vdf_scan']
+    vds = job['vds']
+    scan = job['scan']
    chunksize = job['chunksize']
    nbunches = job['nbunches']
    do_pulse_mean = job['do_pulse_mean']

    image_path = f'INSTRUMENT/SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf/image/data'
    npulse_path = f'INDEX/SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf/image/count'
-    with h5py.File(data_vdf, 'r') as m:
+    with h5py.File(vds, 'r') as m:
        all_trainIds = m['INDEX/trainId'][()]
-    n_trains = len(all_trainIds)
-    chunk_start = np.arange(n_trains, step=chunksize, dtype=int)
+        frames_per_train = m[npulse_path][()]
+    trains_with_data = all_trainIds[frames_per_train == fpt]

-    # load scan variable
-    scan = xr.open_dataset(scan_vdf, group='data')['scan_variable']
-    scan.name = 'scan'
    len_scan = len(scan.groupby(scan))

    if do_pulse_mean:
@@ -631,14 +648,7 @@ def process_one_module(job):
        module_data['module'] = module

    # crunching
-    with h5py.File(data_vdf, 'r') as m:
-        #fpt_calc = int(len(m[image_path]) / n_trains)
-        #assert fpt_calc == fpt, f'data length does not match expected value (module {module})'
-        all_trainIds = m['INDEX/trainId'][()]
-        frames_per_train = m[npulse_path][()]
-        trains_with_data = all_trainIds[frames_per_train == fpt]
-        #print(np.unique(pulses_per_train), '/', fpt)
-        #print(len(trains_with_data))
+    with h5py.File(vds, 'r') as m:
        chunk_start = np.arange(len(all_trainIds), step=chunksize, dtype=int)
        trains_start = 0
                   

--- a/DSSC1module.py
+++ b/DSSC1module.py
@@ -5,9 +5,8 @@ import os
 import warnings
 import psutil

-import karabo_data as kd
-from karabo_data.read_machinery import find_proposal
-import ToolBox as tb
+import extra_data as ed
+from extra_data.read_machinery import find_proposal
 import matplotlib.pyplot as plt
 from mpl_toolkits.axes_grid1 import ImageGrid
 import matplotlib.patches as patches
@@ -18,6 +17,9 @@ from glob import glob

 from imageio import imread

+from ..constants import mnemonics as _mnemonics
+from ..misc.laser_utils import positionToDelay
+
 class DSSC1module:
        
    def __init__(self, module, proposal):
@@ -58,18 +60,18 @@ class DSSC1module:
        self.maxSaturatedPixel = 1
           
    def open_run(self, run_nr, t0=0.0):
-        """ Open a run with karabo-data and prepare the virtual dataset for multiprocessing
+        """ Open a run with extra-data and prepare the virtual dataset for multiprocessing
        
            inputs:
                run_nr: the run number
                t0: optional t0 in mm
        """
        
-        print('Opening run data with karabo-data')
+        print('Opening run data with extra-data')
        self.run_nr = run_nr
        self.xgm = None
        
-        self.run = kd.open_run(self.proposal, self.run_nr)
+        self.run = ed.open_run(self.proposal, self.run_nr)
        self.plot_title = f'{self.proposal} run: {self.run_nr}'
        
        self.fpt = self.run.detector_info(f'SCS_DET_DSSC1M-1/DET/{self.module}CH0:xtdf')['frames_per_train']
@@ -90,23 +92,23 @@ class DSSC1module:
        
        
        print(f'Loading XGM data')
-        self.xgm = self.run.get_array(tb.mnemonics['SCS_SA3']['source'],
-                                      tb.mnemonics['SCS_SA3']['key'],
-                                      roi=kd.by_index[:self.nbunches])
+        self.xgm = self.run.get_array(_mnemonics['SCS_SA3']['source'],
+                                      _mnemonics['SCS_SA3']['key'],
+                                      roi=ed.by_index[:self.nbunches])
        self.xgm = self.xgm.rename({'dim_0':'pulseId'})
        self.xgm['pulseId'] = np.arange(0, 2*self.nbunches, 2)
        
        print(f'Loading mono nrj data')
-        self.nrj = self.run.get_array(tb.mnemonics['nrj']['source'],
-                                      tb.mnemonics['nrj']['key'])
+        self.nrj = self.run.get_array(_mnemonics['nrj']['source'],
+                                      _mnemonics['nrj']['key'])
        print(f'Loading delay line data')
        try:
-            self.delay_mm = self.run.get_array(tb.mnemonics['PP800_DelayLine']['source'],
-                                               tb.mnemonics['PP800_DelayLine']['key'])
+            self.delay_mm = self.run.get_array(_mnemonics['PP800_DelayLine']['source'],
+                                               _mnemonics['PP800_DelayLine']['key'])
        except:
            self.delay_mm = 0*self.nrj
        self.t0 = t0
-        self.delay_ps = tb.positionToDelay(self.delay_mm, origin=self.t0, invert=True)
+        self.delay_ps = positionToDelay(self.delay_mm, origin=self.t0, invert=True)
                    
    def collect_dssc_module_file(self):
        """ Collect the raw DSSC module h5 files.
@@ -290,6 +292,7 @@ class DSSC1module:
                warnings.warn(f'Overwriting file: {save_path}')
                os.remove(save_path)
            data.to_netcdf(save_path, group='data')
+            data.close()
            os.chmod(save_path, 0o664)
            print('saving: ', save_path)
        else:
@@ -306,7 +309,7 @@ class DSSC1module:
            save_folder = self.save_folder
            
        self.run_nr = dark_runNB
-        self.dark_data = xr.open_dataset(os.path.join(save_folder, f'run{dark_runNB}_dark.h5'), group='data')
+        self.dark_data = xr.load_dataset(os.path.join(save_folder, f'run{dark_runNB}_dark.h5'), group='data')
        self.plot_title = f"{self.proposal} dark: {self.dark_data['run'].values}"

    def show_rois(self):
@@ -422,4 +425,4 @@ def process_one_module(job):
            module_data['std_data'] += (temp**2).sum(dim='trainId')
            module_data['counts'] += n_trains
       
-    return module_data
\ No newline at end of file
+    return module_data
--- a/src/toolbox_scs/detectors/digitizers.py
+++ b/src/toolbox_scs/detectors/digitizers.py
+""" Digitizers related sub-routines
+
+    Copyright (2021) SCS Team.
+
+    (contributions preferrably comply with pep8 code structure
+    guidelines.)
+"""
+
+import logging
+import os
+
+import numpy as np
+import xarray as xr
+import matplotlib.pyplot as plt
+from scipy.signal import find_peaks
+
+from ..misc.bunch_pattern_external import is_pulse_at
+from ..mnemonics_machinery import mnemonics_for_run
+from extra_data import open_run
+from extra_data.read_machinery import find_proposal
+from extra.components import XrayPulses, OpticalLaserPulses
+
+__all__ = [
+    'check_peak_params',
+    'get_digitizer_peaks',
+    'get_dig_avg_trace',
+    'load_processed_peaks',
+    'check_processed_peak_params'
+]
+
+log = logging.getLogger(__name__)
+
+
+def peaks_from_raw_trace(traces, pulseStart, pulseStop, baseStart, baseStop,
+                         period=None, npulses=None, extra_dim=None,
+                         mode='trapz'):
+    """
+    Computes peaks from raw digitizer traces by trapezoidal integration.
+
+    Parameters
+    ----------
+    traces: xarray DataArray or numpy array containing raw traces. If
+        numpy array is provided, the second dimension is that of the samples.
+    pulseStart: int or list or 1D-numpy array
+        trace index of integration start. If 1d array, each value is the start
+        of one peak. The period and npulses parameters are ignored.
+    pulseStop: int
+        trace index of integration stop.
+    baseStart: int
+        trace index of background start.
+    baseStop: int
+        trace index of background stop.
+    period: int
+        number of samples between two peaks. Ignored if intstart is a 1D-array.
+    npulses: int
+        number of pulses. Ignored if intstart is a 1D-array.
+    extra_dim: str
+        Name given to the dimension along the peaks. Defaults to 'pulseId'.
+    mode: str in ['trapz', 'sum', 'mean', 'amplitude']
+        The operation performed over the integration region. `amplitude` is
+        the difference between the baseline and the peak height.
+
+    Returns
+    -------
+    xarray DataArray
+    """
+
+    assert traces.ndim == 2
+
+    if isinstance(traces, xr.DataArray):
+        ntid = traces.sizes['trainId']
+        coords = traces.coords
+        traces = traces.values
+        if traces.shape[0] != ntid:
+            traces = traces.T
+    else:
+        coords = None
+
+    if hasattr(pulseStart, '__len__'):
+        pulseStart = np.array(pulseStart)
+        pulses = pulseStart - pulseStart[0]
+        pulseStart = pulseStart[0]
+    else:
+        if period is None or period == 0:
+            period = 1
+        pulses = range(0, npulses*period, period)
+
+    if extra_dim is None:
+        extra_dim = 'pulseId'
+    results = xr.DataArray(np.empty((traces.shape[0], len(pulses))),
+                           coords=coords,
+                           dims=['trainId', extra_dim])
+    func = {'trapz': np.trapz, 'sum': np.sum,
+            'mean': np.mean, 'amplitude': 'amplitude'}.get(mode)
+    if func is None:
+        raise ValueError('mode must be a string in ["trapz", "sum",'
+                         '"mean", "amplitude"]')
+    for i, p in enumerate(pulses):
+        a = pulseStart + p
+        b = pulseStop + p
+        bkga = baseStart + p
+        bkgb = baseStop + p
+        if b > traces.shape[1]:
+            break
+        bg = np.outer(np.median(traces[:, bkga:bkgb], axis=1),
+                      np.ones(b-a))
+        if mode == 'amplitude':
+            results[:, i] = np.max(traces[:, bkga:b], axis=1) - \
+                            np.min(traces[:, bkga:b], axis=1)
+        else:
+            results[:, i] = func(traces[:, a:b] - bg, axis=1)
+    return results
+
+
+def peaks_from_apd(array, params, digitizer, bpt, bunchPattern):
+    """
+    Extract peak-integrated data according to the bunch pattern.
+
+    Parameters
+    ----------
+    array: xarray DataArray
+        2D array containing peak-integrated data
+    params: dict
+        peak-integration parameters of the digitizer
+    digitizer: str
+        digitizer type, one of {'FastADC', 'ADQ412'}
+    bpt: xarray DataArray
+        bunch pattern table
+    bunchPattern: str
+        one of {'sase1', 'sase3', 'scs_ppl'}, used to select pulses and
+        assign name of the pulse id dimension.
+
+    Returns
+    -------
+    xarray DataArray with pulse id coordinates.
+    """
+    if params['enable'] == 0 or (array == 1.).all():
+        raise ValueError('The digitizer did not record integrated peaks. '
+                         'Consider using raw traces from the same channel '
+                         'for peak integration.')
+    if digitizer == 'FastADC':
+        min_distance = 24
+    if digitizer == 'ADQ412':
+        min_distance = 440
+    period = params['period']
+    if period % min_distance != 0:
+        log.warning(f'Warning: the pulse period ({period} samples) of '
+                    'digitizer is not a multiple of the pulse separation at '
+                    f'4.5 MHz ({min_distance} samples). Pulse id assignment '
+                    'is likely to fail.')
+    stride = int(period/min_distance)
+    npulses_apd = params['npulses']
+    dim_names = {'sase3': 'sa3_pId', 'sase1': 'sa1_pId', 'scs_ppl': 'ol_pId'}
+    pulse_dim = dim_names[bunchPattern]
+    arr_dim = [dim for dim in array.dims if dim != 'trainId'][0]
+    if npulses_apd > array.sizes[arr_dim]:
+        log.warning(f'The digitizer was set to record {npulses_apd} pulses '
+                    f'but the array length is only {array.sizes[arr_dim]}.')
+        npulses_apd = array.sizes[arr_dim]
+    mask = is_pulse_at(bpt, bunchPattern).rename({'pulse_slot': pulse_dim})
+    mask = mask.where(mask.trainId.isin(array.trainId), drop=True)
+    mask = mask.assign_coords({pulse_dim: np.arange(bpt.sizes['pulse_slot'])})
+    pid = np.sort(np.unique(np.where(mask)[1]))
+    npulses_bpt = len(pid)
+    apd_coords = np.arange(pid[0], pid[0] + stride*npulses_apd, stride)
+    noalign = False
+    if len(np.intersect1d(apd_coords, pid, assume_unique=True)) < npulses_bpt:
+        log.warning('Not all pulses were recorded. The digitizer '
+                    'was set to record pulse ids '
+                    f'{apd_coords[apd_coords<bpt.sizes["pulse_slot"]]} but the'
+                    'bunch pattern for'
+                    f' {bunchPattern} is {pid}. Skipping pulse ID alignment.')
+        noalign = True
+    array = array.isel({arr_dim: slice(0, npulses_apd)})
+    array = array.where(array != 1.)
+    if noalign:
+        return array
+    array = array.rename(
+        {arr_dim: pulse_dim}).assign_coords({pulse_dim: apd_coords})
+    array, mask = xr.align(array, mask, join='inner')
+    array = array.where(mask, drop=True)
+    return array
+
+
+def get_dig_avg_trace(run, mnemonic, ntrains=None):
+    """
+    Compute the average over ntrains evenly spaced accross all trains
+    of a digitizer trace.
+
+    Parameters
+    ----------
+    run: extra_data.DataCollection
+        DataCollection containing the digitizer data.
+    mnemonic: str
+        ToolBox mnemonic of the digitizer data, e.g. 'MCP2apd'.
+    ntrains: int
+        Number of trains used to calculate the average raw trace.
+        If None, all trains are used.
+
+    Returns
+    -------
+    trace: DataArray
+        The average digitizer trace
+    """
+    run_mnemonics = mnemonics_for_run(run)
+    if ntrains is None:
+        sel = run
+    else:
+        total_tid = len(run.train_ids)
+        stride = int(np.max([1, np.floor(total_tid/ntrains)]))
+        sel = run.select_trains(np.s_[0:None:stride])
+    m = run_mnemonics[mnemonic]
+    raw_trace = sel.get_array(m['source'], m['key'], m['dim'])
+    raw_trace = raw_trace.mean(dim='trainId')
+    return raw_trace
+
+
+def channel_peak_params(run, source, key=None, channel=None, board=None):
+    """
+    Extract peak-integration parameters used by a channel of the digitizer.
+
+    Parameters
+    ----------
+    run: extra_data.DataCollection
+        DataCollection containing the digitizer data.
+    source: str
+        ToolBox mnemonic of a digitizer data, e.g. "MCP2apd" or
+        "FastADC4peaks", or name of digitizer source, e.g.
+        'SCS_UTC1_ADQ/ADC/1:network'.
+    key: str
+        optional, used in combination of source (if source is not a ToolBox
+        mnemonics) instead of digitizer, channel and board.
+    channel: int or str
+        The digitizer channel for which to retrieve the parameters. If None,
+        inferred from the source mnemonic or source + key arguments.
+    board: int
+        Board of the ADQ412. If None, inferred from the source mnemonic or
+        source + key arguments.
+
+    Returns
+    -------
+    dict with peak integration parameters.
+    """
+    run_mnemonics = mnemonics_for_run(run)
+    if source in run_mnemonics:
+        m = run_mnemonics[source]
+        source = m['source']
+        key = m['key']
+    if 'network' in source:
+        return adq412_channel_peak_params(run, source, key, channel, board)
+    else:
+        return fastADC_channel_peak_params(run, source, channel)
+
+
+def fastADC_channel_peak_params(run, source, channel=None):
+    """
+    Extract peak-integration parameters used by a channel of the Fast ADC.
+
+    Parameters
+    ----------
+    run: extra_data.DataCollection
+        DataCollection containing the digitizer data.
+    source: str
+        Name of Fast ADC source, e.g. 'SCS_UTC1_MCP/ADC/1:channel_5.output'.
+    channel: int
+        The Fast ADC channel for which to retrieve the parameters. If None,
+        inferred from the source.
+
+    Returns
+    -------
+    dict with peak integration parameters.
+    """
+    fastADC_keys = {
+        'baseStart': ('baselineSettings.baseStart.value',
+                      'baseStart.value'),
+        'baseStop': ('baseStop.value',),
+        'baseLength': ('baselineSettings.length.value',),
+        'enable': ('enablePeakComputation.value',),
+        'pulseStart': ('initialDelay.value',),
+        'pulseLength': ('peakSamples.value',),
+        'npulses': ('numPulses.value',),
+        'period': ('pulsePeriod.value',)
+    }
+    if channel is None:
+        channel = int(source.split(':')[1].split('.')[0].split('_')[1])
+    baseName = f'channel_{channel}.'
+    source = source.split(':')[0]
+    data = run.select(source).train_from_index(0)[1][source]
+    result = {}
+    for mnemo, versions in fastADC_keys.items():
+        for v in versions:
+            key = baseName + v
+            if key in data:
+                result[mnemo] = data[key]
+    if 'baseLength' in result:
+        result['baseStop'] = result['baseStart'] + \
+                             result.pop('baseLength')
+    if 'pulseLength' in result:
+        result['pulseStop'] = result['pulseStart'] + \
+                             result.pop('pulseLength')
+    return result
+
+
+def adq412_channel_peak_params(run, source, key=None,
+                               channel=None, board=None):
+    """
+    Extract peak-integration parameters used by a channel of the ADQ412.
+
+    Parameters
+    ----------
+    run: extra_data.DataCollection
+        DataCollection containing the digitizer data.
+    source: str
+        Nname of ADQ412 source, e.g. 'SCS_UTC1_ADQ/ADC/1:network'.
+    key: str
+        optional, e.g. 'digitizers.channel_1_D.apd.pulseIntegral', used in
+        combination of source instead of channel and board.
+    channel: int or str
+        The ADQ412 channel for which to retrieve the parameters. If None,
+        inferred from the source mnemonic or source + key arguments.
+    board: int
+        Board of the ADQ412. If None, inferred from the source mnemonic or
+        source + key arguments.
+
+    Returns
+    -------
+    dict with peak integration parameters.
+    """
+    if key is None:
+        if channel is None or board is None:
+            raise ValueError('key or channel + board arguments are '
+                             'missing.')
+    else:
+        k = key.split('.')[1].split('_')
+        ch_to_int = {'A': 0, 'B': 1, 'C': 2, 'D': 3}
+        channel = ch_to_int[k[2]]
+        board = k[1]
+    baseName = f'board{board}.apd.channel_{channel}.'
+    source = source.split(':')[0]
+    adq412_keys = {
+        'baseStart': (baseName + 'baseStart.value',),
+        'baseStop': (baseName + 'baseStop.value',),
+        'enable': (baseName + 'enable.value',),
+        'pulseStart': (baseName + 'pulseStart.value',),
+        'pulseStop': (baseName + 'pulseStop.value',),
+        'initialDelay': (baseName + 'initialDelay.value',),
+        'upperLimit': (baseName + 'upperLimit.value',),
+        'npulses': (f'board{board}.apd.numberOfPulses.value',)
+    }
+
+    data = run.select(source).train_from_index(0)[1][source]
+    result = {}
+    for mnemo, versions in adq412_keys.items():
+        for key in versions:
+            if key in data:
+                result[mnemo] = data[key]
+    result['period'] = result.pop('upperLimit') - \
+                       result.pop('initialDelay')
+    return result
+
+
+def find_peaks_in_raw_trace(trace, height=None, width=1, distance=24):
+    """
+    Find integration parameters for peak integration of a raw
+    digitizer trace. Based on scipy find_peaks().
+
+    Parameters
+    ----------
+    trace: numpy array or xarray DataArray
+        The digitier raw trace used to find peaks
+    height: int
+        minimum height for peak determination
+    width: int
+        minimum width of peak
+    distance: int
+        minimum distance between two peaks
+
+    Returns
+    -------
+    dict with keys 'pulseStart', 'pulseStop', 'baseStart', 'baseStop',
+        'period', 'npulses' and values in number of samples.
+    """
+    if isinstance(trace, xr.DataArray):
+        trace = trace.values
+    trace_norm = trace - np.median(trace)
+    trace_norm = -trace_norm if np.mean(trace_norm) < 0 else trace_norm
+    SNR = np.max(np.abs(trace_norm)) / np.std(trace_norm[:100])
+    if SNR < 10:
+        log.warning('signal-to-noise ratio too low: cannot '
+                    'automatically find peaks.')
+        return {'pulseStart': 2, 'pulseStop': 3,
+                'baseStart': 0, 'baseStop': 1,
+                'period': 0, 'npulses': 1}
+    min_height = min(3 * SNR, np.max(np.abs(trace_norm)) / 3)
+    peaks, prop = find_peaks(trace_norm, distance=distance,
+                             height=min_height,
+                             width=2)
+    params = {}
+    start = int(prop['left_ips'][0])
+    if len(peaks) == 1:
+        params['pulseStart'] = start
+        params['period'] = 0
+    else:
+        pulse_period = int(np.median(np.diff(peaks)))
+        pulse_ids = np.digitize(peaks,
+                                np.arange(peaks[0] - pulse_period/2,
+                                          peaks[-1] + pulse_period/2,
+                                          pulse_period)) - 1
+        if len(np.unique(np.diff(pulse_ids))) == 1:
+            # Regular pattern
+            params['pulseStart'] = start
+            params['period'] = pulse_period
+        else:
+            # Irregular pattern
+            params['pulseStart'] = start + pulse_ids * pulse_period
+            params['period'] = 0
+    params['pulseStop'] = int(prop['right_ips'][0])
+    params['baseStop'] = (3 * start - peaks[0]) / 2
+    params['baseStop'] = np.min([params['baseStop'],
+                                 int(prop['right_ips'][0])]).astype(int)
+    params['baseStart'] = params['baseStop'] - np.mean(prop['widths'])/2
+    params['baseStart'] = np.max([params['baseStart'], 0]).astype(int)
+    params['npulses'] = len(peaks)
+    return params
+
+
+def find_peak_integration_parameters(run, mnemonic, raw_trace=None,
+                                     integParams=None, pattern=None,
+                                     ntrains=None):
+    '''
+    Finds peak integration parameters.
+    '''
+    run_mnemonics = mnemonics_for_run(run)
+    digitizer = digitizer_type(run, run_mnemonics[mnemonic]['source'])
+    pulse_period = 24 if digitizer == "FastADC" else 440
+
+    if integParams is None:
+        # load raw trace and find peaks
+        autoFind = True
+        if raw_trace is None:
+            raw_trace = get_dig_avg_trace(run, mnemonic, ntrains)
+        params = find_peaks_in_raw_trace(raw_trace, distance=pulse_period)
+    else:
+        # inspect provided parameters
+        autoFind = False
+        required_keys = ['pulseStart', 'pulseStop', 'baseStart',
+                         'baseStop']
+        add_text = ''
+        if pattern is None and not hasattr(integParams['pulseStart'],
+                                           '__len__'):
+            required_keys += ['period', 'npulses']
+            add_text = 'Bunch pattern not provided. '
+        if not all(name in integParams for name in required_keys):
+            raise ValueError(add_text + 'All keys of integParams argument '
+                             f'{required_keys} are required.')
+        params = integParams.copy()
+    # extract pulse ids from the parameters (starting at 0)
+    pulse_ids_params = None
+    if hasattr(params['pulseStart'], '__len__'):
+        if params.get('npulses') is not None and (
+                    params.get('npulses') != len(params['pulseStart'])):
+            log.warning('The number of pulses does not match the length '
+                        'of pulseStart. Using length of pulseStart as '
+                        'the number of pulses.')
+        params['npulses'] = len(params['pulseStart'])
+        pulse_ids_params = ((np.array(params['pulseStart']) -
+            params['pulseStart'][0]) / pulse_period).astype(int)
+    elif 'npulses' in params and 'period' in params:
+        if params['npulses'] == 1:
+            pulse_ids_params = np.array([0])
+        else:
+            pulse_ids_params = np.arange(0,
+                params['npulses'] * params['period'] / pulse_period,
+                params['period'] / pulse_period).astype(int)
+
+    # Extract pulse_ids, period and npulses from bunch pattern
+    pulse_ids_bp, npulses_bp, period_bp = None, None, 0
+    regular = True
+    if pattern is not None:
+        bunchPattern = 'sase3' if hasattr(pattern, 'sase') else 'scs_ppl'
+        if pattern.is_constant_pattern() is False:
+            log.warning('The number of pulses changed during the run.')
+            pulse_ids_bp = np.unique(pattern.pulse_ids(labelled=False,
+                                                       copy=False))
+            npulses_bp, period_bp = None, 0
+            regular = False
+        else:
+            pulse_ids_bp = pattern.peek_pulse_ids(labelled=False)
+            npulses_bp = len(pulse_ids_bp)
+            if npulses_bp > 1:
+                periods = np.diff(pulse_ids_bp)
+                if len(np.unique(periods)) > 1:
+                    regular = False
+                else:
+                    period_bp = np.unique(periods)[0] * pulse_period
+    # Compare parameters with bunch pattern
+        if pulse_ids_params is None:
+            params['npulses'] = npulses_bp
+            params['period'] = period_bp
+            pulse_ids_params = np.arange(0,
+                 params['npulses'] * params['period'] / pulse_period,
+                 params['period'] / pulse_period).astype(int)
+        if len(pulse_ids_params) == len(pulse_ids_bp):
+            if not (pulse_ids_params == pulse_ids_bp - pulse_ids_bp[0]).all():
+                log.warning('The provided pulseStart parameters do not match '
+                            f'the {bunchPattern} bunch pattern pulse ids. '
+                            'Using bunch pattern parameters.')
+            pulse_ids_params = pulse_ids_bp
+
+        if (npulses_bp != params.get('npulses') or
+                period_bp != params.get('period')):
+            if autoFind:
+                add_text = 'Automatically found '
+            else:
+                add_text = 'Provided '
+            log.warning(add_text + 'integration parameters '
+                        f'(npulses={params.get("npulses")}, ' +
+                        f'period={params.get("period")}) do not match the '
+                        f'{bunchPattern} bunch pattern (npulses='
+                        f'{npulses_bp}, period={period_bp}). Using bunch '
+                        'pattern parameters.')
+            pulse_ids_params = pulse_ids_bp
+        params['npulses'] = npulses_bp
+        params['period'] = period_bp
+
+    if not regular:
+        # Irregular pattern
+        if hasattr(params['pulseStart'], '__len__'):
+            start = params['pulseStart'][0]
+        else:
+            start = params['pulseStart']
+        params['pulseStart'] = np.array(
+            [int(start + (pid - pulse_ids_params[0]) * pulse_period)
+             for pid in pulse_ids_params])
+        params['period'] = 0
+    else:
+        # Regular pattern
+        if hasattr(params['pulseStart'], '__len__'):
+            params['pulseStart'] = params['pulseStart'][0]
+        if len(pulse_ids_params) == 1:
+            params['period'] = 0
+    return params, pulse_ids_params, regular, raw_trace
+
+
+def check_peak_params(proposal, runNB, mnemonic, raw_trace=None,
+                      ntrains=200, integParams=None, bunchPattern='sase3',
+                      plot=True, show_all=False):
+    """
+    Checks and plots the peak parameters (pulse window and baseline window
+    of a raw digitizer trace) used to compute the peak integration. These
+    parameters are either set by the digitizer peak-integration settings,
+    or are determined by a peak finding algorithmes. The parameters
+    can also be provided manually for visual inspection. The plot either
+    shows the first and last pulse of the trace or the entire trace.
+
+    Parameters
+    ----------
+    proposal: int
+        the proposal number
+    runNB: int
+        the run number
+    mnemonic: str
+        ToolBox mnemonic of the digitizer data, e.g. 'MCP2apd'.
+    raw_trace: optional, 1D numpy array or xarray DataArray
+        Raw trace to display. If None, the average raw trace over
+        ntrains of the corresponding channel is loaded (this can
+        be time-consuming).
+    ntrains: optional, int
+        Only used if raw_trace is None. Number of trains used to
+        calculate the average raw trace of the corresponding channel.
+    plot: bool
+        If True, displays the raw trace and peak integration regions.
+    show_all: bool
+        If True, displays the entire raw trace and all peak integration
+        regions (this can be time-consuming).
+        If False, shows the first and last pulse according to the bunchPattern.
+    bunchPattern: optional, str
+        Only used if plot is True. Checks the bunch pattern against
+        the digitizer peak parameters and shows potential mismatch.
+
+    Returns
+    -------
+    dictionnary of peak integration parameters
+    """
+    run = open_run(proposal, runNB)
+    run_mnemonics = mnemonics_for_run(run)
+    digitizer = digitizer_type(run, run_mnemonics[mnemonic]['source'])
+    pulse_period = 24 if digitizer == "FastADC" else 440
+    pattern = None
+    try:
+        if bunchPattern == 'sase3':
+            pattern = XrayPulses(run)
+        if bunchPattern == 'scs_ppl':
+            pattern = OpticalLaserPulses(run)
+    except Exception as e:
+        log.warning(e)
+        bunchPattern = None
+    params, pulse_ids, regular, raw_trace = find_peak_integration_parameters(
+        run, mnemonic, raw_trace, integParams, pattern)
+    if bunchPattern:
+        if regular:
+            add_text = ''
+            if len(pulse_ids) > 1:
+                add_text = f's, {(pulse_ids[1]-pulse_ids[0]) * pulse_period}'\
+                           + ' samples between two pulses'
+            print(f'Bunch pattern {bunchPattern}: {len(pulse_ids)} pulse'
+                  + add_text)
+        else:
+            print(f'Bunch pattern {bunchPattern}: Not a regular pattern. '
+                  f'{len(pulse_ids)} pulses, pulse_ids='
+                  f'{pulse_ids}.')
+    if integParams is None:
+        title = 'Auto-find peak parameters'
+    else:
+        title = 'Provided peak parameters'
+        no_change = True
+        for k, v in integParams.items():
+            no_change = no_change & (v == params[k])
+            if hasattr(no_change, '__len__'):
+                no_change = no_change.all()
+        if no_change is False:
+            print('The provided parameters did not match the bunch '
+                  'pattern and were adjusted.')
+            title += ' (adjusted)'
+    if regular:
+        add_text = ''
+        if params['npulses'] > 1:
+            add_text = f's, {params["period"]} samples between two pulses'
+        print(title + ': ' + f' {params["npulses"]} pulse' + add_text)
+    else:
+        print(f'{title}: Not a regular pattern. '
+              f'{len(pulse_ids)} pulses, pulse_ids='
+              f'{pulse_ids}.')
+    if plot:
+        if raw_trace is None:
+            raw_trace = get_dig_avg_trace(run, mnemonic)
+        fig, ax = plotPeakIntegrationWindow(raw_trace, params,
+                                            show_all=show_all)
+        fig.suptitle(f'p{proposal} r{runNB} ' + title, size=12)
+    return params
+
+
+def plotPeakIntegrationWindow(raw_trace, params, show_all=False):
+    if hasattr(params['pulseStart'], '__len__'):
+        starts = np.array(params['pulseStart'])
+        stops = params['pulseStop'] + starts - params['pulseStart'][0]
+        baseStarts = params['baseStart'] + starts - params['pulseStart'][0]
+        baseStops = params['baseStop'] + starts - params['pulseStart'][0]
+    else:
+        n = params['npulses']
+        p = params['period']
+        starts = [params['pulseStart'] + i*p for i in range(n)]
+        stops = [params['pulseStop'] + i*p for i in range(n)]
+        baseStarts = [params['baseStart'] + i*p for i in range(n)]
+        baseStops = [params['baseStop'] + i*p for i in range(n)]
+
+    if show_all:
+        fig, ax = plt.subplots(figsize=(6, 3), constrained_layout=True)
+        for i in range(len(starts)):
+            lbl = 'baseline' if i == 0 else None
+            lp = 'peak' if i == 0 else None
+            ax.axvline(baseStarts[i], ls='--', color='k')
+            ax.axvline(baseStops[i], ls='--', color='k')
+            ax.axvspan(baseStarts[i], baseStops[i],
+                       alpha=0.5, color='grey', label=lbl)
+            ax.axvline(starts[i], ls='--', color='r')
+            ax.axvline(stops[i], ls='--', color='r')
+            ax.axvspan(starts[i], stops[i],
+                       alpha=0.2, color='r', label=lp)
+        ax.plot(raw_trace, color='C0', label='raw trace')
+        ax.legend(fontsize=8)
+        return fig, [ax]
+
+    fig, ax = plt.subplots(1, 2, figsize=(6, 3), constrained_layout=True)
+    for plot in range(2):
+        title = 'First pulse' if plot == 0 else 'Last pulse'
+        i = 0 if plot == 0 else - 1
+        for j in range(min(2, len(starts))):
+            if plot == 1:
+                j = -j
+            label = 'baseline' if j == 0 else ''
+            ax[plot].axvline(baseStarts[i+j], ls='--', color='k')
+            ax[plot].axvline(baseStops[i+j], ls='--', color='k')
+            ax[plot].axvspan(baseStarts[i+j], baseStops[i+j],
+                          alpha=0.5, color='grey', label=label)
+            label = 'peak' if j == 0 else ''
+            ax[plot].axvline(starts[i+j], ls='--', color='r')
+            ax[plot].axvline(stops[i+j], ls='--', color='r')
+            ax[plot].axvspan(starts[i+j], stops[i+j],
+                          alpha=0.2, color='r', label=label)
+        if len(starts) > 1:
+            period = starts[1] - starts[0]
+            xmin = np.max([0, baseStarts[i] - int(1.5*period)])
+            xmax = np.min([stops[i] + int(1.5*period), raw_trace.size])
+        else:
+            xmin = np.max([0, baseStarts[i] - 200])
+            xmax = np.min([stops[i] + 200, raw_trace.size])
+        ax[plot].plot(np.arange(xmin, xmax),
+                      raw_trace[xmin:xmax], color='C0', label='raw trace')
+        ax[plot].legend(fontsize=8)
+        ax[plot].set_xlim(xmin, xmax)
+        ax[plot].set_xlabel('digitizer samples')
+        ax[plot].set_title(title, size=10)
+    return fig, ax
+
+
+def digitizer_type(run, source):
+    """
+    Finds the digitizer type based on the class Id / name of the source.
+    Example source: 'SCS_UTC1_MCP/ADC/1'. Defaults to ADQ412 if not found.
+    """
+    ret = None
+    if '_MCP/ADC/1' in source:
+        ret = 'FastADC'
+    if '_ADQ/ADC/1' in source:
+        ret = 'ADQ412'
+    if ret is None:
+        digi_dict = {'FastAdc': 'FastADC',
+                     'FastAdcLegacy': 'FastADC',
+                     'AdqDigitizer': 'ADQ412',
+                     'PyADCChannel': 'FastADC',
+                     'PyADCChannelLegacy': 'FastADC'}
+        try:
+            source = source.split(':')[0]
+            classId = run.get_run_value(source, 'classId.value')
+            ret = digi_dict.get(classId)
+        except Exception as e:
+            log.warning(str(e))
+            log.warning(f'Could not find digitizer type from source {source}.')
+            ret = 'ADQ412'
+    return ret
+
+
+def get_digitizer_peaks(proposal, runNB, mnemonic, merge_with=None,
+                        bunchPattern='sase3', integParams=None, mode='trapz',
+                        save=False, subdir='usr/processed_runs'):
+    """
+    Extract the peaks from digitizer raw traces. The result can be merged
+    to an existing `merge_with` dataset and/or saved into an HDF file.
+    The calculation is a trapezoidal integration between 'pulseStart' and
+    'pulseStop' with subtraction of a baseline defined as the median between
+    'baseStart' and 'baseStop'.
+    The integration parameters can either be provided using integParams, or
+    computed by a peak finding algorithm if integParams is None.
+    If the bunchPattern argument is provided, the pulse ids are aligned to it.
+    If there is a mismatch between the provided parameters or the computed
+    parameters and the bunch pattern, the bunch pattern parameters prevail.
+
+    Parameters
+    ----------
+    proposal: int
+        the proposal number
+    runNB: int
+        the run number
+    mnemonic: str
+        mnemonic for FastADC or ADQ412, e.g. "I0_ILHraw" or "MCP3apd".
+        The data is either loaded from the DataCollection or taken from
+        merge_with.
+    merge_with: xarray Dataset
+        If provided, the resulting Dataset will be merged with this one.
+    bunchPattern: str or dict
+        'sase1' or 'sase3' or 'scs_ppl', 'None': bunch pattern
+    integParams: dict
+        dictionnary for raw trace integration, e.g.
+        {'pulseStart':100, 'pulsestop':200, 'baseStart':50,
+        'baseStop':99, 'period':24, 'npulses':500}.
+        If None, integration parameters are computed automatically.
+    mode: str in ['trapz', 'sum', 'mean', 'amplitude']
+        The operation performed over the integration region. `ampl` is the
+        amplitude between the baseline and the peak height.
+    save: bool
+        If True, save the computed peaks and the average raw trace in a h5
+        file in the subdir + f'/r{runNB:04d}/ folder.
+    subdir: str
+        subdirectory to save the output in.
+
+    Returns
+    -------
+    xarray Dataset with digitizer peak variables. Raw variables are
+    substituted by the peak caclulated values (e.g. "FastADC2raw" becomes
+    "FastADC2peaks").
+    """
+    # prepare resulting dataset
+    if bool(merge_with):
+        mw_ds = merge_with.drop(mnemonic, errors='ignore')
+    else:
+        mw_ds = xr.Dataset()
+    run = open_run(proposal, runNB)
+    run_mnemonics = mnemonics_for_run(run)
+    mnemo = run_mnemonics.get(mnemonic)
+    if mnemo is None:
+        log.warning('Mnemonic not found. Skipping.')
+        return xr.DataArray()
+    source, key = mnemo['source'], mnemo['key']
+    extra_dim = {'sase3': 'sa3_pId', 'scs_ppl': 'ol_pId'}.get(bunchPattern)
+    if extra_dim is None:
+        extra_dim = 'pulseId'
+    pattern = None
+    try:
+        if bunchPattern == 'sase3':
+            pattern = XrayPulses(run)
+        if bunchPattern == 'scs_ppl':
+            pattern = OpticalLaserPulses(run)
+    except Exception as e:
+        log.warning(e)
+
+    # load traces and calculate the average
+    traces = run[source, key].xarray(name=mnemonic, extra_dims=mnemo['dim'])
+    trace = traces.mean('trainId').rename(mnemonic.replace('raw', 'avg'))
+
+    # find integration parameters
+    params, pulse_ids, regular, trace = find_peak_integration_parameters(
+        run, mnemonic, trace, integParams, pattern)
+
+    # extract peaks
+    peaks = peaks_from_raw_trace(traces, **params,
+                                 extra_dim=extra_dim, mode=mode)
+    peaks = peaks.rename(mnemonic.replace('raw', 'peaks'))
+
+    # Align pulse id
+    if regular is True:
+        peaks = peaks.assign_coords({extra_dim: pulse_ids})
+    elif pattern:
+        mask = pattern.pulse_mask(labelled=False)
+        mask = xr.DataArray(mask, dims=['trainId', extra_dim],
+                            coords={'trainId': run[source, key].train_ids,
+                                    extra_dim: np.arange(mask.shape[1])})
+        mask = mask.sel({extra_dim: pulse_ids})
+        peaks = peaks.where(mask, drop=True)
+
+    # add peak integration parameters attributes
+    for p in params:
+        if params[p] is None:
+            params[p] = 'None'
+        peaks.attrs[f'{peaks.name}_{p}'] = params[p]
+    peaks.attrs[f'{peaks.name}_mode'] = mode
+
+
+    # save peaks
+    if save:
+        save_peaks(proposal, runNB, peaks, trace, subdir)
+
+    # merge with existing dataset
+    ds = mw_ds.merge(peaks.to_dataset(promote_attrs=True), join='inner',
+                     combine_attrs='drop_conflicts')
+    return ds
+
+
+def save_peaks(proposal, runNB, peaks, avg_trace, subdir):
+    '''
+    Save the peaks extracted with extract_digitizer_peaks() as well as the
+    average raw trace in a dataset at the proposal + subdir location.
+    If a dataset already exists, the new data is merged with it.
+
+    Parameters
+    ----------
+    proposal: int
+        the proposal number
+    runNB: int
+        the run number
+    peaks: xarray DataArray
+        the 2D-array obtained by extract_digitizer_peaks()
+    avg_trace: xarray DataArray
+        the average raw trace over the trains
+    subdir: str
+        subdirectory. The data is stored in
+        <proposal path>/<subdir>/r{runNB:04d}/r{runNB:04d}-digitizers-data.h5
+
+    Returns
+    -------
+        None
+    '''
+    root = find_proposal(f'p{proposal:06d}')
+    path = os.path.join(root, subdir + f'/r{runNB:04d}/')
+    os.makedirs(path, exist_ok=True)
+    fname = path + f'r{runNB:04d}-digitizers-data.h5'
+    ds_peaks = peaks.to_dataset(promote_attrs=True)
+
+    if os.path.isfile(fname):
+        ds = xr.load_dataset(fname)
+        ds = ds.drop_vars([peaks.name, avg_trace.name], errors='ignore')
+        for dim in ds.dims:
+            if all(dim not in ds[v].dims for v in ds):
+                ds = ds.drop_dims(dim)
+        dim_name = 'sampleId'
+        if 'sampleId' in ds.dims and ds.sizes['sampleId'] != len(avg_trace):
+            dim_name = 'sampleId2'
+        avg_trace = avg_trace.rename({avg_trace.dims[0]: dim_name})
+        if f'params_{peaks.name}' in ds.attrs:
+            del ds.attrs[f'params_{peaks.name}']
+        ds = xr.merge([ds, ds_peaks, avg_trace],
+                      combine_attrs='drop_conflicts', join='inner')
+    else:
+        ds = ds_peaks.merge(avg_trace.rename({avg_trace.dims[0]: 'sampleId'}))
+    ds.to_netcdf(fname, format='NETCDF4')
+    print(f'saved data into {fname}.')
+    return None
+
+
+def load_processed_peaks(proposal, runNB, mnemonic=None,
+                         data='usr/processed_runs', merge_with=None):
+    """
+    Load processed digitizer peaks data.
+
+    Parameters
+    ----------
+    proposal: int
+        the proposal number
+    runNB: int
+        the run number
+    mnemonic: str
+        the mnemonic containing peaks. Example: 'XRD_MCP_BIGpeaks'.
+        If None, the entire dataset is loaded
+    data: str
+        subdirectory. The data is stored in
+        <proposal path>/<subdir>/r{runNB:04d}/r{runNB:04d}-digitizers-data.h5
+    merge_with: xarray Dataset
+        A dataset to merge the data with.
+
+    Returns
+    -------
+    xarray DataArray if menmonic is not None and merge_with is None
+    xarray Dataset if mnemonic is None or merge_with is not None.
+
+    Example
+    -------
+    # load the mono energy and the MCP_BIG peaks
+    run, ds = tb.load(proposal, runNB, 'nrj')
+    ds = tb.load_processed_peaks(proposal, runNB,'XRD_MCP_BIGpeaks',
+                                 merge_with=ds)
+    """
+    if mnemonic is None:
+        return load_all_processed_peaks(proposal, runNB, data, merge_with)
+    root = find_proposal(f'p{proposal:06d}')
+    path = os.path.join(root, data + f'/r{runNB:04d}/')
+    fname = path + f'r{runNB:04d}-digitizers-data.h5'
+    if os.path.isfile(fname):
+        ds = xr.load_dataset(fname)
+        if mnemonic not in ds:
+            log.warning(f'Mnemonic {mnemonic} not found in {fname}')
+            return {}
+        da = ds[mnemonic]
+        da.attrs = {k: ds.attrs[k] for k in ds.attrs if f'{mnemonic}_' in k}
+        if merge_with is not None:
+            return merge_with.merge(da.to_dataset(promote_attrs=True),
+                                    combine_attrs='drop_conflicts',
+                                    join='inner')
+        else:
+            return da
+    else:
+        log.warning(f'Mnemonic {mnemonic} not found in {fname}')
+        return merge_with
+
+
+def load_all_processed_peaks(proposal, runNB, data='usr/processed_runs',
+                             merge_with=None):
+    """
+    Load processed digitizer peaks dataset. The data contains the peaks,
+    the average raw trace and the integration parameters (attribute) of
+    each processed digitizer source.
+
+    Parameters
+    ----------
+    proposal: int
+        the proposal number
+    runNB: int
+        the run number
+    data: str
+        subdirectory. The data is stored in
+        <proposal path>/<subdir>/r{runNB:04d}/r{runNB:04d}-digitizers-data.h5
+    merge_with: xarray Dataset
+        A dataset to merge the data with.
+
+    Returns
+    -------
+    xarray Dataset
+    """
+    root = find_proposal(f'p{proposal:06d}')
+    path = os.path.join(root, data + f'/r{runNB:04d}/')
+    fname = path + f'r{runNB:04d}-digitizers-data.h5'
+    if os.path.isfile(fname):
+        if merge_with is not None:
+            return merge_with.merge(xr.load_dataset(fname),
+                                    combine_attrs='drop_conflicts',
+                                    join='inner')
+        return xr.load_dataset(fname)
+    else:
+        log.warning(f'{fname} not found.')
+        return merge_with
+
+
+def check_processed_peak_params(proposal, runNB, mnemonic,
+                                data='usr/processed_runs',
+                                plot=True, show_all=False):
+    """
+    Check the integration parameters used to generate the processed
+    peak values.
+
+    Parameters
+    ----------
+    proposal: int
+        the proposal number
+    runNB: int
+        the run number
+    mnemonic: str
+        the mnemonic containing peaks. Example: 'XRD_MCP_BIGpeaks'.
+        If None, the entire dataset is loaded
+    data: str
+        subdirectory. The data is stored in
+        <proposal path>/<subdir>/r{runNB:04d}/r{runNB:04d}-digitizers-data.h5
+    plot: bool
+        If True, displays the raw trace and peak integration regions.
+    show_all: bool
+        If True, displays the entire raw trace and all peak integration
+        regions (this can be time-consuming).
+        If False, shows the first and last pulses.
+
+    Returns
+    -------
+    params: dict
+        the integration parameters with keys ['pulseStart', 'pulseStop',
+        'baseStart', 'baseStop', 'period', 'npulses'].
+        See `extract_digitizer_peaks()`.
+    """
+    root = find_proposal(f'p{proposal:06d}')
+    path = os.path.join(root, data + f'/r{runNB:04d}/')
+    fname = path + f'r{runNB:04d}-digitizers-data.h5'
+    if os.path.isfile(fname):
+        ds = xr.load_dataset(fname)
+        if mnemonic.replace('raw', 'peaks') not in ds:
+            log.warning(f'Mnemonic {mnemonic} not found in {fname}')
+            return {}
+        params = {k.replace(f'{mnemonic}_', ''): ds.attrs[k] for
+                  k in ds.attrs if f'{mnemonic}_' in k}
+        if plot:
+            title = 'Processed peak parameters'
+            fig, ax = plotPeakIntegrationWindow(
+                ds[mnemonic.replace('peaks', 'avg')],
+                params,
+                show_all=show_all)
+            fig.suptitle(f'p{proposal} r{runNB} ' + title, size=12)
+
+        return params
+    else:
+        log.warning(f'{fname} not found.')
+        return {}
--- a/src/toolbox_scs/detectors/dssc.py
+++ b/src/toolbox_scs/detectors/dssc.py
+"""
+    DSSC-detector class module
+
+    The dssc detector class. It represents a namespace for frequent evaluation
+    while implicitly applying/requiring certain structure/naming conventions to
+    its objects.
+
+    comments:
+        - contributions should comply with pep8 code structure guidelines.
+        - Plot routines don't fit into objects since they are rather fluent.
+          They have been outsourced to dssc_plot.py. They can now be accessed
+          as toolbox_scs member functions.
+"""
+import os
+import logging
+import joblib
+
+import numpy as np
+import xarray as xr
+
+import toolbox_scs as tb
+from ..util.exceptions import ToolBoxValueError, ToolBoxFileError
+from .dssc_data import (
+    save_xarray, load_xarray, save_attributes_h5,
+    search_files, get_data_formatted)
+from .dssc_misc import (
+        load_dssc_info, get_xgm_formatted, get_tim_formatted)
+from .dssc_processing import (
+        process_dssc_data, create_empty_dataset)
+
+__all__ = [
+    "DSSCBinner",
+    "DSSCFormatter"]
+
+log = logging.getLogger(__name__)
+
+
+class DSSCBinner:
+    def __init__(self, proposal_nr, run_nr,
+                 binners={},
+                 xgm_name='SCS_SA3',
+                 tim_names=['MCP1apd', 'MCP2apd', 'MCP3apd'],
+                 dssc_coords_stride=2,
+                 ):
+        """
+        A dssc binner object. Loads and bins the dssc data according to the
+        bins specified in 'binners'. The data can be reduced further through
+        masking using XGM or TIM data.
+
+        Parameters
+        ----------
+        proposal_nr: int, str
+            proposal number containing run folders
+        run_nr: int, str
+            run number
+        binners: dictionary
+            dictionary containing binners constructed using the
+            'create_dssc_bins' toolbox_scs.detectors-method.
+        xgm_name: str
+            a valid mnemonic key of the XGM data to be used to mask the dssc
+            frames. Since the xgm is used in several methods its name can be
+            set here globally.
+        tim_names: list of strings
+            a list of valid mnemonic keys for an mcp in the tim. Once the
+            corresponding data is loaded the different sources will be averaged.
+        dssc_coords_stride: int, list
+            defines which dssc frames should be normalized using data from the
+            xgm. The parameter may be an integer (stride parameter) or a list,
+            that assigns each pulse to its corresponding dssc frame number.
+
+        Returns
+        -------
+        DSSCbinner: object
+
+        Example
+        -------
+        1.) quick -> generic bins, no xgm,
+        >>> import toolbox_scs as tb
+        >>> run235 = tb.DSSCBinner(proposal_nb=2212, run_nb=235)
+        2.) detailed -> docs
+        """
+
+        # ---------------------------------------------------------------------
+        # object (run) properties
+        # ---------------------------------------------------------------------
+        self.proposal = proposal_nr
+        self.runnr = run_nr
+        self.info = load_dssc_info(proposal_nr, run_nr)
+        self.run, _ = tb.load(proposal_nr, run_nr)
+        self.binners = {}
+        for b in binners:
+            self.add_binner(b, binners[b])
+        self.xgm_name = xgm_name
+        self.tim_names = tim_names
+        self.dssc_coords_stride = dssc_coords_stride
+        self.xgm = None
+        self.tim = None
+        self.pulsemask = None
+
+        log.debug("Constructed DSSC object")
+
+    def __del__(self):
+        pass
+
+    def add_binner(self, name, binner):
+        """
+        Add additional binner to internal dictionary
+
+        Parameters
+        ----------
+        name: str
+            name of binner to be created
+        binner: xarray.DataArray
+            An array that represents a map how the respective coordinate should
+            be binned.
+
+        Raises
+        ------
+        ToolBoxValueError: Exception
+            Raises exception in case the name does not correspond to a valid
+            binner name. To be generalized.
+        """
+        if name in ['trainId', 'pulse', 'x', 'y']:
+            self.binners[name] = binner
+        else:
+            msg = "Invalid binner name"
+            log.info(msg+", no binner created")
+            raise ToolBoxValueError(msg, name)
+
+    def load_xgm(self):
+        """
+        load xgm data and construct coordinate array according to corresponding
+        dssc frame number.
+        """
+        self.xgm = get_xgm_formatted(self.run,
+                                     self.xgm_name,
+                                     self.dssc_coords_stride)
+
+    def load_tim(self):
+        """
+        load tim data and construct coordinate array according to corresponding
+        dssc frame number.
+        """
+        self.tim = get_tim_formatted(self.proposal,
+                                     self.runnr,
+                                     self.tim_names,
+                                     self.dssc_coords_stride)
+
+    def create_pulsemask(self, use_data='xgm', threshold=(0, np.inf)):
+        """
+        creates a mask for dssc frames according to measured xgm intensity.
+        Once such a mask has been constructed, it will be used in the data
+        reduction process to drop out-of-bounds pulses.
+        """
+        fpt = self.info['frames_per_train']
+        n_trains = self.info['number_of_trains']
+        trainIds = self.info['trainIds']
+        data = np.ones([n_trains, fpt], dtype=bool)
+        self.pulsemask = xr.DataArray(data,
+                                      dims=['trainId', 'pulse'],
+                                      coords={'trainId': trainIds,
+                                              'pulse': range(fpt)})
+
+        if use_data == 'xgm':
+            if self.xgm is None:
+                self.load_xgm()
+            valid = (self.xgm > threshold[0]) * \
+                    (self.xgm < threshold[1])
+        if use_data == 'tim':
+            if self.tim is None:
+                self.load_tim()
+            valid = (self.tim > threshold[0]) * \
+                    (self.tim < threshold[1])
+
+        self.pulsemask = \
+            (valid.combine_first(self.pulsemask).astype(bool))[:, 0:fpt]
+        log.info(f'created pulse mask used during processing')
+
+    def get_info(self):
+        """
+        Returns the expected shape of the binned dataset, in case binners have
+        been defined.
+        """
+        if any(self.binners):
+            empty = create_empty_dataset(self.info, self.binners)
+            return(empty.dims)
+        else:
+            log.info("no binner defined yet.")
+            pass
+
+    def _bin_metadata(self, data):
+        if self.pulsemask is not None:
+            data = data.where(self.pulsemask)
+        for b in self.binners:
+            if b in ['trainId', 'pulse']:
+                data[b+"_binned"] = self.binners[b]
+                data = data.groupby(b+"_binned").mean(b)
+                data = data.rename(name_dict={b+"_binned": b})
+        log.debug('binned metadata data according to dssc binners.')
+        return data.transpose('trainId', 'pulse')
+
+    def get_xgm_binned(self):
+        """
+        Bin the xgm data according to the binners of the dssc data. The result
+        can eventually be merged into the final dataset by the DSSCFormatter.
+
+        Returns
+        -------
+        xgm_data: xarray.DataSet
+            xarray dataset containing the binned xgm data
+        """
+        if self.xgm is not None:
+            xgm_data = self.xgm.to_dataset(name='xgm')
+            xgm_binned = self._bin_metadata(xgm_data)
+            log.info('binned xgm data according to dssc binners.')
+            return xgm_binned
+        else:
+            log.warning("no xgm data. Use load_xgm() to load the xgm data.")
+            pass
+
+    def get_tim_binned(self):
+        """
+        Bin the tim data according to the binners of the dssc data. The result
+        can eventually be merged into the final dataset by the DSSCFormatter.
+
+        Returns
+        -------
+        tim_data: xarray.DataSet
+            xarray dataset containing the binned tim data
+        """
+        if self.tim is not None:
+            tim_data = self.tim.to_dataset(name='tim')
+            tim_binned = self._bin_metadata(tim_data)
+            log.info('binned tim data according to dssc binners.')
+            return tim_binned
+        else:
+            log.warning("no data. Use load_tim() to load the tim data.")
+            pass
+
+    # -------------------------------------------------------------------------
+    # Data processing
+    # -------------------------------------------------------------------------
+    def process_data(self, modules=[], filepath='./',
+                     chunksize=512, backend='loky', n_jobs=None,
+                     dark_image=None,
+                     xgm_normalization=False, normevery=1
+                     ):
+        """
+        Load and bin dssc data according to self.bins. No data is returned by
+        this method. The condensed data is written to file by the worker
+        processes directly.
+
+        Parameters
+        ----------
+        modules: list of ints
+            a list containing the module numbers that should be processed. If
+            empty, all modules are processed.
+        filepath: str
+            the path where the files containing the reduced data should be
+            stored.
+        chunksize: int
+            The number of trains that should be read in one iterative step.
+        backend: str
+            joblib multiprocessing backend to be used. At the moment it can be
+            any of joblibs standard backends: 'loky' (default),
+            'multiprocessing', 'threading'. Anything else than the default is
+            experimental and not appropriately implemented in the dbdet member
+            function 'bin_data'.
+        n_jobs: int
+            inversely proportional of the number of cpu's available for one
+            job. Tasks within one job can grab a maximum of n_CPU_tot/n_jobs of
+            cpu's.
+            Note that when using the default backend there is no need to adjust
+            this parameter with the current implementation.
+        dark_image: xarray.DataArray
+            DataArray with dimensions compatible with the loaded dssc data. If
+            given, it will be subtracted from the dssc data before the binning.
+            The dark image needs to be of dimension module, trainId, pulse, x
+            and y.
+        xgm_normalization: boolean
+            if true, the dssc data is normalized by the xgm data before the
+            binning.
+        normevery: int
+            integer indicating which out of normevery frame will be normalized.
+        """
+        log.info("Bin data according to binners")
+        log.info(f'Process {chunksize} trains per chunk')
+
+        mod_list = modules
+        if len(mod_list) == 0:
+            mod_list = [i for i in range(16)]
+        log.info(f'Process modules {mod_list}')
+        njobs = n_jobs
+        if njobs is None:
+            njobs = len(mod_list)
+
+        module_jobs = []
+        for m in mod_list:
+            dark = dark_image
+            if dark_image is not None:
+                dark = dark_image.sel(module=m)
+            module_jobs.append(dict(
+                proposal=self.proposal,
+                run_nr=self.runnr,
+                module=m,
+                chunksize=chunksize,
+                path=filepath,
+                info=self.info,
+                dssc_binners=self.binners,
+                pulsemask=self.pulsemask,
+                dark_image=dark,
+                xgm_normalization=xgm_normalization,
+                xgm_mnemonic=self.xgm_name,
+                normevery=normevery,
+            ))
+
+        log.info(f'using parallelization backend {backend}')
+        joblib.Parallel(n_jobs=njobs, backend=backend)\
+            (joblib.delayed(process_dssc_data)(**module_jobs[i])
+             for i in range(len(mod_list)))
+
+        log.info(f'Binning done')
+
+
+class DSSCFormatter:
+    def __init__(self, filepath):
+        """
+        Class that handles formatting related aspects, before handing the data
+        overt for analysis.
+
+        Parameters
+        ----------
+        filepath: str
+            location of processed files.
+
+        Raises
+        ------
+        ToolBoxFileError: Exception
+            Trows an error in case the the given path does not exist.
+        """
+        self._filenames = []
+        self._filename = ''
+        if os.path.exists(filepath):
+            try:
+                self._filenames = search_files(filepath)
+            except ToolBoxFileError:
+                log.info("path did not contain any files")
+                pass
+        else:
+            log.info("path did not exist")
+
+        self.data = None
+        self.data_xarray = {}
+        self.attributes = {}
+
+    def combine_files(self, filenames=[]):
+        """
+        Read the files given in filenames, and store the data in the class
+        variable 'data'. If no filenames are given, it tries to read the files
+        stored in the class-internal variable '_filenames'.
+
+        Parameters
+        ----------
+        filenames: list
+            list of strings containing the names of the files to be combined.
+        """
+        if any(filenames) is True:
+            self._filenames = filenames
+        if self._filenames is not None:
+            self.data = get_data_formatted(self._filenames)
+        else:
+            log.info("No matching data found.")
+
+    def add_dataArray(self, groups=[]):
+        """
+        Reads addional xarray-data from the first file given in the list of
+        filenames. This assumes that all the files in the folder contain the
+        same additional data. To be generalized.
+
+        Parameters
+        ----------
+        groups: list
+            list of strings with the names of the groups in the h5 file,
+            containing additional xarray data.
+        """
+        if any(self._filenames) is True:
+            for group in groups:
+                self.data_xarray[group] = load_xarray(self._filenames[0],
+                                                      group=group,
+                                                      form='array')
+        else:
+            log.info("No files found in specified folder.")
+
+    def add_attributes(self, attributes={}):
+        """
+        Add additional information, such as run-type, as attributes to the
+        formatted .h5 file.
+
+        Parameters
+        ----------
+        attributes: dictionary
+            a dictionary, containing information or data of any kind, that
+            will be added to the formatted .h5 file as attributes.
+        """
+        for key in attributes.keys():
+            self.attributes[key] = attributes[key]
+
+    def save_formatted_data(self, filename):
+        """
+        Create a .h5 file containing the main dataset in the group called
+        'data'. Additional groups will be created for the content of the
+        variable 'data_array'. Metadata about the file is added in the form of
+        attributes.
+
+        Parameters
+        ----------
+        filename: str
+            the name of the file to be created
+        """
+        save_xarray(filename, self.data, group='data', mode='w')
+
+        for arr in self.data_xarray.keys():
+            save_xarray(filename, self.data_xarray[arr], group=arr, mode='a')
+
+        save_attributes_h5(filename, self.attributes)
--- a/src/toolbox_scs/detectors/dssc_data.py
+++ b/src/toolbox_scs/detectors/dssc_data.py
+import os
+import logging
+
+import h5py
+import xarray as xr
+
+from ..util.exceptions import ToolBoxFileError
+
+__all__ = [
+    'get_data_formatted',
+    'load_xarray',
+    'save_attributes_h5',
+    'save_xarray',
+]
+
+log = logging.getLogger(__name__)
+
+
+def _to_netcdf(fname, data, group, mode):
+    f_exists = os.path.isfile(fname)
+    if (f_exists and mode == 'w'):
+        data.to_netcdf(fname, group=group, mode='w', engine='h5netcdf')
+        log.warning(f"File {fname} existed: overwritten")
+        log.info(f"Stored data in file {fname}")
+    elif f_exists and mode == 'a':
+        try:
+            data.to_netcdf(fname, group=group, mode='a', engine='h5netcdf')
+            log.info(f"Created group {group} in file {fname}")
+        except (ValueError, TypeError):
+            msg = f"Group {group} exists and has incompatible dimensions."
+            log.warning(f"Could not store data: {msg}")
+            raise ToolBoxFileError(msg, fname)
+    else:
+        data.to_netcdf(fname, group=group, mode='w', engine='h5netcdf')
+        log.info(f"Stored data in file {fname}")
+
+
+def save_xarray(fname, data, group='data', mode='a'):
+    """
+    Store xarray Dataset in the specified location
+
+    Parameters
+    ----------
+    data: xarray.DataSet
+        The data to be stored
+    fname: str, int
+        filename
+    overwrite: bool
+        overwrite existing data
+
+    Raises
+    ------
+    ToolBoxFileError: Exception
+        File existed, but overwrite was set to False.
+    """
+    try:
+        _to_netcdf(fname, data, group, mode)
+    except ToolBoxFileError as err:
+        raise err
+
+
+def save_attributes_h5(fname, data={}):
+    """
+    Adding attributes to a hdf5 file. This function is intended to be used to
+    attach metadata to a processed run.
+
+    Parameters
+    ----------
+    fname: str
+        filename as string
+    data: dictionary
+        the data that should be added to the file in form of a dictionary.
+    """
+    f = h5py.File(fname, mode='a')
+    for d in data.keys():
+        f.attrs[d] = data[d]
+    f.close()
+    log.info(f"added attributes to file {fname}")
+
+
+def load_xarray(fname, group='data', form='dataset'):
+    """
+    Load stored xarray Dataset.
+    Comment: This function exists because of a problem with the standard
+    netcdf engine that is malfunctioning due to related software installed
+    in the exfel-python environment. May be dropped at some point.
+
+    Parameters
+    ----------
+    fname: str
+        filename as string
+    group: str
+        the name of the xarray dataset (group in h5 file).
+    form: str
+        specify whether the data to be loaded is a 'dataset' or a 'array'.
+    """
+    f_exists = os.path.isfile(fname)
+    if f_exists:
+        if form == 'dataset':
+            log.debug(f'open xarray dataset {fname}')
+            return xr.load_dataset(fname, group=group, engine='h5netcdf')
+        elif form == 'array':
+            log.debug(f'open xarray dataarray {fname}')
+            return xr.load_dataarray(fname, group=group, engine='h5netcdf')
+    else:
+        msg = "File does not exists."
+        raise ToolBoxFileError(msg, fname)
+
+
+def _data_from_list(filenames):
+    """
+    Helper function for data formatting routines. Loads the specified files
+    given by their names. This subroutine expects the name of the group to be
+    'data'.
+
+    Parameters
+    ----------
+    filenames: list
+        list of valid xarray filenames
+
+    Returns
+    -------
+    data: list
+        a list containing the loaded data
+
+    Raises
+    ------
+    ToolBoxFileError
+        raises ToolBoxFileError in case file does not exist.
+    """
+    data = []
+    for name in filenames:
+        f_exists = os.path.isfile(name)
+        if f_exists:
+            data.append(load_xarray(name, group='data'))
+        else:
+            msg = "File does not exists."
+            raise ToolBoxFileError(msg, name)
+    return data
+
+
+def get_data_formatted(filenames=[], data_list=[]):
+    """
+    Combines the given data into one dataset. For any of extra_data's data
+    types, an xarray.Dataset is returned. The data is sorted along the 'module'
+    dimension. The array dimension have the order 'trainId', 'pulse', 'module',
+    'x', 'y'. This order is required by the extra_geometry package.
+
+    Parameters
+    ----------
+    filenames: list of str
+        files to be combined as a list of names. Calls '_data_from_list' to
+        actually load the data.
+    data_list: list
+        list containing the already loaded data
+
+    Returns
+    -------
+    data: xarray.Dataset
+        A xarray.Dataset containing the combined data.
+    """
+    if any(filenames) is True:
+        data = _data_from_list(filenames)
+    elif any(data_list) is True:
+        data = data_list
+
+    mod_list = []
+    for d in data:
+        if 'module' in d.attrs.keys():
+            mod_list.append(d.attrs['module'])
+
+    if type(data[0]).__module__ == 'xarray.core.dataset':
+        data = xr.concat(data, dim='module')
+    elif type(data[0]).__module__ == 'pandas.core.frame':
+        pass
+    elif type(data[0]).__module__ == 'dask.dataframe.core':
+        pass
+
+    if mod_list is not None:
+        data = data.assign_coords(module=("module", mod_list))
+    data = data.sortby("module")
+    data.attrs.clear()
+    return data.transpose('trainId', 'pulse', 'module', 'x', 'y')
+
+
+def search_files(run_folder):
+    """
+    Search folder for h5 files.
+
+    Parameters
+    ----------
+    run_folder: str
+        the path to a folder containing h5 files.
+
+    Returns
+    -------
+    a list of the filenames of all .h5 files in the given folder.
+
+    Raises
+    ------
+    ToolBoxFileError: Exception
+        raises ToolBoxFileError in case there are no .h5 files in the folder,
+        or the folder does not exist.
+    """
+    try:
+        filenames = os.listdir(run_folder)
+        return [run_folder+name for name in filenames if ".h5" in name]
+    except:
+        msg = "No files in folder"
+        raise ToolBoxFileError(msg, run_folder)
--- a/src/toolbox_scs/detectors/dssc_misc.py
+++ b/src/toolbox_scs/detectors/dssc_misc.py
+"""
+    DSSC-related sub-routines.
+
+    comment: contributions should comply with pep8 code structure guidelines.
+"""
+import logging
+
+import numpy as np
+import xarray as xr
+from imageio import imread
+
+import extra_data as ed
+
+from .xgm import get_xgm
+from .digitizers import get_digitizer_peaks
+
+__all__ = [
+    'create_dssc_bins',
+    'get_xgm_formatted',
+    'load_dssc_info',
+    'load_mask',
+    'quickmask_DSSC_ASIC',
+]
+
+log = logging.getLogger(__name__)
+
+
+def load_dssc_info(proposal, run_nr):
+    """
+    Loads the first data file for DSSC module 0 (this is hardcoded)
+    and returns the detector_info dictionary
+
+    Parameters
+    ----------
+    proposal: str, int
+        number of proposal
+    run_nr: str, int
+        number of run
+
+    Returns
+    -------
+    info : dictionary
+        {'dims': tuple, 'frames_per_train': int, 'total_frames': int}
+    """
+    module = ed.open_run(proposal, run_nr, include='*DSSC01*')
+    info = module.detector_info('SCS_DET_DSSC1M-1/DET/1CH0:xtdf')
+    info["number_of_trains"] = len(module.train_ids)
+    info["trainIds"] = module.train_ids
+    log.debug("Fetched information for DSSC module nr. 1.")
+    return info
+
+
+def create_dssc_bins(name, coordinates, bins):
+    """
+    Creates a single entry for the dssc binner dictionary. The produced xarray
+    data-array will later be used to perform grouping operations according to
+    the given bins.
+
+    Parameters
+    ----------
+    name: str
+        name of the coordinate to be binned.
+    coordinates: numpy.ndarray
+        the original coordinate values (1D)
+    bins: numpy.ndarray
+        the bins according to which the corresponding dimension should
+        be grouped.
+
+    Returns
+    -------
+    da: xarray.DataArray
+        A pre-formatted xarray.DataArray relating the specified dimension with
+        its bins.
+
+    Examples
+    --------
+    >>> import toolbox_scs as tb
+    >>> run = tb.open_run(2212, 235, include='*DA*')
+
+    1.) binner along 'pulse' dimension. Group data into two bins.
+    >>> bins_pulse = ['pumped', 'unpumped'] * 10
+    >>> binner_pulse = tb.create_dssc_bins("pulse",
+                                np.linspace(0,19,20, dtype=int),
+                                bins_pulse)
+
+    2.) binner along 'train' dimension. Group data into bins corresponding
+        to the positions of a delay stage for instance.
+    >>> bins_trainId = tb.get_array(run, 'PP800_PhaseShifter', 0.04)
+    >>> binner_train = tb.create_dssc_bins("trainId",
+                                run.trainIds,
+                                bins_trainId.values)
+    """
+    if name in ['trainId', 'pulse', 'x', 'y']:
+        da = xr.DataArray(bins,
+                          dims=[name],
+                          coords={name: coordinates})
+        log.debug(f'created dssc bin array for dimension {name}')
+        return da
+    log.debug(f'could not construct dssc bin array for dimension {name}')
+    raise ValueError(f'Invalid name {str(name)}: value should be '
+                     'trainId, x, or y')
+
+
+def get_xgm_formatted(run_obj, xgm_name, dssc_frame_coords):
+    """
+    Load the xgm data and define coordinates along the pulse dimension.
+
+    Parameters
+    ----------
+    run_obj: extra_data.DataCollection
+        DataCollection object providing access to the xgm data to be loaded
+    xgm_name: str
+        valid mnemonic of a xgm source
+    dssc_frame_coords: int, list
+        defines which dssc frames should be normalized using data from the xgm.
+
+    Returns
+    -------
+    xgm: xarray.DataArray
+        xgm data with coordinate 'pulse'.
+    """
+    log.debug('load raw xgm data')
+    xgm = get_xgm(run_obj, xgm_name)[xgm_name]
+    pulse_dim = [d for d in xgm.dims if d != 'trainId'][0]
+    xgm = xgm.rename({pulse_dim: 'pulse'})
+    if type(dssc_frame_coords) == int:
+        dssc_frame_coords = np.arange(xgm.sizes['pulse']*dssc_frame_coords,
+                                      step=dssc_frame_coords, dtype=np.uint64)
+
+    xgm['pulse'] = dssc_frame_coords
+    log.info('loaded formatted xgm data.')
+    return xgm
+
+
+def get_tim_formatted(proposal, runNB, tim_names, dssc_frame_coords):
+    """
+    Load the tim data and define coordinates along the pulse dimension.
+
+    Parameters
+    ----------
+    proposal: int
+        the proposal number
+    runNB: int
+        the run number
+    tim_names: list of str
+        a list of valid mnemonics for tim data sources
+    dssc_frame_coords: int
+        defines which dssc frames should be normalized using data from the tim.
+
+    Returns
+    -------
+    tim: xarray.DataArray
+        tim data with coordinate 'pulse'.
+    """
+    log.debug('load tim data')
+    tim = get_digitizer_peaks(run_obj, tim_names)
+    # average over all tim sources
+    tim = -tim.to_array().mean(dim='variable')
+    pulse_dim = [d for d in tim.dims if d != 'trainId'][0]
+    tim = tim.rename({pulse_dim: 'pulse'})
+    if type(dssc_frame_coords) == int:
+        dssc_frame_coords = np.arange(tim.sizes['pulse']*dssc_frame_coords,
+                                      step=dssc_frame_coords, dtype=np.uint64)
+
+    tim['pulse'] = dssc_frame_coords
+    log.info('loaded formatted tim data.')
+    return tim
+
+
+def quickmask_DSSC_ASIC(poslist):
+    '''
+    Returns a mask for the given DSSC geometry with ASICs given in poslist
+    blanked. poslist is a list of (module, row, column) tuples. Each module
+    consists of 2 rows and 8 columns of individual ASICS.
+
+    Copyright (c) 2019, Michael Schneider
+    Copyright (c) 2020, SCS-team
+    license: BSD 3-Clause License (see LICENSE_BSD for more info)
+    '''
+    mask = np.ones([16, 128, 512], dtype=float)  # need floats to use NaN
+    for (module, row, col) in poslist:
+        mask[module, 64 * row:64 * (row + 1), 64 * col:64 * (col + 1)] = \
+            np.nan
+    return mask
+
+
+def load_mask(fname, dssc_mask):
+    """
+    Load a DSSC mask file.
+
+    Copyright (c) 2019, Michael Schneider
+    Copyright (c) 2020, SCS-team
+    license: BSD 3-Clause License (see LICENSE_BSD for more info)
+
+    Parameters
+    ----------
+    fname: str
+       string of the filename of the mask file
+
+    Returns
+    -------
+    dssc_mask:
+    """
+    mask = imread(fname)
+    mask = dssc_mask.astype(float)[..., 0] // 255
+    mask[dssc_mask == 0] = np.nan
+    return mask
--- a/src/toolbox_scs/detectors/dssc_plot.py
+++ b/src/toolbox_scs/detectors/dssc_plot.py
+""" DSSC visualization routines
+
+Plotting sub-routines frequently done in combination with dssc analysis. 
+The initial code is based on: https://github.com/dscran/dssc_process
+/blob/master/example_image_process_pulsemask.ipynb
+
+Todo: For visualization of statistical information we could eventually 
+switch to seaborn: https://seaborn.pydata.org/
+"""
+
+from time import strftime
+
+import matplotlib.pyplot as plt
+import numpy as np
+import xarray as xr
+
+
+def plot_xgm_threshold(xgm,
+                       xgm_min = None, xgm_max = None,
+                       run_nr = '', safe_fig = False):
+
+    fig = plt.figure()
+    ax = fig.add_subplot(111)
+
+    ax.plot(xgm.trainId, xgm, 'o', c='C0', ms=1)
+    if xgm_min:
+        ax.axhline(xgm_min, c='r')
+    if xgm_max:
+        ax.axhline(xgm_max, c='r')
+
+    ax.set_xlabel('trainId')
+    ax.set_ylabel('xgm')
+    ax.set_title(f'run: {run_nr}')
+
+    if safe_fig == True:
+        tstamp = strftime('%y%m%d_%H%M')
+        fig.savefig(f'images/run{run_nr}_scan_{tstamp}.png', dpi=200)
+
+
+def plot_binner(binner,
+                yname = 'data', xname='data',
+                run_nr = ''):
+
+    fig = plt.figure()
+    ax = fig.add_subplot(111)
+
+    ax.plot(binner.values)
+
+    ax.set_ylabel(yname)
+    ax.set_xlabel(xname)
+    ax.set_title(f'run: {run_nr}')
+
+
+def plot_binner_hist(binner,
+                 dname = 'data', run_nr = ''):
+
+    counts = xr.DataArray(np.ones(len(binner.values)),
+                      dims=[dname],
+                      coords={dname: binner.values},
+                      name='counts')
+
+    counts = counts.groupby(dname).sum()
+
+    fig = plt.figure()
+    ax = fig.add_subplot(111)
+
+    ax.plot(counts[dname], counts, 'o', ms=4)
+
+    ax.set_xlabel(dname)
+    ax.set_ylabel('counts')
+    ax.set_title(f'run {run_nr}')
+    ax.grid(True)
+
+    #if safe_fig == True:
+    #    tstamp = strftime('%y%m%d_%H%M')
+    #    fig.savefig(f'images/run{run_nr}_scan_{tstamp}.png', dpi=200)
+
+
+def plot_hist_processed(hist_data):
+    pass
--- a/src/toolbox_scs/detectors/dssc_processing.py
+++ b/src/toolbox_scs/detectors/dssc_processing.py
+"""
+    DSSC-related sub-routines.
+
+    comment: contributions should comply with pep8 code structure guidelines.
+"""
+import logging
+import os
+
+import numpy as np
+import xarray as xr
+import pandas as pd
+
+import extra_data as ed
+
+from ..mnemonics_machinery import mnemonics_for_run
+from .dssc_data import save_xarray
+
+__all__ = [
+    'process_dssc_data'
+]
+
+log = logging.getLogger(__name__)
+
+
+def create_empty_dataset(run_info, binners={}):
+    """
+    Create empty (zero-valued) DataSet for a single DSSC module
+    to iteratively add data to.
+
+    Copyright (c) 2020, SCS-team
+
+    Parameters
+    ----------
+
+    Returns
+    -------
+
+    """
+    fpt = run_info['frames_per_train']
+    len_x = run_info['dims'][0]
+    len_y = run_info['dims'][1]
+    defaults = {"trainId": run_info["trainIds"],
+                "pulse": np.linspace(0, fpt-1, fpt, dtype=int),
+                "x": np.linspace(1, len_x, len_x, dtype=int),
+                "y": np.linspace(1, len_y, len_y, dtype=int)}
+
+    dims = []
+    coords = {}
+    shape = []
+
+    for key in defaults:
+        dims.append(key)
+        df = pd.DataFrame({'data': defaults[key]})
+        if key in binners:
+            df = pd.DataFrame({'data': binners[key].values})
+        grouped = df.groupby(['data'])
+        groups = list(grouped.groups.keys())
+        coords[key] = groups
+        shape.append(len(groups))
+
+    da_data = xr.DataArray(np.zeros(shape, dtype=np.float64),
+                           coords=coords,
+                           dims=dims)
+    ds = da_data.to_dataset(name="data")
+    ds['hist'] = xr.full_like(ds.data[:, :, 0, 0], fill_value=0)
+
+    log.debug("Prepared empty dataset for single dssc module")
+    return ds
+
+
+def load_chunk_data(sel, sourcename, maxframes=None):
+    """
+    Load selected DSSC data. The flattened multi-index (trains+pulses) is
+    unraveled before returning the data.
+
+    Copyright (c) 2019, Michael Schneider
+    Copyright (c) 2020, SCS-team
+    license: BSD 3-Clause License (see LICENSE_BSD for more info)
+
+    Parameters
+    ----------
+    sel: extra_data.DataCollection
+        a DataCollection or a subset of a DataCollection obtained by its
+        select_trains() method
+    sourcename: str
+
+    Returns
+    -------
+    xarray.DataArray
+    """
+
+    info = sel.detector_info(sourcename)
+    fpt = info['frames_per_train']
+    data = sel.get_array(sourcename, 'image.data',
+                         extra_dims=['_empty_', 'x', 'y']
+                         ).squeeze()
+
+    tids = np.unique(data.trainId)
+    data = data.rename(dict(trainId='trainId_pulse'))
+
+    midx = pd.MultiIndex.from_product([sorted(tids), range(fpt)],
+                                      names=('trainId', 'pulse'))
+    data = xr.DataArray(data,
+                        dict(trainId_pulse=midx)
+                        ).unstack('trainId_pulse')
+    data = data.transpose('trainId', 'pulse', 'x', 'y')
+
+    log.debug(f"loaded and formatted chunk of dssc data")
+    return data.loc[{'pulse': np.s_[:maxframes]}]
+
+
+def _load_chunk_xgm(sel, xgm_mnemonic, stride):
+    """
+    Load selected xgm data.
+
+    Copyright (c) 2020, SCS-team
+
+    Parameters
+    ----------
+    sel: extra_data.DataCollection
+        a DataCollection or a subset of a DataCollection obtained by its
+        select_trains() method
+    xgm_mnemonic: str
+        a valid mnemonic for an xgm source from the tb.mnemonic dictionary
+    stride: int
+        indicating which dssc frames should be normalized using the xgm data.
+
+    Returns
+    -------
+    xarray.DataArray
+    """
+    mnemonics = mnemonics_for_run(sel)
+    d = sel.get_array(*mnemonics[xgm_mnemonic].values())
+    d = d.rename(new_name_or_name_dict={'XGMbunchId': 'pulse'})
+    frame_coords = np.linspace(0, (d.shape[1]-1)*stride, d.shape[1], dtype=int)
+    d = d.assign_coords({'pulse': frame_coords})
+    log.debug(f"loaded and formatted chunk of xgm data")
+    return d
+
+
+def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
+                      path='./',
+                      pulsemask=None,
+                      dark_image=None,
+                      xgm_mnemonic='SCS_SA3',
+                      xgm_normalization=False,
+                      normevery=1
+                      ):
+    """
+    Collects and reduces DSSC data  for a single module.
+
+    Copyright (c) 2020, SCS-team
+
+    Parameters
+    ----------
+    proposal : int
+        proposal number
+    run_nr : int
+        run number
+    module : int
+        DSSC module to process
+    chunksize : int
+        number of trains to load simultaneously
+    info: dictionary
+        dictionary containing keys 'dims', 'frames_per_train', 'total_frames',
+        'trainIds', 'number_of_trains'.
+    dssc_binners: dictionary
+        a dictionary containing binner objects created by the ToolBox member
+        function "create_binner()"
+    path : str
+        location in which the .h5 files, containing the binned data, should
+        be stored.
+    pulsemask : numpy.ndarray
+        array of booleans to be used to mask dssc data according to xgm data.
+    dark_image: xarray.DataArray
+        an xarray dataarray with matching coordinates with the loaded data. If
+        dark_image is not None it will be subtracted from each individual dssc
+        frame.
+    xgm_normalization: bool
+        true if the data should be divided by the corresponding xgm value.
+    xgm_mnemonic: str
+        Mnemonic of the xgm data to be used for normalization.
+    normevery: int
+        One out of normevery dssc frames will be normalized.
+
+    Returns
+    -------
+    module_data: xarray.Dataset
+        xarray datastructure containing data binned according to bins.
+    """
+    # metadata definition
+    ntrains = info['number_of_trains']
+    chunks = np.arange(ntrains, step=chunksize)
+    sourcename_dssc = f'SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf'
+
+    # open extra_data run objects
+    collection_DSSC = ed.open_run(proposal, run_nr,
+                                  include=f'*DSSC{module:02d}*')
+    collection_DA1 = ed.open_run(proposal, run_nr, include='*DA01*')
+    log.info(f"Processing dssc module {module}: start")
+
+    # create empty dataset for dssc data to be filled iteratively
+    module_data = create_empty_dataset(info, dssc_binners)
+
+    # load data chunk by chunk and merge result into prepared empty dataset
+    for chunk in chunks:
+        log.debug(f"Module {module}: "
+                  f"load trains {chunk}:{chunk + chunksize}")
+
+        sel = collection_DSSC.select_trains(
+                                    ed.by_index[chunk:chunk + chunksize])
+        chunk_data = load_chunk_data(sel, sourcename_dssc)
+        chunk_hist = xr.full_like(chunk_data[:, :, 0, 0], fill_value=1)
+
+        # ---------------------------------------------------------------------
+        # optional blocks -> ToDo: see merge request !87
+        # ---------------------------------------------------------------------
+        # option 1: prefiltering -> xgm pulse masking
+        if pulsemask is not None:
+            log.debug(f'Module {module}: drop out-of-bounds frames')
+            # relatively slow. ToDo -> wrap using np.ufunc
+            chunk_data = chunk_data.where(pulsemask)
+            chunk_hist = chunk_hist.where(pulsemask)
+
+        # option 2: subtraction of dark image/s
+        if dark_image is not None:
+            log.debug(f'Module {module}: subtract dark')
+            chunk_data.values = chunk_data.values - dark_image.values
+            # slower: using xarray directly
+            # chunk_data = chunk_data - dark_image
+
+        # option 3: normalize dssc frames by their xgm values
+        if xgm_normalization:
+            log.debug(f'Module {module}: load and format xgm data')
+            sel_DA1 = collection_DA1.select_trains(
+                                        ed.by_index[chunk:chunk + chunksize])
+            chunk_xgm = _load_chunk_xgm(sel_DA1, xgm_mnemonic, normevery)
+
+            log.debug(f'Module {module}: normalize chunk_data using xgm')
+            # the following line uses numpys fast vectorized array operation
+            # there is overhead coming from rewriting the xarrayDataarray
+            chunk_data.values[:, 0::normevery, :, :] = \
+                np.divide(chunk_data[:, 0::normevery, :, :], chunk_xgm)
+            # slow code using xarray directly
+            # chunk_data = chunk_data / chunk_xgm
+        # ---------------------------------------------------------------------
+        # end optional blocks: xarray operations from here on.
+        # ---------------------------------------------------------------------
+
+        # data reduction -> apply binners
+        log.debug(f'Module {module}: apply binning to chunk_data')
+        chunk_data = chunk_data.to_dataset(name='data')
+        chunk_data['hist'] = chunk_hist
+        for b in dssc_binners:
+            chunk_data[b+"_binned"] = dssc_binners[b]
+            chunk_data = chunk_data.groupby(b+"_binned").sum(b)
+            chunk_data = chunk_data.rename(name_dict={b+"_binned": b})
+
+        # add chunk data to loaded data
+        log.debug(f'Module {module}: merge junk data')
+        for var in ['data', 'hist']:
+            module_data[var] = xr.concat([module_data[var],
+                                         chunk_data[var]],
+                                         dim='tmp').sum('tmp')
+
+        log.debug(f"Module {module}: "
+                  f"processed trains {chunk}:{chunk + chunksize}")
+
+    log.debug(f'Module {module}: calculate mean')
+    module_data['data'] = module_data['data'] / module_data['hist']
+    module_data = module_data.transpose('trainId', 'pulse', 'x', 'y')
+    module_data.attrs['module'] = module
+
+    log.debug(f'saving module {module}')
+    if not os.path.isdir(path):
+        os.mkdir(path)
+    fname = f'run_{run_nr}_module{module}.h5'
+    save_xarray(path+fname, module_data, mode='a')
+
+    log.info(f"processing module {module}: done")
--- a/FastCCD.py
+++ b/FastCCD.py
 from joblib import Parallel, delayed, parallel_backend
 from time import strftime
-import tempfile
 import shutil
 from tqdm.auto import tqdm
 import os
 import warnings
 import psutil

-import karabo_data as kd
-from karabo_data.read_machinery import find_proposal
-import ToolBox as tb
+import extra_data as ed
+from extra_data.read_machinery import find_proposal
 import matplotlib.pyplot as plt
 from mpl_toolkits.axes_grid1 import ImageGrid
 import numpy as np
 import xarray as xr
 import h5py
 from glob import glob
-
 from imageio import imread

+from ..constants import mnemonics as _mnemonics
+from .azimuthal_integrator import AzimuthalIntegrator
+from ..misc.laser_utils import positionToDelay
+
+
 class FastCCD:
    
    def __init__(self, proposal, distance=1, raw=False):
@@ -72,7 +74,7 @@ class FastCCD:
            shutil.rmtree(self.tempdir)
    
    def open_run(self, run_nr, isDark=False, t0=0.0):
-        """ Open a run with karabo-data and prepare the virtual dataset for multiprocessing
+        """ Open a run with extra-data and prepare the virtual dataset for multiprocessing
        
            inputs:
                run_nr: the run number
@@ -80,11 +82,11 @@ class FastCCD:
                t0: optional t0 in mm
        """
        
-        print('Opening run data with karabo-data')
+        print('Opening run data with extra-data')
        self.run_nr = run_nr
        self.xgm = None
        
-        self.run = kd.open_run(self.proposal, self.run_nr)
+        self.run = ed.open_run(self.proposal, self.run_nr)
        self.plot_title = f'{self.proposal} run: {self.run_nr}'
        self.isDark = isDark
        self.fpt = 1
@@ -106,17 +108,17 @@ class FastCCD:
        
        print(f'Loading XGM data')
        try:
-            self.xgm = self.run.get_array(tb.mnemonics['SCS_SA3']['source'],
-                                      tb.mnemonics['SCS_SA3']['key'],
-                                      roi=kd.by_index[:self.nbunches])
+            self.xgm = self.run.get_array(_mnemonics['SCS_SA3']['source'],
+                                      _mnemonics['SCS_SA3']['key'],
+                                      roi=ed.by_index[:self.nbunches])
            self.xgm = self.xgm.squeeze() # remove the pulseId dimension since XGM should have only 1 value per train
        except:
            self.xgm = xr.DataArray(np.zeros_like(self.run.train_ids),dims = 'trainId', coords = {"trainId":self.run.train_ids})
            
        print(f'Loading mono nrj data')
        try:
-            self.nrj = self.run.get_array(tb.mnemonics['nrj']['source'],
-                                      tb.mnemonics['nrj']['key'])
+            self.nrj = self.run.get_array(_mnemonics['nrj']['source'],
+                                      _mnemonics['nrj']['key'])
               
        except:
            self.nrj = xr.DataArray(np.zeros_like(self.run.train_ids),dims = 'trainId', coords = {"trainId":self.run.train_ids})    
@@ -124,16 +126,16 @@ class FastCCD:
                
        print(f'Loading delay line data')
        try:
-            self.delay_mm = self.run.get_array(tb.mnemonics['PP800_DelayLine']['source'],
-                                               tb.mnemonics['PP800_DelayLine']['key'])
+            self.delay_mm = self.run.get_array(_mnemonics['PP800_DelayLine']['source'],
+                                               _mnemonics['PP800_DelayLine']['key'])
        except:
            self.delay_mm = xr.DataArray(np.zeros_like(self.run.train_ids),dims = 'trainId', coords = {"trainId":self.run.train_ids})    
        self.t0 = t0
-        self.delay_ps = tb.positionToDelay(self.delay_mm, origin=self.t0, invert=True)
+        self.delay_ps = positionToDelay(self.delay_mm, origin=self.t0, invert=True)

        print(f'Loading Fast ADC5 data')
        try:
-            self.FastADC5 = self.run.get_array(tb.mnemonics['FastADC5raw']['source'], tb.mnemonics['FastADC5raw']['key']).max('dim_0')
+            self.FastADC5 = self.run.get_array(_mnemonics['FastADC5raw']['source'], _mnemonics['FastADC5raw']['key']).max('dim_0')
            self.FastADC5[self.FastADC5<35000] = 0
            self.FastADC5[self.FastADC5>=35000] = 1
        except:
@@ -178,9 +180,9 @@ class FastCCD:
            if type(vname) is dict:
                self.scan = self.run.get_array(vname['source'], vname['key'])
            elif type(vname) is str:
-                if vname not in tb.mnemonics:
+                if vname not in _mnemonics:
                    raise ValueError(f'{vname} not found in the ToolBox mnemonics table')
-                self.scan = self.run.get_array(tb.mnemonics[vname]['source'], tb.mnemonics[vname]['key'])
+                self.scan = self.run.get_array(_mnemonics[vname]['source'], _mnemonics[vname]['key'])
            else:
                raise ValueError(f'vname should be a string or a dict. We got {type(vname)}')
            
@@ -235,7 +237,7 @@ class FastCCD:
        
        plt.figure(figsize=(5,3))
        plt.bar(bins_center, hist, align='center', width=width)
-        plt.xlabel(f"{tb.mnemonics['SCS_SA3']['source']}{tb.mnemonics['SCS_SA3']['key']}")
+        plt.xlabel(f"{_mnemonics['SCS_SA3']['source']}{_mnemonics['SCS_SA3']['key']}")
        plt.ylabel('density')
        plt.title(self.plot_title)
        
@@ -369,6 +371,7 @@ class FastCCD:
                warnings.warn(f'Overwriting file: {save_path}')
                os.remove(save_path)
            self.module_data.to_netcdf(save_path, group='data')
+            self.module_data.close()
            os.chmod(save_path, 0o664)
            print('saving: ', save_path)
        else:
@@ -389,10 +392,10 @@ class FastCCD:

        self.plot_title = f'{self.proposal} run: {runNB} dark: {dark_runNB}'
                   
-        binned = xr.open_dataset(os.path.join(save_folder, f'run{runNB}.h5'), group='data', cache=False)
+        binned = xr.load_dataset(os.path.join(save_folder, f'run{runNB}.h5'), group='data')
                   
        if dark_runNB is not None:
-            dark = xr.open_dataset(os.path.join(save_folder, f'run{dark_runNB}_dark.h5'), group='data', cache=False)
+            dark = xr.load_dataset(os.path.join(save_folder, f'run{dark_runNB}_dark.h5'), group='data')
            binned['pumped'] = self.gain*(binned['pumped'] - dark['pumped'].squeeze(drop=True))
            binned['unpumped'] = self.gain*(binned['unpumped'] - dark['unpumped'].squeeze(drop=True))
                   
@@ -498,7 +501,7 @@ class FastCCD:
        im_pumped_mean = im_pumped_arranged.mean(axis=0)
        im_unpumped_mean = im_unpumped_arranged.mean(axis=0)

-        ai = tb.azimuthal_integrator(im_pumped_mean.shape, center, angle_range, dr=dr, aspect=1)
+        ai = AzimuthalIntegrator(im_pumped_mean.shape, center, angle_range, dr=dr, aspect=1)
        norm = ai(~np.isnan(im_pumped_mean))

        az_pump = []
@@ -630,7 +633,7 @@ def process_one_module(job):
        data_pumped = ds.where(ds['FastADC5'] > 0, drop=True).groupby('scan_variable').sum('trainId')
        data_unpumped = ds.where(ds['FastADC5'] < 1, drop=True).groupby('scan_variable').sum('trainId')
                   
-        module_data = data_pumped['fastccd'].to_dataset('pumped')
+        module_data = data_pumped['fastccd'].to_dataset(name='pumped')
        module_data['unpumped'] = data_unpumped['fastccd']   
        module_data['sum_count_pumped'] = data_pumped['sum_count']
        module_data['sum_count_unpumped'] = data_unpumped['sum_count']
@@ -638,4 +641,4 @@ def process_one_module(job):
        module_data['xgm_unpumped'] = data_unpumped['xgm']
        module_data['workerId'] = workerId
        
-    return module_data
\ No newline at end of file
+    return module_data
--- a/src/toolbox_scs/detectors/gotthard2.py
+++ b/src/toolbox_scs/detectors/gotthard2.py
+""" Gotthard-II detector related sub-routines
+
+    Copyright (2024) SCS Team.
+
+    (contributions preferrably comply with pep8 code structure
+    guidelines.)
+"""
+from extra.components import OpticalLaserPulses, XrayPulses
+import numpy as np
+import xarray as xr
+import logging
+__all__ = [
+    'extract_GH2',
+]
+
+log = logging.getLogger(__name__)
+
+
+def extract_GH2(ds, run, firstFrame=0, bunchPattern='scs_ppl',
+                gh2_dim='gh2_pId', keep_darks=False):
+    '''
+    Select and align the frames of the Gotthard-II that have been exposed
+    to light. `keep_darks` gives the option to keep the non-exposed
+    frames.
+
+    Parameters
+    ------
+    ds: xarray.Dataset
+        The dataset containing GH2 data
+    run: extra_data.DataCollection
+        The run containing the bunch pattern source
+    firstFrame: int
+        The GH2 frame number corresponding to the first pulse of the train.
+    bunchPattern: str in ['scs_ppl', 'sase3']
+        the bunch pattern used to align data. For 'scs_ppl', the gh2_pId
+        dimension in renamed 'ol_pId', and for 'sase3' gh2_pId is renamed
+        'sa3_pId'.
+    gh2_dim: str
+        The name of the dimension that corresponds to the Gotthard-II frames.
+    keep_darks: bool
+        If True, the frames that do not correspond to the bunchPattern are
+        kept.
+
+    Returns
+    -------
+    nds: xarray Dataset
+        The aligned and reduced dataset with only-data-containing GH2
+        variables.
+    '''
+    if gh2_dim not in ds.dims:
+        log.warning(f'gh2_dim "{gh2_dim}" not in dataset. Skipping.')
+        return ds
+    if bunchPattern == 'scs_ppl':
+        pattern = OpticalLaserPulses(run)
+        dim = 'ol_pId'
+    else:
+        pattern = XrayPulses(run)
+        dim = 'sa3_pId'
+    others = [var for var in ds if 
+              gh2_dim not in ds[var].dims or dim in ds[var].coords]
+    nds = ds.drop_dims(dim, errors='ignore')
+    nds = ds.drop_vars(others, errors='ignore')
+    dark_ds = xr.Dataset()
+    if pattern.is_constant_pattern():
+        pulse_ids = pattern.peek_pulse_ids(labelled=False)
+        signal_on_gh_ids = pulse_ids + firstFrame
+        if keep_darks:
+            no_signal_ids = [i for i in nds[gh2_dim].values
+                             if i not in signal_on_gh_ids]
+            dark_ds = nds.isel({gh2_dim: no_signal_ids})
+            dark_ds = dark_ds.assign_coords({gh2_dim: no_signal_ids})
+            for n in dark_ds:
+                dark_ds = dark_ds.rename({n: f'{n}_dark'})
+        nds = nds.isel({gh2_dim: pulse_ids + firstFrame})
+        nds = nds.assign_coords({gh2_dim: pulse_ids})
+        nds = nds.rename({gh2_dim: dim})
+    else:
+        log.warning('The number of pulses has changed during the run.')
+        pulse_ids = np.unique(pattern.pulse_ids(labelled=False, copy=False))
+        nds = nds.isel({gh2_dim: pulse_ids + firstFrame})
+        nds = nds.assign_coords({gh2_dim: pulse_ids})
+        nds = nds.rename({gh2_dim: dim})
+        mask = pattern.pulse_mask(labelled=False)
+        mask = xr.DataArray(mask, dims=['trainId', dim],
+                            coords={'trainId': run.train_ids,
+                                    dim: np.arange(mask.shape[1])})
+        mask = mask.sel({dim: pulse_ids})
+        nds = nds.where(mask, drop=True)
+    nds = nds.merge(dark_ds, join='inner')
+    ret = ds[others].merge(nds, join='inner')
+    return ret
--- a/src/toolbox_scs/detectors/hrixs.py
+++ b/src/toolbox_scs/detectors/hrixs.py
+import numpy as np
+import matplotlib.pyplot as plt
+from scipy.optimize import leastsq
+from scipy.optimize import curve_fit
+from scipy.signal import fftconvolve
+import xarray as xr
+
+from extra_data import open_run
+import pasha as psh
+import toolbox_scs as tb
+
+
+__all__ = [
+    'hRIXS',
+    'MaranaX',
+]
+
+
+# -----------------------------------------------------------------------------
+# Curvature
+
+def find_curvature(image, frangex=None, frangey=None,
+                   deg=2, axis=1, offset=100):
+    # Resolve arguments
+    x_range = (0, image.shape[1])
+    if frangex is not None:
+        x_range = (max(frangex[0], x_range[0]), min(frangex[1], x_range[1]))
+    y_range = (0, image.shape[0])
+    if frangex is not None:
+        y_range = (max(frangey[0], y_range[0]), min(frangey[1], y_range[1]))
+
+    axis_range = y_range if axis == 1 else x_range
+    axis_dim = image.shape[axis - 1]
+
+    # Get kernel
+    integral = image[slice(*y_range), slice(*x_range)].mean(axis=axis)
+    roi = np.ones([axis_range[1] - axis_range[0], axis_dim])
+    ref = roi * integral[:, np.newaxis]
+
+    # Get sliced image
+    slice_ = [slice(None), slice(None)]
+    slice_[axis - 1] = slice(max(axis_range[0] - offset, 0),
+                             min(axis_range[1] + offset, axis_dim))
+    sliced = image[tuple(slice_)]
+    if axis == 0:
+        sliced = sliced.T
+
+    # Get curvature factor from cross correlation
+    crosscorr = fftconvolve(sliced,
+                            ref[::-1, :],
+                            axes=0, )
+    shifts = np.argmax(crosscorr, axis=0)
+    curv = np.polyfit(np.arange(axis_dim), shifts, deg=deg)
+    return curv[:-1][::-1]
+
+
+def find_curvature(img, args, plot=False, **kwargs):
+    def parabola(x, a, b, c, s=0, h=0, o=0):
+        return (a*x + b)*x + c
+    def gauss(y, x, a, b, c, s, h, o=0):
+        return h * np.exp(-((y - parabola(x, a, b, c)) / (2 * s))**2) + o
+    x = np.arange(img.shape[1])[None, :]
+    y = np.arange(img.shape[0])[:, None]
+
+    if plot:
+        plt.figure(figsize=(10,10))
+        plt.imshow(img, cmap='gray', aspect='auto', interpolation='nearest', **kwargs)
+        plt.plot(x[0, :], parabola(x[0, :], *args))
+
+    args, _ = leastsq(lambda args: (gauss(y, x, *args) - img).ravel(), args)
+
+    if plot:
+        plt.plot(x[0, :], parabola(x[0, :], *args))
+    return args
+
+
+def correct_curvature(image, factor=None, axis=1):
+    if factor is None:
+        return
+
+    if axis == 1:
+        image = image.T
+
+    ydim, xdim = image.shape
+    x = np.arange(xdim + 1)
+    y = np.arange(ydim + 1)
+    xx, yy = np.meshgrid(x[:-1] + 0.5, y[:-1] + 0.5)
+    xxn = xx - factor[0] * yy - factor[1] * yy ** 2
+    ret = np.histogramdd((xxn.flatten(), yy.flatten()),
+                         bins=[x, y],
+                         weights=image.flatten())[0]
+
+    return ret if axis == 1 else ret.T
+
+
+def get_spectrum(image, factor=None, axis=0,
+                 pixel_range=None, energy_range=None, ):
+    start, stop = (0, image.shape[axis - 1])
+    if pixel_range is not None:
+        start = max(pixel_range[0] or start, start)
+        stop = min(pixel_range[1] or stop, stop)
+
+    edge = image.sum(axis=axis)[start:stop]
+    bins = np.arange(start, stop + 1)
+    centers = (bins[1:] + bins[:-1]) * 0.5
+    if factor is not None:
+        centers, edge = calibrate(centers, edge,
+                                  factor=factor,
+                                  range_=energy_range)
+
+    return centers, edge
+
+
+# -----------------------------------------------------------------------------
+# Energy calibration
+
+
+def energy_calibration(channels, energies):
+    return np.polyfit(channels, energies, deg=1)
+
+
+def calibrate(x, y=None, factor=None, range_=None):
+    if factor is not None:
+        x = np.polyval(factor, x)
+
+    if y is not None and range_ is not None:
+        start = np.argmin(np.abs((x - range_[0])))
+        stop = np.argmin(np.abs((x - range_[1])))
+        # Calibrated energies have a different direction
+        x, y = x[stop:start], y[stop:start]
+
+    return x, y
+
+
+# -----------------------------------------------------------------------------
+# Gaussian-related functions
+
+
+FWHM_COEFF = 2 * np.sqrt(2 * np.log(2))
+
+
+def gaussian_fit(x_data, y_data, offset=0):
+    """
+    Centre-of-mass and width. Lifted from image_processing.imageCentreofMass()
+    """
+
+    x0 = np.average(x_data, weights=y_data)
+    sx = np.sqrt(np.average((x_data - x0) ** 2, weights=y_data))
+
+    # Gaussian fit
+    baseline = y_data.min()
+    p_0 = (y_data.max(), x0 + offset, sx, baseline)
+    try:
+        p_f, _ = curve_fit(gauss1d, x_data, y_data, p_0, maxfev=10000)
+        return p_f
+    except (RuntimeError, TypeError) as e:
+        print(e)
+        return None
+
+
+def gauss1d(x, height, x0, sigma, offset):
+    return height * np.exp(-0.5 * ((x - x0) / sigma) ** 2) + offset
+
+
+def to_fwhm(sigma):
+    return abs(sigma * FWHM_COEFF)
+
+
+def decentroid(res):
+    res = np.array(res)
+    ret = np.zeros(shape=(res.max(axis=0) + 1).astype(int))
+    for cy, cx in res:
+        if cx > 0 and cy > 0:
+            ret[int(cy), int(cx)] += 1
+    return ret
+
+
+class hRIXS:
+    """The hRIXS analysis, especially curvature correction
+
+    The objects of this class contain the meta-information about the settings
+    of the spectrometer, not the actual data, except possibly a dark image
+    for background subtraction.
+
+    The actual data is loaded into `xarray`s, and stays there.
+
+    Attributes
+    ----------
+
+    PROPOSAL: int
+        the number of the proposal
+    DETECTOR: str
+        the detector to be used. Can be ['hRIXS_det', 'MaranaX']
+        defaults to 'hRIXS_det' for backward-compatibility.
+    X_RANGE: slice
+        the slice to take in the dispersive direction, in pixels. Defaults
+        to the entire width.
+    Y_RANGE: slice
+        the slice to take in the energy direction
+    THRESHOLD: float
+        pixel counts above which a hit candidate is assumed, for centroiding.
+        use None if you want to give it in standard deviations instead.
+    STD_THRESHOLD:
+        same as THRESHOLD, in standard deviations.
+    DBL_THRESHOLD:
+        threshold controling whether a detected hit is considered to be a 
+        double hit.
+    BINS: int
+        the number of bins used in centroiding
+    CURVE_A, CURVE_B: float
+        the coefficients of the parabola for the curvature correction
+    USE_DARK: bool
+        whether to do dark subtraction. Is initially `False`, magically
+        switches to `True` if a dark has been loaded, but may be reset.
+    ENERGY_INTERCEPT, ENERGY_SLOPE:
+        The calibration from pixel to energy
+    FIELDS:
+        the fields to be loaded from the data. Add additional fields if so
+        desired.
+
+    Example
+    -------
+        
+        proposal = 3145
+        h = hRIXS(proposal)
+        h.Y_RANGE = slice(700, 900)
+        h.CURVE_B = -3.695346575286939e-07
+        h.CURVE_A = 0.024084479232443695
+        h.ENERGY_SLOPE = 0.018387
+        h.ENERGY_INTERCEPT = 498.27
+        h.STD_THRESHOLD = 3.5
+    """
+    
+    DETECTOR_FIELDS = {
+        'hRIXS_det': ['hRIXS_det', 'hRIXS_index', 'hRIXS_delay', 'hRIXS_norm', 'nrj'],
+        'MaranaX': ['MaranaX', 'nrj'],
+    }
+    
+    def __init__(self, proposalNB, detector='MaranaX'):
+
+        self.PROPOSAL = proposalNB
+        self.DETECTOR = detector
+        assert detector in self.DETECTOR_FIELDS
+
+        # image range
+        self.X_RANGE = np.s_[:]
+        self.Y_RANGE = np.s_[:]
+
+        # centroid
+        # centroid_one threshold
+        self.THRESHOLD = None  # pixel counts above which a hit candidate is assumed
+        self.STD_THRESHOLD = 3.5  # same as THRESHOLD, in standard deviations
+        self.DBL_THRESHOLD = 0.1  # factor used for double hits in centroid_one
+        # centroid_two threshold
+        self.CENTROID_THRESHOLD = [0.2, 1]
+
+        self.CURVE_A = 0  # curvature parameters as determined elsewhere
+        self.CURVE_B = 0
+
+        # integral
+        self.BINS = 100
+
+        self.METHOD = 'centroid'  # ['centroid', 'integral']
+        self.USE_DARK = False
+
+        # Ignore double hits
+        self.AVOID_DBL = False
+
+        self.ENERGY_INTERCEPT = 0
+        self.ENERGY_SLOPE = 1
+
+        self.FIELDS = self.DETECTOR_FIELDS[detector]
+
+    def set_params(self, **params):
+        for key, value in params.items():
+            setattr(self, key.upper(), value)
+
+    def get_params(self, *params):
+        if not params:
+            params = ('proposal', 'x_range', 'y_range',
+                      'threshold', 'std_threshold', 'dbl_threshold',
+                      'curve_a', 'curve_b',
+                      'bins', 'method', 'fields')
+        return {param: getattr(self, param.upper()) for param in params}
+
+    def from_run(self, runNB, proposal=None, extra_fields=(), drop_first=False, subset=None):
+        """load a run
+
+        Load the run `runNB`. A thin wrapper around `toolbox.load`.
+        Parameters
+        ----------
+            drop_first: bool
+                if True, the first image in the run is removed from the dataset.
+
+        Example
+        -------
+
+            data = h.from_run(145)  # load run 145
+
+            data1 = h.from_run(145)  # load run 145
+            data2 = h.from_run(155)  # load run 155
+            data = xarray.concat([data1, data2], 'trainId')  # combine both
+        """
+        if proposal is None:
+            proposal = self.PROPOSAL
+        if drop_first:
+            subset = slice(1, None)
+        run, data = tb.load(proposal, runNB=runNB, subset=subset,
+                            fields=self.FIELDS + list(extra_fields))
+        return data
+
+    def load_dark(self, runNB, proposal=None):
+        """load a dark run
+
+        Load the dark run `runNB` from `proposal`. The latter defaults to the
+        current proposal. The dark is stored in this `hRIXS` object, and
+        subsequent analyses use it for background subtraction.
+
+        Example
+        -------
+
+            h.load_dark(166)  # load dark run 166
+        """
+        data = self.from_run(runNB, proposal)
+        self.dark_image = data[self.DETECTOR].mean(dim='trainId')
+        self.USE_DARK = True
+
+    def find_curvature(self, runNB, proposal=None, plot=True, args=None,
+                       **kwargs):
+        """find the curvature correction coefficients
+
+        The hRIXS has some abberations which leads to the spectroscopic lines
+        being curved on the detector. We approximate these abberations with
+        a parabola for later correction.
+
+        Load a run and determine the curvature. The curvature is set in `self`,
+        and returned as a pair of floats.
+
+        Parameters
+        ----------
+
+        runNB: int
+            the run number to use
+        proposal: int
+            the proposal to use, default to the current proposal
+        plot: bool
+            whether to plot the found curvature onto the data
+        args: pair of float, optional
+            a starting value to prime the fitting routine
+
+        Example
+        -------
+
+            h.find_curvature(155)  # use run 155 to fit the curvature
+        """
+        data = self.from_run(runNB, proposal)
+
+        image = data[self.DETECTOR].sum(dim='trainId') \
+                .values[self.X_RANGE, self.Y_RANGE].T
+        if args is None:
+            spec = (image - image[:10, :].mean()).mean(axis=1)
+            mean = np.average(np.arange(len(spec)), weights=spec)
+            args = (-2e-7, 0.02, mean - 0.02 * image.shape[1] / 2, 3,
+                    spec.max(), image.mean())
+        args = find_curvature(image, args, plot=plot, **kwargs)
+        self.CURVE_B, self.CURVE_A, *_ = args
+        return self.CURVE_A, self.CURVE_B
+
+    def centroid_one(self, image):
+        """find the position of photons with sub-pixel precision
+
+        A photon is supposed to have hit the detector if the intensity within a
+        2-by-2 square exceeds a threshold. In this case the position of the photon
+        is calculated as the center-of-mass in a 4-by-4 square.
+
+        Return the list of x, y coordinate pairs, corrected by the curvature.
+        """
+        base = image.mean()
+        corners = image[1:, 1:] + image[:-1, 1:] \
+                  + image[1:, :-1] + image[:-1, :-1]
+        if self.THRESHOLD is None:
+            threshold = corners.mean() + self.STD_THRESHOLD * corners.std()
+        else:
+            threshold = self.THRESHOLD
+        # Threshold for double photons chosen to be the same ratio to single
+        # photons as found in the ESRF method
+        SpotHIGH=self.DBL_THRESHOLD*threshold
+        if self.AVOID_DBL:
+            SpotHIGH = 100000
+        middle = corners[1:-1, 1:-1]
+        candidates = (
+            (middle > threshold)
+            * (middle >= corners[:-2, 1:-1]) * (middle > corners[2:, 1:-1])
+            * (middle >= corners[1:-1, :-2]) * (middle > corners[1:-1, 2:])
+            * (middle >= corners[:-2, :-2]) * (middle > corners[2:, :-2])
+            * (middle >= corners[:-2, 2:]) * (middle > corners[2:, 2:]))
+        cp = np.argwhere(candidates)
+        if len(cp) > 10000:
+            raise RuntimeError(
+                "too many peaks, threshold low or acquisition time too high")
+
+        res = []
+        dres = []
+        for cy, cx in cp:
+            spot = image[cy: cy + 4, cx: cx + 4] - base
+            mx = np.average(np.arange(cx, cx + 4), weights=spot.sum(axis=0))
+            my = np.average(np.arange(cy, cy + 4), weights=spot.sum(axis=1))
+            if spot.sum() < SpotHIGH:
+                res.append((mx, my))
+            else:
+                res.append((mx, my))
+                res.append((mx, my))
+                dres.append((mx, my))
+        return res, dres
+
+    def centroid_two(self, image, energy):
+        """ determine position of photon hits on detector
+
+        The algrothm is taken from the ESRF RIXS toolbox. The thresholds for 
+        determining photon hits are given by the incident photon energy
+
+        The function returns arrays containing the single and double hits
+        as x and y coordinates
+        """
+        # Prepare
+        threshold = self.CENTROID_THRESHOLD
+
+        # Multiplication factor * ADU/photon
+        photons = energy/3.6/1.06
+        SpotLOW = threshold[0] * photons
+        SpotHIGH = threshold[1] * photons
+        low_th_px = threshold[0] * photons
+        high_th_px = threshold[1] * photons
+
+        if self.AVOID_DBL:
+            SpotHIGH = 100000
+
+        img = image-image.mean()
+
+        gs = 2
+ 
+        # Find potential hits on a clipped image where 2 rows/columns are excluded
+        # from the edges because centroiding can't be done at the edge
+        cp = (np.argwhere((img[gs//2 : -gs//2, gs//2 : -gs//2] > low_th_px)*
+                         (img[gs//2 : -gs//2, gs//2 : -gs//2] < high_th_px))+
+                         np.array([gs//2, gs//2]))
+        
+        if len(cp) > 100000:
+            raise RuntimeError('Threshold to low or acquisition time to long')
+
+        res = []
+        dres = []
+        for cy, cx in cp:
+            spot = img[cy - gs//2 : cy + gs//2 + 1, cx - gs//2 : cx+gs//2 +1] 
+            #spot[spot < 0] = 0
+            if (spot > img[cy, cx]).sum() == 0:
+                mx = np.average(np.arange(cx - gs//2, cx + gs//2 + 1), 
+                    weights=spot.sum(axis=0))
+                my = np.average(np.arange(cy - gs//2, cy + gs//2 + 1), 
+                    weights=spot.sum(axis=1))
+                if (spot.sum()>=SpotLOW) and (spot.sum()<SpotHIGH):               
+                    res.append((mx, my))
+                elif (spot.sum()>SpotHIGH):
+                    res.append((mx, my))
+                    res.append((mx, my))
+                    dres.append((mx, my))
+        return res, dres
+
+    def centroid(self, data, bins=None, method='auto'):
+        """calculate a spectrum by finding the centroid of individual photons
+
+        This takes the `xarray.Dataset` `data` and returns a copy of it, with
+        a new `xarray.DataArray` named `spectrum` added, which contains the
+        energy spectrum calculated for each hRIXS image.
+
+        Added a key for switching between algorithims choices are "auto" and 
+        "manual" which selects for method for determining whether thresholds 
+        there is a photon hit. It changes whether centroid_one or centroid_two
+        is used.
+
+        Example
+        -------
+
+            h.centroid(data)  # find photons in all images of the run
+            data.spectrum[0, :].plot()  # plot the spectrum of the first image
+        """
+        if bins is None:
+            bins = self.BINS
+        ret = np.zeros((len(data[self.DETECTOR]), bins))
+        retd = np.zeros((len(data[self.DETECTOR]), bins))
+        total_hits = np.zeros((len(data[self.DETECTOR])))
+        dbl_hits = np.zeros((len(data[self.DETECTOR])))
+        for i, (image, r, rd) in enumerate(zip(data[self.DETECTOR], ret, retd)):
+            if method=='manual':
+                c, d = self.centroid_one(
+                    image.values[self.X_RANGE, self.Y_RANGE])
+            elif method=='auto':
+                energy = data['nrj'][i].data
+                c, d = self.centroid_two(
+                    image.values[self.X_RANGE, self.Y_RANGE], energy)
+
+            if not len(c):
+                continue
+            rc = np.array(c)
+            r[:], _ = np.histogram(
+                rc[:, 0] - self.parabola(rc[:, 1]),
+                bins=bins, range=(0, self.Y_RANGE.stop - self.Y_RANGE.start))
+            total_hits[i] = rc.shape[0]
+            rcd = np.array(d)
+            dbl_hits[i] = rcd.shape[0]
+            # Account for case where no double hits are found
+            if rcd.shape[0] == 0:
+                continue
+            else:
+                rd[:], _ = np.histogram(
+                    rcd[:, 0] - self.parabola(rcd[:, 1]),
+                    bins=bins, range=(0, self.Y_RANGE.stop - self.Y_RANGE.start))
+
+        data.coords["energy"] = (
+            np.linspace(self.Y_RANGE.start, self.Y_RANGE.stop, bins)
+            * self.ENERGY_SLOPE + self.ENERGY_INTERCEPT)
+        data['spectrum'] = (("trainId", "energy"), ret)
+        data['dbl_spectrum'] = (("trainId", "energy"), retd)
+        data['total_hits'] = ("trainId", total_hits)
+        data['double_hits'] = ("trainId", dbl_hits)
+        return data
+
+    def parabola(self, x):
+        return (self.CURVE_B * x + self.CURVE_A) * x
+
+    def integrate(self, data):
+        """calculate a spectrum by integration
+
+        This takes the `xarray` `data` and returns a copy of it, with a new
+        dataarray named `spectrum` added, which contains the energy spectrum
+        calculated for each hRIXS image.
+
+        First the energy that corresponds to each pixel is calculated.
+        Then all pixels within an energy range are summed, where the intensity
+        of one pixel is distributed among the two energy ranges the pixel
+        spans, proportionally to the overlap between the pixel and bin energy
+        ranges.
+
+        The resulting data is normalized to one pixel, so the average
+        intensity that arrived on one pixel.
+
+        Example
+        -------
+
+            h.integrate(data)  # create spectrum by summing pixels
+            data.spectrum[0, :].plot()  # plot the spectrum of the first image
+        """
+        bins = self.Y_RANGE.stop - self.Y_RANGE.start
+        margin = 10
+        ret = np.zeros((len(data[self.DETECTOR]), bins - 2 * margin))
+        if self.USE_DARK:
+            dark_image = self.dark_image.values[self.X_RANGE, self.Y_RANGE]
+        images = data[self.DETECTOR].values[:, self.X_RANGE, self.Y_RANGE]
+
+        x, y = np.ogrid[:images.shape[1], :images.shape[2]]
+        quo, rem = divmod(y - self.parabola(x), 1)
+        quo = np.array([quo, quo + 1])
+        rem = np.array([rem, 1 - rem])
+        wrong = (quo < margin) | (quo >= bins - margin)
+        quo[wrong] = margin
+        rem[wrong] = 0
+        quo = (quo - margin).astype(int).ravel()
+
+        for image, r in zip(images, ret):
+            if self.USE_DARK:
+                image = image - dark_image
+            r[:] = np.bincount(quo, weights=(rem * image).ravel())
+        ret /= np.bincount(quo, weights=rem.ravel())
+        data.coords["energy"] = (
+            np.arange(self.Y_RANGE.start + margin, self.Y_RANGE.stop - margin)
+            * self.ENERGY_SLOPE + self.ENERGY_INTERCEPT)
+        data['spectrum'] = (("trainId", "energy"), ret)
+        return data
+
+    aggregators = dict(
+        hRIXS_det=lambda x, dim: x.sum(dim=dim),
+        MaranaX=lambda x, dim: x.sum(dim=dim),
+        Delay=lambda x, dim: x.mean(dim=dim),
+        hRIXS_delay=lambda x, dim: x.mean(dim=dim),
+        hRIXS_norm=lambda x, dim: x.sum(dim=dim),
+        spectrum=lambda x, dim: x.sum(dim=dim),
+        dbl_spectrum=lambda x, dim: x.sum(dim=dim),
+        total_hits=lambda x, dim: x.sum(dim=dim),
+        dbl_hits=lambda x, dim: x.sum(dim=dim),
+        counts=lambda x, dim: x.sum(dim=dim)
+    )
+
+    def aggregator(self, da, dim):
+        agg = self.aggregators.get(da.name)
+        if agg is None:
+            return None
+        return agg(da, dim=dim)
+
+    def aggregate(self, ds, var=None, dim="trainId"):
+        """aggregate (i.e. mostly sum) all data within one dataset
+
+        take all images in a dataset and aggregate them and their metadata.
+        For images, spectra and normalizations that means adding them, for
+        others (e.g. delays) adding would not make sense, so we treat them
+        properly. The aggregation functions of each variable are defined
+        in the aggregators attribute of the class.
+        If var is specified, group the dataset by var prior to aggregation.
+        A new variable "counts" gives the number of frames aggregated in
+        each group.
+
+        Parameters
+        ----------
+        ds: xarray Dataset
+            the dataset containing RIXS data
+        var: string
+            One of the variables in the dataset. If var is specified, the
+            dataset is grouped by var prior to aggregation. This is useful
+            for sorting e.g. a dataset that contains multiple delays.
+        dim: string
+            the dimension over which to aggregate the data
+
+        Example
+        -------
+
+            h.centroid(data)  # create spectra from finding photons
+            agg = h.aggregate(data)  # sum all spectra
+            agg.spectrum.plot()  # plot the resulting spectrum
+
+            agg2 = h.aggregate(data, 'hRIXS_delay') # group data by delay
+            agg2.spectrum[0, :].plot()  # plot the spectrum for first value
+        """
+        ds["counts"] = xr.ones_like(ds[dim])
+        if var is not None:
+            groups = ds.groupby(var)
+            return groups.map(self.aggregate_ds, dim=dim)
+        return self.aggregate_ds(ds, dim)
+
+    def aggregate_ds(self, ds, dim='trainId'):
+        ret = ds.map(self.aggregator, dim=dim)
+        ret = ret.drop_vars([n for n in ret if n not in self.aggregators])
+        return ret
+
+    def normalize(self, data, which="hRIXS_norm"):
+        """ Adds a 'normalized' variable to the dataset defined as the
+        ration between 'spectrum' and 'which'
+
+        Parameters
+        ----------
+            data: xarray Dataset
+                the dataset containing hRIXS data
+            which: string, default="hRIXS_norm"
+                one of the variables of the dataset, usually "hRIXS_norm"
+                or "counts"
+        """
+        return data.assign(normalized=data["spectrum"] / data[which])
+
+
+class MaranaX(hRIXS):
+    """
+    A spin-off of the hRIXS class: with parallelized centroiding
+    """
+
+    NUM_MAX_HITS = 30
+
+    psh.set_default_context('processes', num_workers=20)
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._centroid_result = None
+
+    def centroid(self, data, bins=None, **kwargs):
+        # 0.a Setup constants that we might use
+        if bins is None:
+            bins = self.BINS
+        images = data[self.DETECTOR]
+
+        # 0.b. Allocate output arrays (the naming is a bit meany..)
+        num_images, _, Nx = images.shape
+        self._centroid_result = {
+            'total_hist': psh.alloc(shape=(num_images, bins)),
+            'double_hist': psh.alloc(shape=(num_images, bins)),
+            'total_hits': psh.alloc(shape=(num_images,)),
+            'double_hits': psh.alloc(shape=(num_images,)),
+        }
+
+        # 0.c Use a xr.DataArray that pasha understands
+        data_array = images.assign_coords(nrj=data['nrj'])
+
+        # 1. Calculate with parallelization
+        psh.map(self._centroid_tb_map, data_array)
+
+        # 2. Finalize: set the results back to the dataset
+        data.coords["energy"] = (
+            np.linspace(self.Y_RANGE.start or 0, self.Y_RANGE.stop or Nx, bins)
+            * self.ENERGY_SLOPE + self.ENERGY_INTERCEPT)
+        data['spectrum'] = (("trainId", "energy"),
+                            self._centroid_result['total_hist'])
+        data['dbl_spectrum'] = (("trainId", "energy"),
+                                self._centroid_result['double_hist'])
+        data['total_hits'] = ("trainId",
+                              self._centroid_result['total_hits'])
+        data['double_hits'] = ("trainId",
+                               self._centroid_result['double_hits'])
+        return data
+
+    def _centroid_tb_map(self, _, index, data):
+        self._centroid_map(index,
+                           image=data.values,
+                           energy=data.coords['nrj'].item())
+
+    def _centroid_map(self, index, *, image, energy):
+        total, double = self._centroid_task(index, image, energy)
+        # Check if there are results: don't do anything otherwise
+        if not len(total):
+            return
+
+        self._histogram_task(index, total, double,
+                             default_range=(0, image.shape[1]))
+
+    def _centroid_task(self, index, image, energy):
+        # Calculate the centroid and store it on the allocated output array
+        total, double = self.centroid_two(image[self.X_RANGE, self.Y_RANGE],
+                                          energy)
+        total, double = np.array(total), np.array(double)
+
+        total_hits_list = self._centroid_result.get('total_hits_list')
+        if total_hits_list is not None and len(total):
+            total_hits_list[index][:min(len(total), self.NUM_MAX_HITS)] \
+                = total[:self.NUM_MAX_HITS]
+        double_hits_list = self._centroid_result.get('double_hits_list')
+        if double_hits_list is not None and len(double):
+            double_hits_list[index][:min(len(double), self.NUM_MAX_HITS)] \
+                = double[:self.NUM_MAX_HITS]
+
+        return total, double
+
+    def _histogram_task(self, index, total, double, default_range):
+        # Prepare
+        _, bins = self._centroid_result['total_hist'].shape
+        hist_range = (self.Y_RANGE.start or default_range[0],
+                      self.Y_RANGE.stop or default_range[1])
+
+        # Calculate total spectrum
+        self._centroid_result['total_hist'][index], _ = np.histogram(
+            total[:, 0] - self.parabola(total[:, 1]),
+            bins=bins, range=hist_range)
+        self._centroid_result['total_hits'][index] = len(total)
+
+        # Calculate double spectrum
+        double_hits = len(double)
+        if double_hits:
+            self._centroid_result['double_hist'][index], _ = np.histogram(
+                double[:, 0] - self.parabola(double[:, 1]),
+                bins=bins, range=hist_range)
+            self._centroid_result['double_hits'][index] = double_hits
+
+    def centroid_from_run(self, runNB, proposal=None, extra_fields=(),
+                          drop_first=False, subset=None, bins=None,
+                          return_hits=False):
+        """
+        A combined function of `from_run()` and `centroid()`, which
+        uses `extra_data` and `pasha` to avoid bulk loading of files.
+        """
+        # 0.a Setup constants that we might use
+        if proposal is None:
+            proposal = self.PROPOSAL
+        if drop_first:
+            subset = slice(1, None)
+        if bins is None:
+            bins = self.BINS
+        run_mnemo = set(self.DETECTOR_FIELDS['MaranaX']) | set(extra_fields)
+
+        # 0.b. Open the run with extra-data and select the relevant fields
+        run = open_run(proposal, runNB)
+        if subset is not None:
+            run = run.select_trains(subset)
+        # Filter out mnemonics that does not exist in the run files
+        run_mnemo = set([mnemo for mnemo in run_mnemo
+                         if self._is_mnemo_in_run(mnemo, run)])
+        assert set(self.DETECTOR_FIELDS['MaranaX']).issubset(run_mnemo)
+        
+        sources = [self._mnemo_to_prop(mnemo)[0] for mnemo in run_mnemo]
+        selection = run.select([source for source in sources
+                                if source in run.all_sources],
+                               require_all=True)
+
+        # 0.c. Allocate output arrays (the naming is a bit meany..)
+        mara_source, mara_key = self._mnemo_to_prop('MaranaX')
+        num_images, _, Nx = selection[mara_source, mara_key].shape
+        self._centroid_result = {
+            'total_hist': psh.alloc(shape=(num_images, bins)),
+            'double_hist': psh.alloc(shape=(num_images, bins)),
+            'total_hits': psh.alloc(shape=(num_images,)),
+            'double_hits': psh.alloc(shape=(num_images,)),
+        }
+        if return_hits:
+            self._centroid_result['total_hits_list'] = psh.alloc(
+                shape=(num_images, self.NUM_MAX_HITS, 2), fill=np.nan)
+            self._centroid_result['double_hits_list'] = psh.alloc(
+                shape=(num_images, self.NUM_MAX_HITS, 2), fill=np.nan)
+
+        # 1. Calculate with parallelization
+        psh.map(self._centroid_ed_map, selection)
+
+        # 2. Finalize: generate the rest of the mnemonics to the dataset
+        # and set the results also.
+        data = xr.merge(
+            [selection.get_array(*self._mnemo_to_prop(mnemo), name=mnemo)
+             for mnemo in run_mnemo - {'MaranaX'}], join='inner')
+        data.coords["energy"] = (
+            np.linspace(self.Y_RANGE.start or 0, self.Y_RANGE.stop or Nx, bins)
+            * self.ENERGY_SLOPE + self.ENERGY_INTERCEPT)
+        data['spectrum'] = (("trainId", "energy"),
+                            self._centroid_result['total_hist'])
+        data['dbl_spectrum'] = (("trainId", "energy"),
+                                self._centroid_result['double_hist'])
+        data['total_hits'] = ("trainId",
+                              self._centroid_result['total_hits'])
+        data['double_hits'] = ("trainId",
+                               self._centroid_result['double_hits'])
+
+        if return_hits:
+            data['total_hits_list'] = (
+                ('trainId', 'hits', 'coord'),
+                self._centroid_result['total_hits_list'])
+            data['double_hits_list'] = (
+                ('trainId', 'hits', 'coord'),
+                self._centroid_result['double_hits_list'])
+
+        # Add attributes
+        data.attrs.update(
+            CENTROID_THRESHOLD=self.CENTROID_THRESHOLD,
+            NUM_BINS=bins,
+            CURVATURE_CORRECTION=[self.CURVE_A, self.CURVE_B],
+            ENERGY_CALIBRATION=[self.ENERGY_SLOPE, self.ENERGY_INTERCEPT],
+            Y_RANGE=[self.Y_RANGE.start or 0,
+                     self.Y_RANGE.stop or Nx]
+        )
+
+        return data
+
+    def _centroid_ed_map(self, _, index, trainId, data):
+        mara_source, mara_key = self._mnemo_to_prop('MaranaX')
+        nrj_source, nrj_key = self._mnemo_to_prop('nrj')
+        self._centroid_map(
+            index,
+            image=data[mara_source][mara_key],
+            energy=data[nrj_source][nrj_key])
+
+    @staticmethod
+    def _mnemo_to_prop(mnemo):
+        prop = tb.constants.mnemonics[mnemo][0]
+        return prop['source'], prop['key']
+
+    def _is_mnemo_in_run(self, mnemo, run):
+        source, key = self._mnemo_to_prop(mnemo)
+        if source not in run.all_sources:
+            return False
+        return key in run.keys_for_source(source)
No results found