lib2828.py

from euxfel_bunch_pattern import indices_at_sase  # Installable from PyPI
import numpy as np
import string
from scipy.interpolate import interp1d
from pathlib import Path

detectors = np.genfromtxt(Path(__file__).parent / 'detectors.txt', names=True, dtype=('|U5', '|U4', '|U3', '<f8'))

def correct_adq_common_mode(trace, region, sym):
    """Baseline substraction based on common mode.
    
    Since ADQ digitizers always interleave multiple ADCs per channel to sample
    a trace, regular baseline substraction will cause an ADC-dependant common
    mode to appear. This correction directly uses a per-ADC baseline instead
    to perform include this in the substraction. 
    
    Empirical testing has shown that a symmetry of 8 (should be 4 for
    non-interleaved) regularly shows better results in suppressing high
    frequency signals from the common mode. Going 16 or higher is not recommend
    in most cases, as additional artifacts appear. 
    
    Args:
        trace (array_like): Vector to correct, will be modified in-place.
        region (slice): Region to use for computing baseline.
        sym (int): Periodic symmetry of ADC common mode.
        
    Returns:
        (array_like) Corrected vector, same as trace.
    """
    
    for x in range(sym):
        trace[x::sym] -= trace.astype(np.float32)[region][x::sym].mean()
        
    return trace

def separate_pulses(trace, pulse_spacing):
    """Separate vector into pieces on an integer boundary.
    
    Args:
        trace (array_like): Vector to split.
        pulse_spacing (int): Pulse length in samples.
        
    Returns:
        (array_like) Separated vector.
    """

    return trace[:(len(trace) // pulse_spacing) * pulse_spacing].reshape(-1, pulse_spacing)


def preprocess(
    traces, ppt, xgm=None,
    sase=3, xgm_normalize=False,
    adq_baseline_region=np.s_[:1000], adq_baseline_symmetry=8,
    adq_train_region=np.s_[30000:], adq_pulse_region=np.s_[:5000],
    adq_interleaved=False
):
    """Pre-process raw data into "cookiebox images".
    
    Raw digitzer traces containing train data are baseline corrected, split into
    pulses based on the pulse pattern table and optionally normalized by XGM
    fast data.
    
    Important optional parameters:
    
      * adq_baseline_region: This slice *must* refer to a section with no signal.
      * adq_train_region: This slice is used for pulse separation and needs to
            start with the first pulse. Leaving additional room in the front may
            cause empty pulses to be created. As the number of pulses are known,
            it may end with the trace.
      * adq_pulse_region: This slice is applied within each seperated pulse
            spectrum to reduce its size to the actual experimental signal.
      * adq_interleaved: Knowledge of whether the digitizer runs in interleaved
            mode or not is essential to compute the pulse length.
    
    Args:
        traces (Iterable of array_like): Digitizer traces containing TOF spectra.
        ppt (array_like): Pulse pattern table from time server device.
        xgm (array_like, optional): XGM fast data.
        sase (int, optional): Which SASE the experiments run at, 3 by default.
        xgm_normalize (bool, optional): Whether data should be normalized by
            fast XGM intensity, disabled by default.
        adq_baseline_region (slice, optional): Region with no signal to use for
            baseline correction, :1000 by default.
        adq_baseline_symmetry (int, optional): Common mode symmetry for baseline
            correction, 8 by default.
        adq_train_region (slice, optional): Region containing actual signal for
            the entire train.
        adq_pulse_region (slice, optional): Region after pulse separation
            containing actual signal for the pulse.
        adq_interleaved (bool, optional): Whether digitizer operates in interleaved
            mode at 4 GS/s or regular at 2 GS/s.
        
    Returns:
        (array_like, array_like) Detector images of shape (pulse, angle, tof) and
            vector of correspondig pulse IDs taken from PPT indices.
        
    Raises:
        ValueError: When xgm is not passed while xgm_normalize is enabled.
        RuntimeError: When the pulse pattern table shows no pulses in the
            specified SASE.
        AssertionError: When no digitzer trace is obtained.
    """
        
    if xgm is None and xgm_normalize:
        raise ValueError('no xgm data passed for xgm normalization')
    
    # Look for pulses going to the specified SASE in the pulse pattern table.
    pulse_ids = indices_at_sase(ppt, sase=sase)
    num_pulses = len(pulse_ids)
        
    if num_pulses == 0:
        # What are we even doing here?
        raise RuntimeError(f'no pulses in SASE{sase}')
    elif num_pulses > 1:
        # ADQ412s run at a clock factor of 440 relative to the PPT un-interleaved.
        pulse_spacing = (pulse_ids[1] - pulse_ids[0]) * (880 if adq_interleaved else 440)
        
    corr_traces = []
    
    # Perform common-mode correction on traces to get rid of ADC-dependent offsets.
    for i, trace in enumerate(traces):
        # uint16 -> float32 simplifies assumptions about math.
        trace = correct_adq_common_mode(trace.astype(np.float32), adq_baseline_region, adq_baseline_symmetry)
        
        if num_pulses > 1:
            trace = separate_pulses(trace[adq_train_region])[:, adq_pulse_region]
        else:
            trace = trace[adq_train_region][adq_pulse_region].reshape(1, -1)
            
        corr_traces.append(trace)
        
    assert len(corr_traces) > 1, 'at least one digitzer trace required'
        
    # Copy it into nice contiguous memory (pulse, angle, tof)
    spectra = np.zeros((num_pulses, len(corr_traces), corr_traces[0].shape[1]))
    
    for i, traces_by_pulse in enumerate(corr_traces):
        spectra[:, i, :] = traces_by_pulse
        
    if xgm_normalize:
        spectra /= xgm[:num_pulses]
    
    return spectra, pulse_ids


def energy_calibration(spectra, pulse_ids, calib_params, samplerate, energy_nodes, valid_energies):
    def f(t, a, b, c, d):
        return a / t**3 + b / t**2 + c / t + d
    
    def df(t, a, b, c, d):
        return -3 * a / t**4 - 2 * b / t**3 - c / t**2
    
    t = np.arange(spectra.shape[2]) / samplerate
    E = f(1e-9 * t[:, None], *calib_params)[valid_energies].T
    dE = df(1e-9 * t[:, None], *calib_params)[valid_energies].T
    transformed = -spectra[:,:,valid_energies] / dE
    calib_spectra = (np.asarray([interp1d(e, t)(energy_nodes) for e, t in zip(E, transformed.transpose(1, 0, 2))])).transpose(1, 0, 2) if energy_nodes is not None else None
    return E, transformed, energy_nodes, calib_spectra, t[valid_energies]