bunch_pattern.py

# -*- coding: utf-8 -*-
""" Toolbox for SCS.

    Various utilities function to quickly process data measured at the SCS instruments.

    Copyright (2019) SCS Team.
"""

import numpy as np
import xarray as xr
import ToolBox as tb
import os
from extra_data.read_machinery import find_proposal
from extra_data import RunDirectory

def extractBunchPattern(bp_table=None, key='sase3', runDir=None):
    ''' generate the bunch pattern and number of pulses of a source directly from the
        bunch pattern table and not using the MDL device BUNCH_DECODER. This is 
        inspired by the euxfel_bunch_pattern package, 
        https://git.xfel.eu/gitlab/karaboDevices/euxfel_bunch_pattern
        Inputs:
            bp_table: DataArray corresponding to the mnemonics "bunchPatternTable".
                      If None, the bunch pattern table is loaded using runDir.
            key: str, ['sase1', 'sase2', 'sase3', 'scs_ppl']
            runDir: extra-data DataCollection. Required only if bp_table is None.
            
        Outputs:
            bunchPattern: DataArray containing indices of the sase/laser pulses for 
            each train
            npulses: DataArray containing the number of pulses for each train
            matched: 2-D DataArray mask (trainId x 2700), True where 'key' has pulses 
                  
    '''
    keys=['sase1', 'sase2', 'sase3', 'scs_ppl']
    if key not in keys:
        raise ValueError(f'Invalid key "{key}", possible values are {keys}')
    if bp_table is None:
        if runDir is None:
            raise ValueError('bp_table and runDir cannot both be None')
        bp_mnemo = tb.mnemonics['bunchPatternTable']
        if bp_mnemo['source'] not in runDir.all_sources:
            raise ValueError('Source {} not found in run'.format(
                                bp_mnemo['source']))
        else:
            bp_table = runDir.get_array(bp_mnemo['source'],bp_mnemo['key'], 
                                        extra_dims=bp_mnemo['dim'])
    # define relevant masks, see euxfel_bunch_pattern package for details
    DESTINATION_MASK = 0xf << 18
    DESTINATION_T4D = 4 << 18   # SASE1/3 dump
    DESTINATION_T5D = 2 << 18  # SASE2 dump
    PHOTON_LINE_DEFLECTION = 1 << 27  # Soft kick (e.g. SA3)
    LASER_SEED6 = 1 << 13
    if 'sase' in key:
        sase = int(key[4])
        destination = DESTINATION_T5D if (sase == 2) else DESTINATION_T4D
        matched = (bp_table & DESTINATION_MASK) == destination
        if sase == 1:
            # Pulses to SASE 1 when soft kick is off
            matched &= (bp_table & PHOTON_LINE_DEFLECTION) == 0
        elif sase == 3:
            # Pulses to SASE 3 when soft kick is on
            matched &= (bp_table & PHOTON_LINE_DEFLECTION) != 0
    elif key=='scs_ppl':
        matched = (bp_table & LASER_SEED6) != 0
    
    # create table of indices where bunch pattern and mask match
    nz = np.nonzero(matched.values)
    dim_pId = matched.shape[1]
    bunchPattern = np.ones(matched.shape, dtype=np.uint64)*dim_pId
    bunchPattern[nz] = nz[1]
    bunchPattern = np.sort(bunchPattern)
    npulses = np.count_nonzero(bunchPattern<dim_pId, axis=1)
    bunchPattern[bunchPattern == dim_pId] = 0

    bunchPattern = xr.DataArray(bunchPattern[:,:1000], dims=['trainId', 'bunchId'],
                          coords={'trainId':matched.trainId}, 
                          name=key)
    npulses = xr.DataArray(npulses, dims=['trainId'],
                                coords={'trainId':matched.trainId}, 
                                name=f'npulses_{key}')
    return bunchPattern, npulses, matched


def pulsePatternInfo(data, plot=False):
    ''' display general information on the pulse patterns operated by SASE1 and SASE3.
        This is useful to track changes of number of pulses or mode of operation of
        SASE1 and SASE3. It also determines which SASE comes first in the train and
        the minimum separation between the two SASE sub-trains.
        
        Inputs:
            data: xarray Dataset containing pulse pattern info from the bunch decoder MDL: 
            {'sase1, sase3', 'npulses_sase1', 'npulses_sase3'}
            plot: bool enabling/disabling the plotting of the pulse patterns
            
        Outputs:
            print of pulse pattern info. If plot==True, plot of the pulse pattern.
    '''
    #Which SASE comes first?
    npulses_sa3 = data['npulses_sase3']       
    npulses_sa1 = data['npulses_sase1']  
    dedicated = False
    if np.all(npulses_sa1.where(npulses_sa3 !=0, drop=True) == 0):
        dedicated = True
        print('No SASE 1 pulses during SASE 3 operation')
    if np.all(npulses_sa3.where(npulses_sa1 !=0, drop=True) == 0):
        dedicated = True
        print('No SASE 3 pulses during SASE 1 operation')
    if dedicated==False:
        pulseIdmin_sa1 = data['sase1'].where(npulses_sa1 != 0).where(data['sase1']>1).min().values
        pulseIdmax_sa1 = data['sase1'].where(npulses_sa1 != 0).where(data['sase1']>1).max().values
        pulseIdmin_sa3 = data['sase3'].where(npulses_sa3 != 0).where(data['sase3']>1).min().values
        pulseIdmax_sa3 = data['sase3'].where(npulses_sa3 != 0).where(data['sase3']>1).max().values
        #print(pulseIdmin_sa1, pulseIdmax_sa1, pulseIdmin_sa3, pulseIdmax_sa3)
        if pulseIdmin_sa1 > pulseIdmax_sa3:
            t = 0.220*(pulseIdmin_sa1 - pulseIdmax_sa3 + 1)
            print('SASE 3 pulses come before SASE 1 pulses (minimum separation %.1f µs)'%t)
        elif pulseIdmin_sa3 > pulseIdmax_sa1:
            t = 0.220*(pulseIdmin_sa3 - pulseIdmax_sa1 + 1)
            print('SASE 1 pulses come before SASE 3 pulses (minimum separation %.1f µs)'%t)
        else:
            print('Interleaved mode')
    
    #What is the pulse pattern of each SASE?
    for key in['sase3', 'sase1']:
        print('\n*** %s pulse pattern: ***'%key.upper())
        npulses = data['npulses_%s'%key]
        sase = data[key]
        if not np.all(npulses == npulses[0]):
            print('Warning: number of pulses per train changed during the run!')
        #take the derivative along the trainId to track changes in pulse number:
        diff = npulses.diff(dim='trainId')
        #only keep trainIds where a change occured:
        diff = diff.where(diff !=0, drop=True)
        #get a list of indices where a change occured:
        idx_change = np.argwhere(np.isin(npulses.trainId.values,
                                         diff.trainId.values, assume_unique=True))[:,0]
        #add index 0 to get the initial pulse number per train:
        idx_change = np.insert(idx_change, 0, 0)
        print('npulses\tindex From\tindex To\ttrainId From\ttrainId To\trep. rate [kHz]')
        for i,idx in enumerate(idx_change):
            n = npulses[idx]
            idxFrom = idx
            trainIdFrom = npulses.trainId[idx]
            if i < len(idx_change)-1:
                idxTo = idx_change[i+1]-1
            else:
                idxTo = npulses.shape[0]-1
            trainIdTo = npulses.trainId[idxTo]
            if n <= 1:
                print('%i\t%i\t\t%i\t\t%i\t%i'%(n, idxFrom, idxTo, trainIdFrom, trainIdTo))
            else:
                f = 1/((sase[idxFrom,1] - sase[idxFrom,0])*222e-6)
                print('%i\t%i\t\t%i\t\t%i\t%i\t%.0f'%(n, idxFrom, idxTo, trainIdFrom, trainIdTo, f))
    print('\n')
    if plot:
        plt.figure(figsize=(6,3))
        plt.plot(data['npulses_sase3'].trainId, data['npulses_sase3'], 'o-', 
                 ms=3, label='SASE 3')
        plt.xlabel('trainId')
        plt.ylabel('pulses per train')
        plt.plot(data['npulses_sase1'].trainId, data['npulses_sase1'], '^-',
                 ms=3, color='C2', label='SASE 1')
        plt.legend()
        plt.tight_layout()
        

def repRate(data=None, runNB=None, proposalNB=None, key='sase3'):
    ''' Calculates the pulse repetition rate (in kHz) in sase
        according to the bunch pattern and assuming a grid of
        4.5 MHz.
        Inputs:
            data: xarray Dataset containing pulse pattern, needed if runNB is none
            runNB: int or str, run number. Needed if data is None
            proposal: int or str, proposal where to find the run. Needed if data is None
            key: str in [sase1, sase2, sase3, scs_ppl], source for which the
                 repetition rate is calculated
        Output:
            f: repetition rate in kHz
    '''
    if runNB is None and data is None:
        raise ValueError('Please provide either the runNB + proposal or the data argument.')
    if runNB is not None and proposalNB is None:
        raise ValueError('Proposal is missing.')
    if runNB is not None:
        if isinstance(runNB, int):
            runNB = 'r{:04d}'.format(runNB)
        if isinstance(proposalNB,int):
            proposalNB = 'p{:06d}'.format(proposalNB)
        runFolder = os.path.join(find_proposal(proposalNB), 'raw', runNB)
        runDir = RunDirectory(runFolder)
        bp_mnemo = tb.mnemonics['bunchPatternTable']
        if bp_mnemo['source'] not in runDir.all_sources:
            raise ValueError('Source {} not found in run'.format(
                                bp_mnemo['source']))
        else:
            bp_table = runDir.get_array(bp_mnemo['source'],bp_mnemo['key'], 
                                        extra_dims=bp_mnemo['dim'])
        a, b, mask = extractBunchPattern(bp_table, key=key)
    else:
        if key not in ['sase1', 'sase3']:
            a, b, mask = extractBunchPattern(key=key, runDir=data.attrs['run'])
        else:
            a = data[key]
            b = data[f'npulses_{key}']
    a = a.where(b > 1, drop = True).values
    if len(a)==0:
        print('Not enough pulses to extract repetition rate')
        return 0
    f = 1/((a[0,1] - a[0,0])*12e-3/54.1666667)
    return f

def sortBAMdata(data, key='scs_ppl', sa3Offset=0):
    ''' Extracts beam arrival monitor data from the raw arrays 'BAM6', 'BAM7', etc...
        according to the bunchPatternTable. The BAM arrays contain 7220 values, which
        corresponds to FLASH busrt length of 800 us @ 9 MHz. The bunchPatternTable
        only has 2700 values, corresponding to XFEL 600 us burst length @ 4.5 MHz.
        Hence, the BAM arrays are truncated to 5400 with a stride of 2 and matched
        to the bunchPatternTable. If key is one of the sase, the given dimension name
        of the bam arrays is 'sa[sase number]_pId', to match other data (XGM, TIM...).
        If key is 'scs_ppl', the dimension is named 'ol_pId'
        Inputs:
            data: xarray Dataset containing BAM arrays
            key: str, ['sase1', 'sase2', 'sase3', 'scs_ppl']
            sa3Offset: int, used if key=='scs_ppl'. Offset in number of pulse_id 
                between the first OL and FEL pulses. An offset of 40 means that 
                the first laser pulse comes 40 pulse_id later than the FEL on a 
                grid of 4.5 MHz. Negative values shift the laser pulse before
                the FEL one.
        Output:
            ndata: xarray Dataset with same keys as input data (but new bam arrays)
    '''
    a, b, mask = extractBunchPattern(key=key, runDir=data.attrs['run'])
    if key == 'scs_ppl':
        a3, b3, mask3 = extractBunchPattern(key='sase3', runDir=data.attrs['run'])
        firstSa3_pId = a3.where(b3>0, drop=True)[0,0].values.astype(int)
        mask = mask.roll(pulse_slot=firstSa3_pId+sa3Offset)
    mask = mask.rename({'pulse_slot':'BAMbunchId'})
    ndata = data
    dropList = []
    mergeList = []
    for k in data:
        if 'BAM' in k:
            dropList.append(k)
            bam = data[k].isel(BAMbunchId=slice(0,5400,2))
            bam = bam.where(mask, drop=True)
            if 'sase' in key:
                name = f'sa{key[4]}_pId'
            elif key=='scs_ppl':
                name = 'ol_pId'
            else:
                name = 'bam_pId'
            bam = bam.rename({'BAMbunchId':name})
            mergeList.append(bam)
    mergeList.append(data.drop(dropList))
    ndata = xr.merge(mergeList, join='inner')
    for k in data.attrs.keys():
        ndata.attrs[k] = data.attrs[k]
    return ndata