From 5f85ed01303319188126f88dc2c8e9633d3f420f Mon Sep 17 00:00:00 2001
From: Laurent Mercadier <laurent.mercadier@xfel.eu>
Date: Sat, 26 Oct 2019 08:17:13 +0200
Subject: [PATCH 1/7] Simplified getTIMapd() for case with no change of number
 of pulses

---
 xgm.py | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/xgm.py b/xgm.py
index 400ae78..506bfcc 100644
--- a/xgm.py
+++ b/xgm.py
@@ -518,7 +518,7 @@ def mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=1, t_offset=None, n
                 pattern and determine the t_offset assuming mininum pulse 
                 separation of 220 ns and digitizer resolution of 2 GHz.
             npulses: number of pulses. If None, takes the maximum number of
-                pulses according to the bunch patter (field 'npulses_sase3')
+                pulses according to the bunch pattern (field 'npulses_sase3')
             
         Output:
             results: DataArray with dims trainId x max(sase3 pulses) 
@@ -592,7 +592,32 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None,
                        t_offset=t_offset, npulses=npulses)
         return tim
     
+    #1. case where number of SASE 3 pulses were not changed during the run:
     sa3 = data['sase3'].where(data['sase3']>1, drop=True)
+    sa3 -= sa3[0,0]
+    idx = np.unique(sa3, axis=0)
+    npulses_sa3 = data['npulses_sase3']
+    maxpulses = int(npulses_sa3.max().values)
+    if npulses is not None:
+        maxpulses = np.min([npulses, maxpulses])
+    if len(idx)==1:
+        if use_apd:
+            apd = data['MCP{}apd'.format(mcp)]
+            initialDelay = data.attrs['run'].get_array(
+                            'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.initialDelay.value')[0].values
+            upperLimit = data.attrs['run'].get_array(
+                            'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.upperLimit.value')[0].values
+            period = upperLimit - initialDelay #period of the apd in number of digitizer samples
+            idx /= int(period/440) #440 samples correspond to the separation between two pulses at 4.5 MHz
+            if len(idx)==1:
+                idx = idx[0].astype(int)
+                tim = apd.isel(apdId=idx)
+                return tim
+        else:
+            apd = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, 
+                           t_offset=t_offset, npulses=maxpulses)
+    
+    #2. case where the number of SASE 3 pulses varied during the run:
     npulses_sa3 = data['npulses_sase3']
     maxpulses = int(npulses_sa3.max().values)
     if npulses is not None:
-- 
GitLab


From 9eaa6b24d596f6146d12e3cc0f34c2ce327d9938 Mon Sep 17 00:00:00 2001
From: Laurent Mercadier <laurent.mercadier@xfel.eu>
Date: Sat, 26 Oct 2019 08:24:15 +0200
Subject: [PATCH 2/7] Cleaned up

---
 xgm.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/xgm.py b/xgm.py
index 506bfcc..881b8bd 100644
--- a/xgm.py
+++ b/xgm.py
@@ -609,12 +609,10 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None,
                             'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.upperLimit.value')[0].values
             period = upperLimit - initialDelay #period of the apd in number of digitizer samples
             idx /= int(period/440) #440 samples correspond to the separation between two pulses at 4.5 MHz
-            if len(idx)==1:
-                idx = idx[0].astype(int)
-                tim = apd.isel(apdId=idx)
-                return tim
+            idx = idx[0].astype(int)
+            return apd.isel(apdId=idx)
         else:
-            apd = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, 
+            return mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, 
                            t_offset=t_offset, npulses=maxpulses)
     
     #2. case where the number of SASE 3 pulses varied during the run:
-- 
GitLab


From 3c42d1dbe1c772e7ac219caf9dc18a8a1dce18a5 Mon Sep 17 00:00:00 2001
From: Laurent Mercadier <laurent.mercadier@xfel.eu>
Date: Sat, 26 Oct 2019 23:25:00 +0200
Subject: [PATCH 3/7] Uses mask according to bunch pattern in getTIMapd()

---
 xgm.py | 113 +++++++++++++++++----------------------------------------
 1 file changed, 34 insertions(+), 79 deletions(-)

diff --git a/xgm.py b/xgm.py
index 881b8bd..a41e746 100644
--- a/xgm.py
+++ b/xgm.py
@@ -120,7 +120,7 @@ def cleanXGMdata(data, npulses=None, sase3First=True):
     ''' Cleans the XGM data arrays obtained from load() function.
         The XGM "TD" data arrays have arbitrary size of 1000 and default value 1.0
         when there is no pulse. This function sorts the SASE 1 and SASE 3 pulses.
-        For recent DAQ runs, sase-resolved arrays can be used. For older runs,
+        For DAQ runs after April 2019, sase-resolved arrays can be used. For older runs,
         the function selectSASEinXGM can be used to extract sase-resolved pulses.
         Inputs:
             data: xarray Dataset containing XGM TD arrays.
@@ -176,7 +176,7 @@ def cleanXGMdata(data, npulses=None, sase3First=True):
         dropList.append(key)
         mergeList.append(res)
     mergeList.append(data.drop(dropList))
-    subset = xr.merge(mergeList, join='inner')
+    subset = xr.merge(mergeList, join='outer')
     for k in data.attrs.keys():
         subset.attrs[k] = data.attrs[k]
     return subset
@@ -539,13 +539,15 @@ def mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=1, t_offset=None, n
             t_offset = 440 * step
         else:
             t_offset = 1
-    results = xr.DataArray(np.empty((data.trainId.shape[0], npulses)), coords=data[keyraw].coords,
+    results = xr.DataArray(np.zeros((data.trainId.shape[0], npulses)), coords=data[keyraw].coords,
                            dims=['trainId', 'MCP{}fromRaw'.format(mcp)])
     for i in range(npulses):
         a = intstart + t_offset*i
         b = intstop + t_offset*i
         bkga = bkgstart + t_offset*i
         bkgb = bkgstop + t_offset*i
+        if b > data.dims['samplesId']:
+            break
         bg = np.outer(np.median(data[keyraw][:,bkga:bkgb], axis=1), np.ones(b-a))
         results[:,i] = np.trapz(data[keyraw][:,a:b] - bg, axis=1)
     return results
@@ -578,6 +580,7 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None,
             tim: DataArray of shape trainId only for SASE3 pulses x N 
                  with N=max(number of pulses per train)
     '''
+    #1. case where no bunch pattern is available:
     if 'sase3' not in data:
         print('Missing bunch pattern info!\n')
         if npulses is None:
@@ -586,91 +589,43 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None,
         print('Retrieving {} SASE 3 pulses assuming that '.format(npulses) +
                'SASE 3 pulses come first.')
         if use_apd:
-            tim = data['MCP{}apd'.format(mcp)][:,:npulses:stride]
+            tim = data[f'MCP{mcp}apd'][:,:npulses:stride]
         else:
             tim = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, 
                        t_offset=t_offset, npulses=npulses)
         return tim
     
-    #1. case where number of SASE 3 pulses were not changed during the run:
+    #2. If bunch pattern available, define a mask that corresponds to the SASE 3 pulses
+    initialDelay = data.attrs['run'].get_array(
+                    'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.initialDelay.value')[0].values
+    upperLimit = data.attrs['run'].get_array(
+                    'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.upperLimit.value')[0].values
+    period = upperLimit - initialDelay #period of the apd in number of digitizer samples
     sa3 = data['sase3'].where(data['sase3']>1, drop=True)
     sa3 -= sa3[0,0]
-    idx = np.unique(sa3, axis=0)
-    npulses_sa3 = data['npulses_sase3']
-    maxpulses = int(npulses_sa3.max().values)
-    if npulses is not None:
-        maxpulses = np.min([npulses, maxpulses])
-    if len(idx)==1:
-        if use_apd:
-            apd = data['MCP{}apd'.format(mcp)]
-            initialDelay = data.attrs['run'].get_array(
-                            'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.initialDelay.value')[0].values
-            upperLimit = data.attrs['run'].get_array(
-                            'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.upperLimit.value')[0].values
-            period = upperLimit - initialDelay #period of the apd in number of digitizer samples
-            idx /= int(period/440) #440 samples correspond to the separation between two pulses at 4.5 MHz
-            idx = idx[0].astype(int)
-            return apd.isel(apdId=idx)
-        else:
-            return mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, 
-                           t_offset=t_offset, npulses=maxpulses)
+    sa3 = sa3/int(period/440) #440 = samples between two pulses @4.5 MHz with ADQ412 digitizer
+    idxList, inv = np.unique(sa3, axis=0, return_inverse=True)
+    mask = xr.DataArray(np.zeros((data.dims['trainId'], data.dims['apdId']), dtype=bool), 
+                        dims=['trainId', 'apdId'],
+                        coords={'trainId':data.trainId, 
+                                'apdId':np.arange(data.dims['apdId'])}, 
+                        name='mask')
+    mask = mask.sel(trainId=sa3.trainId)
+    for i,idxApd in enumerate(idxList):
+        idxApd = idxApd[idxApd>=0].astype(int)
+        idxTid = inv==i
+        mask[idxTid, idxApd] = True
     
-    #2. case where the number of SASE 3 pulses varied during the run:
-    npulses_sa3 = data['npulses_sase3']
-    maxpulses = int(npulses_sa3.max().values)
-    if npulses is not None:
-        maxpulses = np.min([npulses, maxpulses])
-    step = 1
-    if maxpulses > 1:
-        #Calculate the number of non-lasing pulses between two lasing pulses (step)
-        step = sa3.where(data['npulses_sase3']>1, drop=True)[0,:2].values
-        step = int(step[1] - step[0])
+    #2.1 case where apd is used:
     if use_apd:
-        apd = data['MCP{}apd'.format(mcp)]
-        initialDelay = data.attrs['run'].get_array(
-                        'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.initialDelay.value')[0].values
-        upperLimit = data.attrs['run'].get_array(
-                        'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.upperLimit.value')[0].values
-        nsamples = upperLimit - initialDelay
-        npulses_per_apd = int(nsamples/440)
-        sa3 /= npulses_per_apd
+        return data[f'MCP{mcp}apd'].where(mask, drop=True)
+
+    #2.2 case where integration is performed on raw trace:
     else:
-        apd = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, 
-                       t_offset=t_offset, npulses=npulses)
-        sa3 /= step
-    sa3 -= sa3[:,0]
-    sa3 = sa3.astype(int)
-    if np.all(npulses_sa3 == npulses_sa3[0]):
-        tim = apd[:, sa3[0].values[:maxpulses]]
-        return tim
-    
-    stride = 1
-    if use_apd:
-        stride = np.max([stride,int(step/npulses_per_apd)])
-    diff = npulses_sa3.diff(dim='trainId')
-    #only keep trainIds where a change occured:
-    diff = diff.where(diff != 0, drop=True)
-    #get a list of indices where a change occured:
-    idx_change = np.argwhere(np.isin(npulses_sa3.trainId.values,
-                                     diff.trainId.values, assume_unique=True))[:,0]
-    #add index 0 to get the initial pulse number per train:
-    idx_change = np.insert(idx_change, 0, 0)
-    tim = None
-    for i,idx in enumerate(idx_change):
-        if npulses_sa3[idx]==0:
-            continue
-        if i==len(idx_change)-1:
-            l = None
-        else:
-            l = idx_change[i+1]
-        b = npulses_sa3[idx].values
-        temp = apd[idx:l,:maxpulses*stride:stride].copy()
-        temp[:,b:] = np.NaN
-        if tim is None:
-            tim = temp
-        else:
-            tim = xr.concat([tim, temp], dim='trainId')
-    return tim
+        peaks = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, 
+                               t_offset=period, npulses=data.dims['apdId'])
+        mask = mask.rename({'apdId':f'MCP{mcp}fromRaw'})
+        return peaks.where(mask, drop=True)
 
 
 def calibrateTIM(data, rollingWindow=200, mcp=1, plot=False, use_apd=True, intstart=None,
@@ -966,7 +921,7 @@ def matchXgmTimPulseId(data, use_apd=True, intstart=None, intstop=None,
     ndata = cleanXGMdata(data, npulses, sase3First)
     for mcp in range(1,5):
         if 'MCP{}apd'.format(mcp) in data or 'MCP{}raw'.format(mcp) in data:
-            MCPapd = getTIMapd(ndata, mcp=mcp, use_apd=use_apd, intstart=intstart,
+            MCPapd = getTIMapd(data, mcp=mcp, use_apd=use_apd, intstart=intstart,
                                intstop=intstop,bkgstart=bkgstart, bkgstop=bkgstop,
                                t_offset=t_offset, npulses=npulses,
                                stride=stride).rename('MCP{}apd'.format(mcp))
-- 
GitLab


From 971893a80c4d3ee1e7949694257edff13a81876b Mon Sep 17 00:00:00 2001
From: Laurent Mercadier <laurent.mercadier@xfel.eu>
Date: Sun, 27 Oct 2019 18:46:51 +0100
Subject: [PATCH 4/7] Simplifies selectSASEinXGM() by using mask based on bunch
 pattern

---
 xgm.py | 125 +++++++++++----------------------------------------------
 1 file changed, 23 insertions(+), 102 deletions(-)

diff --git a/xgm.py b/xgm.py
index a41e746..d7522cb 100644
--- a/xgm.py
+++ b/xgm.py
@@ -183,7 +183,8 @@ def cleanXGMdata(data, npulses=None, sase3First=True):
 
 
 def selectSASEinXGM(data, sase='sase3', xgm='SCS_XGM', sase3First=True, npulses=None):
-    ''' Extract SASE1- or SASE3-only XGM data.
+    ''' Given an array containing both SASE1 and SASE3 data, extracts SASE1-
+        or SASE3-only XGM data.
         There are various cases depending on i) the mode of operation (10 Hz
         with fresh bunch, dedicated trains to one SASE, pulse on demand), 
         ii) the potential change of number of pulses per train in each SASE
@@ -192,7 +193,7 @@ def selectSASEinXGM(data, sase='sase3', xgm='SCS_XGM', sase3First=True, npulses=
         Inputs:
             data: xarray Dataset containing xgm data
             sase: key of sase to select: {'sase1', 'sase3'}
-            xgm: key of xgm to select: {'SA3_XGM', 'SCS_XGM'}
+            xgm: key of xgm to select: {'XTD10_XGM', 'SCS_XGM'}
             sase3First: bool, optional. Used in case no bunch pattern was recorded
             npulses: int, optional. Required in case no bunch pattern was recorded.
             
@@ -202,6 +203,7 @@ def selectSASEinXGM(data, sase='sase3', xgm='SCS_XGM', sase3First=True, npulses=
             that sase in the run. The missing values, in case of change of number of pulses,
             are filled with NaNs.
     '''
+    #1. case where bunch pattern is missing:
     if sase not in data:
         print('Missing bunch pattern info!')
         if npulses is None:
@@ -221,108 +223,27 @@ def selectSASEinXGM(data, sase='sase3', xgm='SCS_XGM', sase3First=True, npulses=
             else:
                 start=xgmData.shape[1]-npulses
                 return xgmData[:,start:start+npulses]
-    result = None
-    npulses_sa3 = data['npulses_sase3']       
-    npulses_sa1 = data['npulses_sase1']
-    dedicated = 0
-    if np.all(npulses_sa1.where(npulses_sa3 !=0, drop=True) == 0):
-        dedicated += 1
-        print('No SASE 1 pulses during SASE 3 operation')
-    if np.all(npulses_sa3.where(npulses_sa1 !=0, drop=True) == 0):
-        dedicated += 1
-        print('No SASE 3 pulses during SASE 1 operation')
-    #Alternating pattern with dedicated pulses in SASE1 and SASE3:
-    if dedicated==2:
-        if sase=='sase1':
-            result = data[xgm].where(npulses_sa1>0, drop=True)[:,:npulses_sa1.max().values]
-        else:
-            result = data[xgm].where(npulses_sa3>0, drop=True)[:,:npulses_sa3.max().values]
-        result = result.where(result != 1.0)
-        return result
-    # SASE1 and SASE3 bunches in a same train: find minimum indices of first and
-    # maximum indices of last pulse per train
-    else:
-        pulseIdmin_sa1 = data['sase1'].where(npulses_sa1 != 0).where(data['sase1']>1).min().values
-        pulseIdmax_sa1 = data['sase1'].where(npulses_sa1 != 0).where(data['sase1']>1).max().values
-        pulseIdmin_sa3 = data['sase3'].where(npulses_sa3 != 0).where(data['sase3']>1).min().values
-        pulseIdmax_sa3 = data['sase3'].where(npulses_sa3 != 0).where(data['sase3']>1).max().values
-        if pulseIdmin_sa1 > pulseIdmax_sa3:
-            sa3First = True
-        elif pulseIdmin_sa3 > pulseIdmax_sa1:
-            sa3First = False
-        else:
-            print('Interleaved mode, but no sase-dedicated XGM data loaded.')
-            saseStr = 'SA{}'.format(sase[4])
-            xgmStr = xgm.split('_')[0]
-            print('Loading {}_{} data...'.format(xgmStr, saseStr))
-            try:
-                if npulses == None:
-                    npulses = data['npulses_sase{}'.format(sase[4])].max().values
-                if xgmStr == 'XTD10':
-                    source = 'SA3_XTD10_XGM/XGM/DOOCS:output'
-                if xgmStr == 'SCS':
-                    source = 'SCS_BLU_XGM/XGM/DOOCS:output'
-                key = 'data.intensitySa{}TD'.format(sase[4])
-                result = data.attrs['run'].get_array(source, key, extra_dims=['XGMbunchId'])
-                result = result.isel(XGMbunchId=slice(0, npulses))                
-                return result
-            except:
-                print('Could not load {}_{} data. '.format(xgmStr, saseStr) +
-                  'Interleaved mode and no sase-dedicated data is not yet supported.')
-
-    #take the derivative along the trainId to track changes in pulse number:
-    diff = npulses_sa3.diff(dim='trainId')
-    #only keep trainIds where a change occured:
-    diff = diff.where(diff != 0, drop=True)
-    #get a list of indices where a change occured:
-    idx_change_sa3 = np.argwhere(np.isin(npulses_sa3.trainId.values,
-                                     diff.trainId.values, assume_unique=True))[:,0]
-
-    #Same for SASE 1:
-    diff = npulses_sa1.diff(dim='trainId')
-    diff = diff.where(diff !=0, drop=True)
-    idx_change_sa1 = np.argwhere(np.isin(npulses_sa1.trainId.values,
-                                     diff.trainId.values, assume_unique=True))[:,0]
-
-    #create index that locates all changes of pulse number in both SASE1 and 3:
-    #add index 0 to get the initial pulse number per train:
-    idx_change = np.unique(np.concatenate(([0], idx_change_sa3, idx_change_sa1))).astype(int)
+    
+    #2. case where bunch pattern is provided
+    xgm_arr = data[xgm].where(data[xgm] != 1., drop=True)
+    sa3 = data['sase3'].where(data['sase3'] > 1, drop=True)
+    sa3_val=np.unique(sa3)
+    sa3_val = sa3_val[~np.isnan(sa3_val)]
+    sa1 = data['sase1'].where(data['sase1'] > 1, drop=True)
+    sa1_val=np.unique(sa1)
+    sa1_val = sa1_val[~np.isnan(sa1_val)]
+    sa_all = xr.concat([sa1, sa3], dim='bunchId').rename('sa_all')
+    sa_all = xr.DataArray(np.sort(sa_all)[:,:xgm_arr['XGMbunchId'].shape[0]],
+                          dims=['trainId', 'bunchId'],
+                          coords={'trainId':data.trainId},
+                          name='sase_all')
+    mask_sa1 = sa_all.sel(trainId=sa1.trainId).isin(sa1_val).rename({'bunchId':'XGMbunchId'})
+    mask_sa3 = sa_all.sel(trainId=sa3.trainId).isin(sa3_val).rename({'bunchId':'XGMbunchId'})
     if sase=='sase1':
-        npulses = npulses_sa1
-        maxpulses = int(npulses_sa1.max().values)
+        return xgm_arr.where(mask_sa1, drop=True).rename('{}_SA1'.format(xgm.split('_')[0]))
     else:
-        npulses = npulses_sa3
-        maxpulses = int(npulses_sa3.max().values)
-    for i,k in enumerate(idx_change):    
-        #skip if no pulses after the change:
-        if npulses[idx_change[i]]==0:
-            continue
-        #calculate indices
-        if sa3First:
-            a = 0
-            b = int(npulses_sa3[k].values)
-            c = b
-            d = int(c + npulses_sa1[k].values)
-        else:
-            a = int(npulses_sa1[k].values)
-            b = int(a + npulses_sa3[k].values)
-            c = 0
-            d = a
-        if sase=='sase1':
-            a = c
-            b = d
-        if i==len(idx_change)-1:
-            l = None
-        else:
-            l = idx_change[i+1]
-        temp = data[xgm][k:l,a:a+maxpulses].copy()
-        temp[:,b:] = np.NaN
-        if result is None:
-            result = temp
-        else:
-            result = xr.concat([result, temp], dim='trainId')
-    return result
-
+        return xgm_arr.where(mask_sa3, drop=True).rename('{}_SA3'.format(xgm.split('_')[0]))
+    
 
 def saseContribution(data, sase='sase1', xgm='XTD10_XGM'):
     ''' Calculate the relative contribution of SASE 1 or SASE 3 pulses 
-- 
GitLab


From 4c1cf2dfd54f9d0f1cd14287ec3f042140058996 Mon Sep 17 00:00:00 2001
From: Laurent Mercadier <laurent.mercadier@xfel.eu>
Date: Mon, 28 Oct 2019 05:22:22 +0100
Subject: [PATCH 5/7] Fixes some bugs

---
 xgm.py | 125 ++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 89 insertions(+), 36 deletions(-)

diff --git a/xgm.py b/xgm.py
index d7522cb..fecea7d 100644
--- a/xgm.py
+++ b/xgm.py
@@ -133,33 +133,42 @@ def cleanXGMdata(data, npulses=None, sase3First=True):
     '''
     dropList = []
     mergeList = []
-    if ("XTD10_SA3" not in data and "XTD10_XGM" in data) or (
-        "SCS_SA3" not in data and "SCS_XGM" in data):
+    dedicated = False
+    if 'sase3' in data:
+        if np.all(data['npulses_sase1'].where(data['npulses_sase3'] !=0,
+                                              drop=True) == 0):
+            dedicated = True
+            print('Dedicated trains, skip loading SASE 1 data.')
+    keys = ["XTD10_XGM", "XTD10_SA3", "XTD10_SA1", 
+            "XTD10_XGM_sigma", "XTD10_SA3_sigma", "XTD10_SA1_sigma",
+            "SCS_XGM", "SCS_SA3", "SCS_SA1",
+            "SCS_XGM_sigma", "SCS_SA3_sigma", "SCS_SA1_sigma"]
+    
+    if ("SCS_SA3" not in data and "SCS_XGM" in data):
         #no SASE-resolved arrays available
-        if 'SCS_XGM' in data:
-            sa3 = selectSASEinXGM(data, xgm='SCS_XGM', sase='sase3', npulses=npulses,
-                   sase3First=sase3First).rename({'XGMbunchId':'sa3_pId'}).rename('SCS_SA3')
-            mergeList.append(sa3)
-            sa1 = selectSASEinXGM(data, xgm='SCS_XGM', sase='sase1', npulses=npulses,
-                   sase3First=sase3First).rename({'XGMbunchId':'sa1_pId'}).rename('SCS_SA1')
+        sa3 = selectSASEinXGM(data, xgm='SCS_XGM', sase='sase3', npulses=npulses,
+               sase3First=sase3First).rename({'XGMbunchId':'sa3_pId'})
+        mergeList.append(sa3)
+        if not dedicated:
+            sa1 = selectSASEinXGM(data, xgm='SCS_XGM', sase='sase1',
+                                  npulses=npulses, sase3First=sase3First).rename(
+                                  {'XGMbunchId':'sa1_pId'})
             mergeList.append(sa1)
-            dropList.append('SCS_XGM')
-
-        if 'XTD10_XGM' in data:
-            sa3 = selectSASEinXGM(data, xgm='XTD10_XGM', sase='sase3', npulses=npulses,
-                       sase3First=sase3First).rename({'XGMbunchId':'sa3_pId'}).rename('XTD10_SA3')
-            mergeList.append(sa3)
-            sa1 = selectSASEinXGM(data, xgm='XTD10_XGM', sase='sase1', npulses=npulses,
-                       sase3First=sase3First).rename({'XGMbunchId':'sa1_pId'}).rename('XTD10_SA1')
+        dropList.append('SCS_XGM')
+        keys.remove('SCS_XGM')
+
+    if ("XTD10_SA3" not in data and "XTD10_XGM" in data):
+        #no SASE-resolved arrays available
+        sa3 = selectSASEinXGM(data, xgm='XTD10_XGM', sase='sase3', npulses=npulses,
+                   sase3First=sase3First).rename({'XGMbunchId':'sa3_pId'})
+        mergeList.append(sa3)
+        if not dedicated:
+            sa1 = selectSASEinXGM(data, xgm='XTD10_XGM', sase='sase1', 
+                                  npulses=npulses, sase3First=sase3First).rename(
+                                  {'XGMbunchId':'sa1_pId'})
             mergeList.append(sa1)
-            dropList.append('XTD10_XGM')
-        keys = []
-        
-    else:
-        keys = ["XTD10_XGM", "XTD10_SA3", "XTD10_SA1",
-                "XTD10_XGM_sigma", "XTD10_SA3_sigma", "XTD10_SA1_sigma"]
-        keys += ["SCS_XGM", "SCS_SA3", "SCS_SA1",
-                 "SCS_XGM_sigma", "SCS_SA3_sigma", "SCS_SA1_sigma"]
+        dropList.append('XTD10_XGM')
+        keys.remove('XTD10_XGM')
         
     for key in keys:
         if key not in data:
@@ -168,6 +177,9 @@ def cleanXGMdata(data, npulses=None, sase3First=True):
             sase = 'sa3_'
         elif "sa1" in key.lower():
             sase = 'sa1_'
+            if dedicated:
+                dropList.append(key)
+                continue
         else:
             dropList.append(key)
             continue
@@ -176,7 +188,7 @@ def cleanXGMdata(data, npulses=None, sase3First=True):
         dropList.append(key)
         mergeList.append(res)
     mergeList.append(data.drop(dropList))
-    subset = xr.merge(mergeList, join='outer')
+    subset = xr.merge(mergeList, join='inner')
     for k in data.attrs.keys():
         subset.attrs[k] = data.attrs[k]
     return subset
@@ -237,13 +249,51 @@ def selectSASEinXGM(data, sase='sase3', xgm='SCS_XGM', sase3First=True, npulses=
                           dims=['trainId', 'bunchId'],
                           coords={'trainId':data.trainId},
                           name='sase_all')
-    mask_sa1 = sa_all.sel(trainId=sa1.trainId).isin(sa1_val).rename({'bunchId':'XGMbunchId'})
-    mask_sa3 = sa_all.sel(trainId=sa3.trainId).isin(sa3_val).rename({'bunchId':'XGMbunchId'})
+    idxListAll, invAll = np.unique(sa_all.fillna(-1), axis=0, return_inverse=True)
+    idxList3 = np.unique(sa3)
+    idxList1 = np.unique(sa1)
+
+    if sase=='sase3':
+        big_sa3 = []
+        for i,idxXGM in enumerate(idxListAll):
+            idxXGM = np.isin(idxXGM, idxList3)
+            idxTid = invAll==i
+            mask = xr.DataArray(np.zeros((data.dims['trainId'], sa_all['bunchId'].shape[0]), dtype=bool), 
+                            dims=['trainId', 'XGMbunchId'],
+                            coords={'trainId':data.trainId, 
+                                    'XGMbunchId':sa_all['bunchId'].values}, 
+                            name='mask')
+            mask[idxTid, idxXGM] = True
+            sa3 = xgm_arr.where(mask, drop=True)
+            if sa3.trainId.size > 0:
+                sa3 = sa3.assign_coords(XGMbunchId=np.arange(sa3.XGMbunchId.size))
+                big_sa3.append(sa3)
+        if len(big_sa3) > 0:
+            da_sa3 = xr.concat(big_sa3, dim='trainId').rename('{}_SA3'.format(xgm.split('_')[0]))
+        else:
+            da_sa3 = xr.DataArray([], dims=['trainId'], name='{}_SA3'.format(xgm.split('_')[0]))
+        return da_sa3
+
     if sase=='sase1':
-        return xgm_arr.where(mask_sa1, drop=True).rename('{}_SA1'.format(xgm.split('_')[0]))
-    else:
-        return xgm_arr.where(mask_sa3, drop=True).rename('{}_SA3'.format(xgm.split('_')[0]))
-    
+        big_sa1 = []
+        for i,idxXGM in enumerate(idxListAll):
+            idxXGM = np.isin(idxXGM, idxList1)
+            idxTid = invAll==i
+            mask = xr.DataArray(np.zeros((data.dims['trainId'], sa_all['bunchId'].shape[0]), dtype=bool), 
+                            dims=['trainId', 'XGMbunchId'],
+                            coords={'trainId':data.trainId, 
+                                    'XGMbunchId':sa_all['bunchId'].values}, 
+                            name='mask')
+            mask[idxTid, idxXGM] = True
+            sa1 = xgm_arr.where(mask, drop=True)
+            if sa1.trainId.size > 0:
+                sa1 = sa1.assign_coords(XGMbunchId=np.arange(sa1.XGMbunchId.size))
+                big_sa1.append(sa1)
+        if len(big_sa1) > 0:
+            da_sa1 = xr.concat(big_sa1, dim='trainId').rename('{}_SA1'.format(xgm.split('_')[0]))
+        else:
+            da_sa1 = xr.DataArray([], dims=['trainId'], name='{}_SA1'.format(xgm.split('_')[0]))  
+        return da_sa1
 
 def saseContribution(data, sase='sase1', xgm='XTD10_XGM'):
     ''' Calculate the relative contribution of SASE 1 or SASE 3 pulses 
@@ -539,14 +589,17 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None,
     
     #2.1 case where apd is used:
     if use_apd:
-        return data[f'MCP{mcp}apd'].where(mask, drop=True)
-
+        res = data[f'MCP{mcp}apd'].where(mask, drop=True)
+        res = res.assign_coords(apdId=np.arange(res['apdId'].shape[0]))
+        return res
     #2.2 case where integration is performed on raw trace:
     else:
-        peaks = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, 
-                               t_offset=period, npulses=data.dims['apdId'])
+        peaks = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, t_offset=period,
+                         npulses=data.dims['apdId'])
         mask = mask.rename({'apdId':f'MCP{mcp}fromRaw'})
-        return peaks.where(mask, drop=True)
+        res = peaks.where(mask, drop=True)
+        res = res.assign_coords({f'MCP{mcp}fromRaw':np.arange(res[f'MCP{mcp}fromRaw'].shape[0])})
+        return res
 
 
 def calibrateTIM(data, rollingWindow=200, mcp=1, plot=False, use_apd=True, intstart=None,
-- 
GitLab


From b8e480218e5c4c0dd7406d1a8f62216abb2fe670 Mon Sep 17 00:00:00 2001
From: Laurent Mercadier <laurent.mercadier@xfel.eu>
Date: Mon, 28 Oct 2019 17:56:32 +0100
Subject: [PATCH 6/7] Adds warning if apd parameters were wrong during
 acquisition

---
 xgm.py | 66 ++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 41 insertions(+), 25 deletions(-)

diff --git a/xgm.py b/xgm.py
index fecea7d..85e0058 100644
--- a/xgm.py
+++ b/xgm.py
@@ -9,7 +9,6 @@ import matplotlib.pyplot as plt
 import numpy as np
 import xarray as xr
 
-
 # Machine
 def pulsePatternInfo(data, plot=False):
     ''' display general information on the pulse patterns operated by SASE1 and SASE3.
@@ -139,6 +138,7 @@ def cleanXGMdata(data, npulses=None, sase3First=True):
                                               drop=True) == 0):
             dedicated = True
             print('Dedicated trains, skip loading SASE 1 data.')
+    #pulse-resolved signals from XGMs
     keys = ["XTD10_XGM", "XTD10_SA3", "XTD10_SA1", 
             "XTD10_XGM_sigma", "XTD10_SA3_sigma", "XTD10_SA1_sigma",
             "SCS_XGM", "SCS_SA3", "SCS_SA1",
@@ -185,6 +185,9 @@ def cleanXGMdata(data, npulses=None, sase3First=True):
             continue
         res = data[key].where(data[key] != 1.0, drop=True).rename(
                 {'XGMbunchId':'{}pId'.format(sase)}).rename(key)
+        res = res.assign_coords(
+                {f'{sase}pId':np.arange(res[f'{sase}pId'].shape[0])})
+        
         dropList.append(key)
         mergeList.append(res)
     mergeList.append(data.drop(dropList))
@@ -567,39 +570,52 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None,
         return tim
     
     #2. If bunch pattern available, define a mask that corresponds to the SASE 3 pulses
-    initialDelay = data.attrs['run'].get_array(
-                    'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.initialDelay.value')[0].values
-    upperLimit = data.attrs['run'].get_array(
-                    'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.upperLimit.value')[0].values
-    period = upperLimit - initialDelay #period of the apd in number of digitizer samples
     sa3 = data['sase3'].where(data['sase3']>1, drop=True)
     sa3 -= sa3[0,0]
-    sa3 = sa3/int(period/440) #440 = samples between two pulses @4.5 MHz with ADQ412 digitizer
+    #2.1 case where apd is used:
+    if use_apd:
+        pulseId = 'apdId'
+        pulseIdDim = data.dims['apdId']
+        initialDelay = data.attrs['run'].get_array(
+                        'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.initialDelay.value')[0].values
+        upperLimit = data.attrs['run'].get_array(
+                        'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.upperLimit.value')[0].values
+        #440 = samples between two pulses @4.5 MHz with ADQ412 digitizer:
+        period = int((upperLimit - initialDelay)/440)
+        #display some warnings if apd parameters do not match pulse pattern:
+        period_from_bunch_pattern = int(np.nanmin(np.diff(sa3)))
+        if period > period_from_bunch_pattern:
+            print(f'Warning: apd parameter was set to record 1 pulse out of {period} @ 4.5 MHz ' +
+                  f'but XFEL delivered 1 pulse out of {period_from_bunch_pattern}.')
+        maxPulses = data['npulses_sase3'].max().values
+        if period*pulseIdDim < period_from_bunch_pattern*maxPulses:
+            print(f'Warning: Number of pulses and/or rep. rate in apd parameters were set ' +
+                  f'too low ({pulseIdDim})to record the {maxPulses} SASE 3 pulses')
+        peaks = data[f'MCP{mcp}apd']
+    
+    #2.2 case where integration is performed on raw trace:
+    else:
+        pulseId = f'MCP{mcp}fromRaw'
+        pulseIdDim = int(np.max(sa3).values) + 1
+        period = int(np.nanmin(np.diff(sa3)))
+        peaks = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, t_offset=period*440,
+                         npulses=pulseIdDim)
+    
+    sa3 = sa3/period
     idxList, inv = np.unique(sa3, axis=0, return_inverse=True)
-    mask = xr.DataArray(np.zeros((data.dims['trainId'], data.dims['apdId']), dtype=bool), 
-                        dims=['trainId', 'apdId'],
+    mask = xr.DataArray(np.zeros((data.dims['trainId'], pulseIdDim), dtype=bool), 
+                        dims=['trainId', pulseId],
                         coords={'trainId':data.trainId, 
-                                'apdId':np.arange(data.dims['apdId'])}, 
-                        name='mask')
+                                pulseId:np.arange(pulseIdDim)})
     mask = mask.sel(trainId=sa3.trainId)
     for i,idxApd in enumerate(idxList):
         idxApd = idxApd[idxApd>=0].astype(int)
         idxTid = inv==i
         mask[idxTid, idxApd] = True
-    
-    #2.1 case where apd is used:
-    if use_apd:
-        res = data[f'MCP{mcp}apd'].where(mask, drop=True)
-        res = res.assign_coords(apdId=np.arange(res['apdId'].shape[0]))
-        return res
-    #2.2 case where integration is performed on raw trace:
-    else:
-        peaks = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, t_offset=period,
-                         npulses=data.dims['apdId'])
-        mask = mask.rename({'apdId':f'MCP{mcp}fromRaw'})
-        res = peaks.where(mask, drop=True)
-        res = res.assign_coords({f'MCP{mcp}fromRaw':np.arange(res[f'MCP{mcp}fromRaw'].shape[0])})
-        return res
+
+    peaks = peaks.where(mask, drop=True)
+    peaks = peaks.assign_coords({pulseId:np.arange(peaks[pulseId].shape[0])})
+    return peaks
 
 
 def calibrateTIM(data, rollingWindow=200, mcp=1, plot=False, use_apd=True, intstart=None,
-- 
GitLab


From 660045ed0cc20fdcce528c56e36c6cdca4d7b8ab Mon Sep 17 00:00:00 2001
From: Laurent Mercadier <laurent.mercadier@xfel.eu>
Date: Tue, 29 Oct 2019 09:46:35 +0100
Subject: [PATCH 7/7] Adds documentation, simplifies selectSASEfromXGM()

---
 xgm.py | 97 +++++++++++++++++++++++++---------------------------------
 1 file changed, 41 insertions(+), 56 deletions(-)

diff --git a/xgm.py b/xgm.py
index 85e0058..61eba7e 100644
--- a/xgm.py
+++ b/xgm.py
@@ -199,11 +199,9 @@ def cleanXGMdata(data, npulses=None, sase3First=True):
 
 def selectSASEinXGM(data, sase='sase3', xgm='SCS_XGM', sase3First=True, npulses=None):
     ''' Given an array containing both SASE1 and SASE3 data, extracts SASE1-
-        or SASE3-only XGM data.
-        There are various cases depending on i) the mode of operation (10 Hz
-        with fresh bunch, dedicated trains to one SASE, pulse on demand), 
-        ii) the potential change of number of pulses per train in each SASE
-        and iii) the order (SASE1 first, SASE3 first, interleaved mode).
+        or SASE3-only XGM data. The function tracks the changes of bunch patterns
+        in sase 1 and sase 3 and applies a mask to the XGM array to extract the 
+        relevant pulses. This way, all complicated patterns are accounted for.
         
         Inputs:
             data: xarray Dataset containing xgm data
@@ -240,6 +238,7 @@ def selectSASEinXGM(data, sase='sase3', xgm='SCS_XGM', sase3First=True, npulses=
                 return xgmData[:,start:start+npulses]
     
     #2. case where bunch pattern is provided
+    #2.1 Merge sase1 and sase3 bunch patterns to get indices of all pulses
     xgm_arr = data[xgm].where(data[xgm] != 1., drop=True)
     sa3 = data['sase3'].where(data['sase3'] > 1, drop=True)
     sa3_val=np.unique(sa3)
@@ -252,51 +251,38 @@ def selectSASEinXGM(data, sase='sase3', xgm='SCS_XGM', sase3First=True, npulses=
                           dims=['trainId', 'bunchId'],
                           coords={'trainId':data.trainId},
                           name='sase_all')
-    idxListAll, invAll = np.unique(sa_all.fillna(-1), axis=0, return_inverse=True)
-    idxList3 = np.unique(sa3)
-    idxList1 = np.unique(sa1)
-
     if sase=='sase3':
-        big_sa3 = []
-        for i,idxXGM in enumerate(idxListAll):
-            idxXGM = np.isin(idxXGM, idxList3)
-            idxTid = invAll==i
-            mask = xr.DataArray(np.zeros((data.dims['trainId'], sa_all['bunchId'].shape[0]), dtype=bool), 
-                            dims=['trainId', 'XGMbunchId'],
-                            coords={'trainId':data.trainId, 
-                                    'XGMbunchId':sa_all['bunchId'].values}, 
-                            name='mask')
-            mask[idxTid, idxXGM] = True
-            sa3 = xgm_arr.where(mask, drop=True)
-            if sa3.trainId.size > 0:
-                sa3 = sa3.assign_coords(XGMbunchId=np.arange(sa3.XGMbunchId.size))
-                big_sa3.append(sa3)
-        if len(big_sa3) > 0:
-            da_sa3 = xr.concat(big_sa3, dim='trainId').rename('{}_SA3'.format(xgm.split('_')[0]))
-        else:
-            da_sa3 = xr.DataArray([], dims=['trainId'], name='{}_SA3'.format(xgm.split('_')[0]))
-        return da_sa3
-
-    if sase=='sase1':
-        big_sa1 = []
-        for i,idxXGM in enumerate(idxListAll):
-            idxXGM = np.isin(idxXGM, idxList1)
-            idxTid = invAll==i
-            mask = xr.DataArray(np.zeros((data.dims['trainId'], sa_all['bunchId'].shape[0]), dtype=bool), 
-                            dims=['trainId', 'XGMbunchId'],
-                            coords={'trainId':data.trainId, 
-                                    'XGMbunchId':sa_all['bunchId'].values}, 
-                            name='mask')
-            mask[idxTid, idxXGM] = True
-            sa1 = xgm_arr.where(mask, drop=True)
-            if sa1.trainId.size > 0:
-                sa1 = sa1.assign_coords(XGMbunchId=np.arange(sa1.XGMbunchId.size))
-                big_sa1.append(sa1)
-        if len(big_sa1) > 0:
-            da_sa1 = xr.concat(big_sa1, dim='trainId').rename('{}_SA1'.format(xgm.split('_')[0]))
-        else:
-            da_sa1 = xr.DataArray([], dims=['trainId'], name='{}_SA1'.format(xgm.split('_')[0]))  
-        return da_sa1
+        idxListSase = np.unique(sa3)
+        newName = xgm.split('_')[0] + '_SA3'
+    else:
+        idxListSase = np.unique(sa1)
+        newName = xgm.split('_')[0] + '_SA1'
+        
+    #2.2 track the changes of pulse patterns and the indices at which they occured (invAll)
+    idxListAll, invAll = np.unique(sa_all.fillna(-1), axis=0, return_inverse=True)
+    
+    #2.3 define a mask, loop over the different patterns and update the mask for each pattern
+    mask = xr.DataArray(np.zeros((data.dims['trainId'], sa_all['bunchId'].shape[0]), dtype=bool), 
+                    dims=['trainId', 'XGMbunchId'],
+                    coords={'trainId':data.trainId, 
+                            'XGMbunchId':sa_all['bunchId'].values}, 
+                    name='mask')
+
+    big_sase = []
+    for i,idxXGM in enumerate(idxListAll):
+        mask.values = np.zeros(mask.shape, dtype=bool)
+        idxXGM = np.isin(idxXGM, idxListSase)
+        idxTid = invAll==i
+        mask[idxTid, idxXGM] = True
+        sa_arr = xgm_arr.where(mask, drop=True)
+        if sa_arr.trainId.size > 0:
+            sa_arr = sa_arr.assign_coords(XGMbunchId=np.arange(sa_arr.XGMbunchId.size))
+            big_sase.append(sa_arr)
+    if len(big_sase) > 0:
+        da_sase = xr.concat(big_sase, dim='trainId').rename(newName)
+    else:
+        da_sase = xr.DataArray([], dims=['trainId'], name=newName)
+    return da_sase
 
 def saseContribution(data, sase='sase1', xgm='XTD10_XGM'):
     ''' Calculate the relative contribution of SASE 1 or SASE 3 pulses 
@@ -533,9 +519,8 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None,
     ''' Extract peak-integrated data from TIM where pulses are from SASE3 only.
         If use_apd is False it calculates integration from raw traces. 
         The missing values, in case of change of number of pulses, are filled
-        with NaNs.
-        If no bunch pattern info is available, the function assumes that
-        SASE 3 comes first and that the number of pulses is fixed in both
+        with NaNs. If no bunch pattern info is available, the function assumes
+        that SASE 3 comes first and that the number of pulses is fixed in both
         SASE 1 and 3.
         
         Inputs:
@@ -544,7 +529,7 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None,
             intstop: trace index of integration stop
             bkgstart: trace index of background start
             bkgstop: trace index of background stop
-            t_offset: index separation between two pulses
+            t_offset: number of ADC samples between two pulses
             mcp: MCP channel number
             npulses: int, optional. Number of pulses to compute. Required if
                 no bunch pattern info is available.
@@ -602,6 +587,7 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None,
                          npulses=pulseIdDim)
     
     sa3 = sa3/period
+    #2.3 track the changes of pulse patterns and the indices at which they occured (invAll)
     idxList, inv = np.unique(sa3, axis=0, return_inverse=True)
     mask = xr.DataArray(np.zeros((data.dims['trainId'], pulseIdDim), dtype=bool), 
                         dims=['trainId', pulseId],
@@ -993,11 +979,10 @@ def mergeFastAdcPeaks(data, channel, intstart, intstop, bkgstart, bkgstop,
             intstop: trace index of integration stop
             bkgstart: trace index of background start
             bkgstop: trace index of background stop
-            period: Number of samples separation between two pulses. Needed
+            period: Number of ADC samples between two pulses. Needed
                 if bunch pattern info is not available. If None, checks the 
                 pulse pattern and determine the period assuming a resolution
-                of 9.23 ns per sample which leads to 24 samples between
-                two bunches @ 4.5 MHz. 
+                of 9.23 ns per sample = 24 samples between two pulses @ 4.5 MHz. 
             npulses: number of pulses. If None, takes the maximum number of
                 pulses according to the bunch patter (field 'npulses_sase3')
             dim: name of the xr dataset dimension along the peaks
-- 
GitLab