From 5f85ed01303319188126f88dc2c8e9633d3f420f Mon Sep 17 00:00:00 2001 From: Laurent Mercadier <laurent.mercadier@xfel.eu> Date: Sat, 26 Oct 2019 08:17:13 +0200 Subject: [PATCH 1/7] Simplified getTIMapd() for case with no change of number of pulses --- xgm.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/xgm.py b/xgm.py index 400ae78..506bfcc 100644 --- a/xgm.py +++ b/xgm.py @@ -518,7 +518,7 @@ def mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=1, t_offset=None, n pattern and determine the t_offset assuming mininum pulse separation of 220 ns and digitizer resolution of 2 GHz. npulses: number of pulses. If None, takes the maximum number of - pulses according to the bunch patter (field 'npulses_sase3') + pulses according to the bunch pattern (field 'npulses_sase3') Output: results: DataArray with dims trainId x max(sase3 pulses) @@ -592,7 +592,32 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None, t_offset=t_offset, npulses=npulses) return tim + #1. case where number of SASE 3 pulses were not changed during the run: sa3 = data['sase3'].where(data['sase3']>1, drop=True) + sa3 -= sa3[0,0] + idx = np.unique(sa3, axis=0) + npulses_sa3 = data['npulses_sase3'] + maxpulses = int(npulses_sa3.max().values) + if npulses is not None: + maxpulses = np.min([npulses, maxpulses]) + if len(idx)==1: + if use_apd: + apd = data['MCP{}apd'.format(mcp)] + initialDelay = data.attrs['run'].get_array( + 'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.initialDelay.value')[0].values + upperLimit = data.attrs['run'].get_array( + 'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.upperLimit.value')[0].values + period = upperLimit - initialDelay #period of the apd in number of digitizer samples + idx /= int(period/440) #440 samples correspond to the separation between two pulses at 4.5 MHz + if len(idx)==1: + idx = idx[0].astype(int) + tim = apd.isel(apdId=idx) + return tim + else: + apd = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, + t_offset=t_offset, npulses=maxpulses) + + #2. case where the number of SASE 3 pulses varied during the run: npulses_sa3 = data['npulses_sase3'] maxpulses = int(npulses_sa3.max().values) if npulses is not None: -- GitLab From 9eaa6b24d596f6146d12e3cc0f34c2ce327d9938 Mon Sep 17 00:00:00 2001 From: Laurent Mercadier <laurent.mercadier@xfel.eu> Date: Sat, 26 Oct 2019 08:24:15 +0200 Subject: [PATCH 2/7] Cleaned up --- xgm.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/xgm.py b/xgm.py index 506bfcc..881b8bd 100644 --- a/xgm.py +++ b/xgm.py @@ -609,12 +609,10 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None, 'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.upperLimit.value')[0].values period = upperLimit - initialDelay #period of the apd in number of digitizer samples idx /= int(period/440) #440 samples correspond to the separation between two pulses at 4.5 MHz - if len(idx)==1: - idx = idx[0].astype(int) - tim = apd.isel(apdId=idx) - return tim + idx = idx[0].astype(int) + return apd.isel(apdId=idx) else: - apd = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, + return mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, t_offset=t_offset, npulses=maxpulses) #2. case where the number of SASE 3 pulses varied during the run: -- GitLab From 3c42d1dbe1c772e7ac219caf9dc18a8a1dce18a5 Mon Sep 17 00:00:00 2001 From: Laurent Mercadier <laurent.mercadier@xfel.eu> Date: Sat, 26 Oct 2019 23:25:00 +0200 Subject: [PATCH 3/7] Uses mask according to bunch pattern in getTIMapd() --- xgm.py | 113 +++++++++++++++++---------------------------------------- 1 file changed, 34 insertions(+), 79 deletions(-) diff --git a/xgm.py b/xgm.py index 881b8bd..a41e746 100644 --- a/xgm.py +++ b/xgm.py @@ -120,7 +120,7 @@ def cleanXGMdata(data, npulses=None, sase3First=True): ''' Cleans the XGM data arrays obtained from load() function. The XGM "TD" data arrays have arbitrary size of 1000 and default value 1.0 when there is no pulse. This function sorts the SASE 1 and SASE 3 pulses. - For recent DAQ runs, sase-resolved arrays can be used. For older runs, + For DAQ runs after April 2019, sase-resolved arrays can be used. For older runs, the function selectSASEinXGM can be used to extract sase-resolved pulses. Inputs: data: xarray Dataset containing XGM TD arrays. @@ -176,7 +176,7 @@ def cleanXGMdata(data, npulses=None, sase3First=True): dropList.append(key) mergeList.append(res) mergeList.append(data.drop(dropList)) - subset = xr.merge(mergeList, join='inner') + subset = xr.merge(mergeList, join='outer') for k in data.attrs.keys(): subset.attrs[k] = data.attrs[k] return subset @@ -539,13 +539,15 @@ def mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=1, t_offset=None, n t_offset = 440 * step else: t_offset = 1 - results = xr.DataArray(np.empty((data.trainId.shape[0], npulses)), coords=data[keyraw].coords, + results = xr.DataArray(np.zeros((data.trainId.shape[0], npulses)), coords=data[keyraw].coords, dims=['trainId', 'MCP{}fromRaw'.format(mcp)]) for i in range(npulses): a = intstart + t_offset*i b = intstop + t_offset*i bkga = bkgstart + t_offset*i bkgb = bkgstop + t_offset*i + if b > data.dims['samplesId']: + break bg = np.outer(np.median(data[keyraw][:,bkga:bkgb], axis=1), np.ones(b-a)) results[:,i] = np.trapz(data[keyraw][:,a:b] - bg, axis=1) return results @@ -578,6 +580,7 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None, tim: DataArray of shape trainId only for SASE3 pulses x N with N=max(number of pulses per train) ''' + #1. case where no bunch pattern is available: if 'sase3' not in data: print('Missing bunch pattern info!\n') if npulses is None: @@ -586,91 +589,43 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None, print('Retrieving {} SASE 3 pulses assuming that '.format(npulses) + 'SASE 3 pulses come first.') if use_apd: - tim = data['MCP{}apd'.format(mcp)][:,:npulses:stride] + tim = data[f'MCP{mcp}apd'][:,:npulses:stride] else: tim = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, t_offset=t_offset, npulses=npulses) return tim - #1. case where number of SASE 3 pulses were not changed during the run: + #2. If bunch pattern available, define a mask that corresponds to the SASE 3 pulses + initialDelay = data.attrs['run'].get_array( + 'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.initialDelay.value')[0].values + upperLimit = data.attrs['run'].get_array( + 'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.upperLimit.value')[0].values + period = upperLimit - initialDelay #period of the apd in number of digitizer samples sa3 = data['sase3'].where(data['sase3']>1, drop=True) sa3 -= sa3[0,0] - idx = np.unique(sa3, axis=0) - npulses_sa3 = data['npulses_sase3'] - maxpulses = int(npulses_sa3.max().values) - if npulses is not None: - maxpulses = np.min([npulses, maxpulses]) - if len(idx)==1: - if use_apd: - apd = data['MCP{}apd'.format(mcp)] - initialDelay = data.attrs['run'].get_array( - 'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.initialDelay.value')[0].values - upperLimit = data.attrs['run'].get_array( - 'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.upperLimit.value')[0].values - period = upperLimit - initialDelay #period of the apd in number of digitizer samples - idx /= int(period/440) #440 samples correspond to the separation between two pulses at 4.5 MHz - idx = idx[0].astype(int) - return apd.isel(apdId=idx) - else: - return mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, - t_offset=t_offset, npulses=maxpulses) + sa3 = sa3/int(period/440) #440 = samples between two pulses @4.5 MHz with ADQ412 digitizer + idxList, inv = np.unique(sa3, axis=0, return_inverse=True) + mask = xr.DataArray(np.zeros((data.dims['trainId'], data.dims['apdId']), dtype=bool), + dims=['trainId', 'apdId'], + coords={'trainId':data.trainId, + 'apdId':np.arange(data.dims['apdId'])}, + name='mask') + mask = mask.sel(trainId=sa3.trainId) + for i,idxApd in enumerate(idxList): + idxApd = idxApd[idxApd>=0].astype(int) + idxTid = inv==i + mask[idxTid, idxApd] = True - #2. case where the number of SASE 3 pulses varied during the run: - npulses_sa3 = data['npulses_sase3'] - maxpulses = int(npulses_sa3.max().values) - if npulses is not None: - maxpulses = np.min([npulses, maxpulses]) - step = 1 - if maxpulses > 1: - #Calculate the number of non-lasing pulses between two lasing pulses (step) - step = sa3.where(data['npulses_sase3']>1, drop=True)[0,:2].values - step = int(step[1] - step[0]) + #2.1 case where apd is used: if use_apd: - apd = data['MCP{}apd'.format(mcp)] - initialDelay = data.attrs['run'].get_array( - 'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.initialDelay.value')[0].values - upperLimit = data.attrs['run'].get_array( - 'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.upperLimit.value')[0].values - nsamples = upperLimit - initialDelay - npulses_per_apd = int(nsamples/440) - sa3 /= npulses_per_apd + return data[f'MCP{mcp}apd'].where(mask, drop=True) + + #2.2 case where integration is performed on raw trace: else: - apd = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, - t_offset=t_offset, npulses=npulses) - sa3 /= step - sa3 -= sa3[:,0] - sa3 = sa3.astype(int) - if np.all(npulses_sa3 == npulses_sa3[0]): - tim = apd[:, sa3[0].values[:maxpulses]] - return tim - - stride = 1 - if use_apd: - stride = np.max([stride,int(step/npulses_per_apd)]) - diff = npulses_sa3.diff(dim='trainId') - #only keep trainIds where a change occured: - diff = diff.where(diff != 0, drop=True) - #get a list of indices where a change occured: - idx_change = np.argwhere(np.isin(npulses_sa3.trainId.values, - diff.trainId.values, assume_unique=True))[:,0] - #add index 0 to get the initial pulse number per train: - idx_change = np.insert(idx_change, 0, 0) - tim = None - for i,idx in enumerate(idx_change): - if npulses_sa3[idx]==0: - continue - if i==len(idx_change)-1: - l = None - else: - l = idx_change[i+1] - b = npulses_sa3[idx].values - temp = apd[idx:l,:maxpulses*stride:stride].copy() - temp[:,b:] = np.NaN - if tim is None: - tim = temp - else: - tim = xr.concat([tim, temp], dim='trainId') - return tim + peaks = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, + t_offset=period, npulses=data.dims['apdId']) + mask = mask.rename({'apdId':f'MCP{mcp}fromRaw'}) + return peaks.where(mask, drop=True) def calibrateTIM(data, rollingWindow=200, mcp=1, plot=False, use_apd=True, intstart=None, @@ -966,7 +921,7 @@ def matchXgmTimPulseId(data, use_apd=True, intstart=None, intstop=None, ndata = cleanXGMdata(data, npulses, sase3First) for mcp in range(1,5): if 'MCP{}apd'.format(mcp) in data or 'MCP{}raw'.format(mcp) in data: - MCPapd = getTIMapd(ndata, mcp=mcp, use_apd=use_apd, intstart=intstart, + MCPapd = getTIMapd(data, mcp=mcp, use_apd=use_apd, intstart=intstart, intstop=intstop,bkgstart=bkgstart, bkgstop=bkgstop, t_offset=t_offset, npulses=npulses, stride=stride).rename('MCP{}apd'.format(mcp)) -- GitLab From 971893a80c4d3ee1e7949694257edff13a81876b Mon Sep 17 00:00:00 2001 From: Laurent Mercadier <laurent.mercadier@xfel.eu> Date: Sun, 27 Oct 2019 18:46:51 +0100 Subject: [PATCH 4/7] Simplifies selectSASEinXGM() by using mask based on bunch pattern --- xgm.py | 125 +++++++++++---------------------------------------------- 1 file changed, 23 insertions(+), 102 deletions(-) diff --git a/xgm.py b/xgm.py index a41e746..d7522cb 100644 --- a/xgm.py +++ b/xgm.py @@ -183,7 +183,8 @@ def cleanXGMdata(data, npulses=None, sase3First=True): def selectSASEinXGM(data, sase='sase3', xgm='SCS_XGM', sase3First=True, npulses=None): - ''' Extract SASE1- or SASE3-only XGM data. + ''' Given an array containing both SASE1 and SASE3 data, extracts SASE1- + or SASE3-only XGM data. There are various cases depending on i) the mode of operation (10 Hz with fresh bunch, dedicated trains to one SASE, pulse on demand), ii) the potential change of number of pulses per train in each SASE @@ -192,7 +193,7 @@ def selectSASEinXGM(data, sase='sase3', xgm='SCS_XGM', sase3First=True, npulses= Inputs: data: xarray Dataset containing xgm data sase: key of sase to select: {'sase1', 'sase3'} - xgm: key of xgm to select: {'SA3_XGM', 'SCS_XGM'} + xgm: key of xgm to select: {'XTD10_XGM', 'SCS_XGM'} sase3First: bool, optional. Used in case no bunch pattern was recorded npulses: int, optional. Required in case no bunch pattern was recorded. @@ -202,6 +203,7 @@ def selectSASEinXGM(data, sase='sase3', xgm='SCS_XGM', sase3First=True, npulses= that sase in the run. The missing values, in case of change of number of pulses, are filled with NaNs. ''' + #1. case where bunch pattern is missing: if sase not in data: print('Missing bunch pattern info!') if npulses is None: @@ -221,108 +223,27 @@ def selectSASEinXGM(data, sase='sase3', xgm='SCS_XGM', sase3First=True, npulses= else: start=xgmData.shape[1]-npulses return xgmData[:,start:start+npulses] - result = None - npulses_sa3 = data['npulses_sase3'] - npulses_sa1 = data['npulses_sase1'] - dedicated = 0 - if np.all(npulses_sa1.where(npulses_sa3 !=0, drop=True) == 0): - dedicated += 1 - print('No SASE 1 pulses during SASE 3 operation') - if np.all(npulses_sa3.where(npulses_sa1 !=0, drop=True) == 0): - dedicated += 1 - print('No SASE 3 pulses during SASE 1 operation') - #Alternating pattern with dedicated pulses in SASE1 and SASE3: - if dedicated==2: - if sase=='sase1': - result = data[xgm].where(npulses_sa1>0, drop=True)[:,:npulses_sa1.max().values] - else: - result = data[xgm].where(npulses_sa3>0, drop=True)[:,:npulses_sa3.max().values] - result = result.where(result != 1.0) - return result - # SASE1 and SASE3 bunches in a same train: find minimum indices of first and - # maximum indices of last pulse per train - else: - pulseIdmin_sa1 = data['sase1'].where(npulses_sa1 != 0).where(data['sase1']>1).min().values - pulseIdmax_sa1 = data['sase1'].where(npulses_sa1 != 0).where(data['sase1']>1).max().values - pulseIdmin_sa3 = data['sase3'].where(npulses_sa3 != 0).where(data['sase3']>1).min().values - pulseIdmax_sa3 = data['sase3'].where(npulses_sa3 != 0).where(data['sase3']>1).max().values - if pulseIdmin_sa1 > pulseIdmax_sa3: - sa3First = True - elif pulseIdmin_sa3 > pulseIdmax_sa1: - sa3First = False - else: - print('Interleaved mode, but no sase-dedicated XGM data loaded.') - saseStr = 'SA{}'.format(sase[4]) - xgmStr = xgm.split('_')[0] - print('Loading {}_{} data...'.format(xgmStr, saseStr)) - try: - if npulses == None: - npulses = data['npulses_sase{}'.format(sase[4])].max().values - if xgmStr == 'XTD10': - source = 'SA3_XTD10_XGM/XGM/DOOCS:output' - if xgmStr == 'SCS': - source = 'SCS_BLU_XGM/XGM/DOOCS:output' - key = 'data.intensitySa{}TD'.format(sase[4]) - result = data.attrs['run'].get_array(source, key, extra_dims=['XGMbunchId']) - result = result.isel(XGMbunchId=slice(0, npulses)) - return result - except: - print('Could not load {}_{} data. '.format(xgmStr, saseStr) + - 'Interleaved mode and no sase-dedicated data is not yet supported.') - - #take the derivative along the trainId to track changes in pulse number: - diff = npulses_sa3.diff(dim='trainId') - #only keep trainIds where a change occured: - diff = diff.where(diff != 0, drop=True) - #get a list of indices where a change occured: - idx_change_sa3 = np.argwhere(np.isin(npulses_sa3.trainId.values, - diff.trainId.values, assume_unique=True))[:,0] - - #Same for SASE 1: - diff = npulses_sa1.diff(dim='trainId') - diff = diff.where(diff !=0, drop=True) - idx_change_sa1 = np.argwhere(np.isin(npulses_sa1.trainId.values, - diff.trainId.values, assume_unique=True))[:,0] - - #create index that locates all changes of pulse number in both SASE1 and 3: - #add index 0 to get the initial pulse number per train: - idx_change = np.unique(np.concatenate(([0], idx_change_sa3, idx_change_sa1))).astype(int) + + #2. case where bunch pattern is provided + xgm_arr = data[xgm].where(data[xgm] != 1., drop=True) + sa3 = data['sase3'].where(data['sase3'] > 1, drop=True) + sa3_val=np.unique(sa3) + sa3_val = sa3_val[~np.isnan(sa3_val)] + sa1 = data['sase1'].where(data['sase1'] > 1, drop=True) + sa1_val=np.unique(sa1) + sa1_val = sa1_val[~np.isnan(sa1_val)] + sa_all = xr.concat([sa1, sa3], dim='bunchId').rename('sa_all') + sa_all = xr.DataArray(np.sort(sa_all)[:,:xgm_arr['XGMbunchId'].shape[0]], + dims=['trainId', 'bunchId'], + coords={'trainId':data.trainId}, + name='sase_all') + mask_sa1 = sa_all.sel(trainId=sa1.trainId).isin(sa1_val).rename({'bunchId':'XGMbunchId'}) + mask_sa3 = sa_all.sel(trainId=sa3.trainId).isin(sa3_val).rename({'bunchId':'XGMbunchId'}) if sase=='sase1': - npulses = npulses_sa1 - maxpulses = int(npulses_sa1.max().values) + return xgm_arr.where(mask_sa1, drop=True).rename('{}_SA1'.format(xgm.split('_')[0])) else: - npulses = npulses_sa3 - maxpulses = int(npulses_sa3.max().values) - for i,k in enumerate(idx_change): - #skip if no pulses after the change: - if npulses[idx_change[i]]==0: - continue - #calculate indices - if sa3First: - a = 0 - b = int(npulses_sa3[k].values) - c = b - d = int(c + npulses_sa1[k].values) - else: - a = int(npulses_sa1[k].values) - b = int(a + npulses_sa3[k].values) - c = 0 - d = a - if sase=='sase1': - a = c - b = d - if i==len(idx_change)-1: - l = None - else: - l = idx_change[i+1] - temp = data[xgm][k:l,a:a+maxpulses].copy() - temp[:,b:] = np.NaN - if result is None: - result = temp - else: - result = xr.concat([result, temp], dim='trainId') - return result - + return xgm_arr.where(mask_sa3, drop=True).rename('{}_SA3'.format(xgm.split('_')[0])) + def saseContribution(data, sase='sase1', xgm='XTD10_XGM'): ''' Calculate the relative contribution of SASE 1 or SASE 3 pulses -- GitLab From 4c1cf2dfd54f9d0f1cd14287ec3f042140058996 Mon Sep 17 00:00:00 2001 From: Laurent Mercadier <laurent.mercadier@xfel.eu> Date: Mon, 28 Oct 2019 05:22:22 +0100 Subject: [PATCH 5/7] Fixes some bugs --- xgm.py | 125 ++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 89 insertions(+), 36 deletions(-) diff --git a/xgm.py b/xgm.py index d7522cb..fecea7d 100644 --- a/xgm.py +++ b/xgm.py @@ -133,33 +133,42 @@ def cleanXGMdata(data, npulses=None, sase3First=True): ''' dropList = [] mergeList = [] - if ("XTD10_SA3" not in data and "XTD10_XGM" in data) or ( - "SCS_SA3" not in data and "SCS_XGM" in data): + dedicated = False + if 'sase3' in data: + if np.all(data['npulses_sase1'].where(data['npulses_sase3'] !=0, + drop=True) == 0): + dedicated = True + print('Dedicated trains, skip loading SASE 1 data.') + keys = ["XTD10_XGM", "XTD10_SA3", "XTD10_SA1", + "XTD10_XGM_sigma", "XTD10_SA3_sigma", "XTD10_SA1_sigma", + "SCS_XGM", "SCS_SA3", "SCS_SA1", + "SCS_XGM_sigma", "SCS_SA3_sigma", "SCS_SA1_sigma"] + + if ("SCS_SA3" not in data and "SCS_XGM" in data): #no SASE-resolved arrays available - if 'SCS_XGM' in data: - sa3 = selectSASEinXGM(data, xgm='SCS_XGM', sase='sase3', npulses=npulses, - sase3First=sase3First).rename({'XGMbunchId':'sa3_pId'}).rename('SCS_SA3') - mergeList.append(sa3) - sa1 = selectSASEinXGM(data, xgm='SCS_XGM', sase='sase1', npulses=npulses, - sase3First=sase3First).rename({'XGMbunchId':'sa1_pId'}).rename('SCS_SA1') + sa3 = selectSASEinXGM(data, xgm='SCS_XGM', sase='sase3', npulses=npulses, + sase3First=sase3First).rename({'XGMbunchId':'sa3_pId'}) + mergeList.append(sa3) + if not dedicated: + sa1 = selectSASEinXGM(data, xgm='SCS_XGM', sase='sase1', + npulses=npulses, sase3First=sase3First).rename( + {'XGMbunchId':'sa1_pId'}) mergeList.append(sa1) - dropList.append('SCS_XGM') - - if 'XTD10_XGM' in data: - sa3 = selectSASEinXGM(data, xgm='XTD10_XGM', sase='sase3', npulses=npulses, - sase3First=sase3First).rename({'XGMbunchId':'sa3_pId'}).rename('XTD10_SA3') - mergeList.append(sa3) - sa1 = selectSASEinXGM(data, xgm='XTD10_XGM', sase='sase1', npulses=npulses, - sase3First=sase3First).rename({'XGMbunchId':'sa1_pId'}).rename('XTD10_SA1') + dropList.append('SCS_XGM') + keys.remove('SCS_XGM') + + if ("XTD10_SA3" not in data and "XTD10_XGM" in data): + #no SASE-resolved arrays available + sa3 = selectSASEinXGM(data, xgm='XTD10_XGM', sase='sase3', npulses=npulses, + sase3First=sase3First).rename({'XGMbunchId':'sa3_pId'}) + mergeList.append(sa3) + if not dedicated: + sa1 = selectSASEinXGM(data, xgm='XTD10_XGM', sase='sase1', + npulses=npulses, sase3First=sase3First).rename( + {'XGMbunchId':'sa1_pId'}) mergeList.append(sa1) - dropList.append('XTD10_XGM') - keys = [] - - else: - keys = ["XTD10_XGM", "XTD10_SA3", "XTD10_SA1", - "XTD10_XGM_sigma", "XTD10_SA3_sigma", "XTD10_SA1_sigma"] - keys += ["SCS_XGM", "SCS_SA3", "SCS_SA1", - "SCS_XGM_sigma", "SCS_SA3_sigma", "SCS_SA1_sigma"] + dropList.append('XTD10_XGM') + keys.remove('XTD10_XGM') for key in keys: if key not in data: @@ -168,6 +177,9 @@ def cleanXGMdata(data, npulses=None, sase3First=True): sase = 'sa3_' elif "sa1" in key.lower(): sase = 'sa1_' + if dedicated: + dropList.append(key) + continue else: dropList.append(key) continue @@ -176,7 +188,7 @@ def cleanXGMdata(data, npulses=None, sase3First=True): dropList.append(key) mergeList.append(res) mergeList.append(data.drop(dropList)) - subset = xr.merge(mergeList, join='outer') + subset = xr.merge(mergeList, join='inner') for k in data.attrs.keys(): subset.attrs[k] = data.attrs[k] return subset @@ -237,13 +249,51 @@ def selectSASEinXGM(data, sase='sase3', xgm='SCS_XGM', sase3First=True, npulses= dims=['trainId', 'bunchId'], coords={'trainId':data.trainId}, name='sase_all') - mask_sa1 = sa_all.sel(trainId=sa1.trainId).isin(sa1_val).rename({'bunchId':'XGMbunchId'}) - mask_sa3 = sa_all.sel(trainId=sa3.trainId).isin(sa3_val).rename({'bunchId':'XGMbunchId'}) + idxListAll, invAll = np.unique(sa_all.fillna(-1), axis=0, return_inverse=True) + idxList3 = np.unique(sa3) + idxList1 = np.unique(sa1) + + if sase=='sase3': + big_sa3 = [] + for i,idxXGM in enumerate(idxListAll): + idxXGM = np.isin(idxXGM, idxList3) + idxTid = invAll==i + mask = xr.DataArray(np.zeros((data.dims['trainId'], sa_all['bunchId'].shape[0]), dtype=bool), + dims=['trainId', 'XGMbunchId'], + coords={'trainId':data.trainId, + 'XGMbunchId':sa_all['bunchId'].values}, + name='mask') + mask[idxTid, idxXGM] = True + sa3 = xgm_arr.where(mask, drop=True) + if sa3.trainId.size > 0: + sa3 = sa3.assign_coords(XGMbunchId=np.arange(sa3.XGMbunchId.size)) + big_sa3.append(sa3) + if len(big_sa3) > 0: + da_sa3 = xr.concat(big_sa3, dim='trainId').rename('{}_SA3'.format(xgm.split('_')[0])) + else: + da_sa3 = xr.DataArray([], dims=['trainId'], name='{}_SA3'.format(xgm.split('_')[0])) + return da_sa3 + if sase=='sase1': - return xgm_arr.where(mask_sa1, drop=True).rename('{}_SA1'.format(xgm.split('_')[0])) - else: - return xgm_arr.where(mask_sa3, drop=True).rename('{}_SA3'.format(xgm.split('_')[0])) - + big_sa1 = [] + for i,idxXGM in enumerate(idxListAll): + idxXGM = np.isin(idxXGM, idxList1) + idxTid = invAll==i + mask = xr.DataArray(np.zeros((data.dims['trainId'], sa_all['bunchId'].shape[0]), dtype=bool), + dims=['trainId', 'XGMbunchId'], + coords={'trainId':data.trainId, + 'XGMbunchId':sa_all['bunchId'].values}, + name='mask') + mask[idxTid, idxXGM] = True + sa1 = xgm_arr.where(mask, drop=True) + if sa1.trainId.size > 0: + sa1 = sa1.assign_coords(XGMbunchId=np.arange(sa1.XGMbunchId.size)) + big_sa1.append(sa1) + if len(big_sa1) > 0: + da_sa1 = xr.concat(big_sa1, dim='trainId').rename('{}_SA1'.format(xgm.split('_')[0])) + else: + da_sa1 = xr.DataArray([], dims=['trainId'], name='{}_SA1'.format(xgm.split('_')[0])) + return da_sa1 def saseContribution(data, sase='sase1', xgm='XTD10_XGM'): ''' Calculate the relative contribution of SASE 1 or SASE 3 pulses @@ -539,14 +589,17 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None, #2.1 case where apd is used: if use_apd: - return data[f'MCP{mcp}apd'].where(mask, drop=True) - + res = data[f'MCP{mcp}apd'].where(mask, drop=True) + res = res.assign_coords(apdId=np.arange(res['apdId'].shape[0])) + return res #2.2 case where integration is performed on raw trace: else: - peaks = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, - t_offset=period, npulses=data.dims['apdId']) + peaks = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, t_offset=period, + npulses=data.dims['apdId']) mask = mask.rename({'apdId':f'MCP{mcp}fromRaw'}) - return peaks.where(mask, drop=True) + res = peaks.where(mask, drop=True) + res = res.assign_coords({f'MCP{mcp}fromRaw':np.arange(res[f'MCP{mcp}fromRaw'].shape[0])}) + return res def calibrateTIM(data, rollingWindow=200, mcp=1, plot=False, use_apd=True, intstart=None, -- GitLab From b8e480218e5c4c0dd7406d1a8f62216abb2fe670 Mon Sep 17 00:00:00 2001 From: Laurent Mercadier <laurent.mercadier@xfel.eu> Date: Mon, 28 Oct 2019 17:56:32 +0100 Subject: [PATCH 6/7] Adds warning if apd parameters were wrong during acquisition --- xgm.py | 66 ++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 25 deletions(-) diff --git a/xgm.py b/xgm.py index fecea7d..85e0058 100644 --- a/xgm.py +++ b/xgm.py @@ -9,7 +9,6 @@ import matplotlib.pyplot as plt import numpy as np import xarray as xr - # Machine def pulsePatternInfo(data, plot=False): ''' display general information on the pulse patterns operated by SASE1 and SASE3. @@ -139,6 +138,7 @@ def cleanXGMdata(data, npulses=None, sase3First=True): drop=True) == 0): dedicated = True print('Dedicated trains, skip loading SASE 1 data.') + #pulse-resolved signals from XGMs keys = ["XTD10_XGM", "XTD10_SA3", "XTD10_SA1", "XTD10_XGM_sigma", "XTD10_SA3_sigma", "XTD10_SA1_sigma", "SCS_XGM", "SCS_SA3", "SCS_SA1", @@ -185,6 +185,9 @@ def cleanXGMdata(data, npulses=None, sase3First=True): continue res = data[key].where(data[key] != 1.0, drop=True).rename( {'XGMbunchId':'{}pId'.format(sase)}).rename(key) + res = res.assign_coords( + {f'{sase}pId':np.arange(res[f'{sase}pId'].shape[0])}) + dropList.append(key) mergeList.append(res) mergeList.append(data.drop(dropList)) @@ -567,39 +570,52 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None, return tim #2. If bunch pattern available, define a mask that corresponds to the SASE 3 pulses - initialDelay = data.attrs['run'].get_array( - 'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.initialDelay.value')[0].values - upperLimit = data.attrs['run'].get_array( - 'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.upperLimit.value')[0].values - period = upperLimit - initialDelay #period of the apd in number of digitizer samples sa3 = data['sase3'].where(data['sase3']>1, drop=True) sa3 -= sa3[0,0] - sa3 = sa3/int(period/440) #440 = samples between two pulses @4.5 MHz with ADQ412 digitizer + #2.1 case where apd is used: + if use_apd: + pulseId = 'apdId' + pulseIdDim = data.dims['apdId'] + initialDelay = data.attrs['run'].get_array( + 'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.initialDelay.value')[0].values + upperLimit = data.attrs['run'].get_array( + 'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.upperLimit.value')[0].values + #440 = samples between two pulses @4.5 MHz with ADQ412 digitizer: + period = int((upperLimit - initialDelay)/440) + #display some warnings if apd parameters do not match pulse pattern: + period_from_bunch_pattern = int(np.nanmin(np.diff(sa3))) + if period > period_from_bunch_pattern: + print(f'Warning: apd parameter was set to record 1 pulse out of {period} @ 4.5 MHz ' + + f'but XFEL delivered 1 pulse out of {period_from_bunch_pattern}.') + maxPulses = data['npulses_sase3'].max().values + if period*pulseIdDim < period_from_bunch_pattern*maxPulses: + print(f'Warning: Number of pulses and/or rep. rate in apd parameters were set ' + + f'too low ({pulseIdDim})to record the {maxPulses} SASE 3 pulses') + peaks = data[f'MCP{mcp}apd'] + + #2.2 case where integration is performed on raw trace: + else: + pulseId = f'MCP{mcp}fromRaw' + pulseIdDim = int(np.max(sa3).values) + 1 + period = int(np.nanmin(np.diff(sa3))) + peaks = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, t_offset=period*440, + npulses=pulseIdDim) + + sa3 = sa3/period idxList, inv = np.unique(sa3, axis=0, return_inverse=True) - mask = xr.DataArray(np.zeros((data.dims['trainId'], data.dims['apdId']), dtype=bool), - dims=['trainId', 'apdId'], + mask = xr.DataArray(np.zeros((data.dims['trainId'], pulseIdDim), dtype=bool), + dims=['trainId', pulseId], coords={'trainId':data.trainId, - 'apdId':np.arange(data.dims['apdId'])}, - name='mask') + pulseId:np.arange(pulseIdDim)}) mask = mask.sel(trainId=sa3.trainId) for i,idxApd in enumerate(idxList): idxApd = idxApd[idxApd>=0].astype(int) idxTid = inv==i mask[idxTid, idxApd] = True - - #2.1 case where apd is used: - if use_apd: - res = data[f'MCP{mcp}apd'].where(mask, drop=True) - res = res.assign_coords(apdId=np.arange(res['apdId'].shape[0])) - return res - #2.2 case where integration is performed on raw trace: - else: - peaks = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, t_offset=period, - npulses=data.dims['apdId']) - mask = mask.rename({'apdId':f'MCP{mcp}fromRaw'}) - res = peaks.where(mask, drop=True) - res = res.assign_coords({f'MCP{mcp}fromRaw':np.arange(res[f'MCP{mcp}fromRaw'].shape[0])}) - return res + + peaks = peaks.where(mask, drop=True) + peaks = peaks.assign_coords({pulseId:np.arange(peaks[pulseId].shape[0])}) + return peaks def calibrateTIM(data, rollingWindow=200, mcp=1, plot=False, use_apd=True, intstart=None, -- GitLab From 660045ed0cc20fdcce528c56e36c6cdca4d7b8ab Mon Sep 17 00:00:00 2001 From: Laurent Mercadier <laurent.mercadier@xfel.eu> Date: Tue, 29 Oct 2019 09:46:35 +0100 Subject: [PATCH 7/7] Adds documentation, simplifies selectSASEfromXGM() --- xgm.py | 97 +++++++++++++++++++++++++--------------------------------- 1 file changed, 41 insertions(+), 56 deletions(-) diff --git a/xgm.py b/xgm.py index 85e0058..61eba7e 100644 --- a/xgm.py +++ b/xgm.py @@ -199,11 +199,9 @@ def cleanXGMdata(data, npulses=None, sase3First=True): def selectSASEinXGM(data, sase='sase3', xgm='SCS_XGM', sase3First=True, npulses=None): ''' Given an array containing both SASE1 and SASE3 data, extracts SASE1- - or SASE3-only XGM data. - There are various cases depending on i) the mode of operation (10 Hz - with fresh bunch, dedicated trains to one SASE, pulse on demand), - ii) the potential change of number of pulses per train in each SASE - and iii) the order (SASE1 first, SASE3 first, interleaved mode). + or SASE3-only XGM data. The function tracks the changes of bunch patterns + in sase 1 and sase 3 and applies a mask to the XGM array to extract the + relevant pulses. This way, all complicated patterns are accounted for. Inputs: data: xarray Dataset containing xgm data @@ -240,6 +238,7 @@ def selectSASEinXGM(data, sase='sase3', xgm='SCS_XGM', sase3First=True, npulses= return xgmData[:,start:start+npulses] #2. case where bunch pattern is provided + #2.1 Merge sase1 and sase3 bunch patterns to get indices of all pulses xgm_arr = data[xgm].where(data[xgm] != 1., drop=True) sa3 = data['sase3'].where(data['sase3'] > 1, drop=True) sa3_val=np.unique(sa3) @@ -252,51 +251,38 @@ def selectSASEinXGM(data, sase='sase3', xgm='SCS_XGM', sase3First=True, npulses= dims=['trainId', 'bunchId'], coords={'trainId':data.trainId}, name='sase_all') - idxListAll, invAll = np.unique(sa_all.fillna(-1), axis=0, return_inverse=True) - idxList3 = np.unique(sa3) - idxList1 = np.unique(sa1) - if sase=='sase3': - big_sa3 = [] - for i,idxXGM in enumerate(idxListAll): - idxXGM = np.isin(idxXGM, idxList3) - idxTid = invAll==i - mask = xr.DataArray(np.zeros((data.dims['trainId'], sa_all['bunchId'].shape[0]), dtype=bool), - dims=['trainId', 'XGMbunchId'], - coords={'trainId':data.trainId, - 'XGMbunchId':sa_all['bunchId'].values}, - name='mask') - mask[idxTid, idxXGM] = True - sa3 = xgm_arr.where(mask, drop=True) - if sa3.trainId.size > 0: - sa3 = sa3.assign_coords(XGMbunchId=np.arange(sa3.XGMbunchId.size)) - big_sa3.append(sa3) - if len(big_sa3) > 0: - da_sa3 = xr.concat(big_sa3, dim='trainId').rename('{}_SA3'.format(xgm.split('_')[0])) - else: - da_sa3 = xr.DataArray([], dims=['trainId'], name='{}_SA3'.format(xgm.split('_')[0])) - return da_sa3 - - if sase=='sase1': - big_sa1 = [] - for i,idxXGM in enumerate(idxListAll): - idxXGM = np.isin(idxXGM, idxList1) - idxTid = invAll==i - mask = xr.DataArray(np.zeros((data.dims['trainId'], sa_all['bunchId'].shape[0]), dtype=bool), - dims=['trainId', 'XGMbunchId'], - coords={'trainId':data.trainId, - 'XGMbunchId':sa_all['bunchId'].values}, - name='mask') - mask[idxTid, idxXGM] = True - sa1 = xgm_arr.where(mask, drop=True) - if sa1.trainId.size > 0: - sa1 = sa1.assign_coords(XGMbunchId=np.arange(sa1.XGMbunchId.size)) - big_sa1.append(sa1) - if len(big_sa1) > 0: - da_sa1 = xr.concat(big_sa1, dim='trainId').rename('{}_SA1'.format(xgm.split('_')[0])) - else: - da_sa1 = xr.DataArray([], dims=['trainId'], name='{}_SA1'.format(xgm.split('_')[0])) - return da_sa1 + idxListSase = np.unique(sa3) + newName = xgm.split('_')[0] + '_SA3' + else: + idxListSase = np.unique(sa1) + newName = xgm.split('_')[0] + '_SA1' + + #2.2 track the changes of pulse patterns and the indices at which they occured (invAll) + idxListAll, invAll = np.unique(sa_all.fillna(-1), axis=0, return_inverse=True) + + #2.3 define a mask, loop over the different patterns and update the mask for each pattern + mask = xr.DataArray(np.zeros((data.dims['trainId'], sa_all['bunchId'].shape[0]), dtype=bool), + dims=['trainId', 'XGMbunchId'], + coords={'trainId':data.trainId, + 'XGMbunchId':sa_all['bunchId'].values}, + name='mask') + + big_sase = [] + for i,idxXGM in enumerate(idxListAll): + mask.values = np.zeros(mask.shape, dtype=bool) + idxXGM = np.isin(idxXGM, idxListSase) + idxTid = invAll==i + mask[idxTid, idxXGM] = True + sa_arr = xgm_arr.where(mask, drop=True) + if sa_arr.trainId.size > 0: + sa_arr = sa_arr.assign_coords(XGMbunchId=np.arange(sa_arr.XGMbunchId.size)) + big_sase.append(sa_arr) + if len(big_sase) > 0: + da_sase = xr.concat(big_sase, dim='trainId').rename(newName) + else: + da_sase = xr.DataArray([], dims=['trainId'], name=newName) + return da_sase def saseContribution(data, sase='sase1', xgm='XTD10_XGM'): ''' Calculate the relative contribution of SASE 1 or SASE 3 pulses @@ -533,9 +519,8 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None, ''' Extract peak-integrated data from TIM where pulses are from SASE3 only. If use_apd is False it calculates integration from raw traces. The missing values, in case of change of number of pulses, are filled - with NaNs. - If no bunch pattern info is available, the function assumes that - SASE 3 comes first and that the number of pulses is fixed in both + with NaNs. If no bunch pattern info is available, the function assumes + that SASE 3 comes first and that the number of pulses is fixed in both SASE 1 and 3. Inputs: @@ -544,7 +529,7 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None, intstop: trace index of integration stop bkgstart: trace index of background start bkgstop: trace index of background stop - t_offset: index separation between two pulses + t_offset: number of ADC samples between two pulses mcp: MCP channel number npulses: int, optional. Number of pulses to compute. Required if no bunch pattern info is available. @@ -602,6 +587,7 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None, npulses=pulseIdDim) sa3 = sa3/period + #2.3 track the changes of pulse patterns and the indices at which they occured (invAll) idxList, inv = np.unique(sa3, axis=0, return_inverse=True) mask = xr.DataArray(np.zeros((data.dims['trainId'], pulseIdDim), dtype=bool), dims=['trainId', pulseId], @@ -993,11 +979,10 @@ def mergeFastAdcPeaks(data, channel, intstart, intstop, bkgstart, bkgstop, intstop: trace index of integration stop bkgstart: trace index of background start bkgstop: trace index of background stop - period: Number of samples separation between two pulses. Needed + period: Number of ADC samples between two pulses. Needed if bunch pattern info is not available. If None, checks the pulse pattern and determine the period assuming a resolution - of 9.23 ns per sample which leads to 24 samples between - two bunches @ 4.5 MHz. + of 9.23 ns per sample = 24 samples between two pulses @ 4.5 MHz. npulses: number of pulses. If None, takes the maximum number of pulses according to the bunch patter (field 'npulses_sase3') dim: name of the xr dataset dimension along the peaks -- GitLab