diff --git a/xgm.py b/xgm.py index 400ae78eda4661fdc424216a1a50e9898f026e87..61eba7e7a3c26304ef90ecd8da298ff0dbaa3d24 100644 --- a/xgm.py +++ b/xgm.py @@ -9,7 +9,6 @@ import matplotlib.pyplot as plt import numpy as np import xarray as xr - # Machine def pulsePatternInfo(data, plot=False): ''' display general information on the pulse patterns operated by SASE1 and SASE3. @@ -120,7 +119,7 @@ def cleanXGMdata(data, npulses=None, sase3First=True): ''' Cleans the XGM data arrays obtained from load() function. The XGM "TD" data arrays have arbitrary size of 1000 and default value 1.0 when there is no pulse. This function sorts the SASE 1 and SASE 3 pulses. - For recent DAQ runs, sase-resolved arrays can be used. For older runs, + For DAQ runs after April 2019, sase-resolved arrays can be used. For older runs, the function selectSASEinXGM can be used to extract sase-resolved pulses. Inputs: data: xarray Dataset containing XGM TD arrays. @@ -133,33 +132,43 @@ def cleanXGMdata(data, npulses=None, sase3First=True): ''' dropList = [] mergeList = [] - if ("XTD10_SA3" not in data and "XTD10_XGM" in data) or ( - "SCS_SA3" not in data and "SCS_XGM" in data): + dedicated = False + if 'sase3' in data: + if np.all(data['npulses_sase1'].where(data['npulses_sase3'] !=0, + drop=True) == 0): + dedicated = True + print('Dedicated trains, skip loading SASE 1 data.') + #pulse-resolved signals from XGMs + keys = ["XTD10_XGM", "XTD10_SA3", "XTD10_SA1", + "XTD10_XGM_sigma", "XTD10_SA3_sigma", "XTD10_SA1_sigma", + "SCS_XGM", "SCS_SA3", "SCS_SA1", + "SCS_XGM_sigma", "SCS_SA3_sigma", "SCS_SA1_sigma"] + + if ("SCS_SA3" not in data and "SCS_XGM" in data): #no SASE-resolved arrays available - if 'SCS_XGM' in data: - sa3 = selectSASEinXGM(data, xgm='SCS_XGM', sase='sase3', npulses=npulses, - sase3First=sase3First).rename({'XGMbunchId':'sa3_pId'}).rename('SCS_SA3') - mergeList.append(sa3) - sa1 = selectSASEinXGM(data, xgm='SCS_XGM', sase='sase1', npulses=npulses, - sase3First=sase3First).rename({'XGMbunchId':'sa1_pId'}).rename('SCS_SA1') + sa3 = selectSASEinXGM(data, xgm='SCS_XGM', sase='sase3', npulses=npulses, + sase3First=sase3First).rename({'XGMbunchId':'sa3_pId'}) + mergeList.append(sa3) + if not dedicated: + sa1 = selectSASEinXGM(data, xgm='SCS_XGM', sase='sase1', + npulses=npulses, sase3First=sase3First).rename( + {'XGMbunchId':'sa1_pId'}) mergeList.append(sa1) - dropList.append('SCS_XGM') + dropList.append('SCS_XGM') + keys.remove('SCS_XGM') - if 'XTD10_XGM' in data: - sa3 = selectSASEinXGM(data, xgm='XTD10_XGM', sase='sase3', npulses=npulses, - sase3First=sase3First).rename({'XGMbunchId':'sa3_pId'}).rename('XTD10_SA3') - mergeList.append(sa3) - sa1 = selectSASEinXGM(data, xgm='XTD10_XGM', sase='sase1', npulses=npulses, - sase3First=sase3First).rename({'XGMbunchId':'sa1_pId'}).rename('XTD10_SA1') + if ("XTD10_SA3" not in data and "XTD10_XGM" in data): + #no SASE-resolved arrays available + sa3 = selectSASEinXGM(data, xgm='XTD10_XGM', sase='sase3', npulses=npulses, + sase3First=sase3First).rename({'XGMbunchId':'sa3_pId'}) + mergeList.append(sa3) + if not dedicated: + sa1 = selectSASEinXGM(data, xgm='XTD10_XGM', sase='sase1', + npulses=npulses, sase3First=sase3First).rename( + {'XGMbunchId':'sa1_pId'}) mergeList.append(sa1) - dropList.append('XTD10_XGM') - keys = [] - - else: - keys = ["XTD10_XGM", "XTD10_SA3", "XTD10_SA1", - "XTD10_XGM_sigma", "XTD10_SA3_sigma", "XTD10_SA1_sigma"] - keys += ["SCS_XGM", "SCS_SA3", "SCS_SA1", - "SCS_XGM_sigma", "SCS_SA3_sigma", "SCS_SA1_sigma"] + dropList.append('XTD10_XGM') + keys.remove('XTD10_XGM') for key in keys: if key not in data: @@ -168,11 +177,17 @@ def cleanXGMdata(data, npulses=None, sase3First=True): sase = 'sa3_' elif "sa1" in key.lower(): sase = 'sa1_' + if dedicated: + dropList.append(key) + continue else: dropList.append(key) continue res = data[key].where(data[key] != 1.0, drop=True).rename( {'XGMbunchId':'{}pId'.format(sase)}).rename(key) + res = res.assign_coords( + {f'{sase}pId':np.arange(res[f'{sase}pId'].shape[0])}) + dropList.append(key) mergeList.append(res) mergeList.append(data.drop(dropList)) @@ -183,16 +198,15 @@ def cleanXGMdata(data, npulses=None, sase3First=True): def selectSASEinXGM(data, sase='sase3', xgm='SCS_XGM', sase3First=True, npulses=None): - ''' Extract SASE1- or SASE3-only XGM data. - There are various cases depending on i) the mode of operation (10 Hz - with fresh bunch, dedicated trains to one SASE, pulse on demand), - ii) the potential change of number of pulses per train in each SASE - and iii) the order (SASE1 first, SASE3 first, interleaved mode). + ''' Given an array containing both SASE1 and SASE3 data, extracts SASE1- + or SASE3-only XGM data. The function tracks the changes of bunch patterns + in sase 1 and sase 3 and applies a mask to the XGM array to extract the + relevant pulses. This way, all complicated patterns are accounted for. Inputs: data: xarray Dataset containing xgm data sase: key of sase to select: {'sase1', 'sase3'} - xgm: key of xgm to select: {'SA3_XGM', 'SCS_XGM'} + xgm: key of xgm to select: {'XTD10_XGM', 'SCS_XGM'} sase3First: bool, optional. Used in case no bunch pattern was recorded npulses: int, optional. Required in case no bunch pattern was recorded. @@ -202,6 +216,7 @@ def selectSASEinXGM(data, sase='sase3', xgm='SCS_XGM', sase3First=True, npulses= that sase in the run. The missing values, in case of change of number of pulses, are filled with NaNs. ''' + #1. case where bunch pattern is missing: if sase not in data: print('Missing bunch pattern info!') if npulses is None: @@ -221,108 +236,53 @@ def selectSASEinXGM(data, sase='sase3', xgm='SCS_XGM', sase3First=True, npulses= else: start=xgmData.shape[1]-npulses return xgmData[:,start:start+npulses] - result = None - npulses_sa3 = data['npulses_sase3'] - npulses_sa1 = data['npulses_sase1'] - dedicated = 0 - if np.all(npulses_sa1.where(npulses_sa3 !=0, drop=True) == 0): - dedicated += 1 - print('No SASE 1 pulses during SASE 3 operation') - if np.all(npulses_sa3.where(npulses_sa1 !=0, drop=True) == 0): - dedicated += 1 - print('No SASE 3 pulses during SASE 1 operation') - #Alternating pattern with dedicated pulses in SASE1 and SASE3: - if dedicated==2: - if sase=='sase1': - result = data[xgm].where(npulses_sa1>0, drop=True)[:,:npulses_sa1.max().values] - else: - result = data[xgm].where(npulses_sa3>0, drop=True)[:,:npulses_sa3.max().values] - result = result.where(result != 1.0) - return result - # SASE1 and SASE3 bunches in a same train: find minimum indices of first and - # maximum indices of last pulse per train + + #2. case where bunch pattern is provided + #2.1 Merge sase1 and sase3 bunch patterns to get indices of all pulses + xgm_arr = data[xgm].where(data[xgm] != 1., drop=True) + sa3 = data['sase3'].where(data['sase3'] > 1, drop=True) + sa3_val=np.unique(sa3) + sa3_val = sa3_val[~np.isnan(sa3_val)] + sa1 = data['sase1'].where(data['sase1'] > 1, drop=True) + sa1_val=np.unique(sa1) + sa1_val = sa1_val[~np.isnan(sa1_val)] + sa_all = xr.concat([sa1, sa3], dim='bunchId').rename('sa_all') + sa_all = xr.DataArray(np.sort(sa_all)[:,:xgm_arr['XGMbunchId'].shape[0]], + dims=['trainId', 'bunchId'], + coords={'trainId':data.trainId}, + name='sase_all') + if sase=='sase3': + idxListSase = np.unique(sa3) + newName = xgm.split('_')[0] + '_SA3' else: - pulseIdmin_sa1 = data['sase1'].where(npulses_sa1 != 0).where(data['sase1']>1).min().values - pulseIdmax_sa1 = data['sase1'].where(npulses_sa1 != 0).where(data['sase1']>1).max().values - pulseIdmin_sa3 = data['sase3'].where(npulses_sa3 != 0).where(data['sase3']>1).min().values - pulseIdmax_sa3 = data['sase3'].where(npulses_sa3 != 0).where(data['sase3']>1).max().values - if pulseIdmin_sa1 > pulseIdmax_sa3: - sa3First = True - elif pulseIdmin_sa3 > pulseIdmax_sa1: - sa3First = False - else: - print('Interleaved mode, but no sase-dedicated XGM data loaded.') - saseStr = 'SA{}'.format(sase[4]) - xgmStr = xgm.split('_')[0] - print('Loading {}_{} data...'.format(xgmStr, saseStr)) - try: - if npulses == None: - npulses = data['npulses_sase{}'.format(sase[4])].max().values - if xgmStr == 'XTD10': - source = 'SA3_XTD10_XGM/XGM/DOOCS:output' - if xgmStr == 'SCS': - source = 'SCS_BLU_XGM/XGM/DOOCS:output' - key = 'data.intensitySa{}TD'.format(sase[4]) - result = data.attrs['run'].get_array(source, key, extra_dims=['XGMbunchId']) - result = result.isel(XGMbunchId=slice(0, npulses)) - return result - except: - print('Could not load {}_{} data. '.format(xgmStr, saseStr) + - 'Interleaved mode and no sase-dedicated data is not yet supported.') - - #take the derivative along the trainId to track changes in pulse number: - diff = npulses_sa3.diff(dim='trainId') - #only keep trainIds where a change occured: - diff = diff.where(diff != 0, drop=True) - #get a list of indices where a change occured: - idx_change_sa3 = np.argwhere(np.isin(npulses_sa3.trainId.values, - diff.trainId.values, assume_unique=True))[:,0] - - #Same for SASE 1: - diff = npulses_sa1.diff(dim='trainId') - diff = diff.where(diff !=0, drop=True) - idx_change_sa1 = np.argwhere(np.isin(npulses_sa1.trainId.values, - diff.trainId.values, assume_unique=True))[:,0] - - #create index that locates all changes of pulse number in both SASE1 and 3: - #add index 0 to get the initial pulse number per train: - idx_change = np.unique(np.concatenate(([0], idx_change_sa3, idx_change_sa1))).astype(int) - if sase=='sase1': - npulses = npulses_sa1 - maxpulses = int(npulses_sa1.max().values) + idxListSase = np.unique(sa1) + newName = xgm.split('_')[0] + '_SA1' + + #2.2 track the changes of pulse patterns and the indices at which they occured (invAll) + idxListAll, invAll = np.unique(sa_all.fillna(-1), axis=0, return_inverse=True) + + #2.3 define a mask, loop over the different patterns and update the mask for each pattern + mask = xr.DataArray(np.zeros((data.dims['trainId'], sa_all['bunchId'].shape[0]), dtype=bool), + dims=['trainId', 'XGMbunchId'], + coords={'trainId':data.trainId, + 'XGMbunchId':sa_all['bunchId'].values}, + name='mask') + + big_sase = [] + for i,idxXGM in enumerate(idxListAll): + mask.values = np.zeros(mask.shape, dtype=bool) + idxXGM = np.isin(idxXGM, idxListSase) + idxTid = invAll==i + mask[idxTid, idxXGM] = True + sa_arr = xgm_arr.where(mask, drop=True) + if sa_arr.trainId.size > 0: + sa_arr = sa_arr.assign_coords(XGMbunchId=np.arange(sa_arr.XGMbunchId.size)) + big_sase.append(sa_arr) + if len(big_sase) > 0: + da_sase = xr.concat(big_sase, dim='trainId').rename(newName) else: - npulses = npulses_sa3 - maxpulses = int(npulses_sa3.max().values) - for i,k in enumerate(idx_change): - #skip if no pulses after the change: - if npulses[idx_change[i]]==0: - continue - #calculate indices - if sa3First: - a = 0 - b = int(npulses_sa3[k].values) - c = b - d = int(c + npulses_sa1[k].values) - else: - a = int(npulses_sa1[k].values) - b = int(a + npulses_sa3[k].values) - c = 0 - d = a - if sase=='sase1': - a = c - b = d - if i==len(idx_change)-1: - l = None - else: - l = idx_change[i+1] - temp = data[xgm][k:l,a:a+maxpulses].copy() - temp[:,b:] = np.NaN - if result is None: - result = temp - else: - result = xr.concat([result, temp], dim='trainId') - return result - + da_sase = xr.DataArray([], dims=['trainId'], name=newName) + return da_sase def saseContribution(data, sase='sase1', xgm='XTD10_XGM'): ''' Calculate the relative contribution of SASE 1 or SASE 3 pulses @@ -518,7 +478,7 @@ def mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=1, t_offset=None, n pattern and determine the t_offset assuming mininum pulse separation of 220 ns and digitizer resolution of 2 GHz. npulses: number of pulses. If None, takes the maximum number of - pulses according to the bunch patter (field 'npulses_sase3') + pulses according to the bunch pattern (field 'npulses_sase3') Output: results: DataArray with dims trainId x max(sase3 pulses) @@ -539,13 +499,15 @@ def mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=1, t_offset=None, n t_offset = 440 * step else: t_offset = 1 - results = xr.DataArray(np.empty((data.trainId.shape[0], npulses)), coords=data[keyraw].coords, + results = xr.DataArray(np.zeros((data.trainId.shape[0], npulses)), coords=data[keyraw].coords, dims=['trainId', 'MCP{}fromRaw'.format(mcp)]) for i in range(npulses): a = intstart + t_offset*i b = intstop + t_offset*i bkga = bkgstart + t_offset*i bkgb = bkgstop + t_offset*i + if b > data.dims['samplesId']: + break bg = np.outer(np.median(data[keyraw][:,bkga:bkgb], axis=1), np.ones(b-a)) results[:,i] = np.trapz(data[keyraw][:,a:b] - bg, axis=1) return results @@ -557,9 +519,8 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None, ''' Extract peak-integrated data from TIM where pulses are from SASE3 only. If use_apd is False it calculates integration from raw traces. The missing values, in case of change of number of pulses, are filled - with NaNs. - If no bunch pattern info is available, the function assumes that - SASE 3 comes first and that the number of pulses is fixed in both + with NaNs. If no bunch pattern info is available, the function assumes + that SASE 3 comes first and that the number of pulses is fixed in both SASE 1 and 3. Inputs: @@ -568,7 +529,7 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None, intstop: trace index of integration stop bkgstart: trace index of background start bkgstop: trace index of background stop - t_offset: index separation between two pulses + t_offset: number of ADC samples between two pulses mcp: MCP channel number npulses: int, optional. Number of pulses to compute. Required if no bunch pattern info is available. @@ -578,6 +539,7 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None, tim: DataArray of shape trainId only for SASE3 pulses x N with N=max(number of pulses per train) ''' + #1. case where no bunch pattern is available: if 'sase3' not in data: print('Missing bunch pattern info!\n') if npulses is None: @@ -586,68 +548,60 @@ def getTIMapd(data, mcp=1, use_apd=True, intstart=None, intstop=None, print('Retrieving {} SASE 3 pulses assuming that '.format(npulses) + 'SASE 3 pulses come first.') if use_apd: - tim = data['MCP{}apd'.format(mcp)][:,:npulses:stride] + tim = data[f'MCP{mcp}apd'][:,:npulses:stride] else: tim = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, t_offset=t_offset, npulses=npulses) return tim + #2. If bunch pattern available, define a mask that corresponds to the SASE 3 pulses sa3 = data['sase3'].where(data['sase3']>1, drop=True) - npulses_sa3 = data['npulses_sase3'] - maxpulses = int(npulses_sa3.max().values) - if npulses is not None: - maxpulses = np.min([npulses, maxpulses]) - step = 1 - if maxpulses > 1: - #Calculate the number of non-lasing pulses between two lasing pulses (step) - step = sa3.where(data['npulses_sase3']>1, drop=True)[0,:2].values - step = int(step[1] - step[0]) + sa3 -= sa3[0,0] + #2.1 case where apd is used: if use_apd: - apd = data['MCP{}apd'.format(mcp)] + pulseId = 'apdId' + pulseIdDim = data.dims['apdId'] initialDelay = data.attrs['run'].get_array( 'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.initialDelay.value')[0].values upperLimit = data.attrs['run'].get_array( 'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.upperLimit.value')[0].values - nsamples = upperLimit - initialDelay - npulses_per_apd = int(nsamples/440) - sa3 /= npulses_per_apd + #440 = samples between two pulses @4.5 MHz with ADQ412 digitizer: + period = int((upperLimit - initialDelay)/440) + #display some warnings if apd parameters do not match pulse pattern: + period_from_bunch_pattern = int(np.nanmin(np.diff(sa3))) + if period > period_from_bunch_pattern: + print(f'Warning: apd parameter was set to record 1 pulse out of {period} @ 4.5 MHz ' + + f'but XFEL delivered 1 pulse out of {period_from_bunch_pattern}.') + maxPulses = data['npulses_sase3'].max().values + if period*pulseIdDim < period_from_bunch_pattern*maxPulses: + print(f'Warning: Number of pulses and/or rep. rate in apd parameters were set ' + + f'too low ({pulseIdDim})to record the {maxPulses} SASE 3 pulses') + peaks = data[f'MCP{mcp}apd'] + + #2.2 case where integration is performed on raw trace: else: - apd = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, - t_offset=t_offset, npulses=npulses) - sa3 /= step - sa3 -= sa3[:,0] - sa3 = sa3.astype(int) - if np.all(npulses_sa3 == npulses_sa3[0]): - tim = apd[:, sa3[0].values[:maxpulses]] - return tim + pulseId = f'MCP{mcp}fromRaw' + pulseIdDim = int(np.max(sa3).values) + 1 + period = int(np.nanmin(np.diff(sa3))) + peaks = mcpPeaks(data, intstart, intstop, bkgstart, bkgstop, mcp=mcp, t_offset=period*440, + npulses=pulseIdDim) - stride = 1 - if use_apd: - stride = np.max([stride,int(step/npulses_per_apd)]) - diff = npulses_sa3.diff(dim='trainId') - #only keep trainIds where a change occured: - diff = diff.where(diff != 0, drop=True) - #get a list of indices where a change occured: - idx_change = np.argwhere(np.isin(npulses_sa3.trainId.values, - diff.trainId.values, assume_unique=True))[:,0] - #add index 0 to get the initial pulse number per train: - idx_change = np.insert(idx_change, 0, 0) - tim = None - for i,idx in enumerate(idx_change): - if npulses_sa3[idx]==0: - continue - if i==len(idx_change)-1: - l = None - else: - l = idx_change[i+1] - b = npulses_sa3[idx].values - temp = apd[idx:l,:maxpulses*stride:stride].copy() - temp[:,b:] = np.NaN - if tim is None: - tim = temp - else: - tim = xr.concat([tim, temp], dim='trainId') - return tim + sa3 = sa3/period + #2.3 track the changes of pulse patterns and the indices at which they occured (invAll) + idxList, inv = np.unique(sa3, axis=0, return_inverse=True) + mask = xr.DataArray(np.zeros((data.dims['trainId'], pulseIdDim), dtype=bool), + dims=['trainId', pulseId], + coords={'trainId':data.trainId, + pulseId:np.arange(pulseIdDim)}) + mask = mask.sel(trainId=sa3.trainId) + for i,idxApd in enumerate(idxList): + idxApd = idxApd[idxApd>=0].astype(int) + idxTid = inv==i + mask[idxTid, idxApd] = True + + peaks = peaks.where(mask, drop=True) + peaks = peaks.assign_coords({pulseId:np.arange(peaks[pulseId].shape[0])}) + return peaks def calibrateTIM(data, rollingWindow=200, mcp=1, plot=False, use_apd=True, intstart=None, @@ -943,7 +897,7 @@ def matchXgmTimPulseId(data, use_apd=True, intstart=None, intstop=None, ndata = cleanXGMdata(data, npulses, sase3First) for mcp in range(1,5): if 'MCP{}apd'.format(mcp) in data or 'MCP{}raw'.format(mcp) in data: - MCPapd = getTIMapd(ndata, mcp=mcp, use_apd=use_apd, intstart=intstart, + MCPapd = getTIMapd(data, mcp=mcp, use_apd=use_apd, intstart=intstart, intstop=intstop,bkgstart=bkgstart, bkgstop=bkgstop, t_offset=t_offset, npulses=npulses, stride=stride).rename('MCP{}apd'.format(mcp)) @@ -1025,11 +979,10 @@ def mergeFastAdcPeaks(data, channel, intstart, intstop, bkgstart, bkgstop, intstop: trace index of integration stop bkgstart: trace index of background start bkgstop: trace index of background stop - period: Number of samples separation between two pulses. Needed + period: Number of ADC samples between two pulses. Needed if bunch pattern info is not available. If None, checks the pulse pattern and determine the period assuming a resolution - of 9.23 ns per sample which leads to 24 samples between - two bunches @ 4.5 MHz. + of 9.23 ns per sample = 24 samples between two pulses @ 4.5 MHz. npulses: number of pulses. If None, takes the maximum number of pulses according to the bunch patter (field 'npulses_sase3') dim: name of the xr dataset dimension along the peaks