From 8d294e9f2069f0edce9cb9188e94613a5c4671ea Mon Sep 17 00:00:00 2001 From: Laurent Mercadier <laurent.mercadier@xfel.eu> Date: Fri, 19 Mar 2021 09:40:27 +0100 Subject: [PATCH] Changed arguments of get_tim_peaks and get_fast_adc_peaks to take DataCollection, flake8 --- src/toolbox_scs/detectors/digitizers.py | 326 +++++++++++++----------- 1 file changed, 184 insertions(+), 142 deletions(-) diff --git a/src/toolbox_scs/detectors/digitizers.py b/src/toolbox_scs/detectors/digitizers.py index 0bb2662..5057429 100644 --- a/src/toolbox_scs/detectors/digitizers.py +++ b/src/toolbox_scs/detectors/digitizers.py @@ -1,8 +1,8 @@ """ Digitizers related sub-routines Copyright (2021) SCS Team. - - (contributions preferrably comply with pep8 code structure + + (contributions preferrably comply with pep8 code structure guidelines.) """ @@ -20,15 +20,16 @@ from ..misc.bunch_pattern_external import is_sase_1, is_sase_3, is_ppl log = logging.getLogger(__name__) + def peaks_from_raw_trace(traces, - intstart, - intstop, - bkgstart, - bkgstop, - period, - npulses, - extra_dim='pulseId' - ): + intstart, + intstop, + bkgstart, + bkgstop, + period, + npulses, + extra_dim='pulseId' + ): """ Computes peaks from raw digitizer traces by trapezoidal integration. @@ -38,7 +39,7 @@ def peaks_from_raw_trace(traces, numpy array is provided, the second dimension is that of the samples. intstart: int or list or 1D-numpy array trace index of integration start. If 1d array, each value is the start - of one peak. The period and npulses parameters are ignored. + of one peak. The period and npulses parameters are ignored. intstop: int trace index of integration stop bkgstart: int @@ -56,10 +57,10 @@ def peaks_from_raw_trace(traces, ------- xarray DataArray """ - - assert len(traces.shape)==2 - - if type(traces) is xr.DataArray: + + assert len(traces.shape) == 2 + + if isinstance(traces, xr.DataArray): ntid = traces.sizes['trainId'] coords = traces.coords traces = traces.values @@ -67,7 +68,7 @@ def peaks_from_raw_trace(traces, traces = traces.T else: coords = None - + if hasattr(intstart, '__len__'): intstart = np.array(intstart) pulses = intstart - intstart[0] @@ -78,19 +79,19 @@ def peaks_from_raw_trace(traces, results = xr.DataArray(np.empty((traces.shape[0], len(pulses))), coords=coords, dims=['trainId', extra_dim]) - - for i,p in enumerate(pulses): + + for i, p in enumerate(pulses): a = intstart + p b = intstop + p bkga = bkgstart + p bkgb = bkgstop + p if b > traces.shape[1]: break - bg = np.outer(np.median(traces[:,bkga:bkgb], axis=1), + bg = np.outer(np.median(traces[:, bkga:bkgb], axis=1), np.ones(b-a)) - results[:,i] = np.trapz(traces[:,a:b] - bg, axis=1) + results[:, i] = np.trapz(traces[:, a:b] - bg, axis=1) return results - + def get_peaks(run, data=None, @@ -113,7 +114,7 @@ def get_peaks(run, DataCollection containing the digitizer data data: xarray DataArray or str array containing the raw traces or peak-integrated values from the - digitizer. If str, must be one of the ToolBox mnemonics. If None, + digitizer. If str, must be one of the ToolBox mnemonics. If None, the data is loaded via the source and key arguments. source: str Name of digitizer source, e.g. 'SCS_UTC1_ADQ/ADC/1:network'. Only @@ -137,7 +138,7 @@ def get_peaks(run, bunch pattern is missing. bunchPattern: string or dict match the peaks to the bunch pattern: 'sase1', 'sase3', 'scs_ppl'. - This will dictate the name of the pulse ID coordinates: 'sa1_pId', + This will dictate the name of the pulse ID coordinates: 'sa1_pId', 'sa3_pId' or 'scs_ppl'. Alternatively, a dict with source, key and pattern can be provided, e.g. {'source':'SCS_RR_UTC/TSYS/TIMESERVER', 'key':'bunchPatternTable.value', 'pattern':'sase3'} @@ -147,7 +148,7 @@ def get_peaks(run, indices: array, slice indices from the peak-integrated data to retrieve. Only required when bunch pattern is missing and useRaw is False. - + Returns ------- xarray.DataArray containing digitizer peaks with pulse coordinates @@ -158,114 +159,113 @@ def get_peaks(run, # load data if data is None: arr = run.get_array(source, key) - elif type(data) is str: + elif isinstance(data, str): arr = run.get_array(*_mnemonics[data].values()) source = _mnemonics[data]['source'] key = _mnemonics[data]['key'] else: arr = data - dim = [d for d in arr.dims if d is not 'trainId'][0] - #check if bunch pattern is provided + dim = [d for d in arr.dims if d != 'trainId'][0] + # check if bunch pattern is provided bpt = None - if type(bunchPattern) is dict: - bpt = run.get_array(bunchPattern['source'], - bunchPattern['key']).rename({'dim_0':'pulse_slot'}) + if isinstance(bunchPattern, dict): + bpt = run.get_array(bunchPattern['source'], bunchPattern['key']) + bpt = bpt.rename({'dim_0': 'pulse_slot'}) pattern = bunchPattern['pattern'] elif _mnemonics['bunchPatternTable']['source'] in run.all_sources: bpt = run.get_array(*_mnemonics['bunchPatternTable'].values()) pattern = bunchPattern if bpt is not None: - #load mask and extract pulse Id: - if pattern is 'sase3': + # load mask and extract pulse Id: + if pattern == 'sase3': mask = is_sase_3(bpt) extra_dim = 'sa3_pId' - elif pattern is 'sase1': + elif pattern == 'sase1': mask = is_sase_1(bpt) extra_dim = 'sa1_pId' - elif pattern is 'scs_ppl': + elif pattern == 'scs_ppl': mask = is_ppl(bpt) extra_dim = 'ol_pId' else: extra_dim = 'pulseId' - valid_tid = mask.where(mask.sum(dim='pulse_slot')>0, + valid_tid = mask.where(mask.sum(dim='pulse_slot') > 0, drop=True).trainId mask_on = mask.sel(trainId=valid_tid) - if (mask_on == mask_on[0]).all().values == False: + if (mask_on == mask_on[0]).all().values is False: log.debug('Pattern changed during the run!') pid = np.unique(np.where(mask_on)[1]) npulses = len(pid) extra_coords = pid mask_final = mask_on.where(mask_on, drop=True).fillna(False) - mask_final = mask_final.astype(bool).rename({'pulse_slot':extra_dim}) + mask_final = mask_final.astype(bool).rename({'pulse_slot': extra_dim}) log.debug(f'Bunch pattern: {npulses} pulses for {pattern}.') if extra_dim is None: extra_dim = 'pulseId' - + # 1. Use peak-integrated data from digitizer if useRaw is False: - #1.1 No bunch pattern provided + # 1.1 No bunch pattern provided if bpt is None: log.debug('Missing bunch pattern info.') if indices is None: - raise TypeError(f'indices argument must be provided '+ + raise TypeError('indices argument must be provided ' + 'when bunch pattern info is missing.') - return arr.isel({dim:indices}).rename({dim:'pulseId'}) + return arr.isel({dim: indices}).rename({dim: 'pulseId'}) - #1.2 Bunch pattern is provided + # 1.2 Bunch pattern is provided if npulses == 1: - return arr.sel({dim:0}).expand_dims(extra_dim).T.assign_coords( - {extra_dim:extra_coords}) + return arr.sel({dim: 0}).expand_dims(extra_dim).T.assign_coords( + {extra_dim: extra_coords}) # verify period used by APD and match it to pid from bunchPattern - if digitizer is 'FastADC': + if digitizer == 'FastADC': adc_source = source.split(':')[0] - enable_key = (source.split(':')[1].split('.')[0] + enable_key = (source.split(':')[1].split('.')[0] + '.enablePeakComputation.value') if run.get_array(adc_source, enable_key)[0] is False: - raise ValueError('The digitizer did not record '+ + raise ValueError('The digitizer did not record ' + 'peak-integrated data.') - period_key = (source.split(':')[1].split('.')[0] + + period_key = (source.split(':')[1].split('.')[0] + '.pulsePeriod.value') period = run.get_array(adc_source, period_key)[0].values/24 - if digitizer is 'ADQ412': + if digitizer == 'ADQ412': board_source = source.split(':')[0] board = key[19] channel = key[21] - channel_to_number = {'A':0, 'B':1, 'C':2, 'D':3} + channel_to_number = {'A': 0, 'B': 1, 'C': 2, 'D': 3} channel = channel_to_number[channel] - in_del_key = (f'board{board}.apd.channel_{channel}'+ + in_del_key = (f'board{board}.apd.channel_{channel}' + '.initialDelay.value') initialDelay = run.get_array(board_source, in_del_key)[0].values - up_lim_key = (f'board{board}.apd.channel_{channel}'+ + up_lim_key = (f'board{board}.apd.channel_{channel}' + '.upperLimit.value') upperLimit = run.get_array(board_source, up_lim_key)[0].values period = (upperLimit - initialDelay)/440 stride = (pid[1] - pid[0]) / period if period < 1: - log.warning('Warning: the pulse period in digitizer was '+ - 'smaller than the separation of two pulses '+ + log.warning('Warning: the pulse period in digitizer was ' + + 'smaller than the separation of two pulses ' + 'at 4.5 MHz.') stride = 1 if stride < 1: - raise ValueError('Pulse period in digitizer was too large '+ - 'compared to actual pulse separation. Some pulses '+ - 'were not recorded.') + raise ValueError('Pulse period in digitizer was too large ' + + 'compared to actual pulse separation. Some ' + + 'pulses were not recorded.') stride = int(stride) log.debug(f'period {period}, stride {stride}') if npulses*stride > arr.sizes[dim]: - raise ValueError('The number of pulses recorded by digitizer '+ - f'that correspond to actual {pattern} pulses '+ + raise ValueError('The number of pulses recorded by digitizer ' + + f'that correspond to actual {pattern} pulses ' + 'is too small.') - peaks = arr.isel({dim:slice(0,npulses*stride,stride)}).rename( - {dim:extra_dim}) - peaks = peaks.where(mask_final, drop=True) - return peaks.assign_coords({extra_dim:extra_coords}) + peaks = arr.isel({dim: slice(0, npulses*stride, stride)}) + peaks = peaks.rename({dim: extra_dim}).where(mask_final, drop=True) + return peaks.assign_coords({extra_dim: extra_coords}) # 2. Use raw data from digitizer - #minimum samples between two pulses, according to digitizer type + # minimum samples between two pulses, according to digitizer type min_distance = 1 - if digitizer is 'FastADC': + if digitizer == 'FastADC': min_distance = 24 - if digitizer is 'ADQ412': + if digitizer == 'ADQ412': min_distance = 440 if autoFind: integParams = find_integ_params(arr.mean(dim='trainId'), @@ -275,12 +275,12 @@ def get_peaks(run, # 2.1. No bunch pattern provided if bpt is None: log.debug('Missing bunch pattern info.') - required_keys=['intstart', 'intstop', 'bkgstart', - 'bkgstop', 'period', 'npulses'] + required_keys = ['intstart', 'intstop', 'bkgstart', + 'bkgstop', 'period', 'npulses'] if integParams is None or not all(name in integParams for name in required_keys): - raise TypeError('All keys of integParams argument '+ - f'{required_keys} are required when '+ + raise TypeError('All keys of integParams argument ' + + f'{required_keys} are required when ' + 'bunch pattern info is missing.') log.debug(f'Retrieving {integParams["npulses"]} pulses.') if extra_dim is None: @@ -292,10 +292,10 @@ def get_peaks(run, integParams['period'], integParams['npulses'], extra_dim=extra_dim) - + # 2.2 Bunch pattern is provided sample_id = (pid-pid[0])*min_distance - #override auto find parameters + # override auto find parameters if isinstance(integParams['intstart'], (int, np.integer)): integParams['intstart'] = sample_id + integParams['intstart'] integParams['period'] = integParams['npulses'] = None @@ -308,7 +308,7 @@ def get_peaks(run, integParams['npulses'], extra_dim=extra_dim) peaks = peaks.where(mask_final, drop=True) - return peaks.assign_coords({extra_dim:extra_coords}) + return peaks.assign_coords({extra_dim: extra_coords}) def find_integ_params(trace, min_distance=1, height=None, width=1): @@ -332,24 +332,23 @@ def find_integ_params(trace, min_distance=1, height=None, width=1): dict with keys 'intstart', 'intstop', 'bkgstart', 'bkgstop', 'period', 'npulses' and values in number of samples. """ - if type(trace) is xr.DataArray: + if isinstance(trace, xr.DataArray): trace = trace.values bl = np.median(trace) trace_no_bl = trace - bl if np.max(trace_no_bl) < np.abs(np.min(trace_no_bl)): - posNeg = 'negative' trace_no_bl *= -1 trace = bl + trace_no_bl - noise = trace[:100] - noise_ptp = np.max(noise) - np.min(noise) + # noise = trace[:100] + # noise_ptp = np.max(noise) - np.min(noise) if height is None: height = trace_no_bl.max()/20 centers, peaks = find_peaks(trace_no_bl, distance=min_distance, - height=height, width=width) + height=height, width=width) npulses = len(centers) - if npulses==0: + if npulses == 0: raise ValueError('Could not automatically find peaks.') - elif npulses==1: + elif npulses == 1: period = 0 else: period = np.median(np.diff(centers)).astype(int) @@ -359,63 +358,82 @@ def find_integ_params(trace, min_distance=1, height=None, width=1): + 0.5*np.mean(peaks['widths'])).astype(int) bkgstop = intstart - int(0.5*np.mean(peaks['widths'])) bkgstart = np.max([bkgstop - min_distance, 0]) - result = {'intstart':intstart, 'intstop':intstop, - 'bkgstart':bkgstart, 'bkgstop':bkgstop, - 'period':period, 'npulses':npulses} + result = {'intstart': intstart, 'intstop': intstop, + 'bkgstart': bkgstart, 'bkgstop': bkgstop, + 'period': period, 'npulses': npulses} return result -def get_tim_peaks(data, bunchPattern='sase3', + +def get_tim_peaks(data=None, run=None, bunchPattern='sase3', integParams=None, keepAllSase=False): """ - Automatically computes TIM peaks from sources in data. Sources - can be raw traces (e.g. "MCP2raw") or peak-integrated data + Automatically computes TIM peaks from sources in data. Sources + can be raw traces (e.g. "MCP2raw") or peak-integrated data (e.g. "MCP2apd"). The bunch pattern table is used to assign the pulse id coordinates. - + Parameters ---------- - data: xarray Dataset containing TIM data + data: xarray Dataset, str or list of str + xarray Dataset containing TIM data. If str or list of str, + must be mnemonics for TIM, e.g. "MCP2apd" or "MCP3raw" and + run must be provided. + run: extra_data.DataCollection + DataCollection containing the digitizer data. If provided, + data must be a str or list of str. If None, data must be + an xarray Dataset containing actual data. bunchPattern: str 'sase1' or 'sase3' or 'scs_ppl', bunch pattern used to extract peaks. The pulse ID dimension will be named 'sa1_pId', 'sa3_pId' or 'ol_pId', respectively. integParams: dict dictionnary for raw trace integration, e.g. - {'intstart':100, 'intstop':200, 'bkgstart':50, - 'bkgstop':99, 'period':24, 'npulses':500}. + {'intstart':100, 'intstop':200, 'bkgstart':50, + 'bkgstop':99, 'period':440, 'npulses':500}. If None, integration parameters are computed automatically. keepAllSase: bool Only relevant in case of sase-dedicated trains. If - True, the trains for SASE 1 are kept, else they are - dropped. - + True, all trains are kept, else only those of the + bunchPattern are kept. + Returns ------- - xarray Dataset with all TIM variables substituted by + xarray Dataset with all TIM variables substituted by the peak caclulated values (e.g. "MCP2raw" becomes "MCP2peaks"). """ - - proposal = data.attrs['runFolder'].split('/')[-3] - runNB = data.attrs['runFolder'].split('/')[-1] - run = ed.open_run(proposal, runNB) - peakDict = {} - toRemove = [] - autoFind=True + if run is None: + if bool(data) is False: + raise ValueError('At least run or data must be provided.') + if not isinstance(data, xr.Dataset): + raise TypeError(f'data type is {type(data)}. Expected ' + + 'xarray Dataset.') + else: + if bool(data) is False or not ( + isinstance(data, str) or isinstance(data, list)): + raise TypeError(f'data is {type(data)}. Expected str or ' + + 'list of str.') + if isinstance(data, xr.Dataset): + proposal = data.attrs['runFolder'].split('/')[-3] + runNB = data.attrs['runFolder'].split('/')[-1] + run = ed.open_run(proposal, runNB) + keys = [k for k in data if "MCP" in k] + is_ds = True + else: + keys = data if isinstance(data, list) else [data] + is_ds = False + autoFind = True if integParams is not None: autoFind = False - for c in range(1,5): - key = f'MCP{c}raw' - useRaw = True - if key not in data: - key = f'MCP{c}apd' - useRaw = False - if key not in data: - continue + peakDict = {} + toRemove = [] + for key in keys: + useRaw = True if 'raw' in key else False log.debug(f'Retrieving TIM peaks from {key}...') mnemonic = _mnemonics[key] - peaks = get_peaks(run, data[key], + d = data[key] if is_ds else key + peaks = get_peaks(run, d, source=mnemonic['source'], key=mnemonic['key'], digitizer='ADQ412', @@ -423,76 +441,100 @@ def get_tim_peaks(data, bunchPattern='sase3', autoFind=autoFind, integParams=integParams, bunchPattern=bunchPattern) - peakDict[f'MCP{c}peaks'] = peaks + newKey = key.replace('raw', 'peaks').replace('apd', 'peaks') + peakDict[newKey] = peaks toRemove.append(key) - ds = data.drop(toRemove) + ds = xr.Dataset() + if is_ds: + ds = data.drop(toRemove) join = 'outer' if keepAllSase else 'inner' ds = ds.merge(peakDict, join=join) return ds -def get_fast_adc_peaks(data, bunchPattern='scs_ppl', + +def get_fast_adc_peaks(data=None, run=None, bunchPattern='scs_ppl', integParams=None, keepAllSase=False): """ Automatically computes Fast ADC peaks from sources in data. Sources can be raw traces (e.g. "FastADC2raw") or peak- integrated data (e.g. "FastADC2peaks"). The bunch pattern table is used to assign the pulse id coordinates. - + Parameters ---------- - data: xarray Dataset containing TIM data + data: xarray Dataset, str or list of str + xarray Dataset containing Fast ADC data. If str or list + of str, list of mnemonics for FastADC, e.g. "FastADC5raw" + or "FastADC5peaks" and run must be provided. + run: extra_data.DataCollection + DataCollection containing the digitizer data. If provided, + data must be a str or list of str. If None, data must be + an xarray Dataset containing actual data. bunchPattern: str 'sase1' or 'sase3' or 'scs_ppl', bunch pattern used to extract peaks. integParams: dict dictionnary for raw trace integration, e.g. - {'intstart':100, 'intstop':200, 'bkgstart':50, - 'bkgstop':99, 'period':24, 'npulses':500}. + {'intstart':100, 'intstop':200, 'bkgstart':50, + 'bkgstop':99, 'period':24, 'npulses':500}. If None, integration parameters are computed automatically. keepAllSase: bool Only relevant in case of sase-dedicated trains. If - True, the trains for SASE 1 are kept, else they are - dropped. - display: bool - If True, displays information - + True, all trains are kept, else only those of the + bunchPattern are kept. + Returns ------- - xarray Dataset with all Fast ADC variables substituted by + xarray Dataset with all Fast ADC variables substituted by the peak caclulated values (e.g. "FastADC2raw" becomes "FastADC2peaks"). """ - proposal = data.attrs['runFolder'].split('/')[-3] - runNB = data.attrs['runFolder'].split('/')[-1] - run = ed.open_run(proposal, runNB) - peakDict = {} - toRemove = [] - autoFind=True + if run is None: + if bool(data) is False: + raise ValueError('At least run or data must be provided.') + if not isinstance(data, xr.Dataset): + raise TypeError(f'data type is {type(data)}. Expected ' + + 'xarray Dataset.') + else: + if bool(data) is False or not ( + isinstance(data, str) or isinstance(data, list)): + raise TypeError(f'data is {type(data)}. Expected str or ' + + 'list of str.') + if isinstance(data, xr.Dataset): + proposal = data.attrs['runFolder'].split('/')[-3] + runNB = data.attrs['runFolder'].split('/')[-1] + run = ed.open_run(proposal, runNB) + keys = [k for k in data if "FastADC" in k] + is_ds = True + else: + keys = data if isinstance(data, list) else [data] + is_ds = False + autoFind = True if integParams is not None: autoFind = False - for c in range(1,10): - key = f'FastADC{c}raw' - useRaw = True - if key not in data: - key = f'FastADC{c}peaks' - useRaw = False - if key not in data: - continue - log.debug('Retrieving Fast ADC peaks from {key}...') + peakDict = {} + toRemove = [] + for key in keys: + useRaw = True if 'raw' in key else False + log.debug(f'Retrieving Fast ADC peaks from {key}...') mnemonic = _mnemonics[key] - peaks = get_peaks(run, data[key], + d = data[key] if is_ds else key + peaks = get_peaks(run, d, source=mnemonic['source'], key=mnemonic['key'], - digitizer='FastADC', + digitizer='ADQ412', useRaw=useRaw, autoFind=autoFind, integParams=integParams, bunchPattern=bunchPattern) - peakDict[f'FastADC{c}peaks'] = peaks + newKey = key.replace('raw', 'peaks') + peakDict[newKey] = peaks toRemove.append(key) - ds = data.drop(toRemove) + ds = xr.Dataset() + if is_ds: + ds = data.drop(toRemove) join = 'outer' if keepAllSase else 'inner' ds = ds.merge(peakDict, join=join) return ds -- GitLab