Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • SCS/ToolBox
  • kluyvert/ToolBox
2 results
Show changes
Showing
with 9042 additions and 0 deletions
BSD 3-Clause License
Copyright (c) 2019, Michael Schneider
Copyright (c) 2020, SCS-team
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
from .azimuthal_integrator import *
from .bam_detectors import *
from .digitizers import *
from .dssc import *
from .dssc_data import *
from .dssc_misc import *
from .dssc_processing import *
from .gotthard2 import *
from .hrixs import *
from .pes import *
from .viking import *
from .xgm import *
__all__ = (
azimuthal_integrator.__all__
+ bam_detectors.__all__
+ digitizers.__all__
+ dssc.__all__
+ dssc_data.__all__
+ dssc_misc.__all__
+ dssc_processing.__all__
+ gotthard2.__all__
+ hrixs.__all__
+ pes.__all__
+ viking.__all__
+ xgm.__all__
)
import logging
import numpy as np
__all__ = [
'AzimuthalIntegrator',
'AzimuthalIntegratorDSSC'
]
log = logging.getLogger(__name__)
class AzimuthalIntegrator(object):
def __init__(self, imageshape, center, polar_range,
aspect=204/236, **kwargs):
'''
Create a reusable integrator for repeated azimuthal integration of
similar images. Calculates array indices for a given parameter set that
allows fast recalculation.
Parameters
==========
imageshape : tuple of ints
The shape of the images to be integrated over.
center : tuple of ints
center coordinates in pixels
polar_range : tuple of ints
start and stop polar angle (in degrees) to restrict integration to
wedges
dr : int, optional
radial width of the integration slices. Takes non-square DSSC
pixels into account.
nrings : int, optional
Number of integration rings. Can be given as an alternative to dr
aspect: float, default 204/236 for DSSC
aspect ratio of the pixel pitch
Returns
=======
ai : azimuthal_integrator instance
Instance can directly be called with image data:
> az_intensity = ai(image)
radial distances and the polar mask are accessible as attributes:
> ai.distance
> ai.polar_mask
'''
self.xcoord = None
self.ycoord = None
self._calc_dist_array(imageshape, center, aspect)
self._calc_polar_mask(polar_range)
self._calc_indices(**kwargs)
def _calc_dist_array(self, shape, center, aspect):
'''Calculate pixel coordinates for the given shape.'''
self.center = center
self.shape = shape
self.aspect = aspect
cx, cy = center
log.info(f'azimuthal center: {center}')
sx, sy = shape
xcoord, ycoord = np.ogrid[:sx, :sy]
self.xcoord = xcoord - cx
self.ycoord = ycoord - cy
# distance from center, hexagonal pixel shape taken into account
self.dist_array = np.hypot(self.xcoord * aspect, self.ycoord)
def _calc_indices(self, **kwargs):
'''Calculates the list of indices for the flattened image array.'''
maxdist = self.dist_array.max()
mindist = self.dist_array.min()
dr = kwargs.get('dr', None)
nrings = kwargs.get('nrings', None)
if (dr is None) and (nrings is None):
raise AssertionError('Either <dr> or <nrings> needs to be given.')
if (dr is not None) and (nrings is not None):
log.warning('Both <dr> and <nrings> given. <dr> takes precedence.')
if (dr is None):
dr = maxdist / nrings
idx = np.indices(dimensions=self.shape)
self.index_array = np.ravel_multi_index(idx, self.shape)
self.distance = np.array([])
self.flat_indices = []
for dist in np.arange(mindist, maxdist + dr, dr):
ring_mask = (self.polar_mask
* (self.dist_array >= (dist - dr))
* (self.dist_array < dist))
self.flat_indices.append(self.index_array[ring_mask])
self.distance = np.append(self.distance, dist)
def _calc_polar_mask(self, polar_range):
self.polar_range = polar_range
prange = np.abs(polar_range[1] - polar_range[0])
if prange > 180:
raise ValueError('Integration angle too wide, should be within 180'
' degrees')
if prange < 1e-6:
raise ValueError('Integration angle too narrow')
if prange == 180:
self.polar_mask = np.ones(self.shape, dtype=bool)
else:
tmin, tmax = np.deg2rad(np.sort(polar_range)) % np.pi
polar_array = np.arctan2(self.xcoord, self.ycoord)
polar_array = np.mod(polar_array, np.pi)
self.polar_mask = (polar_array > tmin) * (polar_array < tmax)
def calc_q(self, distance, wavelength):
'''Calculate momentum transfer coordinate.
Parameters
==========
distance : float
Sample - detector distance in meter
wavelength : float
wavelength of scattered light in meter
Returns
=======
deltaq : np.ndarray
Momentum transfer coordinate in 1/m
'''
res = 4 * np.pi \
* np.sin(np.arctan(self.distance / distance) / 2) / wavelength
return res
def __call__(self, image):
assert self.shape == image.shape, 'image shape does not match'
image_flat = np.ravel(image)
return np.array([np.nansum(image_flat[indices])
for indices in self.flat_indices])
class AzimuthalIntegratorDSSC(AzimuthalIntegrator):
def __init__(self, geom, polar_range, dxdy=(0, 0), **kwargs):
'''
Create a reusable integrator for repeated azimuthal integration of
similar images. Calculates array indices for a given parameter set that
allows fast recalculation. Directly uses a
extra_geom.detectors.DSSC_1MGeometry instance for correct pixel
positions
Parameters
==========
geom : extra_geom.detectors.DSSC_1MGeometry
loaded geometry instance
polar_range : tuple of ints
start and stop polar angle (in degrees) to restrict integration to
wedges
dr : int, optional
radial width of the integration slices. Takes non-square DSSC
pixels into account.
nrings : int, optional
Number of integration rings. Can be given as an alternative to dr
dxdy : tuple of floats, default (0, 0)
global coordinate shift to adjust center outside of geom object
(meter)
Returns
=======
ai : azimuthal_integrator instance
Instance can directly be called with image data:
> az_intensity = ai(image)
radial distances and the polar mask are accessible as attributes:
> ai.distance
> ai.polar_mask
'''
self.xcoord = None
self.ycoord = None
self._calc_dist_array(geom, dxdy)
self._calc_polar_mask(polar_range)
self._calc_indices(**kwargs)
def _calc_dist_array(self, geom, dxdy):
self.dxdy = dxdy
pos = geom.get_pixel_positions()
self.shape = pos.shape[:-1]
self.xcoord = pos[..., 0] + dxdy[0]
self.ycoord = pos[..., 1] + dxdy[1]
self.dist_array = np.hypot(self.xcoord, self.ycoord)
""" Beam Arrival Monitor related sub-routines
Copyright (2021) SCS Team.
(contributions preferrably comply with pep8 code structure
guidelines.)
"""
import logging
import xarray as xr
import numpy as np
from ..misc.bunch_pattern_external import is_pulse_at
from ..mnemonics_machinery import mnemonics_for_run
from ..constants import mnemonics as _mnemonics
from ..misc.bunch_pattern import (npulses_has_changed,
get_unique_sase_pId, load_bpt)
from toolbox_scs.load import get_array
__all__ = [
'get_bam',
'get_bam_params',
]
log = logging.getLogger(__name__)
def get_bam(run, mnemonics=None, merge_with=None, bunchPattern='sase3',
pulseIds=None):
"""
Load beam arrival monitor (BAM) data and align their pulse ID
according to the bunch pattern. Sources can be loaded on the fly
via the mnemonics argument, or processed from an existing data set
(merge_with).
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the bam data.
mnemonics: str or list of str
mnemonics for BAM, e.g. "BAM1932M" or ["BAM414", "BAM1932M"].
the arrays are either taken from merge_with or loaded from
the DataCollection run.
merge_with: xarray Dataset
If provided, the resulting Dataset will be merged with this
one. If merge_with contains variables in mnemonics, they will
be selected, aligned and merged.
bunchPattern: str
'sase1' or 'sase3' or 'scs_ppl', bunch pattern
used to extract peaks. The pulse ID dimension will be named
'sa1_pId', 'sa3_pId' or 'ol_pId', respectively.
pulseIds: list, 1D array
Pulse Ids. If None, they are automatically loaded.
Returns
-------
xarray Dataset with pulse-resolved BAM variables aligned,
merged with Dataset *merge_with* if provided.
Example
-------
>>> import toolbox_scs as tb
>>> run = tb.open_run(2711, 303)
>>> bam = tb.get_bam(run, 'BAM1932S')
"""
bam_mnemos = ['BAM4', 'BAM1', 'BAM2']
mnemonics = [mnemonics] if isinstance(mnemonics, str) else mnemonics
m2 = []
for m in mnemonics:
if any([(k in m) for k in bam_mnemos]):
m2.append(m)
mnemonics = list(set(m2))
if len(mnemonics) == 0:
log.info('no BAM mnemonics to process. Skipping.')
return merge_with
# Prepare the dataset of non-BAM data to merge with
if bool(merge_with):
ds_mw = merge_with.drop(mnemonics, errors='ignore')
else:
ds_mw = xr.Dataset()
dim_names = {'sase3': 'sa3_pId', 'sase1': 'sa1_pId',
'scs_ppl': 'ol_pId'}
bpt = load_bpt(run, ds_mw)
if bpt is not None:
mask = is_pulse_at(bpt, bunchPattern)
mask = mask.rename({'pulse_slot': dim_names[bunchPattern]})
ds = xr.Dataset()
run_mnemonics = mnemonics_for_run(run)
for m in mnemonics:
if merge_with is not None and m in merge_with:
da_bam = merge_with[m]
else:
da_bam = get_array(run, m)
if len(da_bam.dims) == 2:
if 'BAMbunchId' not in da_bam.dims:
continue
da_bam = da_bam.sel(BAMbunchId=slice(0, None, 2))
# align the pulse Id
if bpt is not None:
n = mask.sizes[dim_names[bunchPattern]]
da_bam = da_bam.isel(BAMbunchId=slice(0, n))
da_bam = da_bam.assign_coords(BAMbunchId=np.arange(0, n))
da_bam = da_bam.rename(BAMbunchId=dim_names[bunchPattern])
da_bam = da_bam.where(mask, drop=True)
# make sure unit is picosecond
if run_mnemonics[m]['key'] != 'data.lowChargeArrivalTime':
da_bam *= 1e-3
else:
# The 1D values (mean, std dev...) are in fs, need to convert to ps
mnemo = run_mnemonics[m]
first_val = run[mnemo['source']][mnemo['key']].train_from_index(0)[1]
if first_val == da_bam[0]:
da_bam *= 1e-3
ds = ds.merge(da_bam, join='inner')
# merge with non-BAM dataset
ds = ds_mw.merge(ds, join='inner')
return ds
'''
def get_bam_old(run, mnemonics=None, merge_with=None, bunchPattern='sase3'):
"""
Load beam arrival monitor (BAM) data and align their pulse ID
according to the bunch pattern. Sources can be loaded on the fly
via the mnemonics argument, or processed from an existing data set
(merge_with).
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the bam data.
mnemonics: str or list of str
mnemonics for BAM, e.g. "BAM1932M" or ["BAM414", "BAM1932M"].
If None, defaults to "BAM1932M" in case no merge_with dataset
is provided.
merge_with: xarray Dataset
If provided, the resulting Dataset will be merged with this
one. The BAM variables of merge_with (if any) will also be
selected, aligned and merged.
bunchPattern: str
'sase1' or 'sase3' or 'scs_ppl', bunch pattern
used to extract peaks. The pulse ID dimension will be named
'sa1_pId', 'sa3_pId' or 'ol_pId', respectively.
Returns
-------
xarray Dataset with pulse-resolved BAM variables aligned,
merged with Dataset *merge_with* if provided.
Example
-------
>>> import toolbox_scs as tb
>>> import toolbox_scs.detectors as tbdet
>>> run, _ = tb.load(2711, 303)
>>> bam = tbdet.get_bam(run)
"""
# get the list of mnemonics to process
mnemonics = mnemonics_to_process(mnemonics, merge_with, 'BAM')
if len(mnemonics) == 0:
log.info('No array with unaligned BAM data to extract. Skipping.')
return merge_with
else:
log.info(f'Extracting BAM data from {mnemonics}.')
# Prepare the dataset of non-BAM data to merge with
if bool(merge_with):
mw_ds = merge_with.drop(mnemonics, errors='ignore')
else:
mw_ds = xr.Dataset()
run_mnemonics = mnemonics_for_run(run)
# extract the XFEL pulses: slice(0,5400,2)
roi = np.s_[:5400:2]
ds = xr.Dataset()
for m in mnemonics:
if bool(merge_with) and m in merge_with:
val = merge_with[m].isel({run_mnemonics[m]['dim'][0]: roi})
log.debug(f'Using {m} from merge_with dataset.')
else:
val = run.get_array(*run_mnemonics[m].values(), roi=roi, name=m)
log.debug(f'Loading {m} from DataCollection.')
val[run_mnemonics[m]['dim'][0]] = np.arange(2700)
# Since winter shutdown 2021-2022, units changed from ps to fs
# Here we convert to ps
if run_mnemonics[m]['key'] != 'data.lowChargeArrivalTime':
val *= 1e-3
ds = ds.merge(val, join='inner')
# check if bunch pattern table exists
if bool(merge_with) and 'bunchPatternTable' in merge_with:
bpt = merge_with['bunchPatternTable']
log.debug('Using bpt from merge_with dataset.')
elif 'bunchPatternTable' in run_mnemonics:
bpt = run.get_array(*run_mnemonics['bunchPatternTable'].values())
log.debug('Loaded bpt from extra_data run.')
else:
bpt = None
# align the pulse Id
if bpt is not None and len(ds.variables) > 0:
dim_names = {'sase3': 'sa3_pId', 'sase1': 'sa1_pId',
'scs_ppl': 'ol_pId'}
mask = is_pulse_at(bpt, bunchPattern)
mask = mask.rename({'pulse_slot': dim_names[bunchPattern]})
ds = ds.rename({run_mnemonics['BAM1932M']['dim'][0]:
dim_names[bunchPattern]})
ds = ds.where(mask, drop=True)
# merge with non-BAM dataset
ds = mw_ds.merge(ds, join='inner')
return ds
'''
def get_bam_params(run, mnemo_or_source='BAM1932S'):
"""
Extract the run values of bamStatus[1-3] and bamError.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the bam data.
mnemo_or_source: str
mnemonic of the BAM, e.g. 'BAM414', or source name,
e.g. 'SCS_ILH_LAS/DOOCS/BAM_414_B2.
Returns
-------
params: dict
dictionnary containing the extracted parameters.
Note
----
The extracted parameters are run values, they do not reflect any
possible change during the run.
"""
if mnemo_or_source in _mnemonics:
run_mnemonics = mnemonics_for_run(run)
source = run_mnemonics[mnemo_or_source]['source'].split(':')[0]
else:
source = mnemo_or_source
res = {}
res['status1'] = run.get_run_value(source, 'bamStatus1.value')
res['status2'] = run.get_run_value(source, 'bamStatus2.value')
res['status3'] = run.get_run_value(source, 'bamStatus3.value')
res['error'] = run.get_run_value(source, 'bamError.value')
return res
""" Digitizers related sub-routines
Copyright (2021) SCS Team.
(contributions preferrably comply with pep8 code structure
guidelines.)
"""
import logging
import os
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
from scipy.signal import fftconvolve
from ..misc.bunch_pattern_external import is_pulse_at
from ..util.exceptions import ToolBoxValueError
from ..mnemonics_machinery import (mnemonics_to_process,
mnemonics_for_run)
from extra_data import open_run
from extra_data.read_machinery import find_proposal
from extra.components import XrayPulses, OpticalLaserPulses
__all__ = [
'check_peak_params',
'get_digitizer_peaks',
'get_laser_peaks',
'get_peaks',
'get_tim_peaks',
'digitizer_signal_description',
'get_dig_avg_trace',
'extract_digitizer_peaks',
'load_processed_peaks',
'check_processed_peak_params'
]
log = logging.getLogger(__name__)
def peaks_from_raw_trace(traces, pulseStart, pulseStop, baseStart, baseStop,
period=None, npulses=None, extra_dim=None):
"""
Computes peaks from raw digitizer traces by trapezoidal integration.
Parameters
----------
traces: xarray DataArray or numpy array containing raw traces. If
numpy array is provided, the second dimension is that of the samples.
pulseStart: int or list or 1D-numpy array
trace index of integration start. If 1d array, each value is the start
of one peak. The period and npulses parameters are ignored.
pulseStop: int
trace index of integration stop.
baseStart: int
trace index of background start.
baseStop: int
trace index of background stop.
period: int
number of samples between two peaks. Ignored if intstart is a 1D-array.
npulses: int
number of pulses. Ignored if intstart is a 1D-array.
extra_dim: str
Name given to the dimension along the peaks. Defaults to 'pulseId'.
Returns
-------
xarray DataArray
"""
assert len(traces.shape) == 2
if isinstance(traces, xr.DataArray):
ntid = traces.sizes['trainId']
coords = traces.coords
traces = traces.values
if traces.shape[0] != ntid:
traces = traces.T
else:
coords = None
if hasattr(pulseStart, '__len__'):
pulseStart = np.array(pulseStart)
pulses = pulseStart - pulseStart[0]
pulseStart = pulseStart[0]
else:
pulses = range(0, npulses*period, period)
if extra_dim is None:
extra_dim = 'pulseId'
results = xr.DataArray(np.empty((traces.shape[0], len(pulses))),
coords=coords,
dims=['trainId', extra_dim])
for i, p in enumerate(pulses):
a = pulseStart + p
b = pulseStop + p
bkga = baseStart + p
bkgb = baseStop + p
if b > traces.shape[1]:
break
bg = np.outer(np.median(traces[:, bkga:bkgb], axis=1),
np.ones(b-a))
results[:, i] = np.trapz(traces[:, a:b] - bg, axis=1)
return results
def peaks_from_apd(array, params, digitizer, bpt, bunchPattern):
"""
Extract peak-integrated data according to the bunch pattern.
Parameters
----------
array: xarray DataArray
2D array containing peak-integrated data
params: dict
peak-integration parameters of the digitizer
digitizer: str
digitizer type, one of {'FastADC', 'ADQ412'}
bpt: xarray DataArray
bunch pattern table
bunchPattern: str
one of {'sase1', 'sase3', 'scs_ppl'}, used to select pulses and
assign name of the pulse id dimension.
Returns
-------
xarray DataArray with pulse id coordinates.
"""
if params['enable'] == 0 or (array == 1.).all():
raise ValueError('The digitizer did not record integrated peaks. '
'Consider using raw traces from the same channel '
'for peak integration.')
if digitizer == 'FastADC':
min_distance = 24
if digitizer == 'ADQ412':
min_distance = 440
period = params['period']
if period % min_distance != 0:
log.warning(f'Warning: the pulse period ({period} samples) of '
'digitizer is not a multiple of the pulse separation at '
f'4.5 MHz ({min_distance} samples). Pulse id assignment '
'is likely to fail.')
stride = int(period/min_distance)
npulses_apd = params['npulses']
dim_names = {'sase3': 'sa3_pId', 'sase1': 'sa1_pId', 'scs_ppl': 'ol_pId'}
pulse_dim = dim_names[bunchPattern]
arr_dim = [dim for dim in array.dims if dim != 'trainId'][0]
if npulses_apd > array.sizes[arr_dim]:
log.warning(f'The digitizer was set to record {npulses_apd} pulses '
f'but the array length is only {array.sizes[arr_dim]}.')
npulses_apd = array.sizes[arr_dim]
mask = is_pulse_at(bpt, bunchPattern).rename({'pulse_slot': pulse_dim})
mask = mask.where(mask.trainId.isin(array.trainId), drop=True)
mask = mask.assign_coords({pulse_dim: np.arange(bpt.sizes['pulse_slot'])})
pid = np.sort(np.unique(np.where(mask)[1]))
npulses_bpt = len(pid)
apd_coords = np.arange(pid[0], pid[0] + stride*npulses_apd, stride)
noalign = False
if len(np.intersect1d(apd_coords, pid, assume_unique=True)) < npulses_bpt:
log.warning('Not all pulses were recorded. The digitizer '
'was set to record pulse ids '
f'{apd_coords[apd_coords<bpt.sizes["pulse_slot"]]} but the'
'bunch pattern for'
f' {bunchPattern} is {pid}. Skipping pulse ID alignment.')
noalign = True
array = array.isel({arr_dim: slice(0, npulses_apd)})
array = array.where(array != 1.)
if noalign:
return array
array = array.rename(
{arr_dim: pulse_dim}).assign_coords({pulse_dim: apd_coords})
array, mask = xr.align(array, mask, join='inner')
array = array.where(mask, drop=True)
return array
def get_peaks(run,
data,
mnemonic,
useRaw=True,
autoFind=True,
integParams=None,
bunchPattern='sase3',
bpt=None,
extra_dim=None,
indices=None,
):
"""
Extract peaks from one source (channel) of a digitizer.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the digitizer data
data: xarray DataArray or str
array containing the raw traces or peak-integrated values from the
digitizer. If str, must be one of the ToolBox mnemonics. If None,
the data is loaded via the source and key arguments.
mnemonic: str or dict
ToolBox mnemonic or dict with source and key as in
{'source': 'SCS_UTC1_ADQ/ADC/1:network',
'key': 'digitizers.channel_1_A.raw.samples'}
useRaw: bool
If True, extract peaks from raw traces. If False, uses the APD (or
peaks) data from the digitizer.
autoFind: bool
If True, finds integration parameters by inspecting the average raw
trace. Only valid if useRaw is True.
integParams: dict
dictionnary containing the integration parameters for raw trace
integration: 'pulseStart', 'pulseStop', 'baseStart', 'baseStop',
'period', 'npulses'. Not used if autoFind is True. All keys are
required when bunch pattern is missing.
bunchPattern: str
match the peaks to the bunch pattern: 'sase1', 'sase3', 'scs_ppl'.
This will dictate the name of the pulse ID coordinates: 'sa1_pId',
'sa3_pId' or 'scs_ppl'.
bpt: xarray DataArray
bunch pattern table
extra_dim: str
Name given to the dimension along the peaks. If None, the name is given
according to the bunchPattern.
indices: array, slice
indices from the peak-integrated data to retrieve. Only required
when bunch pattern is missing and useRaw is False.
Returns
-------
xarray.DataArray containing digitizer peaks with pulse coordinates
"""
arr = data
dim = [d for d in arr.dims if d != 'trainId'][0]
# Load bunch pattern table
run_mnemonics = mnemonics_for_run(run)
if bpt is None and bunchPattern != 'None':
if 'bunchPatternTable' in run_mnemonics:
m = run_mnemonics['bunchPatternTable']
bpt = run.get_array(m['source'], m['key'], m['dim'])
pattern = bunchPattern
else:
pattern = bunchPattern
if bunchPattern == 'None':
bpt = None
# Find digitizer type
m = mnemonic if isinstance(mnemonic, dict) else run_mnemonics[mnemonic]
digitizer = digitizer_type(run, m.get('source'))
# 1. Peak-integrated data from digitizer
if useRaw is False:
# 1.1 No bunch pattern provided
if bpt is None:
log.info('Missing bunch pattern info.')
if indices is None:
raise TypeError('indices argument must be provided '
'when bunch pattern info is missing.')
if extra_dim is None:
extra_dim = 'pulseId'
return arr.isel({dim: indices}).rename({dim: extra_dim})
# 1.2 Bunch pattern is provided
if isinstance(mnemonic, dict):
peak_params = channel_peak_params(run, mnemonic.get('source'),
mnemonic.get('key'))
else:
peak_params = channel_peak_params(run, mnemonic)
log.debug(f'Digitizer peak integration parameters: {peak_params}')
return peaks_from_apd(arr, peak_params, digitizer, bpt, bunchPattern)
# 2. Use raw data from digitizer
# minimum pulse period @ 4.5MHz, according to digitizer type
min_distance = 1
if digitizer == 'FastADC':
min_distance = 24
if digitizer == 'ADQ412':
min_distance = 440
if autoFind:
stride = int(np.max([1, np.floor(arr.sizes['trainId']/200)]))
trace = arr.isel(trainId=slice(0, None, stride)).mean(dim='trainId')
try:
integParams = find_integ_params(trace)
except ValueError as err:
log.warning(f'{err}, trying with averaged trace over all trains.')
trace = arr.mean(dim='trainId')
integParams = find_integ_params(trace)
log.debug(f'Auto find peaks result: {integParams}')
required_keys = ['pulseStart', 'pulseStop', 'baseStart',
'baseStop', 'period', 'npulses']
if integParams is None or not all(name in integParams
for name in required_keys):
raise TypeError('All keys of integParams argument '
f'{required_keys} are required when '
'bunch pattern info is missing.')
# 2.1. No bunch pattern provided
if bpt is None:
log.info('Missing bunch pattern info.')
log.debug(f'Retrieving {integParams["npulses"]} pulses.')
if extra_dim is None:
extra_dim = 'pulseId'
return peaks_from_raw_trace(arr, integParams['pulseStart'],
integParams['pulseStop'],
integParams['baseStart'],
integParams['baseStop'],
integParams['period'],
integParams['npulses'],
extra_dim=extra_dim)
# 2.2 Bunch pattern is provided
# load mask and extract pulse Id:
dim_names = {'sase3': 'sa3_pId', 'sase1': 'sa1_pId',
'scs_ppl': 'ol_pId', 'None': 'pulseId'}
extra_dim = dim_names[pattern]
valid_tid = np.intersect1d(arr.trainId, bpt.trainId, assume_unique=True)
mask = is_pulse_at(bpt, pattern)
pattern_changed = ~(mask == mask[0]).all().values
if not pattern_changed:
pid = np.nonzero(mask[0].values)[0]
valid_arr = arr
else:
mask = is_pulse_at(bpt.sel(trainId=valid_tid), pattern)
mask = mask.rename({'pulse_slot': extra_dim})
mask = mask.assign_coords(
{extra_dim: np.arange(bpt.sizes['pulse_slot'])})
mask_on = mask.where(mask, drop=True).fillna(False).astype(bool)
log.info(f'Bunch pattern of {pattern} changed during the run.')
pid = mask_on.coords[extra_dim]
# select trains containing pulses
valid_arr = arr.sel(trainId=mask_on.trainId)
npulses = len(pid)
log.debug(f'Bunch pattern: {npulses} pulses for {pattern}.')
if npulses == 1:
period_bpt = 0
else:
period_bpt = np.min(np.diff(pid))
if autoFind and period_bpt*min_distance != integParams['period']:
log.warning('The period from the bunch pattern is different than '
'that found by the peak-finding algorithm. Either '
'the algorithm failed or the bunch pattern source '
f'({bunchPattern}) is not correct.')
# create array of sample indices for peak integration
sample_id = (pid-pid[0])*min_distance
# override auto find parameters
if isinstance(integParams['pulseStart'], (int, np.integer)):
integParams['pulseStart'] = integParams['pulseStart'] + sample_id
peaks = peaks_from_raw_trace(valid_arr, integParams['pulseStart'],
integParams['pulseStop'],
integParams['baseStart'],
integParams['baseStop'],
integParams['period'],
integParams['npulses'],
extra_dim)
if pattern_changed:
peaks = peaks.where(mask_on, drop=True)
return peaks.assign_coords({extra_dim: pid})
def get_dig_avg_trace(run, mnemonic, ntrains=None):
"""
Compute the average over ntrains evenly spaced accross all trains
of a digitizer trace.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the digitizer data.
mnemonic: str
ToolBox mnemonic of the digitizer data, e.g. 'MCP2apd'.
ntrains: int
Number of trains used to calculate the average raw trace.
If None, all trains are used.
Returns
-------
trace: DataArray
The average digitizer trace
"""
run_mnemonics = mnemonics_for_run(run)
if ntrains is None:
sel = run
else:
total_tid = len(run.train_ids)
stride = int(np.max([1, np.floor(total_tid/ntrains)]))
sel = run.select_trains(np.s_[0:None:stride])
m = run_mnemonics[mnemonic]
raw_trace = sel.get_array(m['source'], m['key'], m['dim'])
raw_trace = raw_trace.mean(dim='trainId')
return raw_trace
def channel_peak_params(run, source, key=None, channel=None, board=None):
"""
Extract peak-integration parameters used by a channel of the digitizer.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the digitizer data.
source: str
ToolBox mnemonic of a digitizer data, e.g. "MCP2apd" or
"FastADC4peaks", or name of digitizer source, e.g.
'SCS_UTC1_ADQ/ADC/1:network'.
key: str
optional, used in combination of source (if source is not a ToolBox
mnemonics) instead of digitizer, channel and board.
channel: int or str
The digitizer channel for which to retrieve the parameters. If None,
inferred from the source mnemonic or source + key arguments.
board: int
Board of the ADQ412. If None, inferred from the source mnemonic or
source + key arguments.
Returns
-------
dict with peak integration parameters.
"""
run_mnemonics = mnemonics_for_run(run)
if source in run_mnemonics:
m = run_mnemonics[source]
source = m['source']
key = m['key']
if 'network' in source:
return adq412_channel_peak_params(run, source, key, channel, board)
else:
return fastADC_channel_peak_params(run, source, channel)
def fastADC_channel_peak_params(run, source, channel=None):
"""
Extract peak-integration parameters used by a channel of the Fast ADC.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the digitizer data.
source: str
Name of Fast ADC source, e.g. 'SCS_UTC1_MCP/ADC/1:channel_5.output'.
channel: int
The Fast ADC channel for which to retrieve the parameters. If None,
inferred from the source.
Returns
-------
dict with peak integration parameters.
"""
fastADC_keys = {
'baseStart': ('baselineSettings.baseStart.value',
'baseStart.value'),
'baseStop': ('baseStop.value',),
'baseLength': ('baselineSettings.length.value',),
'enable': ('enablePeakComputation.value',),
'pulseStart': ('initialDelay.value',),
'pulseLength': ('peakSamples.value',),
'npulses': ('numPulses.value',),
'period': ('pulsePeriod.value',)
}
if channel is None:
channel = int(source.split(':')[1].split('.')[0].split('_')[1])
baseName = f'channel_{channel}.'
source = source.split(':')[0]
data = run.select(source).train_from_index(0)[1][source]
result = {}
for mnemo, versions in fastADC_keys.items():
for v in versions:
key = baseName + v
if key in data:
result[mnemo] = data[key]
if 'baseLength' in result:
result['baseStop'] = result['baseStart'] + \
result.pop('baseLength')
if 'pulseLength' in result:
result['pulseStop'] = result['pulseStart'] + \
result.pop('pulseLength')
return result
def adq412_channel_peak_params(run, source, key=None,
channel=None, board=None):
"""
Extract peak-integration parameters used by a channel of the ADQ412.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the digitizer data.
source: str
Nname of ADQ412 source, e.g. 'SCS_UTC1_ADQ/ADC/1:network'.
key: str
optional, e.g. 'digitizers.channel_1_D.apd.pulseIntegral', used in
combination of source instead of channel and board.
channel: int or str
The ADQ412 channel for which to retrieve the parameters. If None,
inferred from the source mnemonic or source + key arguments.
board: int
Board of the ADQ412. If None, inferred from the source mnemonic or
source + key arguments.
Returns
-------
dict with peak integration parameters.
"""
if key is None:
if channel is None or board is None:
raise ValueError('key or channel + board arguments are '
'missing.')
else:
k = key.split('.')[1].split('_')
ch_to_int = {'A': 0, 'B': 1, 'C': 2, 'D': 3}
channel = ch_to_int[k[2]]
board = k[1]
baseName = f'board{board}.apd.channel_{channel}.'
source = source.split(':')[0]
adq412_keys = {
'baseStart': (baseName + 'baseStart.value',),
'baseStop': (baseName + 'baseStop.value',),
'enable': (baseName + 'enable.value',),
'pulseStart': (baseName + 'pulseStart.value',),
'pulseStop': (baseName + 'pulseStop.value',),
'initialDelay': (baseName + 'initialDelay.value',),
'upperLimit': (baseName + 'upperLimit.value',),
'npulses': (f'board{board}.apd.numberOfPulses.value',)
}
data = run.select(source).train_from_index(0)[1][source]
result = {}
for mnemo, versions in adq412_keys.items():
for key in versions:
if key in data:
result[mnemo] = data[key]
result['period'] = result.pop('upperLimit') - \
result.pop('initialDelay')
return result
def find_integ_params(trace, height=None, width=1):
"""
Find integration parameters for peak integration of a raw
digitizer trace. Based on scipy find_peaks().
Parameters
----------
trace: numpy array or xarray DataArray
The digitier raw trace used to find peaks
height: int
minimum threshold for peak determination
width: int
minimum width of peak
Returns
-------
dict with keys 'pulseStart', 'pulseStop', 'baseStart', 'baseStop',
'period', 'npulses' and values in number of samples.
"""
if isinstance(trace, xr.DataArray):
trace = trace.values
trace_norm = trace - np.median(trace)
trace_norm = -trace_norm if np.mean(trace_norm) < 0 else trace_norm
SNR = np.max(np.abs(trace_norm)) / np.std(trace_norm[:100])
if SNR < 10:
log.warning('signal-to-noise ratio too low: cannot '
'automatically find peaks.')
return {'pulseStart': 2, 'pulseStop': 3,
'baseStart': 0, 'baseStop': 1,
'period': 0, 'npulses': 1}
# Compute autocorrelation using fftconvolve
# from "https://stackoverflow.com/questions/12323959/"
# "fast-cross-correlation-method-in-python"
if len(trace_norm) % 2 == 0:
n = len(trace_norm)
else:
n = len(trace_norm) - 1
hl = int(n/2)
a = trace_norm[:n]
b = np.zeros(2*n)
b[hl: hl + n] = a
# Do an array flipped convolution, which is a correlation.
ac_trace = fftconvolve(b, a[::-1], mode='valid')
# slower approach:
# ac_trace = np.correlate(trace_norm, trace_norm, mode='same')
n = int(len(ac_trace)/2)
# estimate quality of ac_trace and define min height to detect peaks
factor = np.abs(ac_trace.max() / np.median(ac_trace))
factor = 3 if factor > 20 else 1.5
ac_peaks = find_peaks(ac_trace[n:], height=ac_trace[n:].max()/factor)
if len(ac_peaks[0]) == 0:
period = 0
distance = 1
elif len(ac_peaks[0]) == 1:
period = ac_peaks[0][0]
distance = max(1, period-6)
else:
period = int(np.median(ac_peaks[0][1:] - ac_peaks[0][:-1]))
distance = max(1, period-6) # smaller than period to account for all peaks
# define min height to detect peaks depending on signal quality
f = np.max([3, np.min([(SNR/10), 4])])
height = trace_norm.max() / f
peaks, dic = find_peaks(trace_norm, distance=distance,
height=height, width=1)
# filter out peaks that are not periodically spaced
idx = np.ones(len(peaks), dtype=bool)
idx[1:] = np.isclose(peaks[1:] - peaks[:-1],
np.ones(len(peaks[1:]))*period, atol=6)
peaks = peaks[idx]
for d in dic:
dic[d] = dic[d][idx]
npulses = len(peaks)
if npulses == 0:
raise ValueError('Could not automatically find peaks.')
pulseStart = np.mean(dic['left_ips'] - peaks + peaks[0], dtype=int)
pulseStop = np.mean(dic['right_ips'] - peaks + peaks[0], dtype=int)
baseStop = pulseStart - np.mean(peaks - dic['left_ips'])/2 - 1
baseStop = np.min([baseStop, pulseStart]).astype(int)
baseStop = np.max([baseStop, 0]).astype(int)
baseStart = baseStop - np.mean(dic['widths'])/2
baseStart = np.max([baseStart, 0]).astype(int)
result = {'pulseStart': pulseStart, 'pulseStop': pulseStop,
'baseStart': baseStart, 'baseStop': baseStop,
'period': period, 'npulses': npulses}
return result
def get_peak_params(run, mnemonic, raw_trace=None, ntrains=None):
"""
Get the peak region and baseline region of a raw digitizer trace used
to compute the peak integration. These regions are either those of the
digitizer peak-integration settings or those determined in get_tim_peaks
or get_fast_adc_peaks when the inputs are raw traces.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the digitizer data.
mnemonic: str
ToolBox mnemonic of the digitizer data, e.g. 'MCP2apd'.
raw_trace: optional, 1D numpy array or xarray DataArray
Raw trace to display. If None, the average raw trace over
ntrains of the corresponding channel is loaded (this can
be time-consuming).
ntrains: optional, int
Only used if raw_trace is None. Number of trains used to
calculate the average raw trace of the corresponding channel.
"""
run_mnemonics = mnemonics_for_run(run)
if mnemonic not in run_mnemonics:
raise ToolBoxValueError("mnemonic must be a ToolBox mnemonics")
if "raw" not in mnemonic:
params = channel_peak_params(run, mnemonic)
if 'enable' in params and params['enable'] == 0:
log.warning('The digitizer did not record peak-integrated data.')
else:
if raw_trace is None:
raw_trace = get_dig_avg_trace(run, mnemonic, ntrains)
params = find_integ_params(raw_trace)
return params
def find_peak_integration_parameters(run, mnemonic, raw_trace=None,
integParams=None, pattern=None):
'''
Finds peak integration parameters.
'''
run_mnemonics = mnemonics_for_run(run)
digitizer = digitizer_type(run, run_mnemonics[mnemonic]['source'])
pulse_period = 24 if digitizer == "FastADC" else 440
if integParams is None: # load raw trace and find peaks
autoFind = True
if raw_trace is None:
raw_trace = get_dig_avg_trace(run, mnemonic)
params = get_peak_params(run, mnemonic, raw_trace)
else: # inspect provided parameters
autoFind = False
required_keys = ['pulseStart', 'pulseStop', 'baseStart',
'baseStop']
add_text = ''
if pattern is None and not hasattr(integParams['pulseStart'],
'__len__'):
required_keys += ['period', 'npulses']
add_text = 'Bunch pattern not provided. '
if not all(name in integParams for name in required_keys):
raise ValueError(add_text + 'All keys of integParams argument '
f'{required_keys} are required.')
params = integParams.copy()
# extract the pulse ids from the parameters (starting at 0)
if hasattr(params['pulseStart'], '__len__'):
if params.get('npulses') is not None and (
params.get('npulses') != len(params['pulseStart']) ):
log.warning('The number of pulses does not match the length '
'of pulseStart. Using length of pulseStart as '
'the number of pulses.')
params['npulses'] = len(params['pulseStart'])
pulse_ids_params = ((np.array(params['pulseStart'])
- params['pulseStart'][0]) / pulse_period).astype(int)
else:
pulse_ids_params = np.arange(0, params['npulses'] * params['period'] / pulse_period,
params['period'] / pulse_period).astype(int)
# Extract pulse_ids and npulses from bunch pattern
pulse_ids, npulses, period = None, None, None
regular = True
if pattern is not None:
bunchPattern = 'sase3' if hasattr(pattern, 'sase') else 'scs_ppl'
if pattern.is_constant_pattern() is False:
log.info('The number of pulses changed during the run.')
pulse_ids = np.unique(pattern.pulse_ids(labelled=False, copy=False))
npulses, period = None, None
regular = False
else:
pulse_ids = pattern.peek_pulse_ids(labelled=False)
npulses = len(pulse_ids)
if npulses > 1:
periods = np.diff(pulse_ids)
if len(np.unique(periods)) > 1:
regular = False
else:
period = np.unique(periods)[0] * pulse_period
# Compare parameters with bunch pattern
if len(pulse_ids_params) == len(pulse_ids):
if not (pulse_ids_params == pulse_ids - pulse_ids[0]).all():
log.warning('The provided pulseStart parameters do not match the '
f'{bunchPattern} bunch pattern pulse ids. Using '
'bunch pattern parameters.')
pulse_ids_params = pulse_ids
if (npulses != params.get('npulses') or
period != params.get('period')):
if autoFind:
add_text = 'Automatically found '
else:
add_text = 'Provided '
log.warning(add_text + 'integration parameters '
f'(npulses={params.get("npulses")}, '
f'period={params.get("period")}) do not match the '
f'{bunchPattern} bunch pattern (npulses={npulses}, '
f'period={period}). Using bunch pattern parameters.')
pulse_ids_params = pulse_ids
params['npulses'] = npulses
params['period'] = period
if not hasattr(params['pulseStart'], '__len__'):
start = params['pulseStart']
else:
start = params['pulseStart'][0]
params['pulseStart'] = np.array([int(start + (pid - pulse_ids_params[0]) * pulse_period)
for pid in pulse_ids_params])
return params, pulse_ids_params, regular, raw_trace
def check_peak_params(run, mnemonic, raw_trace=None, ntrains=200, params=None,
plot=True, show_all=False, bunchPattern='sase3'):
"""
Checks and plots the peak parameters (pulse window and baseline window
of a raw digitizer trace) used to compute the peak integration. These
parameters are either set by the digitizer peak-integration settings,
or are determined by a peak finding algorithm (used in get_tim_peaks
or get_fast_adc_peaks) when the inputs are raw traces. The parameters
can also be provided manually for visual inspection. The plot either
shows the first and last pulse of the trace or the entire trace.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the digitizer data.
mnemonic: str
ToolBox mnemonic of the digitizer data, e.g. 'MCP2apd'.
raw_trace: optional, 1D numpy array or xarray DataArray
Raw trace to display. If None, the average raw trace over
ntrains of the corresponding channel is loaded (this can
be time-consuming).
ntrains: optional, int
Only used if raw_trace is None. Number of trains used to
calculate the average raw trace of the corresponding channel.
plot: bool
If True, displays the raw trace and peak integration regions.
show_all: bool
If True, displays the entire raw trace and all peak integration
regions (this can be time-consuming).
If False, shows the first and last pulse according to the bunchPattern.
bunchPattern: optional, str
Only used if plot is True. Checks the bunch pattern against
the digitizer peak parameters and shows potential mismatch.
Returns
-------
dictionnary of peak integration parameters
"""
'''
run_mnemonics = mnemonics_for_run(run)
if raw_trace is None:
raw_trace = get_dig_avg_trace(run, mnemonic, ntrains)
if params is None:
integParams = get_peak_params(run, mnemonic, raw_trace)
title = 'Auto-find peak params'
else:
title = 'Provided peak params'
integParams = params.copy()
if 'enable' in integParams and integParams['enable'] == 0:
log.warning('The digitizer did not record peak-integrated data.')
if not plot:
return integParams
digitizer = digitizer_type(run, run_mnemonics[mnemonic]['source'])
pulse_period = 24 if digitizer == "FastADC" else 440
pattern = None
try:
if bunchPattern == 'sase3':
pattern = XrayPulses(run)
if bunchPattern == 'scs_ppl':
pattern = OpticalLaserPulses(run)
except Exception as e:
log.warning(e)
bunchPattern = None
# Extract pulse_ids, npulses and period from bunch pattern
pulse_ids, npulses, period = None, None, None
regular = True
if pattern is not None:
if pattern.is_constant_pattern() is False:
log.info('The number of pulses changed during the run.')
pulse_ids = np.unique(pattern.pulse_ids(labelled=False, copy=False))
npulses, period = None, None
regular = False
else:
pulse_ids = pattern.peek_pulse_ids(labelled=False)
npulses = len(pulse_ids)
if npulses > 1:
periods = np.diff(pulse_ids)
if len(np.unique(periods)) > 1:
regular = False
else:
period = np.unique(periods)[0] * pulse_period
bp_params = {}
bp_params['npulses'] = npulses
bp_params['period'] = period
bp_params['pulse_ids'] = pulse_ids
bp_params['regular'] = regular
if regular:
print(f'bunch pattern {bunchPattern}: {bp_params["npulses"]} pulses,'
f' {bp_params["period"]} samples between two pulses')
else:
print(f'bunch pattern {bunchPattern}: Not a regular pattern. '
f'{bp_params["npulses"]} pulses, pulse_ids='
f'{bp_params["pulse_ids"]}.')
if (npulses != integParams.get('npulses') or
period != integParams.get('period')):
log.warning(f'Integration parameters '
f'(npulses={integParams.get("npulses")}, '
f'period={integParams.get("period")}) do not match the '
f'the bunch pattern (npulses={npulses}, '
f'period={period}). Using bunch pattern parameters.')
integParams['npulses'] = npulses
integParams['period'] = period
start = integParams['pulseStart']
integParams['pulseStart'] = [start + (pid - pulse_ids[0]) * pulse_period
for pid in pulse_ids]
else:
bp_params = None
print(f'{title}: {integParams["npulses"]} pulses, {integParams["period"]}'
' samples between two pulses')
fig, ax = plotPeakIntegrationWindow(raw_trace, integParams, bp_params, show_all)
ax[0].set_ylabel(mnemonic.replace('peaks', 'raw').replace('apd', 'raw'))
fig.suptitle(title, size=12)
'''
run_mnemonics = mnemonics_for_run(run)
digitizer = digitizer_type(run, run_mnemonics[mnemonic]['source'])
pulse_period = 24 if digitizer == "FastADC" else 440
if params is None:
title = 'Auto-find peak parameters'
else:
title = 'Provided peak parameters'
pattern = None
try:
if bunchPattern == 'sase3':
pattern = XrayPulses(run)
if bunchPattern == 'scs_ppl':
pattern = OpticalLaserPulses(run)
except Exception as e:
log.warning(e)
bunchPattern = None
integParams, pulse_ids, regular, raw_trace = find_peak_integration_parameters(
run, mnemonic, raw_trace, params, pattern)
if bunchPattern:
if regular:
print(f'bunch pattern {bunchPattern}: {len(pulse_ids)} pulses,'
f' {(pulse_ids[1] - pulse_ids[0]) * pulse_period} samples between two pulses')
print(title + ': ' + f' {integParams["npulses"]} pulses, '
f'{integParams["period"]} samples between two pulses')
else:
print(f'bunch pattern {bunchPattern}: Not a regular pattern. '
f'{len(pulse_ids)} pulses, pulse_ids='
f'{pulse_ids}.')
if plot:
if raw_trace is None:
raw_trace = get_dig_avg_trace(run, mnemonic)
fig, ax = plotPeakIntegrationWindow(raw_trace, integParams,
show_all=show_all)
fig.suptitle(title, size=12)
return integParams
def plotPeakIntegrationWindow(raw_trace, params, show_all=False):
if hasattr(params['pulseStart'], '__len__'):
pulseStarts = np.array(params['pulseStart'])
pulseStops = params['pulseStop'] + pulseStarts - params['pulseStart'][0]
baseStarts = params['baseStart'] + pulseStarts - params['pulseStart'][0]
baseStops = params['baseStop'] + pulseStarts - params['pulseStart'][0]
else:
n = params['npulses']
p = params['period']
pulseStarts = [params['pulseStart'] + i*p for i in range(n)]
pulseStops = [params['pulseStop'] + i*p for i in range(n)]
baseStarts = [params['baseStart'] + i*p for i in range(n)]
baseStops = [params['baseStop'] + i*p for i in range(n)]
if show_all:
fig, ax = plt.subplots(figsize=(6, 3), constrained_layout=True)
for i in range(len(pulseStarts)):
lbl = 'baseline' if i == 0 else None
lp = 'peak' if i == 0 else None
ax.axvline(baseStarts[i], ls='--', color='k')
ax.axvline(baseStops[i], ls='--', color='k')
ax.axvspan(baseStarts[i], baseStops[i],
alpha=0.5, color='grey', label=lbl)
ax.axvline(pulseStarts[i], ls='--', color='r')
ax.axvline(pulseStops[i], ls='--', color='r')
ax.axvspan(pulseStarts[i], pulseStops[i],
alpha=0.2, color='r', label=lp)
ax.plot(raw_trace, color='C0', label='raw trace')
ax.legend(fontsize=8)
return fig, [ax]
fig, ax = plt.subplots(1, 2, figsize=(6, 3), constrained_layout=True)
for plot in range(2):
title = 'First pulse' if plot == 0 else 'Last pulse'
i = 0 if plot == 0 else -1
ax[plot].axvline(baseStarts[i], ls='--', color='k')
ax[plot].axvline(baseStops[i], ls='--', color='k')
ax[plot].axvspan(baseStarts[i], baseStops[i],
alpha=0.5, color='grey', label='baseline')
ax[plot].axvline(pulseStarts[i], ls='--', color='r')
ax[plot].axvline(pulseStops[i], ls='--', color='r')
ax[plot].axvspan(pulseStarts[i], pulseStops[i],
alpha=0.2, color='r', label='peak')
xmin = np.max([0, baseStarts[i]-200])
xmax = np.min([pulseStops[i]+200, raw_trace.size])
ax[plot].plot(np.arange(xmin, xmax), raw_trace[xmin:xmax], color='C0',
label=title)
ax[plot].legend(fontsize=8)
ax[plot].set_xlim(xmin, xmax)
ax[plot].set_xlabel('digitizer samples')
ax[plot].set_title(title, size=10)
return fig, ax
def digitizer_type(run, source):
"""
Finds the digitizer type based on the class Id / name of the source.
Example source: 'SCS_UTC1_MCP/ADC/1'. Defaults to ADQ412 if not found.
"""
ret = None
if '_MCP/ADC/1' in source:
ret = 'FastADC'
if '_ADQ/ADC/1' in source:
ret = 'ADQ412'
if ret is None:
digi_dict = {'FastAdc': 'FastADC',
'FastAdcLegacy': 'FastADC',
'AdqDigitizer': 'ADQ412',
'PyADCChannel': 'FastADC',
'PyADCChannelLegacy': 'FastADC'}
try:
source = source.split(':')[0]
classId = run.get_run_value(source, 'classId.value')
ret = digi_dict.get(classId)
except Exception as e:
log.warning(str(e))
log.warning(f'Could not find digitizer type from source {source}.')
ret = 'ADQ412'
return ret
def get_tim_peaks(run, mnemonic=None, merge_with=None,
bunchPattern='sase3', integParams=None,
keepAllSase=False):
"""
Automatically computes TIM peaks. Sources can be loaded on the
fly via the mnemonics argument, or processed from an existing data
set (merge_with). The bunch pattern table is used to assign the
pulse id coordinates.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the digitizer data.
mnemonic: str
mnemonics for TIM, e.g. "MCP2apd".
merge_with: xarray Dataset
If provided, the resulting Dataset will be merged with this
one. The TIM variables of merge_with (if any) will also be
computed and merged.
bunchPattern: str
'sase1' or 'sase3' or 'scs_ppl', bunch pattern
used to extract peaks. The pulse ID dimension will be named
'sa1_pId', 'sa3_pId' or 'ol_pId', respectively.
integParams: dict
dictionnary for raw trace integration, e.g.
{'pulseStart':100, 'pulsestop':200, 'baseStart':50,
'baseStop':99, 'period':24, 'npulses':500}.
If None, integration parameters are computed automatically.
keepAllSase: bool
Only relevant in case of sase-dedicated trains. If True, all
trains are kept, else only those of the bunchPattern are kept.
Returns
-------
xarray Dataset with TIM variables substituted by
the peak caclulated values (e.g. "MCP2raw" becomes
"MCP2peaks"), merged with Dataset *merge_with* if provided.
"""
return get_digitizer_peaks(run, mnemonic, merge_with,
bunchPattern, integParams,
keepAllSase)
def get_laser_peaks(run, mnemonic=None, merge_with=None,
bunchPattern='scs_ppl', integParams=None):
"""
Extracts laser photodiode signal (peak intensity) from Fast ADC
digitizer. Sources can be loaded on the fly via the mnemonics
argument, and/or processed from an existing data set (merge_with).
The PP laser bunch pattern is used to assign the pulse idcoordinates.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the digitizer data.
mnemonic: str
mnemonic for FastADC corresponding to laser signal, e.g.
"FastADC2peaks" or 'I0_ILHraw'.
merge_with: xarray Dataset
If provided, the resulting Dataset will be merged with this
one. The FastADC variables of merge_with (if any) will also be
computed and merged.
bunchPattern: str
'sase1' or 'sase3' or 'scs_ppl', bunch pattern
used to extract peaks.
integParams: dict
dictionnary for raw trace integration, e.g.
{'pulseStart':100, 'pulsestop':200, 'baseStart':50,
'baseStop':99, 'period':24, 'npulses':500}.
If None, integration parameters are computed
automatically.
Returns
-------
xarray Dataset with all Fast ADC variables substituted by
the peak caclulated values (e.g. "FastADC2raw" becomes
"FastADC2peaks").
"""
return get_digitizer_peaks(run, mnemonic, merge_with,
bunchPattern, integParams, False)
def get_digitizer_peaks(run, mnemonic, merge_with=None,
bunchPattern='sase3', integParams=None,
keepAllSase=False):
"""
Automatically computes digitizer peaks. A source can be loaded on the
fly via the mnemonic argument, or processed from an existing data set
(merge_with). The bunch pattern table is used to assign the pulse
id coordinates.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the digitizer data.
mnemonic: str
mnemonic for FastADC or ADQ412, e.g. "I0_ILHraw" or "MCP3apd".
The data is either loaded from the DataCollection or taken from
merge_with.
merge_with: xarray Dataset
If provided, the resulting Dataset will be merged with this one.
bunchPattern: str or dict
'sase1' or 'sase3' or 'scs_ppl', 'None': bunch pattern
integParams: dict
dictionnary for raw trace integration, e.g.
{'pulseStart':100, 'pulsestop':200, 'baseStart':50,
'baseStop':99, 'period':24, 'npulses':500}.
If None, integration parameters are computed automatically.
keepAllSase: bool
Only relevant in case of sase-dedicated trains. If True, all
trains are kept, else only those of the bunchPattern are kept.
Returns
-------
xarray Dataset with digitizer peak variables. Raw variables are
substituted by the peak caclulated values (e.g. "FastADC2raw" becomes
"FastADC2peaks").
"""
run_mnemonics = mnemonics_for_run(run)
if mnemonic not in run_mnemonics:
log.warning('Mnemonic not found in run. Skipping.')
return merge_with
if bool(merge_with) and mnemonic in merge_with:
for d in merge_with[mnemonic].dims:
if d in ['sa3_pId', 'ol_pId']:
log.warning(f'{mnemonic} already extracted. '
'Skipping.')
return merge_with
# check if bunch pattern table exists
bpt = None
if bool(merge_with) and 'bunchPatternTable' in merge_with:
bpt = merge_with['bunchPatternTable']
log.debug('Using bpt from merge_with dataset.')
elif 'bunchPatternTable' in run_mnemonics:
m = run_mnemonics['bunchPatternTable']
bpt = run.get_array(m['source'], m['key'], m['dim'])
log.debug('Loaded bpt from DataCollection.')
elif 'bunchPatternTable_SA3' in run_mnemonics:
m = run_mnemonics['bunchPatternTable_SA3']
bpt = run.get_array(m['source'], m['key'], m['dim'])
log.debug('Loaded bpt from DataCollection.')
else:
log.warning('Could not load bunch pattern table.')
# prepare resulting dataset
if bool(merge_with):
mw_ds = merge_with.drop(mnemonic, errors='ignore')
else:
mw_ds = xr.Dataset()
# iterate over mnemonics and merge arrays in dataset
autoFind = True if integParams is None else False
m = run_mnemonics[mnemonic]
useRaw = True if 'raw' in mnemonic else False
if bool(merge_with) and mnemonic in merge_with:
data = merge_with[mnemonic]
else:
data = run.get_array(m['source'], m['key'], m['dim'])
peaks = get_peaks(run, data, mnemonic,
useRaw=useRaw,
autoFind=autoFind,
integParams=integParams,
bunchPattern=bunchPattern,
bpt=bpt)
name = mnemonic.replace('raw', 'peaks').replace('apd', 'peaks')
join = 'outer' if keepAllSase else 'inner'
ds = mw_ds.merge(peaks.rename(name), join=join)
return ds
def digitizer_signal_description(run, digitizer=None):
"""
Check for the existence of signal description and return all corresponding
channels in a dictionnary.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the digitizer data.
digitizer: str or list of str (default=None)
Name of the digitizer: one in ['FastADC', FastADC2',
'ADQ412', 'ADQ412_2]
If None, all digitizers are used
Returns
-------
signal_description: dictionnary containing the signal description of
the digitizer channels.
Example
-------
import toolbox_scs as tb
run = tb.open_run(3481, 100)
signals = tb.digitizer_signal_description(run)
signals_fadc2 = tb.digitizer_signal_description(run, 'FastADC2')
"""
if digitizer not in [None, 'FastADC', 'FastADC2']:
raise ValueError('digitizer must be one in '
'["FastADC", "FastADC2"]')
if digitizer is None:
digitizer = ['FastADC', 'FastADC2', 'ADQ412', 'ADQ412_2']
else:
digitizer = [digitizer]
def key_fadc(i):
if i > 9:
return None
return f'channel_{i}.signalDescription.value'
def key_adq412(i):
if i > 3:
return None
return f'board1.channel_{i}.description.value'
sources = {'FastADC': ['SCS_UTC1_MCP/ADC/1', key_fadc],
'FastADC2': ['SCS_UTC2_FADC/ADC/1', key_fadc],
'ADQ412': ['SCS_UTC1_ADQ/ADC/1', key_adq412],
'ADQ412_2': ['SCS_UTC2_ADQ/ADC/1', key_adq412]}
res = {}
for d in digitizer:
if sources[d][0] not in run.all_sources:
continue
if sources[d][1](0) not in run.get_run_values(sources[d][0]):
raise ValueError('No signal description available for '
f'{d}: {sources[d][0]}')
for ch in range(10):
val = sources[d][1](ch)
if val is None:
break
desc = run.get_run_value(sources[d][0], val)
res[f'{d}_Ch{ch}'] = desc
return res
def calibrateTIM(data, rollingWindow=200, mcp=1, plot=False, use_apd=True, intstart=None,
intstop=None, bkgstart=None, bkgstop=None, t_offset=None, npulses_apd=None):
''' Calibrate TIM signal (Peak-integrated signal) to the slow ion signal of SCS_XGM
(photocurrent read by Keithley, channel 'pulseEnergy.photonFlux.value').
The aim is to find F so that E_tim_peak[uJ] = F x TIM_peak. For this, we want to
match the SASE3-only average TIM pulse peak per train (TIM_avg) to the slow XGM
signal E_slow.
Since E_slow is the average energy per pulse over all SASE1 and SASE3
pulses (N1 and N3), we first extract the relative contribution C of the SASE3 pulses
by looking at the pulse-resolved signals of the SA3_XGM in the tunnel.
There, the signal of SASE1 is usually strong enough to be above noise level.
Let TIM_avg be the average of the TIM pulses (SASE3 only).
The calibration factor is then defined as: F = E_slow * C * (N1+N3) / ( N3 * TIM_avg ).
If N3 changes during the run, we locate the indices for which N3 is maximum and define
a window where to apply calibration (indices start/stop).
Warning: the calibration does not include the transmission by the KB mirrors!
Inputs:
data: xarray Dataset
rollingWindow: length of running average to calculate TIM_avg
mcp: MCP channel
plot: boolean. If True, plot calibration results.
use_apd: boolean. If False, the TIM pulse peaks are extract from raw traces using
getTIMapd
intstart: trace index of integration start
intstop: trace index of integration stop
bkgstart: trace index of background start
bkgstop: trace index of background stop
t_offset: index separation between two pulses
npulses_apd: number of pulses
Output:
F: float, TIM calibration factor.
'''
start = 0
stop = None
npulses = data['npulses_sase3']
ntrains = npulses.shape[0]
if not np.all(npulses == npulses[0]):
start = np.argmax(npulses.values)
stop = ntrains + np.argmax(npulses.values[::-1]) - 1
if stop - start < rollingWindow:
print('not enough consecutive data points with the largest number of pulses per train')
start += rollingWindow
stop = np.min((ntrains, stop+rollingWindow))
filteredTIM = getTIMapd(data, mcp, use_apd, intstart, intstop, bkgstart, bkgstop, t_offset, npulses_apd)
sa3contrib = saseContribution(data, 'sase3', 'XTD10_XGM')
avgFast = filteredTIM.mean(axis=1).rolling(trainId=rollingWindow).mean()
ratio = ((data['npulses_sase3']+data['npulses_sase1']) *
data['SCS_photonFlux'] * sa3contrib) / (avgFast*data['npulses_sase3'])
F = float(ratio[start:stop].median().values)
if plot:
fig = plt.figure(figsize=(8,5))
ax = plt.subplot(211)
ax.set_title('E[uJ] = {:2e} x TIM (MCP{})'.format(F, mcp))
ax.plot(data['SCS_photonFlux'], label='SCS XGM slow (all SASE)', color='C0')
slow_avg_sase3 = data['SCS_photonFlux']*(data['npulses_sase1']
+data['npulses_sase3'])*sa3contrib/data['npulses_sase3']
ax.plot(slow_avg_sase3, label='SCS XGM slow (SASE3 only)', color='C1')
ax.plot(avgFast*F, label='Calibrated TIM rolling avg', color='C2')
ax.legend(loc='upper left', fontsize=8)
ax.set_ylabel('Energy [$\mu$J]', size=10)
ax.plot(filteredTIM.mean(axis=1)*F, label='Calibrated TIM train avg', alpha=0.2, color='C9')
ax.legend(loc='best', fontsize=8, ncol=2)
plt.xlabel('train in run')
ax = plt.subplot(234)
xgm_fast = selectSASEinXGM(data)
ax.scatter(filteredTIM, xgm_fast, s=5, alpha=0.1, rasterized=True)
fit, cov = np.polyfit(filteredTIM.values.flatten(),xgm_fast.values.flatten(),1, cov=True)
y=np.poly1d(fit)
x=np.linspace(filteredTIM.min(), filteredTIM.max(), 10)
ax.plot(x, y(x), lw=2, color='r')
ax.set_ylabel('Raw HAMP [$\mu$J]', size=10)
ax.set_xlabel('TIM (MCP{}) signal'.format(mcp), size=10)
ax.annotate(s='y(x) = F x + A\n'+
'F = %.3e\n$\Delta$F/F = %.2e\n'%(fit[0],np.abs(np.sqrt(cov[0,0])/fit[0]))+
'A = %.3e'%fit[1],
xy=(0.5,0.6), xycoords='axes fraction', fontsize=10, color='r')
print('TIM calibration factor: %e'%(F))
ax = plt.subplot(235)
ax.hist(filteredTIM.values.flatten()*F, bins=50, rwidth=0.8)
ax.set_ylabel('number of pulses', size=10)
ax.set_xlabel('Pulse energy MCP{} [uJ]'.format(mcp), size=10)
ax.set_yscale('log')
ax = plt.subplot(236)
if not use_apd:
pulseStart = intstart
pulseStop = intstop
else:
pulseStart = data.attrs['run'].get_array(
'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.pulseStart.value')[0].values
pulseStop = data.attrs['run'].get_array(
'SCS_UTC1_ADQ/ADC/1', 'board1.apd.channel_0.pulseStop.value')[0].values
if 'MCP{}raw'.format(mcp) not in data:
tid, data = data.attrs['run'].train_from_index(0)
trace = data['SCS_UTC1_ADQ/ADC/1:network']['digitizers.channel_1_D.raw.samples']
print('no raw data for MCP{}. Loading trace from MCP1'.format(mcp))
label_trace='MCP1 Voltage [V]'
else:
trace = data['MCP{}raw'.format(mcp)][0]
label_trace='MCP{} Voltage [V]'.format(mcp)
ax.plot(trace[:pulseStop+25], 'o-', ms=2, label='trace')
ax.axvspan(pulseStart, pulseStop, color='C2', alpha=0.2, label='APD region')
ax.axvline(pulseStart, color='gray', ls='--')
ax.axvline(pulseStop, color='gray', ls='--')
ax.set_xlim(pulseStart - 25, pulseStop + 25)
ax.set_ylabel(label_trace, size=10)
ax.set_xlabel('sample #', size=10)
ax.legend(fontsize=8)
return F
''' TIM calibration table
Dict with key= photon energy and value= array of polynomial coefficients for each MCP (1,2,3).
The polynomials correspond to a fit of the logarithm of the calibration factor as a function
of MCP voltage. If P is a polynomial and V the MCP voltage, the calibration factor (in microjoule
per APD signal) is given by -exp(P(V)).
This table was generated from the calibration of March 2019, proposal 900074, semester 201930,
runs 69 - 111 (Ni edge): https://in.xfel.eu/elog/SCS+Beamline/2323
runs 113 - 153 (Co edge): https://in.xfel.eu/elog/SCS+Beamline/2334
runs 163 - 208 (Fe edge): https://in.xfel.eu/elog/SCS+Beamline/2349
'''
tim_calibration_table = {
705.5: np.array([
[-6.85344690e-12, 5.00931986e-08, -1.27206912e-04, 1.15596821e-01, -3.15215367e+01],
[ 1.25613942e-11, -5.41566381e-08, 8.28161004e-05, -7.27230153e-02, 3.10984925e+01],
[ 1.14094964e-12, 7.72658935e-09, -4.27504907e-05, 4.07253378e-02, -7.00773062e+00]]),
779: np.array([
[ 4.57610777e-12, -2.33282497e-08, 4.65978738e-05, -6.43305156e-02, 3.73958623e+01],
[ 2.96325102e-11, -1.61393276e-07, 3.32600044e-04, -3.28468195e-01, 1.28328844e+02],
[ 1.14521506e-11, -5.81980336e-08, 1.12518434e-04, -1.19072484e-01, 5.37601559e+01]]),
851: np.array([
[ 3.15774215e-11, -1.71452934e-07, 3.50316512e-04, -3.40098861e-01, 1.31064501e+02],
[5.36341958e-11, -2.92533156e-07, 6.00574534e-04, -5.71083140e-01, 2.10547161e+02],
[ 3.69445588e-11, -1.97731342e-07, 3.98203522e-04, -3.78338599e-01, 1.41894119e+02]])
}
def timFactorFromTable(voltage, photonEnergy, mcp=1):
''' Returns an energy calibration factor for TIM integrated peak signal (APD)
according to calibration from March 2019, proposal 900074, semester 201930,
runs 69 - 111 (Ni edge): https://in.xfel.eu/elog/SCS+Beamline/2323
runs 113 - 153 (Co edge): https://in.xfel.eu/elog/SCS+Beamline/2334
runs 163 - 208 (Fe edge): https://in.xfel.eu/elog/SCS+Beamline/2349
Uses the tim_calibration_table declared above.
Inputs:
voltage: MCP voltage in volts.
photonEnergy: FEL photon energy in eV. Calibration factor is linearly
interpolated between the known values from the calibration table.
mcp: MCP channel (1, 2, or 3).
Output:
f: calibration factor in microjoule per APD signal
'''
energies = np.sort([key for key in tim_calibration_table])
if photonEnergy not in energies:
if photonEnergy > energies.max():
photonEnergy = energies.max()
elif photonEnergy < energies.min():
photonEnergy = energies.min()
else:
idx = np.searchsorted(energies, photonEnergy) - 1
polyA = np.poly1d(tim_calibration_table[energies[idx]][mcp-1])
polyB = np.poly1d(tim_calibration_table[energies[idx+1]][mcp-1])
fA = -np.exp(polyA(voltage))
fB = -np.exp(polyB(voltage))
f = fA + (fB-fA)/(energies[idx+1]-energies[idx])*(photonEnergy - energies[idx])
return f
poly = np.poly1d(tim_calibration_table[photonEnergy][mcp-1])
f = -np.exp(poly(voltage))
return f
#############################################################################################
#############################################################################################
#############################################################################################
def extract_digitizer_peaks(proposal, runNB, mnemonic, bunchPattern=None,
integParams=None, save=False,
subdir='usr/processed_runs'):
"""
Extract the peaks from digitizer raw traces and saves them into a file.
The calculation is a trapezoidal integration between 'pulseStart' and
'pulseStop' with subtraction of a baseline defined as the median between
'baseStart' and 'baseStop'.
The integration parameters can either be provided using integParams, or
determined by a peak finding algorithm using autoFind. If the bunchPattern
argument is provided, the pulse ids are aligned to it.
Parameters
----------
proposal: int
the proposal number
runNB: int
the run number
mnemonic: str
the mnemonic containing raw traces. Example: 'XRD_MCP_BIGraw'
bunchPattern: str
'sase3' or 'scs_ppl'. If provided, checks the bunch pattern table using
Extra XrayPulses or OpticalPulses and aligns the pulse ids.
If None, the pulse ids are not aligned and indexed between 0 and the
number of pulses per train.
integParams: dict
dictionnary with keys ['pulseStart', 'pulseStop', 'baseStart',
'baseStop', 'period', 'npulses']. If provided, autoFind is set to False.
If bunchPattern is not None, 'period' and 'npulses' are adjusted to match
the bunch pattern and align the pulse ids.
save: bool
whether to save the result to a file or not.
subdir: str
subdirectory. The data is stored in
<proposal path>/<subdir>/r{runNB:04d}/f'r{runNB:04d}-digitizers-data.h5'
"""
if integParams is None and autoFind is False:
log.warning('integParams not provided and autoFind is False. '
'Cannot compute peak integration parameters.')
return xr.DataArray()
run = open_run(proposal, runNB)
run_mnemonics = mnemonics_for_run(run)
mnemo = run_mnemonics.get(mnemonic)
if mnemo is None:
log.warning('Mnemonic not found. Skipping.')
return xr.DataArray()
source, key = mnemo['source'], mnemo['key']
extra_dim = {'sase3': 'sa3_pId', 'scs_ppl': 'ol_pId'}.get(bunchPattern)
if extra_dim is None:
extra_dim = 'pulseId'
digitizer = digitizer_type(run, source)
if digitizer == 'FastADC':
pulse_period = 24
if digitizer == 'ADQ412':
pulse_period = 440
pattern = None
try:
if bunchPattern == 'sase3':
pattern = XrayPulses(run)
if bunchPattern == 'scs_ppl':
pattern = OpticalLaserPulses(run)
except Exception as e:
log.warning(e)
'''
pattern = None
try:
if bunchPattern == 'sase3':
pattern = XrayPulses(run)
if bunchPattern == 'scs_ppl':
pattern = OpticalLaserPulses(run)
except Exception as e:
log.warning(e)
bunchPattern = None
# Extract pulse_ids, npulses and period from bunch pattern
pulse_ids, npulses, period = None, None, None
regular = True
if pattern is not None:
if pattern.is_constant_pattern() is False:
log.info('The number of pulses changed during the run.')
pulse_ids = np.unique(pattern.pulse_ids(labelled=False, copy=False))
npulses, period = None, None
regular = False
else:
pulse_ids = pattern.peek_pulse_ids(labelled=False)
npulses = len(pulse_ids)
if npulses > 1:
periods = np.diff(pulse_ids)
if len(np.unique(periods)) > 1:
regular = False
else:
period = np.unique(periods)[0] * pulse_period
# Use integration parameters, adjust them to match bunch pattern if necessary
if integParams is not None:
required_keys = ['pulseStart', 'pulseStop', 'baseStart',
'baseStop']
if bunchPattern is None:
required_keys += ['period', 'npulses']
if not all(name in integParams for name in required_keys):
raise TypeError('All keys of integParams argument '
f'{required_keys} are required.')
params = integParams.copy()
if pattern is not None:
if (npulses != params.get('npulses') or
period != params.get('period')):
log.warning(f'Integration parameters '
f'(npulses={params.get("npulses")}, '
f'period={params.get("period")}) do not match the '
f'the bunch pattern (npulses={npulses}, '
f'period={period}). Using bunch pattern parameters.')
params['npulses'] = npulses
params['period'] = period
else:
pulse_ids = np.arange(0, params['npulses'], params['period'],
dtype=np.uint64)
start = params['pulseStart']
params['pulseStart'] = [start + (pid - pulse_ids[0]) * pulse_period
for pid in pulse_ids]
'''
# load traces and calculate the average
traces = run[source, key].xarray(name=mnemonic, extra_dims=mnemo['dim'])
trace = traces.mean('trainId').rename(mnemonic.replace('raw', 'avg'))
params, pulse_ids, regular, trace = find_peak_integration_parameters(
run, mnemonic, trace, integParams, pattern)
'''
# find peak integration parameters
if integParams is None:
params = find_integ_params(trace)
if (period is not None and params['period'] != period
or npulses is not None and params['npulses'] != npulses):
log.warning(f'Bunch pattern (npulses={npulses}, period={period}) and '
f'found integration parameters (npulses={params["npulses"]}, '
f'period={params["period"]}) do not match. Using bunch '
'pattern parameters.')
params["npulses"] = npulses
params["period"] = period
if pulse_ids is None:
pulse_ids = np.arange(params['npulses'], dtype=np.uint64)
start = params['pulseStart']
params['pulseStart'] = [start + (pid - pulse_ids[0]) * pulse_period
for pid in pulse_ids]
'''
# extract peaks
data = peaks_from_raw_trace(traces, **params, extra_dim=extra_dim)
data = data.rename(mnemonic.replace('raw', 'peaks'))
if regular is True:
data = data.assign_coords({extra_dim: pulse_ids})
else:
mask = pattern.pulse_mask(labelled=False)
mask = xr.DataArray(mask, dims=['trainId', extra_dim],
coords={'trainId': run[source, key].train_ids,
extra_dim: np.arange(mask.shape[1])})
mask = mask.sel({extra_dim: pulse_ids})
data = data.where(mask, drop=True)
data.attrs['params_keys'] = list(params.keys())
if regular:
params['pulseStart'] = params['pulseStart'][0]
for p in params:
if params[p] is None:
params[p] = 'None'
data.attrs[f'params_{data.name}'] = list(params.values())
if save:
save_peaks(proposal, runNB, data, trace, subdir)
return data
def save_peaks(proposal, runNB, peaks, avg_trace, subdir):
'''
Save the peaks extracted with extract_digitizer_peaks() as well as the
average raw trace in a dataset at the proposal + subdir location.
If a dataset already exists, the new data is merged with it.
Parameters
----------
proposal: int
the proposal number
runNB: int
the run number
peaks: xarray DataArray
the 2D-array obtained by extract_digitizer_peaks()
avg_trace: xarray DataArray
the average raw trace over the trains
subdir: str
subdirectory. The data is stored in
<proposal path>/<subdir>/r{runNB:04d}/f'r{runNB:04d}-digitizers-data.h5'
Returns
-------
None
'''
root = find_proposal(f'p{proposal:06d}')
path = os.path.join(root, subdir + f'/r{runNB:04d}/')
os.makedirs(path, exist_ok=True)
fname = path + f'r{runNB:04d}-digitizers-data.h5'
ds_peaks = peaks.to_dataset(promote_attrs=True)
if os.path.isfile(fname):
ds = xr.load_dataset(fname)
ds = ds.drop_vars([peaks.name, avg_trace.name], errors='ignore')
for dim in ds.dims:
if all(dim not in ds[v].dims for v in ds):
ds = ds.drop_dims(dim)
dim_name = 'sampleId'
if 'sampleId' in ds.dims and ds.sizes['sampleId'] != len(avg_trace):
dim_name = 'sampleId2'
avg_trace = avg_trace.rename({avg_trace.dims[0]: dim_name})
if f'params_{peaks.name}' in ds.attrs:
del ds.attrs[f'params_{peaks.name}']
ds = xr.merge([ds, ds_peaks, avg_trace],
combine_attrs='drop_conflicts', join='inner')
else:
ds = ds_peaks.merge(avg_trace.rename({avg_trace.dims[0]: 'sampleId'}))
ds.to_netcdf(fname, format='NETCDF4')
print(f'saved data into {fname}.')
return None
def load_processed_peaks(proposal, runNB, mnemonic=None,
data='usr/processed_runs', merge_with=None):
"""
Load processed digitizer peaks data.
Parameters
----------
proposal: int
the proposal number
runNB: int
the run number
mnemonic: str
the mnemonic containing peaks. Example: 'XRD_MCP_BIGpeaks'.
If None, the entire dataset is loaded
data: str
subdirectory. The data is stored in
<proposal path>/<subdir>/r{runNB:04d}/f'r{runNB:04d}-digitizers-data.h5'
merge_with: xarray Dataset
A dataset to merge the data with.
Returns
-------
xarray DataArray if menmonic is not None and merge_with is None
xarray Dataset if mnemonic is None or merge_with is not None.
Example
-------
# load the mono energy and the MCP_BIG peaks
run, ds = tb.load(proposal, runNB, 'nrj')
ds = tb.load_processed_peaks(proposal, runNB,'XRD_MCP_BIGpeaks', merge_with=ds)
"""
if mnemonic is None:
return load_all_processed_peaks(proposal, runNB, data, merge_with)
root = find_proposal(f'p{proposal:06d}')
path = os.path.join(root, data + f'/r{runNB:04d}/')
fname = path + f'r{runNB:04d}-digitizers-data.h5'
if os.path.isfile(fname):
ds = xr.load_dataset(fname)
if mnemonic not in ds:
print(f'Mnemonic {mnemonic} not found in {fname}')
return {}
da = ds[mnemonic]
da.attrs['params_keys'] = ds.attrs['params_keys']
da.attrs[f'params_{mnemonic}'] = ds.attrs[f'params_{mnemonic}']
if merge_with is not None:
return merge_with.merge(da.to_dataset(promote_attrs=True),
combine_attrs='drop_conflicts', join='inner')
else:
return da
else:
print(f'Mnemonic {mnemonic} not found in {fname}')
return merge_with
def load_all_processed_peaks(proposal, runNB, data='usr/processed_runs',
merge_with=None):
"""
Load processed digitizer peaks dataset. The data contains the peaks,
the average raw trace and the integration parameters (attribute) of
each processed digitizer source.
Parameters
----------
proposal: int
the proposal number
runNB: int
the run number
data: str
subdirectory. The data is stored in
<proposal path>/<subdir>/r{runNB:04d}/f'r{runNB:04d}-digitizers-data.h5'
merge_with: xarray Dataset
A dataset to merge the data with.
Returns
-------
xarray Dataset
"""
root = find_proposal(f'p{proposal:06d}')
path = os.path.join(root, data + f'/r{runNB:04d}/')
fname = path + f'r{runNB:04d}-digitizers-data.h5'
if os.path.isfile(fname):
if merge_with is not None:
return merge_with.merge(xr.load_dataset(fname),
combine_attrs='drop_conflicts', join='inner')
return xr.load_dataset(fname)
else:
print(f'{fname} not found.')
return merge_with
def check_processed_peak_params(proposal, runNB, mnemonic, data='usr/processed_runs',
plot=True, show_all=False):
"""
Check the integration parameters used to generate the processed peak values.
Parameters
----------
proposal: int
the proposal number
runNB: int
the run number
mnemonic: str
the mnemonic containing peaks. Example: 'XRD_MCP_BIGpeaks'.
If None, the entire dataset is loaded
data: str
subdirectory. The data is stored in
<proposal path>/<subdir>/r{runNB:04d}/f'r{runNB:04d}-digitizers-data.h5'
plot: bool
If True, displays the raw trace and peak integration regions.
show_all: bool
If True, displays the entire raw trace and all peak integration
regions (this can be time-consuming).
If False, shows the first and last pulses.
Returns
-------
params: dict
the integration parameters with keys ['pulseStart', 'pulseStop',
'baseStart', 'baseStop', 'period', 'npulses'].
See `extract_digitizer_peaks()`.
"""
root = find_proposal(f'p{proposal:06d}')
path = os.path.join(root, data + f'/r{runNB:04d}/')
fname = path + f'r{runNB:04d}-digitizers-data.h5'
if os.path.isfile(fname):
ds = xr.load_dataset(fname)
if mnemonic.replace('raw', 'peaks') not in ds:
print(f'Mnemonic {mnemonic} not found in {fname}')
return {}
da = ds[mnemonic]
params = dict(zip(ds.attrs['params_keys'], ds.attrs[f'params_{mnemonic}']))
if plot:
plotPeakIntegrationWindow(ds[mnemonic.replace('peaks', 'avg')],
params, show_all=show_all)
return params
else:
print(f'{fname} not found.')
return {}
\ No newline at end of file
from joblib import Parallel, delayed, parallel_backend
from time import strftime
import tempfile
import shutil
from tqdm.auto import tqdm
import os
import warnings
import psutil
import extra_data as ed
from extra_data.read_machinery import find_proposal
from extra_geom import DSSC_1MGeometry
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import numpy as np
import xarray as xr
import h5py
from imageio import imread
from ..constants import mnemonics as _mnemonics
from .azimuthal_integrator import AzimuthalIntegrator
class DSSC:
def __init__(self, proposal, distance=1):
""" Create a DSSC object to process DSSC data.
inputs:
proposal: (int,str) proposal number string
distance: (float) distance sample to DSSC detector in meter
"""
if isinstance(proposal,int):
proposal = 'p{:06d}'.format(proposal)
runFolder = find_proposal(proposal)
self.semester = runFolder.split('/')[-2]
self.proposal = proposal
self.topic = runFolder.split('/')[-3]
self.tempdir = None
self.save_folder = os.path.join(runFolder, 'usr/condensed_runs/')
self.distance = distance
self.px_pitch_h = 236 # horizontal pitch in microns
self.px_pitch_v = 204 # vertical pitch in microns
self.aspect = self.px_pitch_v/self.px_pitch_h # aspect ratio of the DSSC images
self.geom = None
self.mask = None
self.max_fraction_memory = 0.4
self.filter_mask = None
self.Nworker = 16
print('DSSC configuration')
print(f'Topic: {self.topic}')
print(f'Semester: {self.semester}')
print(f'Proposal: {self.proposal}')
print(f'Default save folder: {self.save_folder}')
print(f'Sample to DSSC distance: {self.distance} m')
if not os.path.exists(self.save_folder):
warnings.warn(f'Default save folder does not exist: {self.save_folder}')
def __del__(self):
# deleting temporay folder
if self.tempdir:
shutil.rmtree(self.tempdir)
def open_run(self, run_nr, isDark=False):
""" Open a run with extra-data and prepare the virtual dataset for multiprocessing
inputs:
run_nr: the run number
isDark: a boolean to specify if the run is a dark run or not
"""
print('Opening run data with extra-data')
self.run_nr = run_nr
self.xgm = None
self.filter_mask = None
self.run = ed.open_run(self.proposal, self.run_nr)
self.isDark = isDark
self.plot_title = f'{self.proposal} run: {self.run_nr}'
self.fpt = self.run.detector_info('SCS_DET_DSSC1M-1/DET/0CH0:xtdf')['frames_per_train']
self.nbunches = self.run.get_array('SCS_RR_UTC/MDL/BUNCH_DECODER', 'sase3.nPulses.value')
self.nbunches = np.unique(self.nbunches)
if len(self.nbunches) == 1:
self.nbunches = self.nbunches[0]
else:
warnings.warn('not all trains have same length DSSC data')
print(f'nbunches: {self.nbunches}')
self.nbunches = self.nbunches[-1]
print(f'DSSC frames per train: {self.fpt}')
print(f'SA3 bunches per train: {self.nbunches}')
if self.tempdir is not None:
shutil.rmtree(self.tempdir)
self.tempdir = tempfile.mkdtemp()
print(f'Temporary directory: {self.tempdir}')
print('Creating virtual dataset')
self.vds_filenames = self.create_virtual_dssc_datasets(self.run, path=self.tempdir)
# create a dummy scan variable for dark run
# for other type or run, use DSSC.define_run function to overwrite it
self.scan = xr.DataArray(np.ones_like(self.run.train_ids), dims=['trainId'],
coords={'trainId': self.run.train_ids}).to_dataset(
name='scan_variable')
self.scan_vname = 'dummy'
def define_scan(self, vname, bins):
"""
Prepare the binning of the DSSC data.
inputs:
vname: variable name for the scan, can be a mnemonic string from ToolBox
or a dictionnary with ['source', 'key'] fields
bins: step size (or bins_edge but not yet implemented)
"""
if type(vname) is dict:
scan = self.run.get_array(vname['source'], vname['key'])
elif type(vname) is str:
if vname not in _mnemonics:
raise ValueError(f'{vname} not found in the ToolBox mnemonics table')
scan = self.run.get_array(_mnemonics[vname]['source'], _mnemonics[vname]['key'])
else:
raise ValueError(f'vname should be a string or a dict. We got {type(vname)}')
if (type(bins) is int) or (type(bins) is float):
scan = bins * np.round(scan / bins)
else:
# TODO: digitize the data
raise ValueError(f'To be implemented')
self.scan_vname = vname
self.scan = scan.to_dataset(name='scan_variable')
self.scan['xgm_pumped'] = self.xgm[:, :self.nbunches:2].mean('dim_0')
self.scan['xgm_unpumped'] = self.xgm[:, 1:self.nbunches:2].mean('dim_0')
self.scan_counts = xr.DataArray(np.ones(len(self.scan['scan_variable'])),
dims=['scan_variable'],
coords={'scan_variable': self.scan['scan_variable'].values},
name='counts')
self.scan_points = self.scan.groupby('scan_variable').mean('trainId').coords['scan_variable'].values
self.scan_points_counts = self.scan_counts.groupby('scan_variable').sum()
self.plot_scan()
def plot_scan(self):
""" Plot a previously defined scan to see the scan range and the statistics.
"""
if self.scan:
fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=[5, 5])
else:
fig, ax1 = plt.subplots(nrows=1, figsize=[5, 2.5])
ax1.plot(self.scan_points, self.scan_points_counts, 'o-', ms=2)
ax1.set_xlabel(f'{self.scan_vname}')
ax1.set_ylabel('# trains')
ax1.set_title(self.plot_title)
if self.scan:
ax2.plot(self.scan['scan_variable'])
ax2.set_xlabel('train #')
ax2.set_ylabel(f'{self.scan_vname}')
def load_xgm(self):
""" Loads pulse resolved dedicated SAS3 data from the SCS XGM.
"""
if self.xgm is None:
self.xgm = self.run.get_array(_mnemonics['SCS_SA3']['source'],
_mnemonics['SCS_SA3']['key'],
roi=ed.by_index[:self.nbunches])
def plot_xgm_hist(self, nbins=100):
""" Plots an histogram of the SCS XGM dedicated SAS3 data.
inputs:
nbins: number of the bins for the histogram.
"""
if self.xgm is None:
self.load_xgm()
hist, bins_edges = np.histogram(self.xgm, nbins, density=True)
width = 1.0 * (bins_edges[1] - bins_edges[0])
bins_center = 0.5*(bins_edges[:-1] + bins_edges[1:])
plt.figure(figsize=(5,3))
plt.bar(bins_center, hist, align='center', width=width)
plt.xlabel(f"{_mnemonics['SCS_SA3']['source']}{_mnemonics['SCS_SA3']['key']}")
plt.ylabel('density')
plt.title(self.plot_title)
def xgm_filter(self, xgm_low=-np.inf, xgm_high=np.inf):
""" Filters the data by train. If one pulse within a train has an SASE3 SCS XGM value below
xgm_low or above xgm_high, that train will be dropped from the dataset.
inputs:
xgm_low: low threshold value
xgm_high: high threshold value
"""
if self.xgm is None:
self.load_xgm()
if self.isDark:
warnings.warn(f'This run was loaded as dark. Filtering on xgm makes no sense. Aborting')
return
self.xgm_low = xgm_low
self.xgm_high = xgm_high
filter_mask = (self.xgm > self.xgm_low) * (self.xgm < self.xgm_high)
if self.filter_mask:
self.filter_mask = self.filter_mask*filter_mask
else:
self.filter_mask = filter_mask
valid = filter_mask.prod('dim_0').astype(bool)
xgm_valid = self.xgm.where(valid)
xgm_valid = xgm_valid.dropna('trainId')
self.scan = self.scan.sel({'trainId': xgm_valid.trainId})
nrejected = len(self.run.train_ids) - len(self.scan)
print((f'Rejecting {nrejected} out of {len(self.run.train_ids)} trains due to xgm '
f'thresholds: [{self.xgm_low}, {self.xgm_high}]'))
def load_geom(self, geopath=None, quad_pos=None):
""" Loads and return the DSSC geometry.
inputs:
geopath: path to the h5 geometry file. If None uses a default file.
quad_pos: list of quadrants tuple position. If None uses a default position.
output:
return the loaded geometry
"""
if quad_pos is None:
quad_pos = [(-124.100, 3.112), # TR
(-133.068, -110.604), # BR
( 0.988, -125.236), # BL
( 4.528, -4.912) # TL
]
if geopath is None:
geopath = '/gpfs/exfel/sw/software/git/EXtra-geom/docs/dssc_geo_june19.h5'
self.geom = DSSC_1MGeometry.from_h5_file_and_quad_positions(geopath, quad_pos)
return self.geom
def load_mask(self, fname, plot=True):
""" Load a DSSC mask file.
input:
fname: string of the filename of the mask file
plot: if True, the loaded mask is plotted
"""
dssc_mask = imread(fname)
dssc_mask = dssc_mask.astype(float)[..., 0] // 255
dssc_mask[dssc_mask==0] = np.nan
self.mask = dssc_mask
if plot:
plt.figure()
plt.imshow(self.mask)
def create_virtual_dssc_datasets(self, run, path=''):
""" Create virtual datasets for each 16 DSSC modules used for the multiprocessing.
input:
run: extra-data run
path: string where the virtual files are created
output:
dictionnary of key:module, value:virtual dataset filename
"""
vds_filenames = {}
for module in tqdm(range(16)):
fname = os.path.join(path, f'dssc{module}_vds.h5')
if os.path.isfile(fname):
os.remove(fname)
vds = run.get_virtual_dataset(f'SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf',
'image.data', filename=fname)
vds.file.close() # keep h5 file closed outside 'with' context
vds_filenames[module] = fname
return vds_filenames
def binning(self, do_pulse_mean=True):
""" Bin the DSSC data by the predifined scan type (DSSC.define()) using multiprocessing
"""
# get available memory in GB, we will try to use 80 % of it
max_GB = psutil.virtual_memory().available/1024**3
print(f'max available memory: {max_GB} GB')
# max_GB / (8byte * Nworker * 128px * 512px * N_pulses)
self.chunksize = int(self.max_fraction_memory*max_GB * 1024**3 // (8 * self.Nworker * 128 * 512 * self.fpt))
print('processing', self.chunksize, 'trains per chunk')
jobs = []
for m in range(16):
jobs.append(dict(
module=m,
fpt=self.fpt,
vds=self.vds_filenames[m],
chunksize=self.chunksize,
scan=self.scan['scan_variable'],
nbunches=self.nbunches,
run_nr=self.run_nr,
do_pulse_mean=do_pulse_mean
))
if self.Nworker != 16:
with warnings.catch_warnings():
warnings.simplefilter("default")
warnings.warn(('Nworker other than 16 known to cause issue' +
'(https://in.xfel.eu/gitlab/SCS/ToolBox/merge_requests/76)'),
RuntimeWarning)
timestamp = strftime('%X')
print(f'start time: {timestamp}')
with parallel_backend('loky', n_jobs=self.Nworker):
module_data = Parallel(verbose=20)(
delayed(process_one_module)(job) for job in tqdm(jobs)
)
print('finished:', strftime('%X'))
# rearange the multiprocessed data
self.module_data = xr.concat(module_data, dim='module')
self.module_data['run'] = self.run_nr
self.module_data = self.module_data.transpose('scan_variable', 'module', 'x', 'y')
if do_pulse_mean:
self.module_data = xr.merge([self.module_data, self.scan.groupby('scan_variable').mean('trainId')])
elif self.xgm is not None:
xgm_pumped = self.xgm[:, :self.nbunches:2].mean('trainId').to_dataset(name='xgm_pumped').rename({'dim_0':'scan_variable'})
xgm_unpumped = self.xgm[:, 1:self.nbunches:2].mean('trainId').to_dataset(name='xgm_unpumped').rename({'dim_0':'scan_variable'})
self.module_data = xr.merge([self.module_data, xgm_pumped, xgm_unpumped])
self.module_data = self.module_data.squeeze()
if do_pulse_mean:
self.module_data.attrs['scan_variable'] = self.scan_vname
else:
self.module_data.attrs['scan_variable'] = 'pulse id'
def save(self, save_folder=None, overwrite=False):
""" Save the crunched data.
inputs:
save_folder: string of the fodler where to save the data.
overwrite: boolean whether or not to overwrite existing files.
"""
if save_folder is None:
save_folder = self.save_folder
if self.isDark:
fname = f'run{self.run_nr}_dark.nc' # no scan
else:
fname = f'run{self.run_nr}.nc' # run with delay scan (change for other scan types!)
save_path = os.path.join(save_folder, fname)
file_exists = os.path.isfile(save_path)
if not file_exists or (file_exists and overwrite):
if file_exists:
warnings.warn(f'Overwriting file: {save_path}')
os.remove(save_path)
self.module_data.to_netcdf(save_path, group='data')
self.module_data.close()
os.chmod(save_path, 0o664)
print('saving: ', save_path)
else:
print('file', save_path, 'exists and overwrite is False')
def load_binned(self, runNB, dark_runNB, xgm_norm = True, save_folder=None):
""" load previously binned (crunched) DSSC data by DSSC.crunch() and DSSC.save()
inputs:
runNB: run number to load
dark_runNB: run number of the corresponding dark
xgm_norm: normlize by XGM data if True
save_folder: path string where the crunched data are saved
"""
if save_folder is None:
save_folder = self.save_folder
self.plot_title = f'{self.proposal} run: {runNB} dark: {dark_runNB}'
dark = xr.load_dataset(os.path.join(save_folder, f'run{dark_runNB}_dark.nc'), group='data',
engine='netcdf4')
binned = xr.load_dataset(os.path.join(save_folder, f'run{runNB}.nc'), group='data',
engine='netcdf4')
binned['pumped'] = (binned['pumped'] - dark['pumped'].values)
binned['unpumped'] = (binned['unpumped'] - dark['unpumped'].values)
if xgm_norm:
binned['pumped'] = binned['pumped'] / binned['xgm_pumped']
binned['unpumped'] = binned['unpumped'] / binned['xgm_unpumped']
self.scan_points = binned['scan_variable']
self.scan_points_counts = binned['sum_count'][:, 0]
self.scan_vname = binned.attrs['scan_variable']
self.scan = None
self.binned = binned
def plot_DSSC(self, use_mask = True, p_low = 1, p_high = 98, vmin = None, vmax = None):
""" Plot pumped and unpumped DSSC images.
inputs:
use_mask: if True, a mask is applied on the DSSC.
p_low: low percentile value to adjust the contrast scale on the unpumped and pumped image
p_high: high percentile value to adjust the contrast scale on the unpumped and pumped image
vmin: low value of the image scale
vmax: high value of the image scale
"""
if use_mask:
if self.mask is None:
raise ValueError('No mask was loaded !')
mask = self.mask
mask_txt = ' masked'
else:
mask = 1
mask_txt = ''
if self.geom is None:
self.load_geom()
im_pump_mean, _ = self.geom.position_modules_fast(self.binned['pumped'].mean('scan_variable'))
im_unpump_mean, _ = self.geom.position_modules_fast(self.binned['unpumped'].mean('scan_variable'))
self.im_pump_mean = mask*im_pump_mean
self.im_unpump_mean = mask*im_unpump_mean
fig = plt.figure(figsize=(9, 4))
grid = ImageGrid(fig, 111,
nrows_ncols=(1,2),
axes_pad=0.15,
share_all=True,
cbar_location="right",
cbar_mode="single",
cbar_size="7%",
cbar_pad=0.15,
)
_vmin, _vmax = np.percentile(self.im_pump_mean[~np.isnan(self.im_pump_mean)], [p_low, p_high])
if vmin is None:
vmin = _vmin
if vmax is None:
vmax = _vmax
im = grid[0].imshow(self.im_pump_mean, vmin=vmin, vmax=vmax, aspect=self.aspect)
grid[0].set_title('pumped' + mask_txt)
im = grid[1].imshow(self.im_unpump_mean, vmin=vmin, vmax=vmax, aspect=self.aspect)
grid[1].set_title('unpumped' + mask_txt)
grid[-1].cax.colorbar(im)
grid[-1].cax.toggle_label(True)
fig.suptitle(self.plot_title)
def azimuthal_int(self, wl, center=None, angle_range=[0, 180-1e-6], dr=1, use_mask=True):
""" Perform azimuthal integration of 1D binned DSSC run.
inputs:
wl: photon wavelength
center: center of integration
angle_range: angles of integration
dr: dr
use_mask: if True, use the loaded mask
"""
if self.geom is None:
self.load_geom()
if use_mask:
if self.mask is None:
raise ValueError('No mask was loaded !')
mask = self.mask
mask_txt = ' masked'
else:
mask = 1
mask_txt = ''
im_pumped_arranged, c_geom = self.geom.position_modules_fast(self.binned['pumped'].values)
im_unpumped_arranged, c_geom = self.geom.position_modules_fast(self.binned['unpumped'].values)
im_pumped_arranged *= mask
im_unpumped_arranged *= mask
im_pumped_mean = im_pumped_arranged.mean(axis=0)
im_unpumped_mean = im_unpumped_arranged.mean(axis=0)
if center is None:
center = c_geom
ai = AzimuthalIntegrator(im_pumped_mean.shape, center, angle_range, dr=dr)
norm = ai(~np.isnan(im_pumped_mean))
az_pump = []
az_unpump = []
for i in tqdm(range(len(self.binned['scan_variable']))):
az_pump.append(ai(im_pumped_arranged[i]) / norm)
az_unpump.append(ai(im_unpumped_arranged[i]) / norm)
az_pump = np.stack(az_pump)
az_unpump = np.stack(az_unpump)
coords = {'scan_variable': self.binned['scan_variable'], 'distance': ai.distance}
azimuthal = xr.DataArray(az_pump, dims=['scan_variable', 'distance'], coords=coords)
azimuthal = azimuthal.to_dataset(name='pumped')
azimuthal['unpumped'] = xr.DataArray(az_unpump, dims=['scan_variable', 'distance'], coords=coords)
azimuthal = azimuthal.transpose('distance', 'scan_variable')
#t0 = 225.5
#azimuthal['delay'] = (t0 - azimuthal.delay)*6.6
#azimuthal['delay'] = azimuthal.delay
azimuthal['delta_q (1/nm)'] = 2e-9 * np.pi * np.sin(
np.arctan(azimuthal.distance * self.px_pitch_v*1e-6 / self.distance)) / wl
azimuthal.attrs = self.binned.attrs
self.azimuthal = azimuthal.swap_dims({'distance': 'delta_q (1/nm)'})
def plot_azimuthal_int(self, kind='difference', lim=None):
""" Plot a computed azimuthal integration.
inputs:
kind: (str) either 'difference' or 'relative' to change the type of plot.
"""
fig, [ax1, ax2, ax3] = plt.subplots(nrows=3, sharex=True, sharey=True)
xr.plot.imshow(self.azimuthal.pumped, ax=ax1, vmin=0, robust=True)
ax1.set_title('pumped')
xr.plot.imshow(self.azimuthal.unpumped, ax=ax2, vmin=0, robust=True)
ax2.set_title('unpumped')
if kind == 'difference':
val = self.azimuthal.pumped - self.azimuthal.unpumped
ax3.set_title('pumped - unpumped')
elif kind == 'relative':
val = (self.azimuthal.pumped - self.azimuthal.unpumped)/self.azimuthal.unpumped
ax3.set_title('(pumped - unpumped)/unpumped')
else:
raise ValueError('kind should be either difference or relative')
if lim is None:
xr.plot.imshow(val, ax=ax3, robust=True)
else:
xr.plot.imshow(val, ax=ax3, vmin=lim[0], vmax=lim[1])
ax3.set_xlabel(self.scan_vname)
fig.suptitle(f'{self.plot_title}')
def plot_azimuthal_line_cut(self, data, qranges, qwidths):
""" Plot line scans on top of the data.
inputs:
data: an azimuthal integrated xarray DataArray with 'delta_q (1/nm)' as one of its dimension.
qranges: a list of q-range
qwidth: a list of q-width, same length as qranges
"""
fig, [ax1, ax2] = plt.subplots(nrows=2, sharex=True, figsize=[8, 7])
xr.plot.imshow(data, ax=ax1, robust=True)
# attributes are not propagated during xarray mathematical operation https://github.com/pydata/xarray/issues/988
# so we might not have in data the scan vaiable name anymore
ax1.set_xlabel(self.scan_vname)
fig.suptitle(f'{self.plot_title}')
for i, (qr, qw) in enumerate(zip(qranges, qwidths)):
sel = (data['delta_q (1/nm)'] > (qr - qw/2)) * (data['delta_q (1/nm)'] < (qr + qw/2))
val = data.where(sel).mean('delta_q (1/nm)')
ax2.plot(data.scan_variable, val, c=f'C{i}', label=f'q = {qr:.2f}')
ax1.axhline(qr - qw/2, c=f'C{i}', lw=1)
ax1.axhline(qr + qw/2, c=f'C{i}', lw=1)
ax2.legend()
ax2.set_xlabel(self.scan_vname)
# since 'self' is not pickable, this function has to be outside the DSSC class so that it can be used
# by the multiprocessing pool.map function
def process_one_module(job):
module = job['module']
fpt = job['fpt']
vds = job['vds']
scan = job['scan']
chunksize = job['chunksize']
nbunches = job['nbunches']
do_pulse_mean = job['do_pulse_mean']
image_path = f'INSTRUMENT/SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf/image/data'
npulse_path = f'INDEX/SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf/image/count'
with h5py.File(vds, 'r') as m:
all_trainIds = m['INDEX/trainId'][()]
frames_per_train = m[npulse_path][()]
trains_with_data = all_trainIds[frames_per_train == fpt]
len_scan = len(scan.groupby(scan))
if do_pulse_mean:
# create empty dataset to add actual data to
module_data = xr.DataArray(np.empty([len_scan, 128, 512], dtype=np.float64),
dims=['scan_variable', 'x', 'y'],
coords={'scan_variable': np.unique(scan)})
module_data = module_data.to_dataset(name='pumped')
module_data['unpumped'] = xr.full_like(module_data['pumped'], 0)
module_data['sum_count'] = xr.DataArray(np.zeros_like(np.unique(scan)), dims=['scan_variable'])
module_data['module'] = module
else:
scan = xr.full_like(scan, 1)
len_scan = len(scan.groupby(scan))
module_data = xr.DataArray(np.empty([len_scan, int(nbunches/2), 128, 512], dtype=np.float64),
dims=['scan_variable', 'pulse', 'x', 'y'],
coords={'scan_variable': np.unique(scan)})
module_data = module_data.to_dataset(name='pumped')
module_data['unpumped'] = xr.full_like(module_data['pumped'], 0)
module_data['sum_count'] = xr.full_like(module_data['pumped'][..., 0, 0], 0)
module_data['module'] = module
# crunching
with h5py.File(vds, 'r') as m:
chunk_start = np.arange(len(all_trainIds), step=chunksize, dtype=int)
trains_start = 0
# This line is the strange hack from https://github.com/tqdm/tqdm/issues/485
print(' ', end='', flush=True)
for c0 in tqdm(chunk_start, desc=f'pool.map#{module:02d}', position=module):
chunk_dssc = np.s_[int(c0 * fpt):int((c0 + chunksize) * fpt)] # for dssc data
data = m[image_path][chunk_dssc].squeeze()
data = data.astype(np.float64)
n_trains = int(data.shape[0] // fpt)
trainIds_chunk = np.unique(trains_with_data[trains_start:trains_start + n_trains])
trains_start += n_trains
n_trains_actual = len(trainIds_chunk)
coords = {'trainId': trainIds_chunk}
data = np.reshape(data, [n_trains_actual, fpt, 128, 512])[:, :int(2 * nbunches)]
data = xr.DataArray(data, dims=['trainId', 'pulse', 'x', 'y'], coords=coords)
if do_pulse_mean:
data_pumped = (data[:, ::4]).mean('pulse')
data_unpumped = (data[:, 2::4]).mean('pulse')
else:
data_pumped = (data[:, ::4])
data_unpumped = (data[:, 2::4])
data = data_pumped.to_dataset(name='pumped')
data['unpumped'] = data_unpumped
data['sum_count'] = xr.full_like(data['unpumped'][..., 0, 0], fill_value=1)
# grouping and summing
data['scan_variable'] = scan # this only adds scan data for matching trainIds
data = data.dropna('trainId')
data = data.groupby('scan_variable').sum('trainId')
where = {'scan_variable': data.scan_variable}
for var in ['pumped', 'unpumped', 'sum_count']:
module_data[var].loc[where] = module_data[var].loc[where] + data[var]
for var in ['pumped', 'unpumped']:
module_data[var] = module_data[var] / module_data.sum_count
#module_data = module_data.drop('sum_count')
if not do_pulse_mean:
module_data = module_data.sum('scan_variable')
module_data = module_data.rename({'pulse':'scan_variable'})
return module_data
import multiprocessing
from time import strftime
from tqdm.auto import tqdm
import os
import warnings
import psutil
import extra_data as ed
from extra_data.read_machinery import find_proposal
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import matplotlib.patches as patches
import numpy as np
import xarray as xr
import h5py
from glob import glob
from imageio import imread
from ..constants import mnemonics as _mnemonics
from ..misc.laser_utils import positionToDelay
class DSSC1module:
def __init__(self, module, proposal):
""" Create a DSSC object to process 1 module of DSSC data.
inputs:
module: module number to process
proposal: (int,str) proposal number string
"""
self.module = module
if isinstance(proposal,int):
proposal = 'p{:06d}'.format(proposal)
self.runFolder = find_proposal(proposal)
self.semester = self.runFolder.split('/')[-2]
self.proposal = proposal
self.topic = self.runFolder.split('/')[-3]
self.save_folder = os.path.join(self.runFolder, 'usr/condensed_runs/')
self.px_pitch_h = 236 # horizontal pitch in microns
self.px_pitch_v = 204 # vertical pitch in microns
self.aspect = self.px_pitch_v/self.px_pitch_h # aspect ratio of the DSSC images
print('DSSC configuration')
print(f'DSSC module: {self.module}')
print(f'Topic: {self.topic}')
print(f'Semester: {self.semester}')
print(f'Proposal: {self.proposal}')
print(f'Default save folder: {self.save_folder}')
if not os.path.exists(self.save_folder):
warnings.warn(f'Default save folder does not exist: {self.save_folder}')
self.dark_data = 0
self.max_fraction_memory = 0.8
self.Nworker = 10
self.rois = None
self.maxSaturatedPixel = 1
def open_run(self, run_nr, t0=0.0):
""" Open a run with extra-data and prepare the virtual dataset for multiprocessing
inputs:
run_nr: the run number
t0: optional t0 in mm
"""
print('Opening run data with extra-data')
self.run_nr = run_nr
self.xgm = None
self.run = ed.open_run(self.proposal, self.run_nr)
self.plot_title = f'{self.proposal} run: {self.run_nr}'
self.fpt = self.run.detector_info(f'SCS_DET_DSSC1M-1/DET/{self.module}CH0:xtdf')['frames_per_train']
self.nbunches = self.run.get_array('SCS_RR_UTC/MDL/BUNCH_DECODER', 'sase3.nPulses.value')
self.nbunches = np.unique(self.nbunches)
if len(self.nbunches) == 1:
self.nbunches = self.nbunches[0]
else:
warnings.warn('not all trains have same length DSSC data')
print(f'nbunches: {self.nbunches}')
self.nbunches = self.nbunches[-1]
print(f'DSSC frames per train: {self.fpt}')
print(f'SA3 bunches per train: {self.nbunches}')
print('Collecting DSSC module files')
self.collect_dssc_module_file()
print(f'Loading XGM data')
self.xgm = self.run.get_array(_mnemonics['SCS_SA3']['source'],
_mnemonics['SCS_SA3']['key'],
roi=ed.by_index[:self.nbunches])
self.xgm = self.xgm.rename({'dim_0':'pulseId'})
self.xgm['pulseId'] = np.arange(0, 2*self.nbunches, 2)
print(f'Loading mono nrj data')
self.nrj = self.run.get_array(_mnemonics['nrj']['source'],
_mnemonics['nrj']['key'])
print(f'Loading delay line data')
try:
self.delay_mm = self.run.get_array(_mnemonics['PP800_DelayLine']['source'],
_mnemonics['PP800_DelayLine']['key'])
except:
self.delay_mm = 0*self.nrj
self.t0 = t0
self.delay_ps = positionToDelay(self.delay_mm, origin=self.t0, invert=True)
def collect_dssc_module_file(self):
""" Collect the raw DSSC module h5 files.
"""
pattern = self.runFolder + f'/raw/r{self.run_nr:04d}/RAW-R{self.run_nr:04d}-DSSC{self.module:02d}-S*.h5'
self.h5list = glob(pattern)
def process(self, dark_pass=None):
""" Process DSSC data from one module using multiprocessing
dark_pass: if None, process data, if 'mean', compute the mean, if 'std', compute the std
"""
# get available memory in GB, we will try to use 80 % of it
max_GB = psutil.virtual_memory().available/1024**3
print(f'max available memory: {max_GB} GB')
# max_GB / (8byte * Nworker * 128px * 512px * N_pulses)
self.chunksize = int(self.max_fraction_memory*max_GB * 1024**3 // (8 * self.Nworker * 128 * 512 * self.fpt))
print('processing', self.chunksize, 'trains per chunk')
if dark_pass == 'mean':
rois = None
dark = 0
mask = 1
elif dark_pass == 'std':
dark = self.dark_data['dark_mean']
rois = None
mask = 1
elif dark_pass is None:
dark = self.dark_data['dark_mean']
rois = self.rois
mask = self.dark_data['mask']
else:
raise ValueError(f"dark_pass should be either None or 'mean' or 'std' but not {dark_pass}")
jobs = []
for m,h5fname in enumerate(self.h5list):
jobs.append(dict(
fpt=self.fpt,
module=self.module,
h5fname=h5fname,
chunksize=self.chunksize,
nbunches=self.nbunches,
workerId=m,
Nworker=self.Nworker,
dark_data=dark,
rois=rois,
mask=mask,
maxSaturatedPixel=self.maxSaturatedPixel
))
timestamp = strftime('%X')
print(f'start time: {timestamp}')
with multiprocessing.Pool(self.Nworker) as pool:
res = pool.map(process_one_module, jobs)
print('finished:', strftime('%X'))
# rearange the multiprocessed data
# this is to get rid of the worker dimension, there is no sum over worker really involved
self.module_data = xr.concat(res, dim='worker').sum(dim='worker')
# reorder the dimension
if 'trainId' in self.module_data.dims:
self.module_data = self.module_data.transpose('trainId', 'pulseId', 'x', 'y')
else:
self.module_data = self.module_data.transpose('pulseId', 'x', 'y')
# fix some computation now that we have everything
self.module_data['std_data'] = np.sqrt(self.module_data['std_data']/(self.module_data['counts'] - 1))
self.module_data['dark_corrected_data'] = self.module_data['dark_corrected_data']/self.module_data['counts']
self.module_data['run'] = self.run_nr
if dark_pass == 'mean':
self.dark_data = self.module_data['dark_corrected_data'].to_dataset('dark_mean')
self.dark_data['run'] = self.run_nr
elif dark_pass == 'std':
self.dark_data['dark_std'] = self.module_data['std_data']
assert self.dark_data['run'] == self.run_nr, "noise map computed from different darks"
else:
self.module_data['xgm'] = self.xgm
self.module_data['nrj'] = self.nrj
self.module_data['delay_mm'] = self.delay_mm
self.module_data['delay_ps'] = self.delay_ps
self.module_data['t0'] = self.t0
self.plot_title = f"{self.proposal} run: {self.module_data['run'].values} dark: {self.dark_data['run'].values}"
self.module_data.attrs['plot_title'] = self.plot_title
def compute_mask(self, low=0.01, high=0.8):
""" Compute a DSSC module mask from the noise map of a dark run.
"""
if self.dark_data['dark_std'] is None:
raise ValueError('Cannot compute from from a missing dark noise map')
self.dark_data['mask_low'] = low
self.dark_data['mask_high'] = high
m_std = self.dark_data['dark_std'].mean('pulseId')
self.dark_data['mask'] = 1 - ((m_std > self.dark_data['mask_high']) + (m_std < self.dark_data['mask_low']))
def plot_module(self, plot_dark=False, low=1, high=98, vmin=None, vmax=None):
""" Plot a module.
inputs:
plot_dark: if true, plot dark instead of run data.
low: low percentile fraction of the display scale
high: high percentile fraction of the display scale
vmin: low value of the display scale, overwrites vmin computed from low
vmax: max value of the display scale, overwrites vmax computed from high
"""
if plot_dark:
mean = self.dark_data['dark_mean'].mean('pulseId')
std = self.dark_data['dark_std']
title = f"{self.proposal} dark: {self.dark_data['run'].values}"
else:
mean = self.module_data['dark_corrected_data'].mean('pulseId')
std = self.module_data['std_data']
title = self.plot_title
fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4, figsize=[5, 4*2.5])
_vmin, _vmax = np.percentile((mean.values[~self.dark_data['mask']]).flatten(), [low, high])
if vmin is None:
vmin = _vmin
if vmax is None:
vmax = _vmax
im = ax1.imshow(mean, vmin=vmin, vmax=vmax)
fig.colorbar(im, ax=ax1)
ax1.set_title('mean')
fig.suptitle(title)
im = ax2.imshow(std.mean('pulseId'), vmin=0, vmax=2)
fig.colorbar(im, ax=ax2)
ax2.set_title('std')
ax3.hist(std.values.flatten(), bins=200, range=[0, 2], density=True)
ax3.axvline(self.dark_data['mask_low'], ls='--', c='k')
ax3.axvline(self.dark_data['mask_high'], ls='--', c='k')
ax3.set_yscale('log')
ax3.set_ylabel('density')
ax3.set_xlabel('std values')
im = ax4.imshow(self.dark_data['mask'])
fig.colorbar(im, ax=ax4)
def save(self, save_folder=None, overwrite=False, isDark=False):
""" Save the crunched data.
inputs:
save_folder: string of the fodler where to save the data.
overwrite: boolean whether or not to overwrite existing files.
isDark: save the dark or the process data
"""
if save_folder is None:
save_folder = self.save_folder
if isDark:
fname = f'run{self.run_nr}_dark.h5' # no scan
data = self.dark_data
else:
fname = f'run{self.run_nr}.h5' # run with delay scan (change for other scan types!)
data = self.module_data
save_path = os.path.join(save_folder, fname)
file_exists = os.path.isfile(save_path)
if not file_exists or (file_exists and overwrite):
if file_exists:
warnings.warn(f'Overwriting file: {save_path}')
os.remove(save_path)
data.to_netcdf(save_path, group='data')
data.close()
os.chmod(save_path, 0o664)
print('saving: ', save_path)
else:
print('file', save_path, 'exists and overwrite is False')
def load_dark(self, dark_runNB, save_folder=None):
""" Load dark data.
inputs:
save_folder: string of the folder where the data were saved.
"""
if save_folder is None:
save_folder = self.save_folder
self.run_nr = dark_runNB
self.dark_data = xr.load_dataset(os.path.join(save_folder, f'run{dark_runNB}_dark.h5'), group='data')
self.plot_title = f"{self.proposal} dark: {self.dark_data['run'].values}"
def show_rois(self):
fig, ax1 = plt.subplots(nrows=1, figsize=[5, 2.5])
try:
ax1.imshow(self.module_data['dark_corrected_data'].mean('pulseId') * self.dark_data['mask'])
except:
ax1.imshow(self.dark_data['dark_mean'].mean('pulseId') * self.dark_data['mask'])
for r,v in self.rois.items():
rect = patches.Rectangle((v['y'][0], v['x'][0]),
v['y'][1] - v['y'][0],
v['x'][1] - v['x'][0],
linewidth=1, edgecolor='r', facecolor='none')
ax1.add_patch(rect)
fig.suptitle(self.plot_title)
# since 'self' is not pickable, this function has to be outside the DSSC class so that it can be used
# by the multiprocessing pool.map function
def process_one_module(job):
chunksize = job['chunksize']
Nworker = job['Nworker']
workerId = job['workerId']
dark_data = job['dark_data']
fpt = job['fpt']
module = job['module']
rois = job['rois']
mask = job['mask']
h5fname = job['h5fname']
maxSaturatedPixel = job['maxSaturatedPixel']
image_path = f"INSTRUMENT/SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf/image/data"
npulse_path = f"INDEX/SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf/image/count"
with h5py.File(h5fname, 'r') as m:
all_trainIds = m['INDEX/trainId'][()]
n_trains = len(all_trainIds)
n_chunk = np.ceil(n_trains/chunksize) + 1
chunks = np.linspace(0, n_trains, n_chunk, endpoint=True, dtype=int)
# create empty dataset to add actual data to
module_data = xr.DataArray(np.zeros([fpt, 128, 512], dtype=np.float64),
dims=['pulseId', 'x', 'y'],
coords={'pulseId':np.arange(fpt)}).to_dataset(name='dark_corrected_data')
module_data['std_data'] = xr.DataArray(np.zeros([fpt, 128, 512], dtype=np.float64),
dims=['pulseId', 'x', 'y'])
if rois is not None:
for k in rois.keys():
module_data[k] = xr.DataArray(np.empty([n_trains], dtype=np.float64),
dims=['trainId'], coords = {'trainId': all_trainIds})
module_data['counts'] = 0
# crunching
with h5py.File(h5fname, 'r') as m:
#chunk_start = np.arange(len(all_trainIds), step=job['chunksize'], dtype=int)
trains_start = 0
# This line is the strange hack from https://github.com/tqdm/tqdm/issues/485
print(' ', end='', flush=True)
for k,v in enumerate(tqdm(chunks[:-1], desc=f"pool.map#{workerId:02d}")):
chunk_dssc = np.s_[int(chunks[k] * fpt):int(chunks[k+1] * fpt)] # for dssc data
data = m[image_path][chunk_dssc].squeeze()
trains = m['INDEX/trainId'][np.s_[int(chunks[k]):int(chunks[k+1])]]
n_trains = len(trains)
data = data.astype(np.float64)
data = xr.DataArray(np.reshape(data, [n_trains, fpt, 128, 512]),
dims=['trainId', 'pulseId', 'x', 'y'],
coords={'trainId': trains})
temp = data - dark_data
if rois is not None:
temp2 = temp.where(mask)
for k,v in rois.items():
bkg = dark_data.isel({'x':slice(v['x'][0], v['x'][1]),
'y':slice(v['y'][0], v['y'][1])})
im = data.isel({'x':slice(v['x'][0], v['x'][1]),
'y':slice(v['y'][0], v['y'][1])})
smask = mask.isel({'x':slice(v['x'][0], v['x'][1]),
'y':slice(v['y'][0], v['y'][1])})
im = im.where(smask)
cim = im - bkg.where(smask)
val = cim.sum(dim=['x','y'])
tokeep = (im>254).sum(dim=['x', 'y']) < maxSaturatedPixel
tokeep = tokeep.assign_coords(pulseId = val.coords['pulseId'].values)
todrop = 1-tokeep
Ndropped = todrop.sum().values
percent_dropped = 100*Ndropped/(n_trains * fpt)
print(f'Dropped: {Ndropped}, i.e. {percent_dropped:.2f}%')
gval = val.where(tokeep, np.nan)
module_data[k] = gval
module_data['dark_corrected_data'] += temp.sum(dim='trainId')
module_data['std_data'] += (temp**2).sum(dim='trainId')
module_data['counts'] += n_trains
return module_data
""" Digitizers related sub-routines
Copyright (2021) SCS Team.
(contributions preferrably comply with pep8 code structure
guidelines.)
"""
import logging
import os
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
from ..misc.bunch_pattern_external import is_pulse_at
from ..mnemonics_machinery import mnemonics_for_run
from extra_data import open_run
from extra_data.read_machinery import find_proposal
from extra.components import XrayPulses, OpticalLaserPulses
__all__ = [
'check_peak_params',
'get_digitizer_peaks',
'get_dig_avg_trace',
'load_processed_peaks',
'check_processed_peak_params'
]
log = logging.getLogger(__name__)
def peaks_from_raw_trace(traces, pulseStart, pulseStop, baseStart, baseStop,
period=None, npulses=None, extra_dim=None,
mode='trapz'):
"""
Computes peaks from raw digitizer traces by trapezoidal integration.
Parameters
----------
traces: xarray DataArray or numpy array containing raw traces. If
numpy array is provided, the second dimension is that of the samples.
pulseStart: int or list or 1D-numpy array
trace index of integration start. If 1d array, each value is the start
of one peak. The period and npulses parameters are ignored.
pulseStop: int
trace index of integration stop.
baseStart: int
trace index of background start.
baseStop: int
trace index of background stop.
period: int
number of samples between two peaks. Ignored if intstart is a 1D-array.
npulses: int
number of pulses. Ignored if intstart is a 1D-array.
extra_dim: str
Name given to the dimension along the peaks. Defaults to 'pulseId'.
mode: str in ['trapz', 'sum', 'mean', 'amplitude']
The operation performed over the integration region. `amplitude` is
the difference between the baseline and the peak height.
Returns
-------
xarray DataArray
"""
assert traces.ndim == 2
if isinstance(traces, xr.DataArray):
ntid = traces.sizes['trainId']
coords = traces.coords
traces = traces.values
if traces.shape[0] != ntid:
traces = traces.T
else:
coords = None
if hasattr(pulseStart, '__len__'):
pulseStart = np.array(pulseStart)
pulses = pulseStart - pulseStart[0]
pulseStart = pulseStart[0]
else:
if period is None or period == 0:
period = 1
pulses = range(0, npulses*period, period)
if extra_dim is None:
extra_dim = 'pulseId'
results = xr.DataArray(np.empty((traces.shape[0], len(pulses))),
coords=coords,
dims=['trainId', extra_dim])
func = {'trapz': np.trapz, 'sum': np.sum,
'mean': np.mean, 'amplitude': 'amplitude'}.get(mode)
if func is None:
raise ValueError('mode must be a string in ["trapz", "sum",'
'"mean", "amplitude"]')
for i, p in enumerate(pulses):
a = pulseStart + p
b = pulseStop + p
bkga = baseStart + p
bkgb = baseStop + p
if b > traces.shape[1]:
break
bg = np.outer(np.median(traces[:, bkga:bkgb], axis=1),
np.ones(b-a))
if mode == 'amplitude':
results[:, i] = np.max(traces[:, bkga:b], axis=1) - \
np.min(traces[:, bkga:b], axis=1)
else:
results[:, i] = func(traces[:, a:b] - bg, axis=1)
return results
def peaks_from_apd(array, params, digitizer, bpt, bunchPattern):
"""
Extract peak-integrated data according to the bunch pattern.
Parameters
----------
array: xarray DataArray
2D array containing peak-integrated data
params: dict
peak-integration parameters of the digitizer
digitizer: str
digitizer type, one of {'FastADC', 'ADQ412'}
bpt: xarray DataArray
bunch pattern table
bunchPattern: str
one of {'sase1', 'sase3', 'scs_ppl'}, used to select pulses and
assign name of the pulse id dimension.
Returns
-------
xarray DataArray with pulse id coordinates.
"""
if params['enable'] == 0 or (array == 1.).all():
raise ValueError('The digitizer did not record integrated peaks. '
'Consider using raw traces from the same channel '
'for peak integration.')
if digitizer == 'FastADC':
min_distance = 24
if digitizer == 'ADQ412':
min_distance = 440
period = params['period']
if period % min_distance != 0:
log.warning(f'Warning: the pulse period ({period} samples) of '
'digitizer is not a multiple of the pulse separation at '
f'4.5 MHz ({min_distance} samples). Pulse id assignment '
'is likely to fail.')
stride = int(period/min_distance)
npulses_apd = params['npulses']
dim_names = {'sase3': 'sa3_pId', 'sase1': 'sa1_pId', 'scs_ppl': 'ol_pId'}
pulse_dim = dim_names[bunchPattern]
arr_dim = [dim for dim in array.dims if dim != 'trainId'][0]
if npulses_apd > array.sizes[arr_dim]:
log.warning(f'The digitizer was set to record {npulses_apd} pulses '
f'but the array length is only {array.sizes[arr_dim]}.')
npulses_apd = array.sizes[arr_dim]
mask = is_pulse_at(bpt, bunchPattern).rename({'pulse_slot': pulse_dim})
mask = mask.where(mask.trainId.isin(array.trainId), drop=True)
mask = mask.assign_coords({pulse_dim: np.arange(bpt.sizes['pulse_slot'])})
pid = np.sort(np.unique(np.where(mask)[1]))
npulses_bpt = len(pid)
apd_coords = np.arange(pid[0], pid[0] + stride*npulses_apd, stride)
noalign = False
if len(np.intersect1d(apd_coords, pid, assume_unique=True)) < npulses_bpt:
log.warning('Not all pulses were recorded. The digitizer '
'was set to record pulse ids '
f'{apd_coords[apd_coords<bpt.sizes["pulse_slot"]]} but the'
'bunch pattern for'
f' {bunchPattern} is {pid}. Skipping pulse ID alignment.')
noalign = True
array = array.isel({arr_dim: slice(0, npulses_apd)})
array = array.where(array != 1.)
if noalign:
return array
array = array.rename(
{arr_dim: pulse_dim}).assign_coords({pulse_dim: apd_coords})
array, mask = xr.align(array, mask, join='inner')
array = array.where(mask, drop=True)
return array
def get_dig_avg_trace(run, mnemonic, ntrains=None):
"""
Compute the average over ntrains evenly spaced accross all trains
of a digitizer trace.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the digitizer data.
mnemonic: str
ToolBox mnemonic of the digitizer data, e.g. 'MCP2apd'.
ntrains: int
Number of trains used to calculate the average raw trace.
If None, all trains are used.
Returns
-------
trace: DataArray
The average digitizer trace
"""
run_mnemonics = mnemonics_for_run(run)
if ntrains is None:
sel = run
else:
total_tid = len(run.train_ids)
stride = int(np.max([1, np.floor(total_tid/ntrains)]))
sel = run.select_trains(np.s_[0:None:stride])
m = run_mnemonics[mnemonic]
raw_trace = sel.get_array(m['source'], m['key'], m['dim'])
raw_trace = raw_trace.mean(dim='trainId')
return raw_trace
def channel_peak_params(run, source, key=None, channel=None, board=None):
"""
Extract peak-integration parameters used by a channel of the digitizer.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the digitizer data.
source: str
ToolBox mnemonic of a digitizer data, e.g. "MCP2apd" or
"FastADC4peaks", or name of digitizer source, e.g.
'SCS_UTC1_ADQ/ADC/1:network'.
key: str
optional, used in combination of source (if source is not a ToolBox
mnemonics) instead of digitizer, channel and board.
channel: int or str
The digitizer channel for which to retrieve the parameters. If None,
inferred from the source mnemonic or source + key arguments.
board: int
Board of the ADQ412. If None, inferred from the source mnemonic or
source + key arguments.
Returns
-------
dict with peak integration parameters.
"""
run_mnemonics = mnemonics_for_run(run)
if source in run_mnemonics:
m = run_mnemonics[source]
source = m['source']
key = m['key']
if 'network' in source:
return adq412_channel_peak_params(run, source, key, channel, board)
else:
return fastADC_channel_peak_params(run, source, channel)
def fastADC_channel_peak_params(run, source, channel=None):
"""
Extract peak-integration parameters used by a channel of the Fast ADC.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the digitizer data.
source: str
Name of Fast ADC source, e.g. 'SCS_UTC1_MCP/ADC/1:channel_5.output'.
channel: int
The Fast ADC channel for which to retrieve the parameters. If None,
inferred from the source.
Returns
-------
dict with peak integration parameters.
"""
fastADC_keys = {
'baseStart': ('baselineSettings.baseStart.value',
'baseStart.value'),
'baseStop': ('baseStop.value',),
'baseLength': ('baselineSettings.length.value',),
'enable': ('enablePeakComputation.value',),
'pulseStart': ('initialDelay.value',),
'pulseLength': ('peakSamples.value',),
'npulses': ('numPulses.value',),
'period': ('pulsePeriod.value',)
}
if channel is None:
channel = int(source.split(':')[1].split('.')[0].split('_')[1])
baseName = f'channel_{channel}.'
source = source.split(':')[0]
data = run.select(source).train_from_index(0)[1][source]
result = {}
for mnemo, versions in fastADC_keys.items():
for v in versions:
key = baseName + v
if key in data:
result[mnemo] = data[key]
if 'baseLength' in result:
result['baseStop'] = result['baseStart'] + \
result.pop('baseLength')
if 'pulseLength' in result:
result['pulseStop'] = result['pulseStart'] + \
result.pop('pulseLength')
return result
def adq412_channel_peak_params(run, source, key=None,
channel=None, board=None):
"""
Extract peak-integration parameters used by a channel of the ADQ412.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the digitizer data.
source: str
Nname of ADQ412 source, e.g. 'SCS_UTC1_ADQ/ADC/1:network'.
key: str
optional, e.g. 'digitizers.channel_1_D.apd.pulseIntegral', used in
combination of source instead of channel and board.
channel: int or str
The ADQ412 channel for which to retrieve the parameters. If None,
inferred from the source mnemonic or source + key arguments.
board: int
Board of the ADQ412. If None, inferred from the source mnemonic or
source + key arguments.
Returns
-------
dict with peak integration parameters.
"""
if key is None:
if channel is None or board is None:
raise ValueError('key or channel + board arguments are '
'missing.')
else:
k = key.split('.')[1].split('_')
ch_to_int = {'A': 0, 'B': 1, 'C': 2, 'D': 3}
channel = ch_to_int[k[2]]
board = k[1]
baseName = f'board{board}.apd.channel_{channel}.'
source = source.split(':')[0]
adq412_keys = {
'baseStart': (baseName + 'baseStart.value',),
'baseStop': (baseName + 'baseStop.value',),
'enable': (baseName + 'enable.value',),
'pulseStart': (baseName + 'pulseStart.value',),
'pulseStop': (baseName + 'pulseStop.value',),
'initialDelay': (baseName + 'initialDelay.value',),
'upperLimit': (baseName + 'upperLimit.value',),
'npulses': (f'board{board}.apd.numberOfPulses.value',)
}
data = run.select(source).train_from_index(0)[1][source]
result = {}
for mnemo, versions in adq412_keys.items():
for key in versions:
if key in data:
result[mnemo] = data[key]
result['period'] = result.pop('upperLimit') - \
result.pop('initialDelay')
return result
def find_peaks_in_raw_trace(trace, height=None, width=1, distance=24):
"""
Find integration parameters for peak integration of a raw
digitizer trace. Based on scipy find_peaks().
Parameters
----------
trace: numpy array or xarray DataArray
The digitier raw trace used to find peaks
height: int
minimum height for peak determination
width: int
minimum width of peak
distance: int
minimum distance between two peaks
Returns
-------
dict with keys 'pulseStart', 'pulseStop', 'baseStart', 'baseStop',
'period', 'npulses' and values in number of samples.
"""
if isinstance(trace, xr.DataArray):
trace = trace.values
trace_norm = trace - np.median(trace)
trace_norm = -trace_norm if np.mean(trace_norm) < 0 else trace_norm
SNR = np.max(np.abs(trace_norm)) / np.std(trace_norm[:100])
if SNR < 10:
log.warning('signal-to-noise ratio too low: cannot '
'automatically find peaks.')
return {'pulseStart': 2, 'pulseStop': 3,
'baseStart': 0, 'baseStop': 1,
'period': 0, 'npulses': 1}
min_height = min(3 * SNR, np.max(np.abs(trace_norm)) / 3)
peaks, prop = find_peaks(trace_norm, distance=distance,
height=min_height,
width=2)
params = {}
start = int(prop['left_ips'][0])
if len(peaks) == 1:
params['pulseStart'] = start
params['period'] = 0
else:
pulse_period = int(np.median(np.diff(peaks)))
pulse_ids = np.digitize(peaks,
np.arange(peaks[0] - pulse_period/2,
peaks[-1] + pulse_period/2,
pulse_period)) - 1
if len(np.unique(np.diff(pulse_ids))) == 1:
# Regular pattern
params['pulseStart'] = start
params['period'] = pulse_period
else:
# Irregular pattern
params['pulseStart'] = start + pulse_ids * pulse_period
params['period'] = 0
params['pulseStop'] = int(prop['right_ips'][0])
params['baseStop'] = (3 * start - peaks[0]) / 2
params['baseStop'] = np.min([params['baseStop'],
int(prop['right_ips'][0])]).astype(int)
params['baseStart'] = params['baseStop'] - np.mean(prop['widths'])/2
params['baseStart'] = np.max([params['baseStart'], 0]).astype(int)
params['npulses'] = len(peaks)
return params
def find_peak_integration_parameters(run, mnemonic, raw_trace=None,
integParams=None, pattern=None,
ntrains=None):
'''
Finds peak integration parameters.
'''
run_mnemonics = mnemonics_for_run(run)
digitizer = digitizer_type(run, run_mnemonics[mnemonic]['source'])
pulse_period = 24 if digitizer == "FastADC" else 440
if integParams is None:
# load raw trace and find peaks
autoFind = True
if raw_trace is None:
raw_trace = get_dig_avg_trace(run, mnemonic, ntrains)
params = find_peaks_in_raw_trace(raw_trace, distance=pulse_period)
else:
# inspect provided parameters
autoFind = False
required_keys = ['pulseStart', 'pulseStop', 'baseStart',
'baseStop']
add_text = ''
if pattern is None and not hasattr(integParams['pulseStart'],
'__len__'):
required_keys += ['period', 'npulses']
add_text = 'Bunch pattern not provided. '
if not all(name in integParams for name in required_keys):
raise ValueError(add_text + 'All keys of integParams argument '
f'{required_keys} are required.')
params = integParams.copy()
# extract pulse ids from the parameters (starting at 0)
pulse_ids_params = None
if hasattr(params['pulseStart'], '__len__'):
if params.get('npulses') is not None and (
params.get('npulses') != len(params['pulseStart'])):
log.warning('The number of pulses does not match the length '
'of pulseStart. Using length of pulseStart as '
'the number of pulses.')
params['npulses'] = len(params['pulseStart'])
pulse_ids_params = ((np.array(params['pulseStart']) -
params['pulseStart'][0]) / pulse_period).astype(int)
elif 'npulses' in params and 'period' in params:
if params['npulses'] == 1:
pulse_ids_params = np.array([0])
else:
pulse_ids_params = np.arange(0,
params['npulses'] * params['period'] / pulse_period,
params['period'] / pulse_period).astype(int)
# Extract pulse_ids, period and npulses from bunch pattern
pulse_ids_bp, npulses_bp, period_bp = None, None, 0
regular = True
if pattern is not None:
bunchPattern = 'sase3' if hasattr(pattern, 'sase') else 'scs_ppl'
if pattern.is_constant_pattern() is False:
log.warning('The number of pulses changed during the run.')
pulse_ids_bp = np.unique(pattern.pulse_ids(labelled=False,
copy=False))
npulses_bp, period_bp = None, 0
regular = False
else:
pulse_ids_bp = pattern.peek_pulse_ids(labelled=False)
npulses_bp = len(pulse_ids_bp)
if npulses_bp > 1:
periods = np.diff(pulse_ids_bp)
if len(np.unique(periods)) > 1:
regular = False
else:
period_bp = np.unique(periods)[0] * pulse_period
# Compare parameters with bunch pattern
if pulse_ids_params is None:
params['npulses'] = npulses_bp
params['period'] = period_bp
pulse_ids_params = np.arange(0,
params['npulses'] * params['period'] / pulse_period,
params['period'] / pulse_period).astype(int)
if len(pulse_ids_params) == len(pulse_ids_bp):
if not (pulse_ids_params == pulse_ids_bp - pulse_ids_bp[0]).all():
log.warning('The provided pulseStart parameters do not match '
f'the {bunchPattern} bunch pattern pulse ids. '
'Using bunch pattern parameters.')
pulse_ids_params = pulse_ids_bp
if (npulses_bp != params.get('npulses') or
period_bp != params.get('period')):
if autoFind:
add_text = 'Automatically found '
else:
add_text = 'Provided '
log.warning(add_text + 'integration parameters '
f'(npulses={params.get("npulses")}, ' +
f'period={params.get("period")}) do not match the '
f'{bunchPattern} bunch pattern (npulses='
f'{npulses_bp}, period={period_bp}). Using bunch '
'pattern parameters.')
pulse_ids_params = pulse_ids_bp
params['npulses'] = npulses_bp
params['period'] = period_bp
if not regular:
# Irregular pattern
if hasattr(params['pulseStart'], '__len__'):
start = params['pulseStart'][0]
else:
start = params['pulseStart']
params['pulseStart'] = np.array(
[int(start + (pid - pulse_ids_params[0]) * pulse_period)
for pid in pulse_ids_params])
params['period'] = 0
else:
# Regular pattern
if hasattr(params['pulseStart'], '__len__'):
params['pulseStart'] = params['pulseStart'][0]
if len(pulse_ids_params) == 1:
params['period'] = 0
return params, pulse_ids_params, regular, raw_trace
def check_peak_params(proposal, runNB, mnemonic, raw_trace=None,
ntrains=200, integParams=None, bunchPattern='sase3',
plot=True, show_all=False):
"""
Checks and plots the peak parameters (pulse window and baseline window
of a raw digitizer trace) used to compute the peak integration. These
parameters are either set by the digitizer peak-integration settings,
or are determined by a peak finding algorithmes. The parameters
can also be provided manually for visual inspection. The plot either
shows the first and last pulse of the trace or the entire trace.
Parameters
----------
proposal: int
the proposal number
runNB: int
the run number
mnemonic: str
ToolBox mnemonic of the digitizer data, e.g. 'MCP2apd'.
raw_trace: optional, 1D numpy array or xarray DataArray
Raw trace to display. If None, the average raw trace over
ntrains of the corresponding channel is loaded (this can
be time-consuming).
ntrains: optional, int
Only used if raw_trace is None. Number of trains used to
calculate the average raw trace of the corresponding channel.
plot: bool
If True, displays the raw trace and peak integration regions.
show_all: bool
If True, displays the entire raw trace and all peak integration
regions (this can be time-consuming).
If False, shows the first and last pulse according to the bunchPattern.
bunchPattern: optional, str
Only used if plot is True. Checks the bunch pattern against
the digitizer peak parameters and shows potential mismatch.
Returns
-------
dictionnary of peak integration parameters
"""
run = open_run(proposal, runNB)
run_mnemonics = mnemonics_for_run(run)
digitizer = digitizer_type(run, run_mnemonics[mnemonic]['source'])
pulse_period = 24 if digitizer == "FastADC" else 440
pattern = None
try:
if bunchPattern == 'sase3':
pattern = XrayPulses(run)
if bunchPattern == 'scs_ppl':
pattern = OpticalLaserPulses(run)
except Exception as e:
log.warning(e)
bunchPattern = None
params, pulse_ids, regular, raw_trace = find_peak_integration_parameters(
run, mnemonic, raw_trace, integParams, pattern)
if bunchPattern:
if regular:
add_text = ''
if len(pulse_ids) > 1:
add_text = f's, {(pulse_ids[1]-pulse_ids[0]) * pulse_period}'\
+ ' samples between two pulses'
print(f'Bunch pattern {bunchPattern}: {len(pulse_ids)} pulse'
+ add_text)
else:
print(f'Bunch pattern {bunchPattern}: Not a regular pattern. '
f'{len(pulse_ids)} pulses, pulse_ids='
f'{pulse_ids}.')
if integParams is None:
title = 'Auto-find peak parameters'
else:
title = 'Provided peak parameters'
no_change = True
for k, v in integParams.items():
no_change = no_change & (v == params[k])
if hasattr(no_change, '__len__'):
no_change = no_change.all()
if no_change is False:
print('The provided parameters did not match the bunch '
'pattern and were adjusted.')
title += ' (adjusted)'
if regular:
add_text = ''
if params['npulses'] > 1:
add_text = f's, {params["period"]} samples between two pulses'
print(title + ': ' + f' {params["npulses"]} pulse' + add_text)
else:
print(f'{title}: Not a regular pattern. '
f'{len(pulse_ids)} pulses, pulse_ids='
f'{pulse_ids}.')
if plot:
if raw_trace is None:
raw_trace = get_dig_avg_trace(run, mnemonic)
fig, ax = plotPeakIntegrationWindow(raw_trace, params,
show_all=show_all)
fig.suptitle(f'p{proposal} r{runNB} ' + title, size=12)
return params
def plotPeakIntegrationWindow(raw_trace, params, show_all=False):
if hasattr(params['pulseStart'], '__len__'):
starts = np.array(params['pulseStart'])
stops = params['pulseStop'] + starts - params['pulseStart'][0]
baseStarts = params['baseStart'] + starts - params['pulseStart'][0]
baseStops = params['baseStop'] + starts - params['pulseStart'][0]
else:
n = params['npulses']
p = params['period']
starts = [params['pulseStart'] + i*p for i in range(n)]
stops = [params['pulseStop'] + i*p for i in range(n)]
baseStarts = [params['baseStart'] + i*p for i in range(n)]
baseStops = [params['baseStop'] + i*p for i in range(n)]
if show_all:
fig, ax = plt.subplots(figsize=(6, 3), constrained_layout=True)
for i in range(len(starts)):
lbl = 'baseline' if i == 0 else None
lp = 'peak' if i == 0 else None
ax.axvline(baseStarts[i], ls='--', color='k')
ax.axvline(baseStops[i], ls='--', color='k')
ax.axvspan(baseStarts[i], baseStops[i],
alpha=0.5, color='grey', label=lbl)
ax.axvline(starts[i], ls='--', color='r')
ax.axvline(stops[i], ls='--', color='r')
ax.axvspan(starts[i], stops[i],
alpha=0.2, color='r', label=lp)
ax.plot(raw_trace, color='C0', label='raw trace')
ax.legend(fontsize=8)
return fig, [ax]
fig, ax = plt.subplots(1, 2, figsize=(6, 3), constrained_layout=True)
for plot in range(2):
title = 'First pulse' if plot == 0 else 'Last pulse'
i = 0 if plot == 0 else - 1
for j in range(min(2, len(starts))):
if plot == 1:
j = -j
label = 'baseline' if j == 0 else ''
ax[plot].axvline(baseStarts[i+j], ls='--', color='k')
ax[plot].axvline(baseStops[i+j], ls='--', color='k')
ax[plot].axvspan(baseStarts[i+j], baseStops[i+j],
alpha=0.5, color='grey', label=label)
label = 'peak' if j == 0 else ''
ax[plot].axvline(starts[i+j], ls='--', color='r')
ax[plot].axvline(stops[i+j], ls='--', color='r')
ax[plot].axvspan(starts[i+j], stops[i+j],
alpha=0.2, color='r', label=label)
if len(starts) > 1:
period = starts[1] - starts[0]
xmin = np.max([0, baseStarts[i] - int(1.5*period)])
xmax = np.min([stops[i] + int(1.5*period), raw_trace.size])
else:
xmin = np.max([0, baseStarts[i] - 200])
xmax = np.min([stops[i] + 200, raw_trace.size])
ax[plot].plot(np.arange(xmin, xmax),
raw_trace[xmin:xmax], color='C0', label='raw trace')
ax[plot].legend(fontsize=8)
ax[plot].set_xlim(xmin, xmax)
ax[plot].set_xlabel('digitizer samples')
ax[plot].set_title(title, size=10)
return fig, ax
def digitizer_type(run, source):
"""
Finds the digitizer type based on the class Id / name of the source.
Example source: 'SCS_UTC1_MCP/ADC/1'. Defaults to ADQ412 if not found.
"""
ret = None
if '_MCP/ADC/1' in source:
ret = 'FastADC'
if '_ADQ/ADC/1' in source:
ret = 'ADQ412'
if ret is None:
digi_dict = {'FastAdc': 'FastADC',
'FastAdcLegacy': 'FastADC',
'AdqDigitizer': 'ADQ412',
'PyADCChannel': 'FastADC',
'PyADCChannelLegacy': 'FastADC'}
try:
source = source.split(':')[0]
classId = run.get_run_value(source, 'classId.value')
ret = digi_dict.get(classId)
except Exception as e:
log.warning(str(e))
log.warning(f'Could not find digitizer type from source {source}.')
ret = 'ADQ412'
return ret
def get_digitizer_peaks(proposal, runNB, mnemonic, merge_with=None,
bunchPattern='sase3', integParams=None, mode='trapz',
save=False, subdir='usr/processed_runs'):
"""
Extract the peaks from digitizer raw traces. The result can be merged
to an existing `merge_with` dataset and/or saved into an HDF file.
The calculation is a trapezoidal integration between 'pulseStart' and
'pulseStop' with subtraction of a baseline defined as the median between
'baseStart' and 'baseStop'.
The integration parameters can either be provided using integParams, or
computed by a peak finding algorithm if integParams is None.
If the bunchPattern argument is provided, the pulse ids are aligned to it.
If there is a mismatch between the provided parameters or the computed
parameters and the bunch pattern, the bunch pattern parameters prevail.
Parameters
----------
proposal: int
the proposal number
runNB: int
the run number
mnemonic: str
mnemonic for FastADC or ADQ412, e.g. "I0_ILHraw" or "MCP3apd".
The data is either loaded from the DataCollection or taken from
merge_with.
merge_with: xarray Dataset
If provided, the resulting Dataset will be merged with this one.
bunchPattern: str or dict
'sase1' or 'sase3' or 'scs_ppl', 'None': bunch pattern
integParams: dict
dictionnary for raw trace integration, e.g.
{'pulseStart':100, 'pulsestop':200, 'baseStart':50,
'baseStop':99, 'period':24, 'npulses':500}.
If None, integration parameters are computed automatically.
mode: str in ['trapz', 'sum', 'mean', 'amplitude']
The operation performed over the integration region. `ampl` is the
amplitude between the baseline and the peak height.
save: bool
If True, save the computed peaks and the average raw trace in a h5
file in the subdir + f'/r{runNB:04d}/ folder.
subdir: str
subdirectory to save the output in.
Returns
-------
xarray Dataset with digitizer peak variables. Raw variables are
substituted by the peak caclulated values (e.g. "FastADC2raw" becomes
"FastADC2peaks").
"""
# prepare resulting dataset
if bool(merge_with):
mw_ds = merge_with.drop(mnemonic, errors='ignore')
else:
mw_ds = xr.Dataset()
run = open_run(proposal, runNB)
run_mnemonics = mnemonics_for_run(run)
mnemo = run_mnemonics.get(mnemonic)
if mnemo is None:
log.warning('Mnemonic not found. Skipping.')
return xr.DataArray()
source, key = mnemo['source'], mnemo['key']
extra_dim = {'sase3': 'sa3_pId', 'scs_ppl': 'ol_pId'}.get(bunchPattern)
if extra_dim is None:
extra_dim = 'pulseId'
pattern = None
try:
if bunchPattern == 'sase3':
pattern = XrayPulses(run)
if bunchPattern == 'scs_ppl':
pattern = OpticalLaserPulses(run)
except Exception as e:
log.warning(e)
# load traces and calculate the average
traces = run[source, key].xarray(name=mnemonic, extra_dims=mnemo['dim'])
trace = traces.mean('trainId').rename(mnemonic.replace('raw', 'avg'))
# find integration parameters
params, pulse_ids, regular, trace = find_peak_integration_parameters(
run, mnemonic, trace, integParams, pattern)
# extract peaks
peaks = peaks_from_raw_trace(traces, **params,
extra_dim=extra_dim, mode=mode)
peaks = peaks.rename(mnemonic.replace('raw', 'peaks'))
# Align pulse id
if regular is True:
peaks = peaks.assign_coords({extra_dim: pulse_ids})
elif pattern:
mask = pattern.pulse_mask(labelled=False)
mask = xr.DataArray(mask, dims=['trainId', extra_dim],
coords={'trainId': run[source, key].train_ids,
extra_dim: np.arange(mask.shape[1])})
mask = mask.sel({extra_dim: pulse_ids})
peaks = peaks.where(mask, drop=True)
# add peak integration parameters attributes
for p in params:
if params[p] is None:
params[p] = 'None'
peaks.attrs[f'{peaks.name}_{p}'] = params[p]
peaks.attrs[f'{peaks.name}_mode'] = mode
# save peaks
if save:
save_peaks(proposal, runNB, peaks, trace, subdir)
# merge with existing dataset
ds = mw_ds.merge(peaks.to_dataset(promote_attrs=True), join='inner',
combine_attrs='drop_conflicts')
return ds
def save_peaks(proposal, runNB, peaks, avg_trace, subdir):
'''
Save the peaks extracted with extract_digitizer_peaks() as well as the
average raw trace in a dataset at the proposal + subdir location.
If a dataset already exists, the new data is merged with it.
Parameters
----------
proposal: int
the proposal number
runNB: int
the run number
peaks: xarray DataArray
the 2D-array obtained by extract_digitizer_peaks()
avg_trace: xarray DataArray
the average raw trace over the trains
subdir: str
subdirectory. The data is stored in
<proposal path>/<subdir>/r{runNB:04d}/r{runNB:04d}-digitizers-data.h5
Returns
-------
None
'''
root = find_proposal(f'p{proposal:06d}')
path = os.path.join(root, subdir + f'/r{runNB:04d}/')
os.makedirs(path, exist_ok=True)
fname = path + f'r{runNB:04d}-digitizers-data.h5'
ds_peaks = peaks.to_dataset(promote_attrs=True)
if os.path.isfile(fname):
ds = xr.load_dataset(fname)
ds = ds.drop_vars([peaks.name, avg_trace.name], errors='ignore')
for dim in ds.dims:
if all(dim not in ds[v].dims for v in ds):
ds = ds.drop_dims(dim)
dim_name = 'sampleId'
if 'sampleId' in ds.dims and ds.sizes['sampleId'] != len(avg_trace):
dim_name = 'sampleId2'
avg_trace = avg_trace.rename({avg_trace.dims[0]: dim_name})
if f'params_{peaks.name}' in ds.attrs:
del ds.attrs[f'params_{peaks.name}']
ds = xr.merge([ds, ds_peaks, avg_trace],
combine_attrs='drop_conflicts', join='inner')
else:
ds = ds_peaks.merge(avg_trace.rename({avg_trace.dims[0]: 'sampleId'}))
ds.to_netcdf(fname, format='NETCDF4')
print(f'saved data into {fname}.')
return None
def load_processed_peaks(proposal, runNB, mnemonic=None,
data='usr/processed_runs', merge_with=None):
"""
Load processed digitizer peaks data.
Parameters
----------
proposal: int
the proposal number
runNB: int
the run number
mnemonic: str
the mnemonic containing peaks. Example: 'XRD_MCP_BIGpeaks'.
If None, the entire dataset is loaded
data: str
subdirectory. The data is stored in
<proposal path>/<subdir>/r{runNB:04d}/r{runNB:04d}-digitizers-data.h5
merge_with: xarray Dataset
A dataset to merge the data with.
Returns
-------
xarray DataArray if menmonic is not None and merge_with is None
xarray Dataset if mnemonic is None or merge_with is not None.
Example
-------
# load the mono energy and the MCP_BIG peaks
run, ds = tb.load(proposal, runNB, 'nrj')
ds = tb.load_processed_peaks(proposal, runNB,'XRD_MCP_BIGpeaks',
merge_with=ds)
"""
if mnemonic is None:
return load_all_processed_peaks(proposal, runNB, data, merge_with)
root = find_proposal(f'p{proposal:06d}')
path = os.path.join(root, data + f'/r{runNB:04d}/')
fname = path + f'r{runNB:04d}-digitizers-data.h5'
if os.path.isfile(fname):
ds = xr.load_dataset(fname)
if mnemonic not in ds:
log.warning(f'Mnemonic {mnemonic} not found in {fname}')
return {}
da = ds[mnemonic]
da.attrs = {k: ds.attrs[k] for k in ds.attrs if f'{mnemonic}_' in k}
if merge_with is not None:
return merge_with.merge(da.to_dataset(promote_attrs=True),
combine_attrs='drop_conflicts',
join='inner')
else:
return da
else:
log.warning(f'Mnemonic {mnemonic} not found in {fname}')
return merge_with
def load_all_processed_peaks(proposal, runNB, data='usr/processed_runs',
merge_with=None):
"""
Load processed digitizer peaks dataset. The data contains the peaks,
the average raw trace and the integration parameters (attribute) of
each processed digitizer source.
Parameters
----------
proposal: int
the proposal number
runNB: int
the run number
data: str
subdirectory. The data is stored in
<proposal path>/<subdir>/r{runNB:04d}/r{runNB:04d}-digitizers-data.h5
merge_with: xarray Dataset
A dataset to merge the data with.
Returns
-------
xarray Dataset
"""
root = find_proposal(f'p{proposal:06d}')
path = os.path.join(root, data + f'/r{runNB:04d}/')
fname = path + f'r{runNB:04d}-digitizers-data.h5'
if os.path.isfile(fname):
if merge_with is not None:
return merge_with.merge(xr.load_dataset(fname),
combine_attrs='drop_conflicts',
join='inner')
return xr.load_dataset(fname)
else:
log.warning(f'{fname} not found.')
return merge_with
def check_processed_peak_params(proposal, runNB, mnemonic,
data='usr/processed_runs',
plot=True, show_all=False):
"""
Check the integration parameters used to generate the processed
peak values.
Parameters
----------
proposal: int
the proposal number
runNB: int
the run number
mnemonic: str
the mnemonic containing peaks. Example: 'XRD_MCP_BIGpeaks'.
If None, the entire dataset is loaded
data: str
subdirectory. The data is stored in
<proposal path>/<subdir>/r{runNB:04d}/r{runNB:04d}-digitizers-data.h5
plot: bool
If True, displays the raw trace and peak integration regions.
show_all: bool
If True, displays the entire raw trace and all peak integration
regions (this can be time-consuming).
If False, shows the first and last pulses.
Returns
-------
params: dict
the integration parameters with keys ['pulseStart', 'pulseStop',
'baseStart', 'baseStop', 'period', 'npulses'].
See `extract_digitizer_peaks()`.
"""
root = find_proposal(f'p{proposal:06d}')
path = os.path.join(root, data + f'/r{runNB:04d}/')
fname = path + f'r{runNB:04d}-digitizers-data.h5'
if os.path.isfile(fname):
ds = xr.load_dataset(fname)
if mnemonic.replace('raw', 'peaks') not in ds:
log.warning(f'Mnemonic {mnemonic} not found in {fname}')
return {}
params = {k.replace(f'{mnemonic}_', ''): ds.attrs[k] for
k in ds.attrs if f'{mnemonic}_' in k}
if plot:
title = 'Processed peak parameters'
fig, ax = plotPeakIntegrationWindow(
ds[mnemonic.replace('peaks', 'avg')],
params,
show_all=show_all)
fig.suptitle(f'p{proposal} r{runNB} ' + title, size=12)
return params
else:
log.warning(f'{fname} not found.')
return {}
"""
DSSC-detector class module
The dssc detector class. It represents a namespace for frequent evaluation
while implicitly applying/requiring certain structure/naming conventions to
its objects.
comments:
- contributions should comply with pep8 code structure guidelines.
- Plot routines don't fit into objects since they are rather fluent.
They have been outsourced to dssc_plot.py. They can now be accessed
as toolbox_scs member functions.
"""
import os
import logging
import joblib
import numpy as np
import xarray as xr
import toolbox_scs as tb
from ..util.exceptions import ToolBoxValueError, ToolBoxFileError
from .dssc_data import (
save_xarray, load_xarray, save_attributes_h5,
search_files, get_data_formatted)
from .dssc_misc import (
load_dssc_info, get_xgm_formatted, get_tim_formatted)
from .dssc_processing import (
process_dssc_data, create_empty_dataset)
__all__ = [
"DSSCBinner",
"DSSCFormatter"]
log = logging.getLogger(__name__)
class DSSCBinner:
def __init__(self, proposal_nr, run_nr,
binners={},
xgm_name='SCS_SA3',
tim_names=['MCP1apd', 'MCP2apd', 'MCP3apd'],
dssc_coords_stride=2,
):
"""
A dssc binner object. Loads and bins the dssc data according to the
bins specified in 'binners'. The data can be reduced further through
masking using XGM or TIM data.
Parameters
----------
proposal_nr: int, str
proposal number containing run folders
run_nr: int, str
run number
binners: dictionary
dictionary containing binners constructed using the
'create_dssc_bins' toolbox_scs.detectors-method.
xgm_name: str
a valid mnemonic key of the XGM data to be used to mask the dssc
frames. Since the xgm is used in several methods its name can be
set here globally.
tim_names: list of strings
a list of valid mnemonic keys for an mcp in the tim. Once the
corresponding data is loaded the different sources will be averaged.
dssc_coords_stride: int, list
defines which dssc frames should be normalized using data from the
xgm. The parameter may be an integer (stride parameter) or a list,
that assigns each pulse to its corresponding dssc frame number.
Returns
-------
DSSCbinner: object
Example
-------
1.) quick -> generic bins, no xgm,
>>> import toolbox_scs as tb
>>> run235 = tb.DSSCBinner(proposal_nb=2212, run_nb=235)
2.) detailed -> docs
"""
# ---------------------------------------------------------------------
# object (run) properties
# ---------------------------------------------------------------------
self.proposal = proposal_nr
self.runnr = run_nr
self.info = load_dssc_info(proposal_nr, run_nr)
self.run, _ = tb.load(proposal_nr, run_nr)
self.binners = {}
for b in binners:
self.add_binner(b, binners[b])
self.xgm_name = xgm_name
self.tim_names = tim_names
self.dssc_coords_stride = dssc_coords_stride
self.xgm = None
self.tim = None
self.pulsemask = None
log.debug("Constructed DSSC object")
def __del__(self):
pass
def add_binner(self, name, binner):
"""
Add additional binner to internal dictionary
Parameters
----------
name: str
name of binner to be created
binner: xarray.DataArray
An array that represents a map how the respective coordinate should
be binned.
Raises
------
ToolBoxValueError: Exception
Raises exception in case the name does not correspond to a valid
binner name. To be generalized.
"""
if name in ['trainId', 'pulse', 'x', 'y']:
self.binners[name] = binner
else:
msg = "Invalid binner name"
log.info(msg+", no binner created")
raise ToolBoxValueError(msg, name)
def load_xgm(self):
"""
load xgm data and construct coordinate array according to corresponding
dssc frame number.
"""
self.xgm = get_xgm_formatted(self.run,
self.xgm_name,
self.dssc_coords_stride)
def load_tim(self):
"""
load tim data and construct coordinate array according to corresponding
dssc frame number.
"""
self.tim = get_tim_formatted(self.proposal,
self.runnr,
self.tim_names,
self.dssc_coords_stride)
def create_pulsemask(self, use_data='xgm', threshold=(0, np.inf)):
"""
creates a mask for dssc frames according to measured xgm intensity.
Once such a mask has been constructed, it will be used in the data
reduction process to drop out-of-bounds pulses.
"""
fpt = self.info['frames_per_train']
n_trains = self.info['number_of_trains']
trainIds = self.info['trainIds']
data = np.ones([n_trains, fpt], dtype=bool)
self.pulsemask = xr.DataArray(data,
dims=['trainId', 'pulse'],
coords={'trainId': trainIds,
'pulse': range(fpt)})
if use_data == 'xgm':
if self.xgm is None:
self.load_xgm()
valid = (self.xgm > threshold[0]) * \
(self.xgm < threshold[1])
if use_data == 'tim':
if self.tim is None:
self.load_tim()
valid = (self.tim > threshold[0]) * \
(self.tim < threshold[1])
self.pulsemask = \
(valid.combine_first(self.pulsemask).astype(bool))[:, 0:fpt]
log.info(f'created pulse mask used during processing')
def get_info(self):
"""
Returns the expected shape of the binned dataset, in case binners have
been defined.
"""
if any(self.binners):
empty = create_empty_dataset(self.info, self.binners)
return(empty.dims)
else:
log.info("no binner defined yet.")
pass
def _bin_metadata(self, data):
if self.pulsemask is not None:
data = data.where(self.pulsemask)
for b in self.binners:
if b in ['trainId', 'pulse']:
data[b+"_binned"] = self.binners[b]
data = data.groupby(b+"_binned").mean(b)
data = data.rename(name_dict={b+"_binned": b})
log.debug('binned metadata data according to dssc binners.')
return data.transpose('trainId', 'pulse')
def get_xgm_binned(self):
"""
Bin the xgm data according to the binners of the dssc data. The result
can eventually be merged into the final dataset by the DSSCFormatter.
Returns
-------
xgm_data: xarray.DataSet
xarray dataset containing the binned xgm data
"""
if self.xgm is not None:
xgm_data = self.xgm.to_dataset(name='xgm')
xgm_binned = self._bin_metadata(xgm_data)
log.info('binned xgm data according to dssc binners.')
return xgm_binned
else:
log.warning("no xgm data. Use load_xgm() to load the xgm data.")
pass
def get_tim_binned(self):
"""
Bin the tim data according to the binners of the dssc data. The result
can eventually be merged into the final dataset by the DSSCFormatter.
Returns
-------
tim_data: xarray.DataSet
xarray dataset containing the binned tim data
"""
if self.tim is not None:
tim_data = self.tim.to_dataset(name='tim')
tim_binned = self._bin_metadata(tim_data)
log.info('binned tim data according to dssc binners.')
return tim_binned
else:
log.warning("no data. Use load_tim() to load the tim data.")
pass
# -------------------------------------------------------------------------
# Data processing
# -------------------------------------------------------------------------
def process_data(self, modules=[], filepath='./',
chunksize=512, backend='loky', n_jobs=None,
dark_image=None,
xgm_normalization=False, normevery=1
):
"""
Load and bin dssc data according to self.bins. No data is returned by
this method. The condensed data is written to file by the worker
processes directly.
Parameters
----------
modules: list of ints
a list containing the module numbers that should be processed. If
empty, all modules are processed.
filepath: str
the path where the files containing the reduced data should be
stored.
chunksize: int
The number of trains that should be read in one iterative step.
backend: str
joblib multiprocessing backend to be used. At the moment it can be
any of joblibs standard backends: 'loky' (default),
'multiprocessing', 'threading'. Anything else than the default is
experimental and not appropriately implemented in the dbdet member
function 'bin_data'.
n_jobs: int
inversely proportional of the number of cpu's available for one
job. Tasks within one job can grab a maximum of n_CPU_tot/n_jobs of
cpu's.
Note that when using the default backend there is no need to adjust
this parameter with the current implementation.
dark_image: xarray.DataArray
DataArray with dimensions compatible with the loaded dssc data. If
given, it will be subtracted from the dssc data before the binning.
The dark image needs to be of dimension module, trainId, pulse, x
and y.
xgm_normalization: boolean
if true, the dssc data is normalized by the xgm data before the
binning.
normevery: int
integer indicating which out of normevery frame will be normalized.
"""
log.info("Bin data according to binners")
log.info(f'Process {chunksize} trains per chunk')
mod_list = modules
if len(mod_list) == 0:
mod_list = [i for i in range(16)]
log.info(f'Process modules {mod_list}')
njobs = n_jobs
if njobs is None:
njobs = len(mod_list)
module_jobs = []
for m in mod_list:
dark = dark_image
if dark_image is not None:
dark = dark_image.sel(module=m)
module_jobs.append(dict(
proposal=self.proposal,
run_nr=self.runnr,
module=m,
chunksize=chunksize,
path=filepath,
info=self.info,
dssc_binners=self.binners,
pulsemask=self.pulsemask,
dark_image=dark,
xgm_normalization=xgm_normalization,
xgm_mnemonic=self.xgm_name,
normevery=normevery,
))
log.info(f'using parallelization backend {backend}')
joblib.Parallel(n_jobs=njobs, backend=backend)\
(joblib.delayed(process_dssc_data)(**module_jobs[i])
for i in range(len(mod_list)))
log.info(f'Binning done')
class DSSCFormatter:
def __init__(self, filepath):
"""
Class that handles formatting related aspects, before handing the data
overt for analysis.
Parameters
----------
filepath: str
location of processed files.
Raises
------
ToolBoxFileError: Exception
Trows an error in case the the given path does not exist.
"""
self._filenames = []
self._filename = ''
if os.path.exists(filepath):
try:
self._filenames = search_files(filepath)
except ToolBoxFileError:
log.info("path did not contain any files")
pass
else:
log.info("path did not exist")
self.data = None
self.data_xarray = {}
self.attributes = {}
def combine_files(self, filenames=[]):
"""
Read the files given in filenames, and store the data in the class
variable 'data'. If no filenames are given, it tries to read the files
stored in the class-internal variable '_filenames'.
Parameters
----------
filenames: list
list of strings containing the names of the files to be combined.
"""
if any(filenames) is True:
self._filenames = filenames
if self._filenames is not None:
self.data = get_data_formatted(self._filenames)
else:
log.info("No matching data found.")
def add_dataArray(self, groups=[]):
"""
Reads addional xarray-data from the first file given in the list of
filenames. This assumes that all the files in the folder contain the
same additional data. To be generalized.
Parameters
----------
groups: list
list of strings with the names of the groups in the h5 file,
containing additional xarray data.
"""
if any(self._filenames) is True:
for group in groups:
self.data_xarray[group] = load_xarray(self._filenames[0],
group=group,
form='array')
else:
log.info("No files found in specified folder.")
def add_attributes(self, attributes={}):
"""
Add additional information, such as run-type, as attributes to the
formatted .h5 file.
Parameters
----------
attributes: dictionary
a dictionary, containing information or data of any kind, that
will be added to the formatted .h5 file as attributes.
"""
for key in attributes.keys():
self.attributes[key] = attributes[key]
def save_formatted_data(self, filename):
"""
Create a .h5 file containing the main dataset in the group called
'data'. Additional groups will be created for the content of the
variable 'data_array'. Metadata about the file is added in the form of
attributes.
Parameters
----------
filename: str
the name of the file to be created
"""
save_xarray(filename, self.data, group='data', mode='w')
for arr in self.data_xarray.keys():
save_xarray(filename, self.data_xarray[arr], group=arr, mode='a')
save_attributes_h5(filename, self.attributes)
import os
import logging
import h5py
import xarray as xr
from ..util.exceptions import ToolBoxFileError
__all__ = [
'get_data_formatted',
'load_xarray',
'save_attributes_h5',
'save_xarray',
]
log = logging.getLogger(__name__)
def _to_netcdf(fname, data, group, mode):
f_exists = os.path.isfile(fname)
if (f_exists and mode == 'w'):
data.to_netcdf(fname, group=group, mode='w', engine='h5netcdf')
log.warning(f"File {fname} existed: overwritten")
log.info(f"Stored data in file {fname}")
elif f_exists and mode == 'a':
try:
data.to_netcdf(fname, group=group, mode='a', engine='h5netcdf')
log.info(f"Created group {group} in file {fname}")
except (ValueError, TypeError):
msg = f"Group {group} exists and has incompatible dimensions."
log.warning(f"Could not store data: {msg}")
raise ToolBoxFileError(msg, fname)
else:
data.to_netcdf(fname, group=group, mode='w', engine='h5netcdf')
log.info(f"Stored data in file {fname}")
def save_xarray(fname, data, group='data', mode='a'):
"""
Store xarray Dataset in the specified location
Parameters
----------
data: xarray.DataSet
The data to be stored
fname: str, int
filename
overwrite: bool
overwrite existing data
Raises
------
ToolBoxFileError: Exception
File existed, but overwrite was set to False.
"""
try:
_to_netcdf(fname, data, group, mode)
except ToolBoxFileError as err:
raise err
def save_attributes_h5(fname, data={}):
"""
Adding attributes to a hdf5 file. This function is intended to be used to
attach metadata to a processed run.
Parameters
----------
fname: str
filename as string
data: dictionary
the data that should be added to the file in form of a dictionary.
"""
f = h5py.File(fname, mode='a')
for d in data.keys():
f.attrs[d] = data[d]
f.close()
log.info(f"added attributes to file {fname}")
def load_xarray(fname, group='data', form='dataset'):
"""
Load stored xarray Dataset.
Comment: This function exists because of a problem with the standard
netcdf engine that is malfunctioning due to related software installed
in the exfel-python environment. May be dropped at some point.
Parameters
----------
fname: str
filename as string
group: str
the name of the xarray dataset (group in h5 file).
form: str
specify whether the data to be loaded is a 'dataset' or a 'array'.
"""
f_exists = os.path.isfile(fname)
if f_exists:
if form == 'dataset':
log.debug(f'open xarray dataset {fname}')
return xr.load_dataset(fname, group=group, engine='h5netcdf')
elif form == 'array':
log.debug(f'open xarray dataarray {fname}')
return xr.load_dataarray(fname, group=group, engine='h5netcdf')
else:
msg = "File does not exists."
raise ToolBoxFileError(msg, fname)
def _data_from_list(filenames):
"""
Helper function for data formatting routines. Loads the specified files
given by their names. This subroutine expects the name of the group to be
'data'.
Parameters
----------
filenames: list
list of valid xarray filenames
Returns
-------
data: list
a list containing the loaded data
Raises
------
ToolBoxFileError
raises ToolBoxFileError in case file does not exist.
"""
data = []
for name in filenames:
f_exists = os.path.isfile(name)
if f_exists:
data.append(load_xarray(name, group='data'))
else:
msg = "File does not exists."
raise ToolBoxFileError(msg, name)
return data
def get_data_formatted(filenames=[], data_list=[]):
"""
Combines the given data into one dataset. For any of extra_data's data
types, an xarray.Dataset is returned. The data is sorted along the 'module'
dimension. The array dimension have the order 'trainId', 'pulse', 'module',
'x', 'y'. This order is required by the extra_geometry package.
Parameters
----------
filenames: list of str
files to be combined as a list of names. Calls '_data_from_list' to
actually load the data.
data_list: list
list containing the already loaded data
Returns
-------
data: xarray.Dataset
A xarray.Dataset containing the combined data.
"""
if any(filenames) is True:
data = _data_from_list(filenames)
elif any(data_list) is True:
data = data_list
mod_list = []
for d in data:
if 'module' in d.attrs.keys():
mod_list.append(d.attrs['module'])
if type(data[0]).__module__ == 'xarray.core.dataset':
data = xr.concat(data, dim='module')
elif type(data[0]).__module__ == 'pandas.core.frame':
pass
elif type(data[0]).__module__ == 'dask.dataframe.core':
pass
if mod_list is not None:
data = data.assign_coords(module=("module", mod_list))
data = data.sortby("module")
data.attrs.clear()
return data.transpose('trainId', 'pulse', 'module', 'x', 'y')
def search_files(run_folder):
"""
Search folder for h5 files.
Parameters
----------
run_folder: str
the path to a folder containing h5 files.
Returns
-------
a list of the filenames of all .h5 files in the given folder.
Raises
------
ToolBoxFileError: Exception
raises ToolBoxFileError in case there are no .h5 files in the folder,
or the folder does not exist.
"""
try:
filenames = os.listdir(run_folder)
return [run_folder+name for name in filenames if ".h5" in name]
except:
msg = "No files in folder"
raise ToolBoxFileError(msg, run_folder)
"""
DSSC-related sub-routines.
comment: contributions should comply with pep8 code structure guidelines.
"""
import logging
import numpy as np
import xarray as xr
from imageio import imread
import extra_data as ed
from .xgm import get_xgm
from .digitizers import get_digitizer_peaks
__all__ = [
'create_dssc_bins',
'get_xgm_formatted',
'load_dssc_info',
'load_mask',
'quickmask_DSSC_ASIC',
]
log = logging.getLogger(__name__)
def load_dssc_info(proposal, run_nr):
"""
Loads the first data file for DSSC module 0 (this is hardcoded)
and returns the detector_info dictionary
Parameters
----------
proposal: str, int
number of proposal
run_nr: str, int
number of run
Returns
-------
info : dictionary
{'dims': tuple, 'frames_per_train': int, 'total_frames': int}
"""
module = ed.open_run(proposal, run_nr, include='*DSSC01*')
info = module.detector_info('SCS_DET_DSSC1M-1/DET/1CH0:xtdf')
info["number_of_trains"] = len(module.train_ids)
info["trainIds"] = module.train_ids
log.debug("Fetched information for DSSC module nr. 1.")
return info
def create_dssc_bins(name, coordinates, bins):
"""
Creates a single entry for the dssc binner dictionary. The produced xarray
data-array will later be used to perform grouping operations according to
the given bins.
Parameters
----------
name: str
name of the coordinate to be binned.
coordinates: numpy.ndarray
the original coordinate values (1D)
bins: numpy.ndarray
the bins according to which the corresponding dimension should
be grouped.
Returns
-------
da: xarray.DataArray
A pre-formatted xarray.DataArray relating the specified dimension with
its bins.
Examples
--------
>>> import toolbox_scs as tb
>>> run = tb.open_run(2212, 235, include='*DA*')
1.) binner along 'pulse' dimension. Group data into two bins.
>>> bins_pulse = ['pumped', 'unpumped'] * 10
>>> binner_pulse = tb.create_dssc_bins("pulse",
np.linspace(0,19,20, dtype=int),
bins_pulse)
2.) binner along 'train' dimension. Group data into bins corresponding
to the positions of a delay stage for instance.
>>> bins_trainId = tb.get_array(run, 'PP800_PhaseShifter', 0.04)
>>> binner_train = tb.create_dssc_bins("trainId",
run.trainIds,
bins_trainId.values)
"""
if name in ['trainId', 'pulse', 'x', 'y']:
da = xr.DataArray(bins,
dims=[name],
coords={name: coordinates})
log.debug(f'created dssc bin array for dimension {name}')
return da
log.debug(f'could not construct dssc bin array for dimension {name}')
raise ValueError(f'Invalid name {str(name)}: value should be '
'trainId, x, or y')
def get_xgm_formatted(run_obj, xgm_name, dssc_frame_coords):
"""
Load the xgm data and define coordinates along the pulse dimension.
Parameters
----------
run_obj: extra_data.DataCollection
DataCollection object providing access to the xgm data to be loaded
xgm_name: str
valid mnemonic of a xgm source
dssc_frame_coords: int, list
defines which dssc frames should be normalized using data from the xgm.
Returns
-------
xgm: xarray.DataArray
xgm data with coordinate 'pulse'.
"""
log.debug('load raw xgm data')
xgm = get_xgm(run_obj, xgm_name)[xgm_name]
pulse_dim = [d for d in xgm.dims if d != 'trainId'][0]
xgm = xgm.rename({pulse_dim: 'pulse'})
if type(dssc_frame_coords) == int:
dssc_frame_coords = np.arange(xgm.sizes['pulse']*dssc_frame_coords,
step=dssc_frame_coords, dtype=np.uint64)
xgm['pulse'] = dssc_frame_coords
log.info('loaded formatted xgm data.')
return xgm
def get_tim_formatted(proposal, runNB, tim_names, dssc_frame_coords):
"""
Load the tim data and define coordinates along the pulse dimension.
Parameters
----------
proposal: int
the proposal number
runNB: int
the run number
tim_names: list of str
a list of valid mnemonics for tim data sources
dssc_frame_coords: int
defines which dssc frames should be normalized using data from the tim.
Returns
-------
tim: xarray.DataArray
tim data with coordinate 'pulse'.
"""
log.debug('load tim data')
tim = get_digitizer_peaks(run_obj, tim_names)
# average over all tim sources
tim = -tim.to_array().mean(dim='variable')
pulse_dim = [d for d in tim.dims if d != 'trainId'][0]
tim = tim.rename({pulse_dim: 'pulse'})
if type(dssc_frame_coords) == int:
dssc_frame_coords = np.arange(tim.sizes['pulse']*dssc_frame_coords,
step=dssc_frame_coords, dtype=np.uint64)
tim['pulse'] = dssc_frame_coords
log.info('loaded formatted tim data.')
return tim
def quickmask_DSSC_ASIC(poslist):
'''
Returns a mask for the given DSSC geometry with ASICs given in poslist
blanked. poslist is a list of (module, row, column) tuples. Each module
consists of 2 rows and 8 columns of individual ASICS.
Copyright (c) 2019, Michael Schneider
Copyright (c) 2020, SCS-team
license: BSD 3-Clause License (see LICENSE_BSD for more info)
'''
mask = np.ones([16, 128, 512], dtype=float) # need floats to use NaN
for (module, row, col) in poslist:
mask[module, 64 * row:64 * (row + 1), 64 * col:64 * (col + 1)] = \
np.nan
return mask
def load_mask(fname, dssc_mask):
"""
Load a DSSC mask file.
Copyright (c) 2019, Michael Schneider
Copyright (c) 2020, SCS-team
license: BSD 3-Clause License (see LICENSE_BSD for more info)
Parameters
----------
fname: str
string of the filename of the mask file
Returns
-------
dssc_mask:
"""
mask = imread(fname)
mask = dssc_mask.astype(float)[..., 0] // 255
mask[dssc_mask == 0] = np.nan
return mask
""" DSSC visualization routines
Plotting sub-routines frequently done in combination with dssc analysis.
The initial code is based on: https://github.com/dscran/dssc_process
/blob/master/example_image_process_pulsemask.ipynb
Todo: For visualization of statistical information we could eventually
switch to seaborn: https://seaborn.pydata.org/
"""
from time import strftime
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
def plot_xgm_threshold(xgm,
xgm_min = None, xgm_max = None,
run_nr = '', safe_fig = False):
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(xgm.trainId, xgm, 'o', c='C0', ms=1)
if xgm_min:
ax.axhline(xgm_min, c='r')
if xgm_max:
ax.axhline(xgm_max, c='r')
ax.set_xlabel('trainId')
ax.set_ylabel('xgm')
ax.set_title(f'run: {run_nr}')
if safe_fig == True:
tstamp = strftime('%y%m%d_%H%M')
fig.savefig(f'images/run{run_nr}_scan_{tstamp}.png', dpi=200)
def plot_binner(binner,
yname = 'data', xname='data',
run_nr = ''):
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(binner.values)
ax.set_ylabel(yname)
ax.set_xlabel(xname)
ax.set_title(f'run: {run_nr}')
def plot_binner_hist(binner,
dname = 'data', run_nr = ''):
counts = xr.DataArray(np.ones(len(binner.values)),
dims=[dname],
coords={dname: binner.values},
name='counts')
counts = counts.groupby(dname).sum()
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(counts[dname], counts, 'o', ms=4)
ax.set_xlabel(dname)
ax.set_ylabel('counts')
ax.set_title(f'run {run_nr}')
ax.grid(True)
#if safe_fig == True:
# tstamp = strftime('%y%m%d_%H%M')
# fig.savefig(f'images/run{run_nr}_scan_{tstamp}.png', dpi=200)
def plot_hist_processed(hist_data):
pass
"""
DSSC-related sub-routines.
comment: contributions should comply with pep8 code structure guidelines.
"""
import logging
import os
import numpy as np
import xarray as xr
import pandas as pd
import extra_data as ed
from ..mnemonics_machinery import mnemonics_for_run
from .dssc_data import save_xarray
__all__ = [
'process_dssc_data'
]
log = logging.getLogger(__name__)
def create_empty_dataset(run_info, binners={}):
"""
Create empty (zero-valued) DataSet for a single DSSC module
to iteratively add data to.
Copyright (c) 2020, SCS-team
Parameters
----------
Returns
-------
"""
fpt = run_info['frames_per_train']
len_x = run_info['dims'][0]
len_y = run_info['dims'][1]
defaults = {"trainId": run_info["trainIds"],
"pulse": np.linspace(0, fpt-1, fpt, dtype=int),
"x": np.linspace(1, len_x, len_x, dtype=int),
"y": np.linspace(1, len_y, len_y, dtype=int)}
dims = []
coords = {}
shape = []
for key in defaults:
dims.append(key)
df = pd.DataFrame({'data': defaults[key]})
if key in binners:
df = pd.DataFrame({'data': binners[key].values})
grouped = df.groupby(['data'])
groups = list(grouped.groups.keys())
coords[key] = groups
shape.append(len(groups))
da_data = xr.DataArray(np.zeros(shape, dtype=np.float64),
coords=coords,
dims=dims)
ds = da_data.to_dataset(name="data")
ds['hist'] = xr.full_like(ds.data[:, :, 0, 0], fill_value=0)
log.debug("Prepared empty dataset for single dssc module")
return ds
def load_chunk_data(sel, sourcename, maxframes=None):
"""
Load selected DSSC data. The flattened multi-index (trains+pulses) is
unraveled before returning the data.
Copyright (c) 2019, Michael Schneider
Copyright (c) 2020, SCS-team
license: BSD 3-Clause License (see LICENSE_BSD for more info)
Parameters
----------
sel: extra_data.DataCollection
a DataCollection or a subset of a DataCollection obtained by its
select_trains() method
sourcename: str
Returns
-------
xarray.DataArray
"""
info = sel.detector_info(sourcename)
fpt = info['frames_per_train']
data = sel.get_array(sourcename, 'image.data',
extra_dims=['_empty_', 'x', 'y']
).squeeze()
tids = np.unique(data.trainId)
data = data.rename(dict(trainId='trainId_pulse'))
midx = pd.MultiIndex.from_product([sorted(tids), range(fpt)],
names=('trainId', 'pulse'))
data = xr.DataArray(data,
dict(trainId_pulse=midx)
).unstack('trainId_pulse')
data = data.transpose('trainId', 'pulse', 'x', 'y')
log.debug(f"loaded and formatted chunk of dssc data")
return data.loc[{'pulse': np.s_[:maxframes]}]
def _load_chunk_xgm(sel, xgm_mnemonic, stride):
"""
Load selected xgm data.
Copyright (c) 2020, SCS-team
Parameters
----------
sel: extra_data.DataCollection
a DataCollection or a subset of a DataCollection obtained by its
select_trains() method
xgm_mnemonic: str
a valid mnemonic for an xgm source from the tb.mnemonic dictionary
stride: int
indicating which dssc frames should be normalized using the xgm data.
Returns
-------
xarray.DataArray
"""
mnemonics = mnemonics_for_run(sel)
d = sel.get_array(*mnemonics[xgm_mnemonic].values())
d = d.rename(new_name_or_name_dict={'XGMbunchId': 'pulse'})
frame_coords = np.linspace(0, (d.shape[1]-1)*stride, d.shape[1], dtype=int)
d = d.assign_coords({'pulse': frame_coords})
log.debug(f"loaded and formatted chunk of xgm data")
return d
def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
path='./',
pulsemask=None,
dark_image=None,
xgm_mnemonic='SCS_SA3',
xgm_normalization=False,
normevery=1
):
"""
Collects and reduces DSSC data for a single module.
Copyright (c) 2020, SCS-team
Parameters
----------
proposal : int
proposal number
run_nr : int
run number
module : int
DSSC module to process
chunksize : int
number of trains to load simultaneously
info: dictionary
dictionary containing keys 'dims', 'frames_per_train', 'total_frames',
'trainIds', 'number_of_trains'.
dssc_binners: dictionary
a dictionary containing binner objects created by the ToolBox member
function "create_binner()"
path : str
location in which the .h5 files, containing the binned data, should
be stored.
pulsemask : numpy.ndarray
array of booleans to be used to mask dssc data according to xgm data.
dark_image: xarray.DataArray
an xarray dataarray with matching coordinates with the loaded data. If
dark_image is not None it will be subtracted from each individual dssc
frame.
xgm_normalization: bool
true if the data should be divided by the corresponding xgm value.
xgm_mnemonic: str
Mnemonic of the xgm data to be used for normalization.
normevery: int
One out of normevery dssc frames will be normalized.
Returns
-------
module_data: xarray.Dataset
xarray datastructure containing data binned according to bins.
"""
# metadata definition
ntrains = info['number_of_trains']
chunks = np.arange(ntrains, step=chunksize)
sourcename_dssc = f'SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf'
# open extra_data run objects
collection_DSSC = ed.open_run(proposal, run_nr,
include=f'*DSSC{module:02d}*')
collection_DA1 = ed.open_run(proposal, run_nr, include='*DA01*')
log.info(f"Processing dssc module {module}: start")
# create empty dataset for dssc data to be filled iteratively
module_data = create_empty_dataset(info, dssc_binners)
# load data chunk by chunk and merge result into prepared empty dataset
for chunk in chunks:
log.debug(f"Module {module}: "
f"load trains {chunk}:{chunk + chunksize}")
sel = collection_DSSC.select_trains(
ed.by_index[chunk:chunk + chunksize])
chunk_data = load_chunk_data(sel, sourcename_dssc)
chunk_hist = xr.full_like(chunk_data[:, :, 0, 0], fill_value=1)
# ---------------------------------------------------------------------
# optional blocks -> ToDo: see merge request !87
# ---------------------------------------------------------------------
# option 1: prefiltering -> xgm pulse masking
if pulsemask is not None:
log.debug(f'Module {module}: drop out-of-bounds frames')
# relatively slow. ToDo -> wrap using np.ufunc
chunk_data = chunk_data.where(pulsemask)
chunk_hist = chunk_hist.where(pulsemask)
# option 2: subtraction of dark image/s
if dark_image is not None:
log.debug(f'Module {module}: subtract dark')
chunk_data.values = chunk_data.values - dark_image.values
# slower: using xarray directly
# chunk_data = chunk_data - dark_image
# option 3: normalize dssc frames by their xgm values
if xgm_normalization:
log.debug(f'Module {module}: load and format xgm data')
sel_DA1 = collection_DA1.select_trains(
ed.by_index[chunk:chunk + chunksize])
chunk_xgm = _load_chunk_xgm(sel_DA1, xgm_mnemonic, normevery)
log.debug(f'Module {module}: normalize chunk_data using xgm')
# the following line uses numpys fast vectorized array operation
# there is overhead coming from rewriting the xarrayDataarray
chunk_data.values[:, 0::normevery, :, :] = \
np.divide(chunk_data[:, 0::normevery, :, :], chunk_xgm)
# slow code using xarray directly
# chunk_data = chunk_data / chunk_xgm
# ---------------------------------------------------------------------
# end optional blocks: xarray operations from here on.
# ---------------------------------------------------------------------
# data reduction -> apply binners
log.debug(f'Module {module}: apply binning to chunk_data')
chunk_data = chunk_data.to_dataset(name='data')
chunk_data['hist'] = chunk_hist
for b in dssc_binners:
chunk_data[b+"_binned"] = dssc_binners[b]
chunk_data = chunk_data.groupby(b+"_binned").sum(b)
chunk_data = chunk_data.rename(name_dict={b+"_binned": b})
# add chunk data to loaded data
log.debug(f'Module {module}: merge junk data')
for var in ['data', 'hist']:
module_data[var] = xr.concat([module_data[var],
chunk_data[var]],
dim='tmp').sum('tmp')
log.debug(f"Module {module}: "
f"processed trains {chunk}:{chunk + chunksize}")
log.debug(f'Module {module}: calculate mean')
module_data['data'] = module_data['data'] / module_data['hist']
module_data = module_data.transpose('trainId', 'pulse', 'x', 'y')
module_data.attrs['module'] = module
log.debug(f'saving module {module}')
if not os.path.isdir(path):
os.mkdir(path)
fname = f'run_{run_nr}_module{module}.h5'
save_xarray(path+fname, module_data, mode='a')
log.info(f"processing module {module}: done")
from joblib import Parallel, delayed, parallel_backend
from time import strftime
import shutil
from tqdm.auto import tqdm
import os
import warnings
import psutil
import extra_data as ed
from extra_data.read_machinery import find_proposal
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import numpy as np
import xarray as xr
import h5py
from glob import glob
from imageio import imread
from ..constants import mnemonics as _mnemonics
from .azimuthal_integrator import AzimuthalIntegrator
from ..misc.laser_utils import positionToDelay
class FastCCD:
def __init__(self, proposal, distance=1, raw=False):
""" Create a FastCCD object to process FaStCCD data.
inputs:
proposal: (int,str) proposal number string
distance: (float) distance sample to FastCCD detector in meter
raw: use processed data from the calibration pipeline or raw files
"""
if isinstance(proposal,int):
proposal = 'p{:06d}'.format(proposal)
self.runFolder = find_proposal(proposal)
self.semester = self.runFolder.split('/')[-2]
self.proposal = proposal
self.topic = self.runFolder.split('/')[-3]
self.tempdir = None
self.save_folder = os.path.join(self.runFolder, 'usr/condensed_runs/')
self.distance = distance
self.px_pitch_h = 30 # pitch in microns
self.px_pitch_v = 30 # pitch in microns
self.aspect = 1 # aspect ratio of the FastCCD images
self.mask = None
self.max_fraction_memory = 0.8
self.filter_mask = None
self.raw = raw
self.gain = 1
print('FastCCD configuration')
print(f'Topic: {self.topic}')
print(f'Semester: {self.semester}')
print(f'Proposal: {self.proposal}')
print(f'Default save folder: {self.save_folder}')
print(f'Sample to FastCCD distance: {self.distance} m')
print(f'Using raw files: {self.raw}')
if not os.path.exists(self.save_folder):
warnings.warn(f'Default save folder does not exist: {self.save_folder}')
self.max_fraction_memory = 0.8
self.Nworker = 10
self.maxSaturatedPixel = 1
def __del__(self):
# deleting temporay folder
if self.tempdir:
shutil.rmtree(self.tempdir)
def open_run(self, run_nr, isDark=False, t0=0.0):
""" Open a run with extra-data and prepare the virtual dataset for multiprocessing
inputs:
run_nr: the run number
isDark: True if the run is a dark run
t0: optional t0 in mm
"""
print('Opening run data with extra-data')
self.run_nr = run_nr
self.xgm = None
self.run = ed.open_run(self.proposal, self.run_nr)
self.plot_title = f'{self.proposal} run: {self.run_nr}'
self.isDark = isDark
self.fpt = 1
#self.nbunches = self.run.get_array('SCS_RR_UTC/MDL/BUNCH_DECODER', 'sase3.nPulses.value')
#self.nbunches = np.unique(self.nbunches)
self.nbunches = 1
#if len(self.nbunches) == 1:
# self.nbunches = self.nbunches[0]
#else:
# warnings.warn('not all trains have same length FastCCD data')
# print(f'nbunches: {self.nbunches}')
# self.nbunches = self.nbunches[-1]
print(f'FastCCD frames per train: {self.fpt}')
print(f'SA3 bunches per train: {self.nbunches}')
print('Collecting FastCCD module files')
self.collect_fastccd_file()
print(f'Loading XGM data')
try:
self.xgm = self.run.get_array(_mnemonics['SCS_SA3']['source'],
_mnemonics['SCS_SA3']['key'],
roi=ed.by_index[:self.nbunches])
self.xgm = self.xgm.squeeze() # remove the pulseId dimension since XGM should have only 1 value per train
except:
self.xgm = xr.DataArray(np.zeros_like(self.run.train_ids),dims = 'trainId', coords = {"trainId":self.run.train_ids})
print(f'Loading mono nrj data')
try:
self.nrj = self.run.get_array(_mnemonics['nrj']['source'],
_mnemonics['nrj']['key'])
except:
self.nrj = xr.DataArray(np.zeros_like(self.run.train_ids),dims = 'trainId', coords = {"trainId":self.run.train_ids})
print(f'Loading delay line data')
try:
self.delay_mm = self.run.get_array(_mnemonics['PP800_DelayLine']['source'],
_mnemonics['PP800_DelayLine']['key'])
except:
self.delay_mm = xr.DataArray(np.zeros_like(self.run.train_ids),dims = 'trainId', coords = {"trainId":self.run.train_ids})
self.t0 = t0
self.delay_ps = positionToDelay(self.delay_mm, origin=self.t0, invert=True)
print(f'Loading Fast ADC5 data')
try:
self.FastADC5 = self.run.get_array(_mnemonics['FastADC5raw']['source'], _mnemonics['FastADC5raw']['key']).max('dim_0')
self.FastADC5[self.FastADC5<35000] = 0
self.FastADC5[self.FastADC5>=35000] = 1
except:
self.FastADC5 = xr.DataArray(np.zeros_like(self.run.train_ids),dims = 'trainId', coords = {"trainId":self.run.train_ids})
# create a dummy scan variable for dark run
# for other type or run, use FastCCD.define_run function to overwrite it
self.scan = xr.DataArray(np.ones_like(self.run.train_ids), dims=['trainId'],coords={'trainId': self.run.train_ids})
self.scan = self.scan.to_dataset(name='scan_variable')
self.scan_vname = 'dummy'
def load_gain(self, fname):
""" Load a gain map by giving the filename
"""
self.gain = np.load(fname)['arr_0'][:,:,0]
def collect_fastccd_file(self):
""" Collect the raw fastCCD h5 files.
"""
if self.raw:
folder = 'raw'
else:
folder = 'proc'
pattern = self.runFolder + f'/{folder}/r{self.run_nr:04d}/RAW-R{self.run_nr:04d}-DA05-S*.h5'
self.h5list = glob(pattern)
def define_scan(self, vname, bins):
"""
Prepare the binning of the FastCCD data.
inputs:
vname: variable name for the scan, can be a mnemonic string from ToolBox
or a dictionnary with ['source', 'key'] fields
bins: step size (or bins_edge but not yet implemented)
"""
if vname == 'delay_ps':
self.scan = self.delay_ps
else:
if type(vname) is dict:
self.scan = self.run.get_array(vname['source'], vname['key'])
elif type(vname) is str:
if vname not in _mnemonics:
raise ValueError(f'{vname} not found in the ToolBox mnemonics table')
self.scan = self.run.get_array(_mnemonics[vname]['source'], _mnemonics[vname]['key'])
else:
raise ValueError(f'vname should be a string or a dict. We got {type(vname)}')
if (type(bins) is int) or (type(bins) is float):
self.scan = bins * np.round(self.scan / bins)
else:
# TODO: digitize the data
raise ValueError(f'To be implemented')
self.scan_vname = vname
self.scan = self.scan.to_dataset(name='scan_variable')
#self.scan['xgm_pumped'] = self.xgm[:, :self.nbunches:2].mean('dim_0')
#self.scan['xgm_unpumped'] = self.xgm[:, 1:self.nbunches:2].mean('dim_0')
#self.scan.to_netcdf(self.vds_scan, group='data')
self.scan_counts = xr.DataArray(np.ones(len(self.scan['scan_variable'])),
dims=['scan_variable'],
coords={'scan_variable': self.scan['scan_variable'].values},
name='counts')
self.scan_points = self.scan.groupby('scan_variable').mean('trainId').coords['scan_variable'].values
self.scan_points_counts = self.scan_counts.groupby('scan_variable').sum()
self.plot_scan()
def plot_scan(self):
""" Plot a previously defined scan to see the scan range and the statistics.
"""
if self.scan:
fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=[5, 5])
else:
fig, ax1 = plt.subplots(nrows=1, figsize=[5, 2.5])
ax1.plot(self.scan_points, self.scan_points_counts, 'o-', ms=2)
ax1.set_xlabel(f'{self.scan_vname}')
ax1.set_ylabel('# trains')
ax1.set_title(self.plot_title)
if self.scan:
ax2.plot(self.scan['scan_variable'])
ax2.set_xlabel('train #')
ax2.set_ylabel(f'{self.scan_vname}')
def plot_xgm_hist(self, nbins=100):
""" Plots an histogram of the SCS XGM dedicated SAS3 data.
inputs:
nbins: number of the bins for the histogram.
"""
hist, bins_edges = np.histogram(self.xgm, nbins, density=True)
width = 1.0 * (bins_edges[1] - bins_edges[0])
bins_center = 0.5*(bins_edges[:-1] + bins_edges[1:])
plt.figure(figsize=(5,3))
plt.bar(bins_center, hist, align='center', width=width)
plt.xlabel(f"{_mnemonics['SCS_SA3']['source']}{_mnemonics['SCS_SA3']['key']}")
plt.ylabel('density')
plt.title(self.plot_title)
def xgm_filter(self, xgm_low=-np.inf, xgm_high=np.inf):
""" Filters the data by train. If one pulse within a train has an SASE3 SCS XGM value below
xgm_low or above xgm_high, that train will be dropped from the dataset.
inputs:
xgm_low: low threshold value
xgm_high: high threshold value
"""
if self.isDark:
warnings.warn(f'This run was loaded as dark. Filtering on xgm makes no sense. Aborting')
return
self.xgm_low = xgm_low
self.xgm_high = xgm_high
filter_mask = (self.xgm > self.xgm_low) * (self.xgm < self.xgm_high)
if self.filter_mask is None:
self.filter_mask = filter_mask
else:
self.filter_mask = self.filter_mask*filter_mask
valid = filter_mask.astype(bool)
self.xgm = self.xgm.where(valid).dropna('trainId')
self.scan = self.scan.sel({'trainId': self.xgm.trainId})
nrejected = len(self.run.train_ids) - len(self.xgm.trainId)
print((f'Rejecting {nrejected} out of {len(self.run.train_ids)} trains due to xgm '
f'thresholds: [{self.xgm_low}, {self.xgm_high}]'))
def load_mask(self, fname, plot=True):
""" Load a FastCCD mask file.
input:
fname: string of the filename of the mask file
plot: if True, the loaded mask is plotted
"""
fccd_mask = imread(fname)
fccd_mask = fccd_mask.astype(float)[..., 0] // 255
fccd_mask[fccd_mask==0] = np.nan
self.mask = fccd_mask
if plot:
plt.figure()
plt.imshow(self.mask)
def binning(self):
""" Bin the FastCCD data by the predifined scan type (FastCCD.define()) using multiprocessing
"""
# get available memory in GB, we will try to use 80 % of it
max_GB = psutil.virtual_memory().available/1024**3
print(f'max available memory: {max_GB} GB')
# max_GB / (8byte * 16modules * 128px * 512px * N_pulses)
self.chunksize = int(self.max_fraction_memory*max_GB * 1024**3 // (self.Nworker * 16 * 1934 * 960 * self.fpt))
print('processing', self.chunksize, 'trains per chunk')
assert self.chunksize > 500, "cannot load FastCCD h5 files in memory"
jobs = []
for m,h5fname in enumerate(self.h5list):
jobs.append(dict(
fpt=self.fpt,
h5fname=h5fname,
chunksize=self.chunksize,
nbunches=self.nbunches,
workerId=m,
Nworker=self.Nworker,
scan = self.scan,
xgm = self.xgm,
FastADC5 = self.FastADC5
#maxSaturatedPixel=self.maxSaturatedPixel
))
timestamp = strftime('%X')
print(f'start time: {timestamp}')
with parallel_backend('threading', n_jobs=self.Nworker):
res = Parallel( verbose=20)(
delayed(process_one_module)(job) for job in tqdm(jobs)
)
print('finished:', strftime('%X'))
# rearange the multiprocessed data
# this is to get rid of the worker dimension, there is no sum over worker really involved
self.module_data = xr.concat(res, dim='workerId').sum(dim='workerId')
self.module_data['pumped'] = self.module_data['pumped'] / self.module_data['sum_count_pumped']
self.module_data['unpumped'] = self.module_data['unpumped'] / self.module_data['sum_count_unpumped']
self.module_data['xgm_pumped'] = self.module_data['xgm_pumped'] / self.module_data['sum_count_pumped']
self.module_data['xgm_unpumped'] = self.module_data['xgm_unpumped'] / self.module_data['sum_count_unpumped']
self.module_data['run'] = self.run_nr
self.module_data['t0'] = self.t0
self.plot_title = f"{self.proposal} run: {self.module_data['run'].values}"
self.module_data.attrs['plot_title'] = self.plot_title
self.module_data.attrs['scan_variable'] = self.scan_vname
def save(self, save_folder=None, overwrite=False):
""" Save the crunched data.
inputs:
save_folder: string of the fodler where to save the data.
overwrite: boolean whether or not to overwrite existing files.
"""
if save_folder is None:
save_folder = self.save_folder
if self.isDark:
fname = f'run{self.run_nr}_dark.h5' # no scan
else:
fname = f'run{self.run_nr}.h5' # run with delay scan (change for other scan types!)
save_path = os.path.join(save_folder, fname)
file_exists = os.path.isfile(save_path)
if not file_exists or (file_exists and overwrite):
if file_exists:
warnings.warn(f'Overwriting file: {save_path}')
os.remove(save_path)
self.module_data.to_netcdf(save_path, group='data')
self.module_data.close()
os.chmod(save_path, 0o664)
print('saving: ', save_path)
else:
print('file', save_path, 'exists and overwrite is False')
def load_binned(self, runNB, dark_runNB=None, xgm_norm = True, save_folder=None):
""" load previously binned (crunched) FastCCD data by FastCCD.crunch() and FastCCD.save()
inputs:
runNB: run number to load
dark_runNB: run number of the corresponding dark
xgm_norm: normlize by XGM data if True
save_folder: path string where the crunched data are saved
"""
if save_folder is None:
save_folder = self.save_folder
self.plot_title = f'{self.proposal} run: {runNB} dark: {dark_runNB}'
binned = xr.load_dataset(os.path.join(save_folder, f'run{runNB}.h5'), group='data')
if dark_runNB is not None:
dark = xr.load_dataset(os.path.join(save_folder, f'run{dark_runNB}_dark.h5'), group='data')
binned['pumped'] = self.gain*(binned['pumped'] - dark['pumped'].squeeze(drop=True))
binned['unpumped'] = self.gain*(binned['unpumped'] - dark['unpumped'].squeeze(drop=True))
if xgm_norm:
binned['pumped'] = binned['pumped']/binned['xgm_pumped']
binned['unpumped'] = binned['unpumped']/binned['xgm_unpumped']
self.scan_points = binned['scan_variable']
self.scan_points_counts = binned['sum_count_pumped'] + binned['sum_count_unpumped']
self.scan_vname = binned.attrs['scan_variable']
self.scan = None
self.binned = binned
def plot_FastCCD(self, use_mask = True, p_low = 1, p_high = 98, vmin = None, vmax = None):
""" Plot pumped and unpumped FastCCD images.
inputs:
use_mask: if True, a mask is applied on the FastCCD.
p_low: low percentile value to adjust the contrast scale on the unpumped and pumped image
p_high: high percentile value to adjust the contrast scale on the unpumped and pumped image
vmin: low value of the image scale
vmax: high value of the image scale
"""
if use_mask:
if self.mask is None:
raise ValueError('No mask was loaded !')
mask = self.mask
mask_txt = ' masked'
else:
mask = 1
mask_txt = ''
im_pump_mean = self.binned['pumped'].mean('scan_variable')
im_unpump_mean = self.binned['unpumped'].mean('scan_variable')
self.im_pump_mean = mask*im_pump_mean
self.im_unpump_mean = mask*im_unpump_mean
fig = plt.figure(figsize=(9, 4))
grid = ImageGrid(fig, 111,
nrows_ncols=(1,2),
axes_pad=0.15,
share_all=True,
cbar_location="right",
cbar_mode="single",
cbar_size="7%",
cbar_pad=0.15,
)
tmp = self.im_pump_mean.values.flatten()
try:
_vmin, _vmax = np.percentile(tmp[~np.isnan(tmp)], [p_low, p_high])
except:
_vmin, _vmax = (None, None)
if vmin is None:
vmin = _vmin
if vmax is None:
vmax = _vmax
im = grid[0].imshow(self.im_pump_mean, vmin=vmin, vmax=vmax, aspect=self.aspect)
grid[0].set_title('pumped' + mask_txt)
im = grid[1].imshow(self.im_unpump_mean, vmin=vmin, vmax=vmax, aspect=self.aspect)
grid[1].set_title('unpumped' + mask_txt)
grid[-1].cax.colorbar(im)
grid[-1].cax.toggle_label(True)
fig.suptitle(self.plot_title)
def azimuthal_int(self, wl, center=None, angle_range=[0, 180-1e-6], dr=1, use_mask=True):
""" Perform azimuthal integration of 1D binned FastCCD run.
inputs:
wl: photon wavelength
center: center of integration
angle_range: angles of integration
dr: dr
use_mask: if True, use the loaded mask
"""
if use_mask:
if self.mask is None:
raise ValueError('No mask was loaded !')
mask = self.mask
mask_txt = ' masked'
else:
mask = 1
mask_txt = ''
im_pumped_arranged = self.binned['pumped'].values
im_unpumped_arranged = self.binned['unpumped'].values
im_pumped_arranged *= mask
im_unpumped_arranged *= mask
im_pumped_mean = im_pumped_arranged.mean(axis=0)
im_unpumped_mean = im_unpumped_arranged.mean(axis=0)
ai = AzimuthalIntegrator(im_pumped_mean.shape, center, angle_range, dr=dr, aspect=1)
norm = ai(~np.isnan(im_pumped_mean))
az_pump = []
az_unpump = []
for i in tqdm(range(len(self.binned['scan_variable']))):
az_pump.append(ai(im_pumped_arranged[i]) / norm)
az_unpump.append(ai(im_unpumped_arranged[i]) / norm)
az_pump = np.stack(az_pump)
az_unpump = np.stack(az_unpump)
coords = {'scan_variable': self.binned['scan_variable'], 'distance': ai.distance}
azimuthal = xr.DataArray(az_pump, dims=['scan_variable', 'distance'], coords=coords)
azimuthal = azimuthal.to_dataset(name='pumped')
azimuthal['unpumped'] = xr.DataArray(az_unpump, dims=['scan_variable', 'distance'], coords=coords)
azimuthal = azimuthal.transpose('distance', 'scan_variable')
#t0 = 225.5
#azimuthal['delay'] = (t0 - azimuthal.delay)*6.6
#azimuthal['delay'] = azimuthal.delay
azimuthal['delta_q (1/nm)'] = 2e-9 * np.pi * np.sin(
np.arctan(azimuthal.distance * self.px_pitch_v*1e-6 / self.distance)) / wl
azimuthal.attrs = self.binned.attrs
self.azimuthal = azimuthal.swap_dims({'distance': 'delta_q (1/nm)'})
def plot_azimuthal_int(self, kind='difference', lim=None):
""" Plot a computed azimuthal integration.
inputs:
kind: (str) either 'difference' or 'relative' to change the type of plot.
"""
fig, [ax1, ax2, ax3] = plt.subplots(nrows=3, sharex=True, sharey=True)
xr.plot.imshow(self.azimuthal.pumped, ax=ax1, vmin=0, robust=True)
ax1.set_title('pumped')
xr.plot.imshow(self.azimuthal.unpumped, ax=ax2, vmin=0, robust=True)
ax2.set_title('unpumped')
if kind == 'difference':
val = self.azimuthal.pumped - self.azimuthal.unpumped
ax3.set_title('pumped - unpumped')
elif kind == 'relative':
val = (self.azimuthal.pumped - self.azimuthal.unpumped)/self.azimuthal.unpumped
ax3.set_title('(pumped - unpumped)/unpumped')
else:
raise ValueError('kind should be either difference or relative')
if lim is None:
xr.plot.imshow(val, ax=ax3, robust=True)
else:
xr.plot.imshow(val, ax=ax3, vmin=lim[0], vmax=lim[1])
ax3.set_xlabel(self.scan_vname)
fig.suptitle(f'{self.plot_title}')
def plot_azimuthal_line_cut(self, data, qranges, qwidths):
""" Plot line scans on top of the data.
inputs:
data: an azimuthal integrated xarray DataArray with 'delta_q (1/nm)' as one of its dimension.
qranges: a list of q-range
qwidth: a list of q-width, same length as qranges
"""
fig, [ax1, ax2] = plt.subplots(nrows=2, sharex=True, figsize=[8, 7])
xr.plot.imshow(data, ax=ax1, robust=True)
# attributes are not propagated during xarray mathematical operation https://github.com/pydata/xarray/issues/988
# so we might not have in data the scan vaiable name anymore
ax1.set_xlabel(self.scan_vname)
fig.suptitle(f'{self.plot_title}')
for i, (qr, qw) in enumerate(zip(qranges, qwidths)):
sel = (data['delta_q (1/nm)'] > (qr - qw/2)) * (data['delta_q (1/nm)'] < (qr + qw/2))
val = data.where(sel).mean('delta_q (1/nm)')
ax2.plot(data.scan_variable, val, c=f'C{i}', label=f'q = {qr:.2f}')
ax1.axhline(qr - qw/2, c=f'C{i}', lw=1)
ax1.axhline(qr + qw/2, c=f'C{i}', lw=1)
ax2.legend()
ax2.set_xlabel(self.scan_vname)
# since 'self' is not pickable, this function has to be outside the FastCCD class so that it can be used
# by the multiprocessing pool.map function
def process_one_module(job):
fpt = job['fpt']
Nworker = job['Nworker']
workerId = job['workerId']
scan = job['scan']
chunksize = job['chunksize']
nbunches = job['nbunches']
h5fname = job['h5fname']
xgm = job['xgm']
FastADC5 = job['FastADC5']
#maxSaturatedPixel = job['maxSaturatedPixel']
image_path = f'/INSTRUMENT/SCS_CDIDET_FCCD2M/DAQ/FCCD:daqOutput/data/image/pixels'
# crunching
with h5py.File(h5fname, 'r') as m:
fastccd_trains = m['/INSTRUMENT/SCS_CDIDET_FCCD2M/DAQ/FCCD:daqOutput/data/trainId'][()]
data = m[image_path][()].squeeze().astype(np.float64)
unique_trainIds, unique_list = np.unique(fastccd_trains, return_index = True)
unique_nz_list = np.nonzero(unique_trainIds)[0]
fastccd_trains = unique_trainIds[unique_nz_list]
coords = {'trainId': fastccd_trains}
fastccd = xr.DataArray(data[unique_nz_list, :, :], dims=['trainId', 'x', 'y'], coords=coords)
fastccd = fastccd.where(fastccd.sum(('x','y'), skipna=True) > 0)
aligned_vals = xr.align(*[fastccd, xgm, FastADC5], join='inner')
ds = xr.Dataset(dict(zip(['fastccd', 'xgm', 'FastADC5'], aligned_vals)))
ds['sum_count'] = xr.full_like(ds['fastccd'][..., 0, 0], fill_value=1)
# grouping and summing
ds['scan_variable'] = scan['scan_variable'] # this only adds scan data for matching trainIds
ds = ds.dropna('trainId')
#print(ds)
data_pumped = ds.where(ds['FastADC5'] > 0, drop=True).groupby('scan_variable').sum('trainId')
data_unpumped = ds.where(ds['FastADC5'] < 1, drop=True).groupby('scan_variable').sum('trainId')
module_data = data_pumped['fastccd'].to_dataset(name='pumped')
module_data['unpumped'] = data_unpumped['fastccd']
module_data['sum_count_pumped'] = data_pumped['sum_count']
module_data['sum_count_unpumped'] = data_unpumped['sum_count']
module_data['xgm_pumped'] = data_pumped['xgm']
module_data['xgm_unpumped'] = data_unpumped['xgm']
module_data['workerId'] = workerId
return module_data
""" Gotthard-II detector related sub-routines
Copyright (2024) SCS Team.
(contributions preferrably comply with pep8 code structure
guidelines.)
"""
from extra.components import OpticalLaserPulses, XrayPulses
import numpy as np
import xarray as xr
import logging
__all__ = [
'extract_GH2',
]
log = logging.getLogger(__name__)
def extract_GH2(ds, run, firstFrame=0, bunchPattern='scs_ppl',
gh2_dim='gh2_pId', keep_darks=False):
'''
Select and align the frames of the Gotthard-II that have been exposed
to light. `keep_darks` gives the option to keep the non-exposed
frames.
Parameters
------
ds: xarray.Dataset
The dataset containing GH2 data
run: extra_data.DataCollection
The run containing the bunch pattern source
firstFrame: int
The GH2 frame number corresponding to the first pulse of the train.
bunchPattern: str in ['scs_ppl', 'sase3']
the bunch pattern used to align data. For 'scs_ppl', the gh2_pId
dimension in renamed 'ol_pId', and for 'sase3' gh2_pId is renamed
'sa3_pId'.
gh2_dim: str
The name of the dimension that corresponds to the Gotthard-II frames.
keep_darks: bool
If True, the frames that do not correspond to the bunchPattern are
kept.
Returns
-------
nds: xarray Dataset
The aligned and reduced dataset with only-data-containing GH2
variables.
'''
if gh2_dim not in ds.dims:
log.warning(f'gh2_dim "{gh2_dim}" not in dataset. Skipping.')
return ds
if bunchPattern == 'scs_ppl':
pattern = OpticalLaserPulses(run)
dim = 'ol_pId'
else:
pattern = XrayPulses(run)
dim = 'sa3_pId'
others = [var for var in ds if
gh2_dim not in ds[var].dims or dim in ds[var].coords]
nds = ds.drop_dims(dim, errors='ignore')
nds = ds.drop_vars(others, errors='ignore')
dark_ds = xr.Dataset()
if pattern.is_constant_pattern():
pulse_ids = pattern.peek_pulse_ids(labelled=False)
signal_on_gh_ids = pulse_ids + firstFrame
if keep_darks:
no_signal_ids = [i for i in nds[gh2_dim].values
if i not in signal_on_gh_ids]
dark_ds = nds.isel({gh2_dim: no_signal_ids})
dark_ds = dark_ds.assign_coords({gh2_dim: no_signal_ids})
for n in dark_ds:
dark_ds = dark_ds.rename({n: f'{n}_dark'})
nds = nds.isel({gh2_dim: pulse_ids + firstFrame})
nds = nds.assign_coords({gh2_dim: pulse_ids})
nds = nds.rename({gh2_dim: dim})
else:
log.warning('The number of pulses has changed during the run.')
pulse_ids = np.unique(pattern.pulse_ids(labelled=False, copy=False))
nds = nds.isel({gh2_dim: pulse_ids + firstFrame})
nds = nds.assign_coords({gh2_dim: pulse_ids})
nds = nds.rename({gh2_dim: dim})
mask = pattern.pulse_mask(labelled=False)
mask = xr.DataArray(mask, dims=['trainId', dim],
coords={'trainId': run.train_ids,
dim: np.arange(mask.shape[1])})
mask = mask.sel({dim: pulse_ids})
nds = nds.where(mask, drop=True)
nds = nds.merge(dark_ds, join='inner')
ret = ds[others].merge(nds, join='inner')
return ret
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import leastsq
from scipy.optimize import curve_fit
from scipy.signal import fftconvolve
import xarray as xr
from extra_data import open_run
import pasha as psh
import toolbox_scs as tb
__all__ = [
'hRIXS',
'MaranaX',
]
# -----------------------------------------------------------------------------
# Curvature
def find_curvature(image, frangex=None, frangey=None,
deg=2, axis=1, offset=100):
# Resolve arguments
x_range = (0, image.shape[1])
if frangex is not None:
x_range = (max(frangex[0], x_range[0]), min(frangex[1], x_range[1]))
y_range = (0, image.shape[0])
if frangex is not None:
y_range = (max(frangey[0], y_range[0]), min(frangey[1], y_range[1]))
axis_range = y_range if axis == 1 else x_range
axis_dim = image.shape[axis - 1]
# Get kernel
integral = image[slice(*y_range), slice(*x_range)].mean(axis=axis)
roi = np.ones([axis_range[1] - axis_range[0], axis_dim])
ref = roi * integral[:, np.newaxis]
# Get sliced image
slice_ = [slice(None), slice(None)]
slice_[axis - 1] = slice(max(axis_range[0] - offset, 0),
min(axis_range[1] + offset, axis_dim))
sliced = image[tuple(slice_)]
if axis == 0:
sliced = sliced.T
# Get curvature factor from cross correlation
crosscorr = fftconvolve(sliced,
ref[::-1, :],
axes=0, )
shifts = np.argmax(crosscorr, axis=0)
curv = np.polyfit(np.arange(axis_dim), shifts, deg=deg)
return curv[:-1][::-1]
def find_curvature(img, args, plot=False, **kwargs):
def parabola(x, a, b, c, s=0, h=0, o=0):
return (a*x + b)*x + c
def gauss(y, x, a, b, c, s, h, o=0):
return h * np.exp(-((y - parabola(x, a, b, c)) / (2 * s))**2) + o
x = np.arange(img.shape[1])[None, :]
y = np.arange(img.shape[0])[:, None]
if plot:
plt.figure(figsize=(10,10))
plt.imshow(img, cmap='gray', aspect='auto', interpolation='nearest', **kwargs)
plt.plot(x[0, :], parabola(x[0, :], *args))
args, _ = leastsq(lambda args: (gauss(y, x, *args) - img).ravel(), args)
if plot:
plt.plot(x[0, :], parabola(x[0, :], *args))
return args
def correct_curvature(image, factor=None, axis=1):
if factor is None:
return
if axis == 1:
image = image.T
ydim, xdim = image.shape
x = np.arange(xdim + 1)
y = np.arange(ydim + 1)
xx, yy = np.meshgrid(x[:-1] + 0.5, y[:-1] + 0.5)
xxn = xx - factor[0] * yy - factor[1] * yy ** 2
ret = np.histogramdd((xxn.flatten(), yy.flatten()),
bins=[x, y],
weights=image.flatten())[0]
return ret if axis == 1 else ret.T
def get_spectrum(image, factor=None, axis=0,
pixel_range=None, energy_range=None, ):
start, stop = (0, image.shape[axis - 1])
if pixel_range is not None:
start = max(pixel_range[0] or start, start)
stop = min(pixel_range[1] or stop, stop)
edge = image.sum(axis=axis)[start:stop]
bins = np.arange(start, stop + 1)
centers = (bins[1:] + bins[:-1]) * 0.5
if factor is not None:
centers, edge = calibrate(centers, edge,
factor=factor,
range_=energy_range)
return centers, edge
# -----------------------------------------------------------------------------
# Energy calibration
def energy_calibration(channels, energies):
return np.polyfit(channels, energies, deg=1)
def calibrate(x, y=None, factor=None, range_=None):
if factor is not None:
x = np.polyval(factor, x)
if y is not None and range_ is not None:
start = np.argmin(np.abs((x - range_[0])))
stop = np.argmin(np.abs((x - range_[1])))
# Calibrated energies have a different direction
x, y = x[stop:start], y[stop:start]
return x, y
# -----------------------------------------------------------------------------
# Gaussian-related functions
FWHM_COEFF = 2 * np.sqrt(2 * np.log(2))
def gaussian_fit(x_data, y_data, offset=0):
"""
Centre-of-mass and width. Lifted from image_processing.imageCentreofMass()
"""
x0 = np.average(x_data, weights=y_data)
sx = np.sqrt(np.average((x_data - x0) ** 2, weights=y_data))
# Gaussian fit
baseline = y_data.min()
p_0 = (y_data.max(), x0 + offset, sx, baseline)
try:
p_f, _ = curve_fit(gauss1d, x_data, y_data, p_0, maxfev=10000)
return p_f
except (RuntimeError, TypeError) as e:
print(e)
return None
def gauss1d(x, height, x0, sigma, offset):
return height * np.exp(-0.5 * ((x - x0) / sigma) ** 2) + offset
def to_fwhm(sigma):
return abs(sigma * FWHM_COEFF)
def decentroid(res):
res = np.array(res)
ret = np.zeros(shape=(res.max(axis=0) + 1).astype(int))
for cy, cx in res:
if cx > 0 and cy > 0:
ret[int(cy), int(cx)] += 1
return ret
class hRIXS:
"""The hRIXS analysis, especially curvature correction
The objects of this class contain the meta-information about the settings
of the spectrometer, not the actual data, except possibly a dark image
for background subtraction.
The actual data is loaded into `xarray`s, and stays there.
Attributes
----------
PROPOSAL: int
the number of the proposal
DETECTOR: str
the detector to be used. Can be ['hRIXS_det', 'MaranaX']
defaults to 'hRIXS_det' for backward-compatibility.
X_RANGE: slice
the slice to take in the dispersive direction, in pixels. Defaults
to the entire width.
Y_RANGE: slice
the slice to take in the energy direction
THRESHOLD: float
pixel counts above which a hit candidate is assumed, for centroiding.
use None if you want to give it in standard deviations instead.
STD_THRESHOLD:
same as THRESHOLD, in standard deviations.
DBL_THRESHOLD:
threshold controling whether a detected hit is considered to be a
double hit.
BINS: int
the number of bins used in centroiding
CURVE_A, CURVE_B: float
the coefficients of the parabola for the curvature correction
USE_DARK: bool
whether to do dark subtraction. Is initially `False`, magically
switches to `True` if a dark has been loaded, but may be reset.
ENERGY_INTERCEPT, ENERGY_SLOPE:
The calibration from pixel to energy
FIELDS:
the fields to be loaded from the data. Add additional fields if so
desired.
Example
-------
proposal = 3145
h = hRIXS(proposal)
h.Y_RANGE = slice(700, 900)
h.CURVE_B = -3.695346575286939e-07
h.CURVE_A = 0.024084479232443695
h.ENERGY_SLOPE = 0.018387
h.ENERGY_INTERCEPT = 498.27
h.STD_THRESHOLD = 3.5
"""
DETECTOR_FIELDS = {
'hRIXS_det': ['hRIXS_det', 'hRIXS_index', 'hRIXS_delay', 'hRIXS_norm', 'nrj'],
'MaranaX': ['MaranaX', 'nrj'],
}
def __init__(self, proposalNB, detector='MaranaX'):
self.PROPOSAL = proposalNB
self.DETECTOR = detector
assert detector in self.DETECTOR_FIELDS
# image range
self.X_RANGE = np.s_[:]
self.Y_RANGE = np.s_[:]
# centroid
# centroid_one threshold
self.THRESHOLD = None # pixel counts above which a hit candidate is assumed
self.STD_THRESHOLD = 3.5 # same as THRESHOLD, in standard deviations
self.DBL_THRESHOLD = 0.1 # factor used for double hits in centroid_one
# centroid_two threshold
self.CENTROID_THRESHOLD = [0.2, 1]
self.CURVE_A = 0 # curvature parameters as determined elsewhere
self.CURVE_B = 0
# integral
self.BINS = 100
self.METHOD = 'centroid' # ['centroid', 'integral']
self.USE_DARK = False
# Ignore double hits
self.AVOID_DBL = False
self.ENERGY_INTERCEPT = 0
self.ENERGY_SLOPE = 1
self.FIELDS = self.DETECTOR_FIELDS[detector]
def set_params(self, **params):
for key, value in params.items():
setattr(self, key.upper(), value)
def get_params(self, *params):
if not params:
params = ('proposal', 'x_range', 'y_range',
'threshold', 'std_threshold', 'dbl_threshold',
'curve_a', 'curve_b',
'bins', 'method', 'fields')
return {param: getattr(self, param.upper()) for param in params}
def from_run(self, runNB, proposal=None, extra_fields=(), drop_first=False, subset=None):
"""load a run
Load the run `runNB`. A thin wrapper around `toolbox.load`.
Parameters
----------
drop_first: bool
if True, the first image in the run is removed from the dataset.
Example
-------
data = h.from_run(145) # load run 145
data1 = h.from_run(145) # load run 145
data2 = h.from_run(155) # load run 155
data = xarray.concat([data1, data2], 'trainId') # combine both
"""
if proposal is None:
proposal = self.PROPOSAL
if drop_first:
subset = slice(1, None)
run, data = tb.load(proposal, runNB=runNB, subset=subset,
fields=self.FIELDS + list(extra_fields))
return data
def load_dark(self, runNB, proposal=None):
"""load a dark run
Load the dark run `runNB` from `proposal`. The latter defaults to the
current proposal. The dark is stored in this `hRIXS` object, and
subsequent analyses use it for background subtraction.
Example
-------
h.load_dark(166) # load dark run 166
"""
data = self.from_run(runNB, proposal)
self.dark_image = data[self.DETECTOR].mean(dim='trainId')
self.USE_DARK = True
def find_curvature(self, runNB, proposal=None, plot=True, args=None,
**kwargs):
"""find the curvature correction coefficients
The hRIXS has some abberations which leads to the spectroscopic lines
being curved on the detector. We approximate these abberations with
a parabola for later correction.
Load a run and determine the curvature. The curvature is set in `self`,
and returned as a pair of floats.
Parameters
----------
runNB: int
the run number to use
proposal: int
the proposal to use, default to the current proposal
plot: bool
whether to plot the found curvature onto the data
args: pair of float, optional
a starting value to prime the fitting routine
Example
-------
h.find_curvature(155) # use run 155 to fit the curvature
"""
data = self.from_run(runNB, proposal)
image = data[self.DETECTOR].sum(dim='trainId') \
.values[self.X_RANGE, self.Y_RANGE].T
if args is None:
spec = (image - image[:10, :].mean()).mean(axis=1)
mean = np.average(np.arange(len(spec)), weights=spec)
args = (-2e-7, 0.02, mean - 0.02 * image.shape[1] / 2, 3,
spec.max(), image.mean())
args = find_curvature(image, args, plot=plot, **kwargs)
self.CURVE_B, self.CURVE_A, *_ = args
return self.CURVE_A, self.CURVE_B
def centroid_one(self, image):
"""find the position of photons with sub-pixel precision
A photon is supposed to have hit the detector if the intensity within a
2-by-2 square exceeds a threshold. In this case the position of the photon
is calculated as the center-of-mass in a 4-by-4 square.
Return the list of x, y coordinate pairs, corrected by the curvature.
"""
base = image.mean()
corners = image[1:, 1:] + image[:-1, 1:] \
+ image[1:, :-1] + image[:-1, :-1]
if self.THRESHOLD is None:
threshold = corners.mean() + self.STD_THRESHOLD * corners.std()
else:
threshold = self.THRESHOLD
# Threshold for double photons chosen to be the same ratio to single
# photons as found in the ESRF method
SpotHIGH=self.DBL_THRESHOLD*threshold
if self.AVOID_DBL:
SpotHIGH = 100000
middle = corners[1:-1, 1:-1]
candidates = (
(middle > threshold)
* (middle >= corners[:-2, 1:-1]) * (middle > corners[2:, 1:-1])
* (middle >= corners[1:-1, :-2]) * (middle > corners[1:-1, 2:])
* (middle >= corners[:-2, :-2]) * (middle > corners[2:, :-2])
* (middle >= corners[:-2, 2:]) * (middle > corners[2:, 2:]))
cp = np.argwhere(candidates)
if len(cp) > 10000:
raise RuntimeError(
"too many peaks, threshold low or acquisition time too high")
res = []
dres = []
for cy, cx in cp:
spot = image[cy: cy + 4, cx: cx + 4] - base
mx = np.average(np.arange(cx, cx + 4), weights=spot.sum(axis=0))
my = np.average(np.arange(cy, cy + 4), weights=spot.sum(axis=1))
if spot.sum() < SpotHIGH:
res.append((mx, my))
else:
res.append((mx, my))
res.append((mx, my))
dres.append((mx, my))
return res, dres
def centroid_two(self, image, energy):
""" determine position of photon hits on detector
The algrothm is taken from the ESRF RIXS toolbox. The thresholds for
determining photon hits are given by the incident photon energy
The function returns arrays containing the single and double hits
as x and y coordinates
"""
# Prepare
threshold = self.CENTROID_THRESHOLD
# Multiplication factor * ADU/photon
photons = energy/3.6/1.06
SpotLOW = threshold[0] * photons
SpotHIGH = threshold[1] * photons
low_th_px = threshold[0] * photons
high_th_px = threshold[1] * photons
if self.AVOID_DBL:
SpotHIGH = 100000
img = image-image.mean()
gs = 2
# Find potential hits on a clipped image where 2 rows/columns are excluded
# from the edges because centroiding can't be done at the edge
cp = (np.argwhere((img[gs//2 : -gs//2, gs//2 : -gs//2] > low_th_px)*
(img[gs//2 : -gs//2, gs//2 : -gs//2] < high_th_px))+
np.array([gs//2, gs//2]))
if len(cp) > 100000:
raise RuntimeError('Threshold to low or acquisition time to long')
res = []
dres = []
for cy, cx in cp:
spot = img[cy - gs//2 : cy + gs//2 + 1, cx - gs//2 : cx+gs//2 +1]
#spot[spot < 0] = 0
if (spot > img[cy, cx]).sum() == 0:
mx = np.average(np.arange(cx - gs//2, cx + gs//2 + 1),
weights=spot.sum(axis=0))
my = np.average(np.arange(cy - gs//2, cy + gs//2 + 1),
weights=spot.sum(axis=1))
if (spot.sum()>=SpotLOW) and (spot.sum()<SpotHIGH):
res.append((mx, my))
elif (spot.sum()>SpotHIGH):
res.append((mx, my))
res.append((mx, my))
dres.append((mx, my))
return res, dres
def centroid(self, data, bins=None, method='auto'):
"""calculate a spectrum by finding the centroid of individual photons
This takes the `xarray.Dataset` `data` and returns a copy of it, with
a new `xarray.DataArray` named `spectrum` added, which contains the
energy spectrum calculated for each hRIXS image.
Added a key for switching between algorithims choices are "auto" and
"manual" which selects for method for determining whether thresholds
there is a photon hit. It changes whether centroid_one or centroid_two
is used.
Example
-------
h.centroid(data) # find photons in all images of the run
data.spectrum[0, :].plot() # plot the spectrum of the first image
"""
if bins is None:
bins = self.BINS
ret = np.zeros((len(data[self.DETECTOR]), bins))
retd = np.zeros((len(data[self.DETECTOR]), bins))
total_hits = np.zeros((len(data[self.DETECTOR])))
dbl_hits = np.zeros((len(data[self.DETECTOR])))
for i, (image, r, rd) in enumerate(zip(data[self.DETECTOR], ret, retd)):
if method=='manual':
c, d = self.centroid_one(
image.values[self.X_RANGE, self.Y_RANGE])
elif method=='auto':
energy = data['nrj'][i].data
c, d = self.centroid_two(
image.values[self.X_RANGE, self.Y_RANGE], energy)
if not len(c):
continue
rc = np.array(c)
r[:], _ = np.histogram(
rc[:, 0] - self.parabola(rc[:, 1]),
bins=bins, range=(0, self.Y_RANGE.stop - self.Y_RANGE.start))
total_hits[i] = rc.shape[0]
rcd = np.array(d)
dbl_hits[i] = rcd.shape[0]
# Account for case where no double hits are found
if rcd.shape[0] == 0:
continue
else:
rd[:], _ = np.histogram(
rcd[:, 0] - self.parabola(rcd[:, 1]),
bins=bins, range=(0, self.Y_RANGE.stop - self.Y_RANGE.start))
data.coords["energy"] = (
np.linspace(self.Y_RANGE.start, self.Y_RANGE.stop, bins)
* self.ENERGY_SLOPE + self.ENERGY_INTERCEPT)
data['spectrum'] = (("trainId", "energy"), ret)
data['dbl_spectrum'] = (("trainId", "energy"), retd)
data['total_hits'] = ("trainId", total_hits)
data['double_hits'] = ("trainId", dbl_hits)
return data
def parabola(self, x):
return (self.CURVE_B * x + self.CURVE_A) * x
def integrate(self, data):
"""calculate a spectrum by integration
This takes the `xarray` `data` and returns a copy of it, with a new
dataarray named `spectrum` added, which contains the energy spectrum
calculated for each hRIXS image.
First the energy that corresponds to each pixel is calculated.
Then all pixels within an energy range are summed, where the intensity
of one pixel is distributed among the two energy ranges the pixel
spans, proportionally to the overlap between the pixel and bin energy
ranges.
The resulting data is normalized to one pixel, so the average
intensity that arrived on one pixel.
Example
-------
h.integrate(data) # create spectrum by summing pixels
data.spectrum[0, :].plot() # plot the spectrum of the first image
"""
bins = self.Y_RANGE.stop - self.Y_RANGE.start
margin = 10
ret = np.zeros((len(data[self.DETECTOR]), bins - 2 * margin))
if self.USE_DARK:
dark_image = self.dark_image.values[self.X_RANGE, self.Y_RANGE]
images = data[self.DETECTOR].values[:, self.X_RANGE, self.Y_RANGE]
x, y = np.ogrid[:images.shape[1], :images.shape[2]]
quo, rem = divmod(y - self.parabola(x), 1)
quo = np.array([quo, quo + 1])
rem = np.array([rem, 1 - rem])
wrong = (quo < margin) | (quo >= bins - margin)
quo[wrong] = margin
rem[wrong] = 0
quo = (quo - margin).astype(int).ravel()
for image, r in zip(images, ret):
if self.USE_DARK:
image = image - dark_image
r[:] = np.bincount(quo, weights=(rem * image).ravel())
ret /= np.bincount(quo, weights=rem.ravel())
data.coords["energy"] = (
np.arange(self.Y_RANGE.start + margin, self.Y_RANGE.stop - margin)
* self.ENERGY_SLOPE + self.ENERGY_INTERCEPT)
data['spectrum'] = (("trainId", "energy"), ret)
return data
aggregators = dict(
hRIXS_det=lambda x, dim: x.sum(dim=dim),
MaranaX=lambda x, dim: x.sum(dim=dim),
Delay=lambda x, dim: x.mean(dim=dim),
hRIXS_delay=lambda x, dim: x.mean(dim=dim),
hRIXS_norm=lambda x, dim: x.sum(dim=dim),
spectrum=lambda x, dim: x.sum(dim=dim),
dbl_spectrum=lambda x, dim: x.sum(dim=dim),
total_hits=lambda x, dim: x.sum(dim=dim),
dbl_hits=lambda x, dim: x.sum(dim=dim),
counts=lambda x, dim: x.sum(dim=dim)
)
def aggregator(self, da, dim):
agg = self.aggregators.get(da.name)
if agg is None:
return None
return agg(da, dim=dim)
def aggregate(self, ds, var=None, dim="trainId"):
"""aggregate (i.e. mostly sum) all data within one dataset
take all images in a dataset and aggregate them and their metadata.
For images, spectra and normalizations that means adding them, for
others (e.g. delays) adding would not make sense, so we treat them
properly. The aggregation functions of each variable are defined
in the aggregators attribute of the class.
If var is specified, group the dataset by var prior to aggregation.
A new variable "counts" gives the number of frames aggregated in
each group.
Parameters
----------
ds: xarray Dataset
the dataset containing RIXS data
var: string
One of the variables in the dataset. If var is specified, the
dataset is grouped by var prior to aggregation. This is useful
for sorting e.g. a dataset that contains multiple delays.
dim: string
the dimension over which to aggregate the data
Example
-------
h.centroid(data) # create spectra from finding photons
agg = h.aggregate(data) # sum all spectra
agg.spectrum.plot() # plot the resulting spectrum
agg2 = h.aggregate(data, 'hRIXS_delay') # group data by delay
agg2.spectrum[0, :].plot() # plot the spectrum for first value
"""
ds["counts"] = xr.ones_like(ds[dim])
if var is not None:
groups = ds.groupby(var)
return groups.map(self.aggregate_ds, dim=dim)
return self.aggregate_ds(ds, dim)
def aggregate_ds(self, ds, dim='trainId'):
ret = ds.map(self.aggregator, dim=dim)
ret = ret.drop_vars([n for n in ret if n not in self.aggregators])
return ret
def normalize(self, data, which="hRIXS_norm"):
""" Adds a 'normalized' variable to the dataset defined as the
ration between 'spectrum' and 'which'
Parameters
----------
data: xarray Dataset
the dataset containing hRIXS data
which: string, default="hRIXS_norm"
one of the variables of the dataset, usually "hRIXS_norm"
or "counts"
"""
return data.assign(normalized=data["spectrum"] / data[which])
class MaranaX(hRIXS):
"""
A spin-off of the hRIXS class: with parallelized centroiding
"""
NUM_MAX_HITS = 30
psh.set_default_context('processes', num_workers=20)
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._centroid_result = None
def centroid(self, data, bins=None, **kwargs):
# 0.a Setup constants that we might use
if bins is None:
bins = self.BINS
images = data[self.DETECTOR]
# 0.b. Allocate output arrays (the naming is a bit meany..)
num_images, _, Nx = images.shape
self._centroid_result = {
'total_hist': psh.alloc(shape=(num_images, bins)),
'double_hist': psh.alloc(shape=(num_images, bins)),
'total_hits': psh.alloc(shape=(num_images,)),
'double_hits': psh.alloc(shape=(num_images,)),
}
# 0.c Use a xr.DataArray that pasha understands
data_array = images.assign_coords(nrj=data['nrj'])
# 1. Calculate with parallelization
psh.map(self._centroid_tb_map, data_array)
# 2. Finalize: set the results back to the dataset
data.coords["energy"] = (
np.linspace(self.Y_RANGE.start or 0, self.Y_RANGE.stop or Nx, bins)
* self.ENERGY_SLOPE + self.ENERGY_INTERCEPT)
data['spectrum'] = (("trainId", "energy"),
self._centroid_result['total_hist'])
data['dbl_spectrum'] = (("trainId", "energy"),
self._centroid_result['double_hist'])
data['total_hits'] = ("trainId",
self._centroid_result['total_hits'])
data['double_hits'] = ("trainId",
self._centroid_result['double_hits'])
return data
def _centroid_tb_map(self, _, index, data):
self._centroid_map(index,
image=data.values,
energy=data.coords['nrj'].item())
def _centroid_map(self, index, *, image, energy):
total, double = self._centroid_task(index, image, energy)
# Check if there are results: don't do anything otherwise
if not len(total):
return
self._histogram_task(index, total, double,
default_range=(0, image.shape[1]))
def _centroid_task(self, index, image, energy):
# Calculate the centroid and store it on the allocated output array
total, double = self.centroid_two(image[self.X_RANGE, self.Y_RANGE],
energy)
total, double = np.array(total), np.array(double)
total_hits_list = self._centroid_result.get('total_hits_list')
if total_hits_list is not None and len(total):
total_hits_list[index][:min(len(total), self.NUM_MAX_HITS)] \
= total[:self.NUM_MAX_HITS]
double_hits_list = self._centroid_result.get('double_hits_list')
if double_hits_list is not None and len(double):
double_hits_list[index][:min(len(double), self.NUM_MAX_HITS)] \
= double[:self.NUM_MAX_HITS]
return total, double
def _histogram_task(self, index, total, double, default_range):
# Prepare
_, bins = self._centroid_result['total_hist'].shape
hist_range = (self.Y_RANGE.start or default_range[0],
self.Y_RANGE.stop or default_range[1])
# Calculate total spectrum
self._centroid_result['total_hist'][index], _ = np.histogram(
total[:, 0] - self.parabola(total[:, 1]),
bins=bins, range=hist_range)
self._centroid_result['total_hits'][index] = len(total)
# Calculate double spectrum
double_hits = len(double)
if double_hits:
self._centroid_result['double_hist'][index], _ = np.histogram(
double[:, 0] - self.parabola(double[:, 1]),
bins=bins, range=hist_range)
self._centroid_result['double_hits'][index] = double_hits
def centroid_from_run(self, runNB, proposal=None, extra_fields=(),
drop_first=False, subset=None, bins=None,
return_hits=False):
"""
A combined function of `from_run()` and `centroid()`, which
uses `extra_data` and `pasha` to avoid bulk loading of files.
"""
# 0.a Setup constants that we might use
if proposal is None:
proposal = self.PROPOSAL
if drop_first:
subset = slice(1, None)
if bins is None:
bins = self.BINS
run_mnemo = set(self.DETECTOR_FIELDS['MaranaX']) | set(extra_fields)
# 0.b. Open the run with extra-data and select the relevant fields
run = open_run(proposal, runNB)
if subset is not None:
run = run.select_trains(subset)
# Filter out mnemonics that does not exist in the run files
run_mnemo = set([mnemo for mnemo in run_mnemo
if self._is_mnemo_in_run(mnemo, run)])
assert set(self.DETECTOR_FIELDS['MaranaX']).issubset(run_mnemo)
sources = [self._mnemo_to_prop(mnemo)[0] for mnemo in run_mnemo]
selection = run.select([source for source in sources
if source in run.all_sources],
require_all=True)
# 0.c. Allocate output arrays (the naming is a bit meany..)
mara_source, mara_key = self._mnemo_to_prop('MaranaX')
num_images, _, Nx = selection[mara_source, mara_key].shape
self._centroid_result = {
'total_hist': psh.alloc(shape=(num_images, bins)),
'double_hist': psh.alloc(shape=(num_images, bins)),
'total_hits': psh.alloc(shape=(num_images,)),
'double_hits': psh.alloc(shape=(num_images,)),
}
if return_hits:
self._centroid_result['total_hits_list'] = psh.alloc(
shape=(num_images, self.NUM_MAX_HITS, 2), fill=np.nan)
self._centroid_result['double_hits_list'] = psh.alloc(
shape=(num_images, self.NUM_MAX_HITS, 2), fill=np.nan)
# 1. Calculate with parallelization
psh.map(self._centroid_ed_map, selection)
# 2. Finalize: generate the rest of the mnemonics to the dataset
# and set the results also.
data = xr.merge(
[selection.get_array(*self._mnemo_to_prop(mnemo), name=mnemo)
for mnemo in run_mnemo - {'MaranaX'}], join='inner')
data.coords["energy"] = (
np.linspace(self.Y_RANGE.start or 0, self.Y_RANGE.stop or Nx, bins)
* self.ENERGY_SLOPE + self.ENERGY_INTERCEPT)
data['spectrum'] = (("trainId", "energy"),
self._centroid_result['total_hist'])
data['dbl_spectrum'] = (("trainId", "energy"),
self._centroid_result['double_hist'])
data['total_hits'] = ("trainId",
self._centroid_result['total_hits'])
data['double_hits'] = ("trainId",
self._centroid_result['double_hits'])
if return_hits:
data['total_hits_list'] = (
('trainId', 'hits', 'coord'),
self._centroid_result['total_hits_list'])
data['double_hits_list'] = (
('trainId', 'hits', 'coord'),
self._centroid_result['double_hits_list'])
# Add attributes
data.attrs.update(
CENTROID_THRESHOLD=self.CENTROID_THRESHOLD,
NUM_BINS=bins,
CURVATURE_CORRECTION=[self.CURVE_A, self.CURVE_B],
ENERGY_CALIBRATION=[self.ENERGY_SLOPE, self.ENERGY_INTERCEPT],
Y_RANGE=[self.Y_RANGE.start or 0,
self.Y_RANGE.stop or Nx]
)
return data
def _centroid_ed_map(self, _, index, trainId, data):
mara_source, mara_key = self._mnemo_to_prop('MaranaX')
nrj_source, nrj_key = self._mnemo_to_prop('nrj')
self._centroid_map(
index,
image=data[mara_source][mara_key],
energy=data[nrj_source][nrj_key])
@staticmethod
def _mnemo_to_prop(mnemo):
prop = tb.constants.mnemonics[mnemo][0]
return prop['source'], prop['key']
def _is_mnemo_in_run(self, mnemo, run):
source, key = self._mnemo_to_prop(mnemo)
if source not in run.all_sources:
return False
return key in run.keys_for_source(source)
""" Beam Arrival Monitor related sub-routines
Copyright (2021) SCS Team.
(contributions preferrably comply with pep8 code structure
guidelines.)
"""
import logging
import numpy as np
import xarray as xr
import extra_data as ed
import re
import os
from pathlib import Path
from multiprocessing import Pool
from extra.components import XrayPulses, AdqRawChannel
from ..misc.bunch_pattern_external import is_sase_3
from ..mnemonics_machinery import (mnemonics_to_process,
mnemonics_for_run)
from extra_data.read_machinery import find_proposal
from ..constants import mnemonics as _mnemonics
__all__ = [
'get_pes_params',
'get_pes_tof',
'save_pes_avg_traces',
'load_pes_avg_traces'
]
log = logging.getLogger(__name__)
def get_pes_tof(proposal, runNB, mnemonic, start=0, origin=None,
width=None, subtract_baseline=False,
baseStart=None, baseWidth=40,merge_with=None):
"""
Extracts time-of-flight spectra from raw digitizer traces. The spectra
are aligned by pulse Id using the SASE 3 bunch pattern. If origin is
not None, a time coordinate in nanoseconds 'time_ns' is computed and
added to the DataArray.
Parameters
----------
proposal:int
The proposal number.
runNB: int
The run number.
mnemonic: str
mnemonic for PES, e.g. "PES_2Araw".
start: int
starting sample of the first spectrum in the raw trace.
origin: int
sample of the trace that corresponds to time-of-flight origin,
also called prompt. Used to compute the 'time_ns'
coordinates. If None, computation of 'time_ns' is skipped.
width: int
number of samples per spectra. If None, the number of samples
for 4.5 MHz repetition rate is used.
subtract_baseline: bool
If True, subtract baseline defined by baseStart and baseWidth to each
spectrum.
baseStart: int
starting sample of the baseline.
baseWidth: int
number of samples to average (starting from baseStart) for baseline
calculation.
merge_with: xarray Dataset
If provided, the resulting Dataset will be merged with this
one.
Returns
-------
pes: xarray DataArray
DataArray containing the PES time-of-flight spectra.
Example
-------
>>> import toolbox_scs as tb
>>> import toolbox_scs.detectors as tbdet
>>> proposal, runNB = 900447, 12
>>> pes = tbdet.get_pes_tof(proposal, runNB, 'PES_2Araw',
>>> start=2557, origin=76)
"""
run = ed.open_run(proposal, runNB)
all_mnemonics = mnemonics_for_run(run)
mnemo = all_mnemonics.get(mnemonic)
if mnemo is None:
print(f'Mnemonic {mnemonic} not found in run. Skipping.')
return None
PULSE_PERIOD = 440
pattern = XrayPulses(run)
regular = True
if pattern.is_constant_pattern() is False:
print('The number of pulses changed during the run.')
pulse_ids = np.unique(pattern.pulse_ids(labelled=False, copy=False))
regular = False
else:
pulse_ids = pattern.peek_pulse_ids(labelled=False)
npulses = len(pulse_ids)
period = 1
if npulses > 1:
periods = np.diff(pulse_ids)
if len(np.unique(periods)) > 1:
regular = False
period = min(periods)
npulses = int((max(pulse_ids) - min(pulse_ids)) / period) + 1
period *= PULSE_PERIOD
kd = run[mnemo['source'], mnemo['key']]
nsamples = kd.shape[1]
npulses_trace = int(np.floor((nsamples - start) / period))
if npulses_trace < npulses:
log.warning(f'The digitizer only recorded {npulses_trace} pulses '
f'out of the {npulses} pulses in the train.')
else:
npulses_trace = npulses
indices = np.array([start + i*period for i in range(npulses_trace + 1)])
outp = kd.ndarray()
outp = outp[:, indices[0]:indices[-1]].reshape([outp.shape[0],
npulses_trace, period])
spectra = xr.DataArray(outp, dims=['trainId', 'sa3_pId', 'sampleId'],
coords={'trainId': kd.train_id_coordinates(),
'sa3_pId': pulse_ids[:npulses_trace],
'sampleId': np.arange(period)})
if subtract_baseline:
if baseStart is None:
baseStart = 0
spectra = spectra - spectra.isel(
sampleId=slice(baseStart, baseStart + baseWidth)).mean('sampleId')
if width is None:
width = PULSE_PERIOD
spectra = spectra.isel(sampleId=slice(0, width), drop=True)
spectra.attrs['start'] = start
if origin is not None:
try:
adq = AdqRawChannel(run, mnemonic.split('_')[1].split('raw')[0])
sample_rate = adq.sampling_rate
except Exception as e:
log.warning(e)
sample_rate = 1986111111.1111112
origin -= start
time_ns = (np.arange(spectra.sizes['sampleId'])
- origin)/sample_rate * 1e9
spectra = spectra.assign_coords(time_ns=('sampleId', time_ns))
spectra.attrs['origin'] = origin
spectra = spectra.rename(mnemonic.replace('raw', 'spectrum'))
if merge_with is not None:
return merge_with.merge(spectra.to_dataset(promote_attrs=True),
join='inner')
return spectra.rename(mnemonic.replace('raw', 'spectrum'))
def get_pes_params(run, channel=None):
"""
Extract PES parameters for a given extra_data DataCollection.
Parameters are gas, binding energy, retardation voltages or all
voltages of the MPOD.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the digitizer data
channel: str
Channel name or PES mnemonic, e.g. '2A' or 'PES_1Craw'.
If None, or if the channel is not found in the data,
the retardation voltage for all channels is retrieved.
Returns
-------
params: dict
dictionnary of PES parameters
"""
params = {}
sel = run.select_trains(ed.by_index[:20])
gas_dict = {'N2': 409.9, 'Ne': 870.2, 'Kr': 1921, 'Xe': 1148.7}
for gas in gas_dict.keys():
mnemo = _mnemonics[f'PES_{gas}'][0]
arr = sel.get_run_value(mnemo['source'], mnemo['key'])
if arr == 'ON':
params['gas'] = gas
params['binding_energy'] = gas_dict[gas]
break
if 'gas' not in params:
params['gas'] = 'unknown'
log.warning('Could not find which PES gas was used.')
mpod_mapper = 'SA3_XTD10_PES/MDL/MPOD_MAPPER'
if mpod_mapper in run.all_sources:
if channel is None:
channel = [f'{a//4 + 1}{b}' for a, b in enumerate(
['A', 'B', 'C', 'D']*4)]
else:
if 'raw' in channel:
channel = channel.split('raw')[0].split('_')[1]
channel = [channel]
for c in channel:
rv = get_pes_rv(run, c, mpod_mapper)
if rv is not None:
params[f'{c}_RV'] = rv
elif 'SA3_XTD10_PES/MDL/DAQ_MPOD' in run.all_sources:
voltages = get_pes_voltages(run)
if voltages is not None:
params.update(voltages)
return params
def get_pes_rv(run, channel, mpod_mapper='SA3_XTD10_PES/MDL/MPOD_MAPPER'):
channel_to_group = {'4D': 1, '4B': 1, '4C': 1, '4A': 1,
'2A': 2, '1C': 2, '2C': 2, '2D': 2,
'3D': 3, '3B': 3, '2B': 3, '3A': 3,
'1A': 4, '3C': 4, '1B': 4, '1D': 4}
group = channel_to_group.get(channel)
if group is None:
log.warning(f'Could not find group for channel {channel}.')
return None
key = f'Group{group}D.channelMeasurementSenseVoltage.value'
voltage = run[mpod_mapper, key].ndarray().mean()
return voltage
def get_pes_voltages(run, device='SA3_XTD10_PES/MDL/DAQ_MPOD'):
"""
Extract PES voltages read by the MDL watchdog of the MPOD device.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing PES data.
device: string
Name of the device containing the voltage data.
Returns
-------
voltages: dict
dictionnary of voltages, empty if device not found.
"""
if device not in run.all_sources:
log.warning(f'Could not find {device} in run. Skipping.')
return None
a = re.compile("[u]\d{3}.value")
tid, da = run.train_from_index(0, devices=device)
voltages = {}
for k in da[device]:
if len(a.findall(k)) == 1:
voltages[k.split('.')[0]] = da[device][k]
return voltages
def calculate_average(run, name, mnemo):
return run[mnemo['source'], mnemo['key']].xarray(
name=name, extra_dims=mnemo['dim']).mean('trainId')
def save_pes_avg_traces(proposal, runNB, channels=None,
subdir='usr/processed_runs'):
'''
Save average traces of PES into an h5 file.
Parameters
----------
proposal:int
The proposal number.
runNB: int
The run number.
channels: str or list
The PES channels or mnemonics, e.g. '2A', ['2A', '3C'],
['PES_1Araw', 'PES_4Draw', '3B']
subdir: str
subdirectory. The data is stored in
<proposal path>/<subdir>/r{runNB:04d}/f'r{runNB:04d}-pes-data.h5'
Output
------
xarray Dataset saved in a h5 file containing the PES average traces.
'''
root = find_proposal(f'p{proposal:06d}')
path = os.path.join(root, subdir + f'/r{runNB:04d}/')
Path(path).mkdir(parents=True, exist_ok=True)
fname = path + f'r{runNB:04d}-pes-data.h5'
if channels is None:
channels = [f'{a//4 + 1}{b}' for a, b in enumerate(
['A', 'B', 'C', 'D']*4)]
if isinstance(channels, str):
channels = [channels]
run = ed.open_run(proposal, runNB, parallelize=False)
all_mnemos = mnemonics_for_run(run)
# use multiprocessing.Pool
args = []
for c in channels:
m = c
if 'raw' not in c:
m = f'PES_{c}raw'
if m not in all_mnemos:
continue
args.append([run, m.replace('raw', 'avg'), all_mnemos[m]])
if len(args) == 0:
log.warning('No pes average trace to save. Skipping')
return
with Pool(len(args)) as pool:
avg_traces = pool.starmap(calculate_average, args)
avg_traces = xr.merge(avg_traces)
ds = xr.Dataset()
if os.path.isfile(fname):
ds = xr.load_dataset(fname)
ds = ds.drop_vars(channels, errors='ignore')
ds = ds.merge(avg_traces)
ds.to_netcdf(fname, format='NETCDF4')
return
def load_pes_avg_traces(proposal, runNB, channels=None,
subdir='usr/processed_runs'):
'''
Load existing PES average traces.
Parameters
----------
proposal:int
The proposal number.
runNB: int
The run number.
channels: str or list
The PES channels or mnemonics, e.g. '2A', ['2A', '3C'],
['PES_1Araw', 'PES_4Draw', '3B']
subdir: str
subdirectory. The data is stored in
<proposal path>/<subdir>/r{runNB:04d}/f'r{runNB:04d}-pes-data.h5'
Output
------
ds: xarray Dataset
dataset containing the PES average traces.
'''
root = find_proposal(f'p{proposal:06d}')
path = os.path.join(root, subdir + f'/r{runNB:04d}/')
fname = path + f'r{runNB:04d}-pes-data.h5'
if channels is None:
channels = [f'PES_{a//4 + 1}{b}avg' for a, b in
enumerate(['A', 'B', 'C', 'D']*4)]
if isinstance(channels, str):
channels = [channels]
for i, c in enumerate(channels):
if 'PES_' not in c and 'avg' not in c:
channels[i] = f'PES_{c}avg'
if os.path.isfile(fname):
ds = xr.load_dataset(fname)
channels = [c for c in channels if c in ds]
ds = ds[channels]
return ds
else:
log.warning(f'{fname} is not a valid file.')
import numpy as np
import xarray as xr
import toolbox_scs as tb
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from .hrixs import gauss1d
__all__ = ['Viking']
def plot_viking_xas(xas, plot_errors=True, xas_ylim=(-1, 3)):
fig, ax = plt.subplots(3, 1, figsize=(7, 7), sharex=True)
ax[0].plot(xas.newt_x, xas['I0'])
ax[0].grid()
ax[0].set_title('I0 spectra')
ax[1].plot(xas.newt_x, xas['It'])
ax[1].grid()
ax[1].set_title('It spectra')
ax[2].plot(xas.newt_x, xas['absorptionCoef'])
ax[2].set_ylim(*xas_ylim)
ax[2].grid()
ax[2].set_title('XAS')
if plot_errors:
ax[0].fill_between(xas.newt_x,
xas['I0'] - 1.96*xas['I0_stderr'],
xas['I0'] + 1.96*xas['I0_stderr'],
alpha=0.2)
ax[1].fill_between(xas.newt_x,
xas['It'] - 1.96*xas['It_stderr'],
xas['It'] + 1.96*xas['It_stderr'],
alpha=0.2)
ax[2].fill_between(xas.newt_x,
xas['absorptionCoef'] - 1.96*xas['absorptionCoef_stderr'],
xas['absorptionCoef'] + 1.96*xas['absorptionCoef_stderr'],
alpha=0.2)
def plot_viking_calibration(spectra, x_pos, nrj, energy_calib,
gaussian_fits, runNBs):
fig, ax = plt.subplots(2, 1, figsize=(7, 6), sharex=True)
for i in range(len(x_pos)):
x = spectra[i].newt_x.values
ax[0].plot(spectra[i].newt_x, spectra[i], color=f'C{i}',
label=f'run {runNBs[i]}')
ax[0].plot(x, gauss1d(x, *gaussian_fits[i]),
ls='--', color=f'C{i}')
ax[0].legend()
ax[0].set_ylabel('intensity [arb. un.]')
ax[1].scatter(x_pos, nrj, label='data')
ax[1].plot(x_pos, np.polyval(energy_calib, x_pos), color='r',
label=f"{energy_calib[2]:.4f}+{energy_calib[1]:.4f}"
f"*pixel+{energy_calib[0]:.4e}*pixel^2")
ax[1].legend()
ax[1].set_xlabel('pixel')
ax[1].set_ylabel('energy [eV]')
ax[1].grid()
fig.suptitle(f'Viking calibration, runs {runNBs}')
# -----------------------------------------------------------------------------
# Viking class
class Viking:
"""The Viking analysis (spectrometer used in combination with Andor Newton
camera)
The objects of this class contain the meta-information about the settings
of the spectrometer, not the actual data, except possibly a dark image
for background subtraction.
The actual data is loaded into `xarray`s via the method `from_run()`,
and stays there.
Attributes
----------
PROPOSAL: int
the number of the proposal
X_RANGE: slice
the slice to take in the non-dispersive direction, in pixels.
Defaults to the entire width.
Y_RANGE: slice
the slice to take in the energy dispersive direction
USE_DARK: bool
whether to do dark subtraction. Is initially `False`, magically
switches to `True` if a dark has been loaded, but may be reset.
ENERGY_CALIB: 1D array (len=3)
The 2nd degree polynomial coefficients for calibration from pixel
to energy. Defaults to [0, 1, 0] (no calibration applied).
BL_POLY_DEG: int
the dgree of the polynomial used for baseline subtraction.
Defaults to 1.
BL_SIGNAL_RANGE: list
the dispersive-axis range, defined by an interval [min, max],
to avoid when fitting a polynomial for baseline subtraction.
Multiple ranges can be provided in the form
[[min1, max1], [min2, max2], ...].
FIELDS: list of str
the fields to be loaded from the data. Add additional fields if so
desired.
Example
-------
proposal = 2953
v = Viking(proposal)
v.X_RANGE = slice(0, 1900)
v.Y_RANGE = slice(38, 80)
v.ENERGY_CALIB = [1.47802667e-06, 2.30600328e-02, 5.15884589e+02]
v.BL_SIGNAL_RANGE = [500, 545]
"""
def __init__(self, proposalNB):
self.PROPOSAL = proposalNB
self.FIELDS = ['newton']
# 2 deg polynomial energy calibration
self.ENERGY_CALIB = [0, 1, 0]
# dark
self.USE_DARK = False
self.dark_image = None
# image range
self.X_RANGE = slice(None, None)
self.Y_RANGE = slice(None, None)
# polynomial degree for background subtraction
self.BL_POLY_DEG = 1
self.BL_SIGNAL_RANGE = []
def set_params(self, **params):
for key, value in params.items():
setattr(self, key.upper(), value)
def get_params(self, *params):
if not params:
params = ('proposal', 'x_range', 'y_range', 'energy_calib',
'use_dark', 'bl_poly_deg', 'bl_signal_range', 'fields')
return {param: getattr(self, param.upper()) for param in params}
def from_run(self, runNB, add_attrs=True, tid_offset=-1):
"""load a run
Load the run `runNB`. A thin wrapper around `toolbox_scs.load`.
Parameters
----------
runNB: int
the run number
add_attrs: bool
if True, adds the camera parameters as attributes to the dataset
(see get_camera_params())
tid_offset: int
train Id offset of Newton camera
Output
------
ds: xarray Dataset
the dataset containing the camera images
Example
-------
data = v.from_run(145) # load run 145
data1 = v.from_run(145) # load run 145
data2 = v.from_run(155) # load run 155
data = xarray.concat([data1, data2], 'trainId') # combine both
"""
#separately load newton image and deal with trainId mismatch
roi = {'newton': {'newton': {'roi': (self.Y_RANGE, self.X_RANGE),
'dim': ['newt_y', 'newt_x']}}}
run, newton = tb.load(self.PROPOSAL, runNB, 'newton', rois=roi)
newton = newton.shift(trainId=tid_offset).astype(float)
#load the rest
fields = [f for f in self.FIELDS if f != 'newton']
if len(fields) == 0:
data = newton
else:
run, data = tb.load(self.PROPOSAL, runNB,
fields=fields)
data = data.merge(newton, join='inner')
data = data.assign_coords(newt_x=np.polyval(self.ENERGY_CALIB,
data['newt_x']))
if add_attrs:
params = self.get_camera_params(run)
for k, v in params.items():
data.attrs[k] = v
return data
def load_dark(self, runNB=None):
if runNB is None:
self.USE_DARK = False
return
data = self.from_run(runNB, add_attrs=False)
self.dark_image = data['newton'].mean(dim='trainId')
self.dark_image.attrs['runNB'] = runNB
self.USE_DARK = True
def integrate(self, data):
'''
This function calculates the mean over the non-dispersive dimension
to create a spectrum. If the camera parameters are known, the
spectrum is multiplied by the number of photoelectrons per ADC count.
A new variable "spectrum" is added to the data.
'''
imgs = data['newton']
if self.USE_DARK:
imgs = imgs - self.dark_image
spectrum = imgs.mean(dim='newt_y')
if 'photoelectrons_per_count' in data.attrs:
spectrum *= data.attrs['photoelectrons_per_count']
data['spectrum'] = spectrum
return data
def get_camera_gain(self, run):
"""
Get the preamp gain of the camera in the Viking spectrometer for
a specified run.
Parameters
----------
run: extra_data DataCollection
information on the run
Output
------
gain: int
"""
gain = run.get_run_value('SCS_EXP_NEWTON/CAM/CAMERA',
'preampGain.value')
gain_dict = {0: 1, 1: 2, 2: 4}
return gain_dict[gain]
def e_per_counts(self, run, gain=None):
"""
Conversion factor from camera digital counts to photoelectrons
per count. The values can be found in the camera datasheet
(Andor Newton) but they have been slightly corrected for High
Sensitivity mode after analysis of runs 1204, 1207 and 1208,
proposal 2937.
Parameters
----------
run: extra_data DataCollection
information on the run
gain: int
the camera preamp gain
Output
------
ret: float
photoelectrons per count
"""
if gain is None:
gain = self.get_camera_gain(run)
hc = run.get_run_value('SCS_EXP_NEWTON/CAM/CAMERA',
'HighCapacity.value')
if hc == 0: # High Sensitivity
pe_dict = {1: 4., 2: 2.05, 4: 0.97}
elif hc == 1: # High Capacity
pe_dict = {1: 17.9, 2: 9., 4: 4.5}
return pe_dict[gain]
def get_camera_params(self, run):
dic = {'vbin:': 'imageSpecifications.verticalBinning.value',
'hbin': 'imageSpecifications.horizontalBinning.value',
'startX': 'imageSpecifications.startX.value',
'endX': 'imageSpecifications.endX.value',
'startY': 'imageSpecifications.startY.value',
'endY': 'imageSpecifications.endY.value',
'temperature': 'coolerActual.temperature.value',
'high_capacity': 'HighCapacity.value',
'exposure_s': 'exposureTime.value'
}
ret = {}
try:
for k, v in dic.items():
ret[k] = run.get_run_value('SCS_EXP_NEWTON/CAM/CAMERA', v)
except Exception as e:
print(e)
dic['temperature'] = 'CoolerActual.temperature.value'
for k, v in dic.items():
ret[k] = run.get_run_value('SCS_EXP_NEWTON/CAM/CAMERA', v)
ret['gain'] = self.get_camera_gain(run)
ret['photoelectrons_per_count'] = self.e_per_counts(run, ret['gain'])
return ret
def removePolyBaseline(self, data, key='spectrum'):
"""
Removes a polynomial baseline to a spectrum, assuming a fixed
position for the signal.
Parameters
----------
data: xarray Dataset
The Viking data containing the variable "spectrum"
or given by argument key
Output
------
data: xarray Dataset
the original dataset with the added variable "spectrum_nobl"
containing the baseline subtracted spectra.
"""
if key not in data:
return
x = data.newt_x
spectra = data[key]
#print(x)
mask = xr.ones_like(x, dtype=bool)
if len(self.BL_SIGNAL_RANGE) > 0:
if not hasattr(self.BL_SIGNAL_RANGE[0], '__len__'):
ranges = [self.BL_SIGNAL_RANGE]
else:
ranges = self.BL_SIGNAL_RANGE
for xrange in ranges:
mask = mask & ((x < xrange[0]) | (x > xrange[1]))
x_bl = x.where(mask, drop=True).astype(x.dtype)
bl = spectra.sel(newt_x=x_bl)
#print(mask)
#print(x_bl)
#print(bl)
fit = np.polyfit(x_bl, bl.T, self.BL_POLY_DEG)
if len(spectra.shape) == 1:
return spectra - np.polyval(fit, x)
final_bl = np.empty(spectra.shape)
for t in range(spectra.shape[0]):
final_bl[t] = np.polyval(fit[:, t], x)
data[key+'_nobl'] = spectra - final_bl
return data
def xas(self, sam, ref, thickness=1, dim='newt_x', plot=False,
plot_errors=True, xas_ylim=(-1, 3)):
"""
Given two independent datasets (one with sample and one reference),
this calculates the average XAS spectrum (absorption coefficient),
associated standard deviation and standard error. The absorption
coefficient is defined as -log(It/I0)/thickness.
Parameters
----------
sam: xarray DataArray
the data array containing the spectra with sample
ref: xarray DataArray
the data array containing the spectra without sample
thickness: float
the thickness used for the calculation of the absorption
coefficient
dim: string
the name of the dimension along the dispersion axis
plot: bool
If True, plot the resulting average spectra.
plot_errors: bool
If True, adds the 95% confidence interval on the spectra.
xas_ylim: tuple or list of float
the y limits for the XAS plot.
Output
------
xas: xarray Dataset
the dataset containing the computed XAS quantities:
I0, It, absorptionCoef and their associated errors.
"""
if sam[dim].equals(ref[dim]) is False:
return
spectrum = sam.mean(dim='trainId')
std = sam.std(dim='trainId')
std_err = std / np.sqrt(sam.sizes['trainId'])
spectrum_ref = ref.mean(dim='trainId')
std_ref = ref.std(dim='trainId')
std_err_ref = std_ref / np.sqrt(ref.sizes['trainId'])
ds = xr.Dataset()
ds['It'] = spectrum
ds['It_std'] = std
ds['It_stderr'] = std_err
ds['I0'] = spectrum_ref
ds['I0_std'] = std
ds['I0_stderr'] = std_err
absorption = spectrum_ref / spectrum
# assume that It and I0 are independent variables:
absorption_std = np.abs(absorption) * np.sqrt(
std_ref**2 / spectrum_ref**2 + std**2 / spectrum**2)
absorption_stderr = np.abs(absorption) * np.sqrt(
(std_err_ref / spectrum_ref)**2 + (std_err / spectrum)**2)
ds['absorptionCoef'] = np.log(absorption) / thickness
ds['absorptionCoef_std'] = absorption_std / (thickness *
np.abs(absorption))
ds['absorptionCoef_stderr'] = absorption_stderr / (thickness *
np.abs(absorption))
ds.attrs['n_It'] = sam.sizes['trainId']
ds.attrs['n_I0'] = ref.sizes['trainId']
if plot:
plot_viking_xas(ds, plot_errors, xas_ylim)
return ds
def calibrate(self, runList, source='nrj', order=2, xrange=slice(None,None), plot=True):
"""
This routine determines the calibration coefficients to translate the
camera pixels into energy in eV. The Viking spectrometer is calibrated
using the beamline monochromator: runs with various monochromatized
photon energy are recorded and their peak position on the detector are
determined by Gaussian fitting. The energy vs. position data is then
fitted to a second degree polynomial.
Parameters
----------
runList: list of int
the list of runs containing the monochromatized spectra
plot: bool
if True, the spectra, their Gaussian fits and the calibration
curve are plotted.
Output
------
energy_calib: np.array
the calibration coefficients (2nd degree polynomial)
"""
old_calib = self.ENERGY_CALIB
self.ENERGY_CALIB = [0, 1, 0]
x_pos = []
nrj = []
spectra = []
gaussian_fits = []
runNBs = []
for i, runNB in enumerate(runList):
if plot:
print(runNB)
xrange0 = self.X_RANGE
self.X_RANGE=xrange
ds = self.from_run(runNB)
self.integrate(ds)
ds = self.removePolyBaseline(ds)
avg_spectrum = ds['spectrum_nobl'].mean(dim='trainId')
p0 = [np.max(avg_spectrum)-np.min(avg_spectrum),
avg_spectrum['newt_x'][avg_spectrum.argmax()].values,
10, np.min(avg_spectrum)]
eV_unit = 1e3 if 'UND' in source else 1
try:
popt, pcov = curve_fit(gauss1d, avg_spectrum['newt_x'].values,
avg_spectrum.values, p0=p0)
x_pos.append(popt[1])
gaussian_fits.append(popt)
spectra.append(avg_spectrum)
runNBs.append(runNB)
nrj.append(tb.load_run_values(self.PROPOSAL, runNB)[source]*eV_unit)
except Exception as e:
print(f'error with run {runNB}:', e)
self.ENERGY_CALIB = old_calib
finally:
self.X_RANGE = xrange0
x_pos = np.array(x_pos)
nrj = np.array(nrj)
idx = np.argsort(x_pos)
x_pos = x_pos[idx]
nrj = nrj[idx]
energy_calib = np.polyfit(x_pos, nrj, order)
if plot:
plot_viking_calibration(spectra, x_pos, nrj, energy_calib,
gaussian_fits, runNBs)
self.ENERGY_CALIB = energy_calib
self.GAUSSIAN_FITS = gaussian_fits
return energy_calib
""" XGM related sub-routines
Copyright (2019) SCS Team.
(contributions preferrably comply with pep8 code structure
guidelines.)
"""
import logging
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
from ..misc.bunch_pattern_external import is_sase_1, is_sase_3
from ..misc.bunch_pattern import (npulses_has_changed,
get_unique_sase_pId, load_bpt)
from ..mnemonics_machinery import mnemonics_for_run
from toolbox_scs.load import get_array
__all__ = [
'calibrate_xgm',
'get_xgm',
]
log = logging.getLogger(__name__)
def get_xgm(run, mnemonics=None, merge_with=None,
indices=slice(0, None)):
"""
Load and/or computes XGM data. Sources can be loaded on the
fly via the mnemonics argument, or processed from an existing dataset
(merge_with). The bunch pattern table is used to assign the pulse
id coordinates if the number of pulses has changed during the run.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the xgm data.
mnemonics: str or list of str
mnemonics for XGM, e.g. "SCS_SA3" or ["XTD10_XGM", "SCS_XGM"].
If None, defaults to "SCS_SA3" in case no merge_with dataset
is provided.
merge_with: xarray Dataset
If provided, the resulting Dataset will be merged with this
one. The XGM variables of merge_with (if any) will also be
computed and merged.
indices: slice, list, 1D array
Pulse indices of the XGM array in case bunch pattern is missing.
Returns
-------
xarray Dataset with pulse-resolved XGM variables aligned,
merged with Dataset *merge_with* if provided.
Example
-------
>>> import toolbox_scs as tb
>>> run, ds = tb.load(2212, 213, 'SCS_SA3')
>>> ds['SCS_SA3']
"""
xgm_mnemos = ['XTD10_SA', 'XTD10_XGM', 'SCS_SA', 'SCS_XGM']
m2 = []
if mnemonics is not None:
mnemonics = [mnemonics] if isinstance(mnemonics, str) else mnemonics
for m in mnemonics:
if any([(k in m) for k in xgm_mnemos]):
if merge_with is not None and m in merge_with:
continue
m2.append(m)
if merge_with is not None:
in_mw = []
for m, da in merge_with.items():
if any([(k in m) for k in xgm_mnemos]) and 'XGMbunchId' in da.dims:
in_mw.append(m)
m2 += in_mw
if len(m2) == 0:
log.info('no XGM mnemonics to process. Skipping.')
return merge_with
mnemonics = list(set(m2))
# Prepare the dataset of non-XGM data to merge with
if bool(merge_with):
ds_mw = merge_with.drop(mnemonics, errors='ignore')
else:
ds_mw = xr.Dataset()
# Load bunch pattern table and check pattern changes
bpt = load_bpt(run, ds_mw)
if bpt is not None:
sase1 = sase3 = None
mask_sa1 = mask_sa3 = None
if any([("SA1" in m) for m in mnemonics]) or any(
[("XGM" in m) for m in mnemonics]):
mask = mask_sa1 = is_sase_1(bpt)
sase1 = np.nonzero(mask_sa1[0].values)[0]
if any([("SA3" in m) for m in mnemonics]) or any(
[("XGM" in m) for m in mnemonics]):
mask = mask_sa3 = is_sase_3(bpt)
sase3 = np.nonzero(mask_sa3[0].values)[0]
if any([("XGM" in m) for m in mnemonics]):
mask = np.logical_or(mask_sa1, mask_sa3)
pattern_changed = ~(mask == mask[0]).all().values
ds = xr.Dataset()
for m in mnemonics:
if merge_with is not None and m in merge_with:
da_xgm = merge_with[m]
else:
da_xgm = get_array(run, m)
if bpt is not None:
if not pattern_changed:
ds_xgm = load_xgm_array(run, da_xgm, sase1, sase3)
else:
ds_xgm = align_xgm_array(da_xgm, bpt, mask_sa1, mask_sa3)
else:
xgm_val = da_xgm.values
xgm_val[xgm_val == 1] = np.nan
xgm_val[xgm_val == 0] = np.nan
da_xgm.values = xgm_val
da_xgm = da_xgm.dropna(dim='XGMbunchId', how='all')
ds_xgm = da_xgm.fillna(0).sel(XGMbunchId=indices).to_dataset()
ds = ds.merge(ds_xgm, join='inner')
# merge with non-XGM dataset
ds = ds_mw.merge(ds, join='inner')
return ds
def load_xgm_array(run, xgm, sase1, sase3):
"""
from a raw array xgm, extract and assign pulse Id coordinates
when the number of pulses does not change during the run.
If 'XGM' in mnemonic, the data is split in two variables
'SA1' and 'SA3'.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the xgm data.
xgm: xarray.DataArray
the raw XGM array
sase1: list or 1D array
the sase1 pulse ids
sase3: list or 1D array
the sase3 pulse ids
Returns
-------
ds_xgm: xarray.Dataset
the dataset containing the aligned XGM variable(s).
"""
xgm_val = xgm.values
xgm_val[xgm_val == 1] = np.nan
xgm_val[xgm_val == 0] = np.nan
xgm.values = xgm_val
xgm = xgm.dropna(dim='XGMbunchId', how='all')
xgm = xgm.fillna(0)
if 'XGM' in xgm.name:
sase1_3 = np.sort(np.concatenate([sase1, sase3]))
sase1_idx = [np.argwhere(sase1_3 == i)[0][0] for i in sase1]
sase3_idx = [np.argwhere(sase1_3 == i)[0][0] for i in sase3]
xgm_sa1 = xgm.isel(XGMbunchId=sase1_idx).rename(XGMbunchId='sa1_pId')
xgm_sa1 = xgm_sa1.assign_coords(sa1_pId=sase1)
xgm_sa1 = xgm_sa1.rename(xgm.name.replace('XGM', 'SA1'))
xgm_sa3 = xgm.isel(XGMbunchId=sase3_idx).rename(XGMbunchId='sa3_pId')
xgm_sa3 = xgm_sa3.assign_coords(sa3_pId=sase3)
xgm_sa3 = xgm_sa3.rename(xgm.name.replace('XGM', 'SA3'))
xgm = xr.merge([xgm_sa1, xgm_sa3])
elif 'SA1' in xgm.name:
xgm = xgm.rename(XGMbunchId='sa1_pId')
xgm = xgm.assign_coords(sa1_pId=sase1).rename(xgm.name)
xgm = xgm.to_dataset()
elif 'SA3' in xgm.name:
xgm = xgm.rename(XGMbunchId='sa3_pId')
xgm = xgm.assign_coords(sa3_pId=sase3).rename(xgm.name)
xgm = xgm.to_dataset()
return xgm
'''
def get_xgm_old(run, mnemonics=None, merge_with=None, keepAllSase=False,
indices=slice(0, None)):
"""
Load and/or computes XGM data. Sources can be loaded on the
fly via the key argument, or processed from an existing data set
(merge_with). The bunch pattern table is used to assign the pulse
id coordinates.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the digitizer data.
mnemonics: str or list of str
mnemonics for XGM, e.g. "SCS_SA3" or ["XTD10_XGM", "SCS_XGM"].
If None, defaults to "SCS_SA3" in case no merge_with dataset
is provided.
merge_with: xarray Dataset
If provided, the resulting Dataset will be merged with this
one. The XGM variables of merge_with (if any) will also be
computed and merged.
keepAllSase: bool
Only relevant in case of sase-dedicated trains. If True, all
trains are kept, else only those of the bunchPattern are kept.
indices: slice, list, 1D array
Pulse indices of the XGM array in case bunch pattern is missing.
Returns
-------
xarray Dataset with pulse-resolved XGM variables aligned,
merged with Dataset *merge_with* if provided.
Example
-------
>>> import toolbox_scs as tb
>>> import toolbox_scs.detectors as tbdet
>>> run, _ = tb.load(2212, 213)
>>> xgm = tbdet.get_xgm(run)
"""
# get the list of mnemonics to process
mnemonics = mnemonics_to_process(mnemonics, merge_with, 'XGM')
if len(mnemonics) == 0:
log.info('No array with unaligned XGM peaks to extract. Skipping.')
return merge_with
else:
log.info(f'Extracting XGM data from {mnemonics}.')
# Prepare the dataset of non-XGM data to merge with
if bool(merge_with):
ds = merge_with.drop(mnemonics, errors='ignore')
else:
ds = xr.Dataset()
run_mnemonics = mnemonics_for_run(run)
# check if bunch pattern table exists
if bool(merge_with) and 'bunchPatternTable' in merge_with:
bpt = merge_with['bunchPatternTable']
log.debug('Using bpt from merge_with dataset.')
elif 'bunchPatternTable' in run_mnemonics:
bpt = run.get_array(*run_mnemonics['bunchPatternTable'].values())
log.debug('Loaded bpt from DataCollection.')
else:
bpt = None
# Load the arrays, assign pulse ID and merge
for m in mnemonics:
if bool(merge_with) and m in merge_with:
arr = merge_with[m]
log.debug(f'Using {m} from merge_with dataset.')
else:
arr = run.get_array(*run_mnemonics[m].values(), name=m)
log.debug(f'Loading {m} from DataCollection.')
if bpt is not None:
arr = align_xgm_array(arr, bpt)
else:
arr = arr.where(arr != 1., drop=True).sel(XGMbunchId=indices)
ds = ds.merge(arr, join='inner')
return ds
'''
def align_xgm_array(xgm_arr, bpt, mask_sa1, mask_sa3):
"""
Assigns pulse ID coordinates to a pulse-resolved XGM array, according to
the bunch pattern table. If the arrays contains both SASE 1 and SASE 3
data, it is split in two arrays.
Parameters
----------
xgm_arr: xarray DataArray
array containing pulse-resolved XGM data, with dims ['trainId',
'XGMbunchId']
bpt: xarray DataArray
bunch pattern table
mask_sa1: xarray DataArray
boolean 2D array (trainId x pulseId) of sase 1 pulses
mask_sa3: xarray DataArray
boolean 2D array (trainId x pulseId) of sase 3 pulses
Returns
-------
xgm: xarray Dataset
dataset with pulse ID coordinates. For SASE 1 data, the coordinates
name is sa1_pId, for SASE 3 data, the coordinates name is sa3_pId.
"""
key = xgm_arr.name
compute_sa1 = False
compute_sa3 = False
valid_tid = np.intersect1d(xgm_arr.trainId, bpt.trainId,
assume_unique=True)
# get the relevant masks for SASE 1 and/or SASE3
if "SA1" in key or "SA3" in key:
if "SA1" in key:
mask = mask_sa1.sel(trainId=valid_tid)
compute_sa1 = True
else:
mask = mask_sa3.sel(trainId=valid_tid)
compute_sa3 = True
tid = mask.where(mask.sum(dim='pulse_slot') > 0, drop=True).trainId
mask = mask.sel(trainId=tid)
mask_sa1 = mask.rename({'pulse_slot': 'sa1_pId'})
mask_sa3 = mask.rename({'pulse_slot': 'sa3_pId'})
if "XGM" in key:
compute_sa1 = True
compute_sa3 = True
mask_sa1 = mask_sa1.sel(trainId=valid_tid)
mask_sa3 = mask_sa3.sel(trainId=valid_tid)
mask = np.logical_or(mask_sa1, mask_sa3)
tid = mask.where(mask.sum(dim='pulse_slot') > 0,
drop=True).trainId
mask_sa1 = mask_sa1.sel(trainId=tid).rename({'pulse_slot': 'sa1_pId'})
mask_sa3 = mask_sa3.sel(trainId=tid).rename({'pulse_slot': 'sa3_pId'})
mask = mask.sel(trainId=tid)
npulses_max = mask.sum(dim='pulse_slot').max().values
bpt_npulses = bpt.sizes['pulse_slot']
xgm_arr = xgm_arr.sel(trainId=tid).isel(
XGMbunchId=slice(0, npulses_max))
# In rare cases, some xgm data is corrupted: trainId is valid but values
# are inf / NaN. We set them to -1 to avoid size mismatch between xgm and
# bpt. Before returning we will drop them.
xgm_arr = xgm_arr.where(np.isfinite(xgm_arr)).fillna(-1.)
# pad the xgm array to match the bpt dims, flatten and
# reorder xgm array to match the indices of the mask
xgm_flat = np.hstack((xgm_arr.fillna(1.),
np.ones((xgm_arr.sizes['trainId'],
bpt_npulses-npulses_max)))).flatten()
xgm_flat_arg = np.argwhere(xgm_flat != 1.)
mask_flat = mask.values.flatten()
mask_flat_arg = np.argwhere(mask_flat)
if(xgm_flat_arg.shape != mask_flat_arg.shape):
log.warning(f'{key}: XGM data and bunch pattern do not match.')
new_xgm_flat = np.ones(xgm_flat.shape)
new_xgm_flat[mask_flat_arg] = xgm_flat[xgm_flat_arg]
new_xgm = new_xgm_flat.reshape((xgm_arr.sizes['trainId'], bpt_npulses))
# create a dataset with new_xgm array masked by SASE 1 or SASE 3
xgm_dict = {}
if compute_sa1:
sa1_xgm = xr.DataArray(new_xgm, dims=['trainId', 'sa1_pId'],
coords={'trainId': xgm_arr.trainId,
'sa1_pId': np.arange(bpt_npulses)},
name=key.replace('XGM', 'SA1'))
sa1_xgm = sa1_xgm.where(mask_sa1, drop=True)
sa1_xgm = sa1_xgm.where(sa1_xgm != -1., drop=True)
# remove potential corrupted data:
xgm_dict[sa1_xgm.name] = sa1_xgm
if compute_sa3:
sa3_xgm = xr.DataArray(new_xgm, dims=['trainId', 'sa3_pId'],
coords={'trainId': xgm_arr.trainId,
'sa3_pId': np.arange(bpt_npulses)},
name=key.replace('XGM', 'SA3'))
sa3_xgm = sa3_xgm.where(mask_sa3, drop=True)
# remove potential corrupted data:
sa3_xgm = sa3_xgm.where(sa3_xgm != -1., drop=True)
xgm_dict[sa3_xgm.name] = sa3_xgm
ds = xr.Dataset(xgm_dict)
return ds
def calibrate_xgm(run, data, xgm='SCS', plot=False):
"""
Calculates the calibration factor F between the photon flux (slow signal)
and the fast signal (pulse-resolved) of the sase 3 pulses. The calibrated
fast signal is equal to the uncalibrated one multiplied by F.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the digitizer data.
data: xarray Dataset
dataset containing the pulse-resolved sase 3 signal, e.g. 'SCS_SA3'
xgm: str
one in {'XTD10', 'SCS'}
plot: bool
If True, shows a plot of the photon flux, averaged fast signal and
calibrated fast signal.
Returns
-------
F: float
calibration factor F defined as:
calibrated XGM [microJ] = F * fast XGM array ('SCS_SA3' or 'XTD10_SA3')
Example
-------
>>> import toolbox_scs as tb
>>> import toolbox_scs.detectors as tbdet
>>> run, data = tb.load(900074, 69, ['SCS_XGM'])
>>> ds = tbdet.get_xgm(run, merge_with=data)
>>> F = tbdet.calibrate_xgm(run, ds, plot=True)
>>> # Add calibrated XGM to the dataset:
>>> ds['SCS_SA3_uJ'] = F * ds['SCS_SA3']
"""
run_mnemonics = mnemonics_for_run(run)
# check if bunch pattern table exists
if 'bunchPatternTable' in data:
bpt = data['bunchPatternTable']
elif 'bunchPatternTable' in run_mnemonics:
bpt = run.get_array(*run_mnemonics['bunchPatternTable'].values())
elif 'bunchPatternTable_SA3' in run_mnemonics:
bpt = run.get_array(*run_mnemonics['bunchPatternTable_SA3'].values())
else:
raise ValueError('Bunch pattern missing. Cannot calibrate XGM.')
mask_sa3 = is_sase_3(bpt.sel(trainId=data.trainId))
npulses_sa3 = np.unique(mask_sa3.sum(dim='pulse_slot'))
if len(npulses_sa3) == 1:
npulses_sa3 = npulses_sa3[0]
else:
log.warning('change of pulse pattern in sase3 during the run.')
npulses_sa3 = max(npulses_sa3)
mask_sa1 = is_sase_1(bpt.sel(trainId=data.trainId))
npulses_sa1 = np.unique(mask_sa1.sum(dim='pulse_slot'))
if len(npulses_sa1) == 1:
npulses_sa1 = npulses_sa1[0]
else:
log.warning('change of pulse pattern in sase1 during the run.')
npulses_sa1 = max(npulses_sa1)
pflux_key = f'{xgm}_photonFlux'
if pflux_key in data:
pflux = data[pflux_key]
else:
pflux = run.get_array(*run_mnemonics[pflux_key].values())
pflux = pflux.sel(trainId=data.trainId)
pflux_sa3 = (npulses_sa1 + npulses_sa3) * pflux / npulses_sa3
avg_fast = data[f'{xgm}_SA3'].rolling(trainId=200).mean().mean(axis=1)
calib = np.nanmean(pflux_sa3.values / avg_fast.values)
if plot:
plot_xgm_calibration(xgm, pflux, pflux_sa3, avg_fast, calib)
return calib
def plot_xgm_calibration(xgm, pflux, pflux_sa3, avg_fast, calib):
plt.figure(figsize=(8, 4))
plt.plot(pflux, label='photon flux all')
plt.plot(pflux_sa3, label='photon flux SA3')
plt.plot(avg_fast, label='avg pulsed XGM')
plt.plot(avg_fast*calib, label='calibrated avg pulsed XGM')
plt.title(f'calibrated XGM = {xgm}_SA3 * {calib:.3e}')
plt.xlabel('train number')
plt.ylabel(r'Pulse energy [$\mu$J]')
plt.legend()
return
# -*- coding: utf-8 -*-
"""
Toolbox for SCS.
Various utilities function to quickly process data measured at the SCS
instruments.
Copyright (2019) SCS Team.
"""
import logging
import os
import numpy as np
import xarray as xr
import extra_data as ed
from extra_data.read_machinery import find_proposal
from .constants import mnemonics as _mnemonics
from .mnemonics_machinery import mnemonics_for_run
from .util.exceptions import ToolBoxValueError
import toolbox_scs.detectors as tbdet
from .misc.bunch_pattern import (npulses_has_changed,
get_unique_sase_pId, load_bpt)
__all__ = [
'concatenateRuns',
'find_run_path',
'get_array',
'load',
'open_run',
'run_by_path',
'load_run_values',
'check_data_rate',
]
log = logging.getLogger(__name__)
def load(proposalNB=None, runNB=None,
fields=None,
data='all',
display=False,
validate=False,
subset=None,
rois={},
extract_digitizers=True,
extract_xgm=True,
extract_bam=True,
bunchPattern='sase3',
parallelize=True,
):
"""
Load a run and extract the data. Output is an xarray with aligned
trainIds.
Parameters
----------
proposalNB: str, int
proposal number e.g. 'p002252' or 2252
runNB: str, int
run number as integer
fields: str, list of str, list of dict
list of mnemonics to load specific data such as "fastccd",
"SCS_XGM", or dictionnaries defining a custom mnemonic such as
{"extra": {'source': 'SCS_CDIFFT_MAG/SUPPLY/CURRENT',
'key': 'actual_current.value',
'dim': None}}
data: str or Sequence of str
'raw', 'proc' (processed), or any other location relative to the
proposal path with data per run to access. May also be ‘all’
(both ‘raw’ and ‘proc’) or a sequence of strings to load data
from several locations, with later locations overwriting sources
present in earlier ones. The default is 'raw'.
display: bool
whether to show the run.info or not
validate: bool
whether to run extra-data-validate or not
subset: slice or extra_data.by_index or numpy.s_
a subset of train that can be loaded with extra_data.by_index[:5] for
the first 5 trains. If None, all trains are retrieved.
rois: dict
a dictionnary of mnemonics with a list of rois definition and
the desired names, for example:
{'fastccd': {'ref': {'roi': by_index[730:890, 535:720],
'dim': ['ref_x', 'ref_y']},
'sam': {'roi':by_index[1050:1210, 535:720],
'dim': ['sam_x', 'sam_y']}}}
extract_digitizers: bool
If True, extracts the peaks from digitizer variables and aligns the
pulse Id according to the fadc_bp bunch pattern.
extract_xgm: bool
If True, extracts the values from XGM variables (e.g. 'SCS_SA3',
'XTD10_XGM') and aligns the pulse Id with the sase1 / sase3 bunch
pattern.
extract_bam: bool
If True, extracts the values from BAM variables (e.g. 'BAM1932M')
and aligns the pulse Id with the sase3 bunch pattern.
bunchPattern: str
bunch pattern used to extract the Fast ADC pulses.
A string or a dict as in::
{'FFT_PD2': 'sase3', 'ILH_I0': 'scs_ppl'}
Ignored if extract_digitizers=False.
parallelize: bool
from EXtra-Data: enable or disable opening files in parallel.
Particularly useful if creating child processes is not allowed
(e.g. in a daemonized multiprocessing.Process).
Returns
-------
run, ds: DataCollection, xarray.Dataset
extra_data DataCollection of the proposal and run number and an
xarray Dataset with aligned trainIds and pulseIds
Example
-------
>>> import toolbox_scs as tb
>>> run, data = tb.load(2212, 208, ['SCS_SA3', 'MCP2apd', 'nrj'])
"""
run = ed.open_run(proposalNB, runNB, data=data, parallelize=parallelize)
if subset is not None:
run = run.select_trains(subset)
if fields is None:
return run, xr.Dataset()
if isinstance(fields, str):
fields = [fields]
if validate:
# get_ipython().system('extra-data-validate ' + runFolder)
pass
if display:
run.info()
data_arrays = []
run_mnemonics = mnemonics_for_run(run)
for f in fields:
if type(f) == dict:
# extracting mnemomic defined on the spot
if len(f.keys()) > 1:
print('Loading only one "on-the-spot" mnemonic at a time, '
'skipping all others !')
k = list(f.keys())[0]
v = f[k]
else:
# extracting mnemomic from the table
if f in run_mnemonics:
v = run_mnemonics[f]
k = f
else:
if f in _mnemonics:
log.warning(f'Mnemonic "{f}" not found in run. Skipping!')
print(f'Mnemonic "{f}" not found in run. Skipping!')
else:
log.warning(f'Unknow mnemonic "{f}". Skipping!')
print(f'Unknow mnemonic "{f}". Skipping!')
continue
if k in [d.name for d in data_arrays]:
continue # already loaded, skip
if display:
print(f'Loading {k}')
if v['source'] not in run.all_sources:
log.warning(f'Source {v["source"]} not found in run. Skipping!')
print(f'Source {v["source"]} not found in run. Skipping!')
continue
if k == 'MTE3':
arr = run.get_array(v['source'], v['key'],
extra_dims=v['dim'], name=k)
tpi = run.get_array('SCS_XTD10_TPI/DCTRL/SHUTTER',
'hardwareStatusBitField.value', name=k)
tpi_open = iter(tpi.trainId[tpi & (1 << 12) > 0])
mte3_tids = []
last = 0
current = next(tpi_open, None)
if current is None:
data_arrays.append(arr)
else:
for tid in arr.trainId:
while current < tid:
last = current
current = next(tpi_open, tid)
mte3_tids.append(last)
data_arrays.append(
arr.assign_coords(trainId=np.array(mte3_tids, dtype='u8')))
elif k not in rois:
# no ROIs selection, we read everything
arr = run.get_array(v['source'], v['key'],
extra_dims=v['dim'], name=k)
if len(arr) == 0:
log.warning(f'Empty array for {f}: {v["source"]}, {v["key"]}. '
'Skipping!')
print(f'Empty array for {f}: {v["source"]}, {v["key"]}. '
'Skipping!')
continue
data_arrays.append(arr)
else:
# ROIs selection, for each ROI we select a region of the data and
# save it with new name and dimensions
for nk, nv in rois[k].items():
arr = run.get_array(v['source'], v['key'],
extra_dims=nv['dim'],
roi=nv['roi'],
name=nk)
if len(arr) == 0:
log.warning(f'Empty array for {f}: {v["source"]}, '
f'{v["key"]}. Skipping!')
print(f'Empty array for {f}: {v["source"]}, {v["key"]}. '
'Skipping!')
continue
data_arrays.append(arr)
# Check missing trains
for arr in data_arrays:
if 'hRIXS' in arr.name:
continue
rate = arr.sizes["trainId"] / len(run.train_ids)
if rate < 0.95:
log.warning(f'{arr.name}: only {rate*100:.1f}% of trains '
f'({arr.sizes["trainId"]} out of '
f'{len(run.train_ids)}) contain data.')
ds = xr.merge(data_arrays, join='inner')
ds.attrs['runNB'] = runNB
if isinstance(proposalNB, int):
proposalNB = 'p{:06d}'.format(proposalNB)
ds.attrs['proposal'] = find_proposal(proposalNB)
ds.attrs['data'] = data
if extract_digitizers:
bp = bunchPattern
for k, v in run_mnemonics.items():
if k not in ds or v.get('extract') != 'peaks':
continue
if isinstance(bunchPattern, dict):
bp = bunchPattern.get(k)
if bp is None:
continue
ds = tbdet.get_digitizer_peaks(proposalNB, runNB,
mnemonic=k, merge_with=ds, bunchPattern=bp)
if extract_xgm:
for k, v in run_mnemonics.items():
if k not in ds or v.get('extract') != 'XGM':
continue
ds = tbdet.get_xgm(run, mnemonics=k, merge_with=ds)
if extract_bam:
for k, v in run_mnemonics.items():
if k not in ds or v.get('extract') != 'BAM':
continue
ds = tbdet.get_bam(run, mnemonics=k, merge_with=ds)
return run, ds
def run_by_path(path):
"""
Return specified run
Wraps the extra_data RunDirectory routine, to ease its use for the
scs-toolbox user.
Parameters
----------
path: str
path to the run directory
Returns
-------
run : extra_data.DataCollection
DataCollection object containing information about the specified
run. Data can be loaded using built-in class methods.
"""
return ed.RunDirectory(path)
def find_run_path(proposalNB, runNB, data='raw'):
"""
Return the run path given the specified proposal and run numbers.
Parameters
----------
proposalNB: (str, int)
proposal number e.g. 'p002252' or 2252
runNB: (str, int)
run number as integer
data: str
'raw', 'proc' (processed) or 'all' (both 'raw' and 'proc') to access
data from either or both of those folders. If 'all' is used, sources
present in 'proc' overwrite those in 'raw'. The default is 'raw'.
Returns
-------
path: str
The run path.
"""
if isinstance(runNB, int):
runNB = 'r{:04d}'.format(runNB)
if isinstance(proposalNB, int):
proposalNB = 'p{:06d}'.format(proposalNB)
return os.path.join(find_proposal(proposalNB), data, runNB)
def open_run(proposalNB, runNB, subset=None, **kwargs):
"""
Get extra_data.DataCollection in a given proposal.
Wraps the extra_data open_run routine and adds subset selection, out of
convenience for the toolbox user. More information can be found in the
extra_data documentation.
Parameters
----------
proposalNB: (str, int)
proposal number e.g. 'p002252' or 2252
runNB: (str, int)
run number e.g. 17 or 'r0017'
subset: slice or extra_data.by_index or numpy.s_
a subset of train that can be loaded with extra_data.by_index[:5] for
the first 5 trains. If None, all trains are retrieved.
**kwargs
--------
data: str
default -> 'raw'
include: str
default -> '*'
Returns
-------
run : extra_data.DataCollection
DataCollection object containing information about the specified
run. Data can be loaded using built-in class methods.
"""
run = ed.open_run(proposalNB, runNB, **kwargs)
if subset is not None:
run = run.select_trains(subset)
return run
def get_array(run=None, mnemonic=None, stepsize=None,
subset=None, data='raw',
proposalNB=None, runNB=None):
"""
Loads one data array for the specified mnemonic and rounds its values to
integer multiples of stepsize for consistent grouping (except for
stepsize=None).
Returns a 1D array of ones if mnemonic is set to None.
Parameters
----------
run: extra_data.DataCollection
DataCollection containing the data.
Used if proposalNB and runNB are None.
mnemonic: str
Identifier of a single item in the mnemonic collection. None creates a
dummy 1D array of ones with length equal to the number of trains.
stepsize : float
nominal stepsize of the array data - values will be rounded to integer
multiples of this value.
subset: slice or extra_data.by_index or numpy.s_
a subset of train that can be loaded with extra_data.by_index[:5] for
the first 5 trains. If None, all trains are retrieved.
data: str or Sequence of str
'raw', 'proc' (processed), or any other location relative to the
proposal path with data per run to access. May also be ‘all’
(both ‘raw’ and ‘proc’) or a sequence of strings to load data
from several locations, with later locations overwriting sources
present in earlier ones. The default is 'raw'.
proposalNB: (str, int)
proposal number e.g. 'p002252' or 2252.
runNB: (str, int)
run number e.g. 17 or 'r0017'.
Returns
-------
data : xarray.DataArray
xarray DataArray containing rounded array values using the trainId as
coordinate.
Raises
------
ToolBoxValueError: Exception
Toolbox specific exception, indicating a non-valid mnemonic entry
Example
-------
>>> import toolbox_scs as tb
>>> run = tb.open_run(2212, 235)
>>> mnemonic = 'PP800_PhaseShifter'
>>> data_PhaseShifter = tb.get_array(run, mnemonic, 0.5)
"""
if run is None:
run = open_run(proposalNB, runNB, subset, data=data)
else:
if not isinstance(run, ed.DataCollection):
raise TypeError(f'run argument has type {type(run)} but '
'expected type is extra_data.DataCollection')
if subset is not None:
run = run.select_trains(subset)
run_mnemonics = mnemonics_for_run(run)
try:
if mnemonic is None:
da = xr.DataArray(
np.ones(len(run.train_ids), dtype=np.int16),
dims=['trainId'], coords={'trainId': run.train_ids})
elif mnemonic in run_mnemonics:
mnem = run_mnemonics[mnemonic]
da = run.get_array(mnem['source'], mnem['key'],
extra_dims=mnem['dim'], name=mnemonic)
else:
raise ToolBoxValueError("Invalid mnemonic", mnemonic)
if stepsize is not None:
da = stepsize * np.round(da / stepsize)
log.debug(f"Got data for {mnemonic}")
except ToolBoxValueError as err:
log.error(f"{err.message}")
raise
return da
def load_run_values(prop_or_run, runNB=None, which='mnemonics'):
"""
Load the run value for each mnemonic whose source is a CONTORL
source (see extra-data DataCollection.get_run_value() for details)
Parameters
----------
prop_or_run: extra_data DataCollection or int
The run (DataCollection) to check for mnemonics.
Alternatively, the proposal number (int), for which the runNB
is also required.
runNB: int
The run number. Only used if the first argument is the proposal
number.
which: str
'mnemonics' or 'all'. If 'mnemonics', only the run values for the
ToolBox mnemonics are retrieved. If 'all', a compiled dictionnary
of all control sources run values is returned.
Output
------
run_values: a dictionnary containing the mnemonic or all run values.
"""
if which not in ['mnemonics', 'all']:
raise ValueError('`which` should be either "mnemonics" or "all"')
run = prop_or_run
if runNB is not None:
run = open_run(prop_or_run, runNB)
if which == 'all':
run_values = {}
for c in run.control_sources:
v = run.get_run_values(c)
run_values[c] = v
return run_values
mnemos = mnemonics_for_run(run)
run_values = {}
for m in mnemos:
val = None
try:
if mnemos[m]['source'] in run.control_sources:
val = run.get_run_value(mnemos[m]['source'],
mnemos[m]['key'])
except Exception as e:
log.info(f'Error while retrieving {m} mnemonic: {e}')
continue
run_values[m] = val
return run_values
def concatenateRuns(runs):
""" Sorts and concatenate a list of runs with identical data variables
along the trainId dimension.
Input:
runs: (list) the xarray Datasets to concatenate
Output:
a concatenated xarray Dataset
"""
firstTid = {i: int(run.trainId[0].values) for i, run in enumerate(runs)}
orderedDict = dict(sorted(firstTid.items(), key=lambda t: t[1]))
orderedRuns = [runs[i] for i in orderedDict]
keys = orderedRuns[0].keys()
for run in orderedRuns[1:]:
if run.keys() != keys:
print('data fields between different runs are not identical. '
'Cannot combine runs.')
return
result = xr.concat(orderedRuns, dim='trainId')
for k in orderedRuns[0].attrs.keys():
result.attrs[k] = [run.attrs[k] for run in orderedRuns]
return result
def check_data_rate(run, fields=None):
"""
Calculates the fraction of train ids that contain data in a run.
Parameters
----------
run: extra_data DataCollection
the DataCollection associated to the data.
fields: str, list of str or dict
mnemonics to check. If None, all mnemonics in the run are checked.
A custom mnemonic can be defined with a dictionnary: {'extra':
{'source': 'SCS_CDIFFT_MAG/SUPPLY/CURRENT', 'key':
'actual_current.value'}}
Output
------
ret: dictionnary
dictionnary with mnemonic as keys and fraction of train ids
that contain data as values.
"""
run_mnemonics = mnemonics_for_run(run)
if fields is None:
fields = run_mnemonics
fields = [fields] if isinstance(fields, str) else fields
ret = {}
for f in fields:
if isinstance(f, dict):
name = list(f.keys())[0]
val = f[name]
f = name
elif f not in run_mnemonics:
log.warning(f'mnemonic {f} not found. Skipping!')
continue
else:
val = run_mnemonics[f]
counts = run[val['source']][val['key']].data_counts(False)
npulses = counts.max()
if npulses == 0: # (only missing data)
rate = 0.
else:
counts = counts / npulses # to only count trains and not pulses
rate = counts.sum() / len(run.train_ids)
ret[f] = rate
return ret