Newer
Older
from euxfel_bunch_pattern import indices_at_sase # Installable from PyPI
import numpy as np
import string
from scipy.interpolate import interp1d
from pathlib import Path
detectors = np.genfromtxt(Path(__file__).parent / 'detectors.txt', names=True, dtype=('|U5', '|U4', '|U3', '<f8'))
def correct_adq_common_mode(trace, region, sym):
"""Baseline substraction based on common mode.
Since ADQ digitizers always interleave multiple ADCs per channel to sample
a trace, regular baseline substraction will cause an ADC-dependant common
mode to appear. This correction directly uses a per-ADC baseline instead
to perform include this in the substraction.
Empirical testing has shown that a symmetry of 8 (should be 4 for
non-interleaved) regularly shows better results in suppressing high
frequency signals from the common mode. Going 16 or higher is not recommend
in most cases, as additional artifacts appear.
Args:
trace (array_like): Vector to correct, will be modified in-place.
region (slice): Region to use for computing baseline.
sym (int): Periodic symmetry of ADC common mode.
Returns:
(array_like) Corrected vector, same as trace.
"""
for x in range(sym):
trace[x::sym] -= trace.astype(np.float32)[region][x::sym].mean()
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
return trace
def separate_pulses(trace, pulse_spacing):
"""Separate vector into pieces on an integer boundary.
Args:
trace (array_like): Vector to split.
pulse_spacing (int): Pulse length in samples.
Returns:
(array_like) Separated vector.
"""
return trace[:(len(trace) // pulse_spacing) * pulse_spacing].reshape(-1, pulse_spacing)
def preprocess(
traces, ppt, xgm=None,
sase=3, xgm_normalize=False,
adq_baseline_region=np.s_[:1000], adq_baseline_symmetry=8,
adq_train_region=np.s_[30000:], adq_pulse_region=np.s_[:5000],
adq_interleaved=False
):
"""Pre-process raw data into "cookiebox images".
Raw digitzer traces containing train data are baseline corrected, split into
pulses based on the pulse pattern table and optionally normalized by XGM
fast data.
Important optional parameters:
* adq_baseline_region: This slice *must* refer to a section with no signal.
* adq_train_region: This slice is used for pulse separation and needs to
start with the first pulse. Leaving additional room in the front may
cause empty pulses to be created. As the number of pulses are known,
it may end with the trace.
* adq_pulse_region: This slice is applied within each seperated pulse
spectrum to reduce its size to the actual experimental signal.
* adq_interleaved: Knowledge of whether the digitizer runs in interleaved
mode or not is essential to compute the pulse length.
Args:
traces (Iterable of array_like): Digitizer traces containing TOF spectra.
ppt (array_like): Pulse pattern table from time server device.
xgm (array_like, optional): XGM fast data.
sase (int, optional): Which SASE the experiments run at, 3 by default.
xgm_normalize (bool, optional): Whether data should be normalized by
fast XGM intensity, disabled by default.
adq_baseline_region (slice, optional): Region with no signal to use for
baseline correction, :1000 by default.
adq_baseline_symmetry (int, optional): Common mode symmetry for baseline
correction, 8 by default.
adq_train_region (slice, optional): Region containing actual signal for
the entire train.
adq_pulse_region (slice, optional): Region after pulse separation
containing actual signal for the pulse.
adq_interleaved (bool, optional): Whether digitizer operates in interleaved
mode at 4 GS/s or regular at 2 GS/s.
Returns:
(array_like, array_like) Detector images of shape (pulse, angle, tof) and
vector of correspondig pulse IDs taken from PPT indices.
Raises:
ValueError: When xgm is not passed while xgm_normalize is enabled.
RuntimeError: When the pulse pattern table shows no pulses in the
specified SASE.
AssertionError: When no digitzer trace is obtained.
"""
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
raise ValueError('no xgm data passed for xgm normalization')
# Look for pulses going to the specified SASE in the pulse pattern table.
pulse_ids = indices_at_sase(ppt, sase=sase)
num_pulses = len(pulse_ids)
if num_pulses == 0:
# What are we even doing here?
raise RuntimeError(f'no pulses in SASE{sase}')
elif num_pulses > 1:
# ADQ412s run at a clock factor of 440 relative to the PPT un-interleaved.
pulse_spacing = (pulse_ids[1] - pulse_ids[0]) * (880 if adq_interleaved else 440)
corr_traces = []
# Perform common-mode correction on traces to get rid of ADC-dependent offsets.
for i, trace in enumerate(traces):
# uint16 -> float32 simplifies assumptions about math.
trace = correct_adq_common_mode(trace.astype(np.float32), adq_baseline_region, adq_baseline_symmetry)
if num_pulses > 1:
trace = separate_pulses(trace[adq_train_region])[:, adq_pulse_region]
else:
trace = trace[adq_train_region][adq_pulse_region].reshape(1, -1)
corr_traces.append(trace)
assert len(corr_traces) > 1, 'at least one digitzer trace required'
# Copy it into nice contiguous memory (pulse, angle, tof)
spectra = np.zeros((num_pulses, len(corr_traces), corr_traces[0].shape[1]))
for i, traces_by_pulse in enumerate(corr_traces):
spectra[:, i, :] = traces_by_pulse
if xgm_normalize:
spectra /= xgm[:num_pulses]
return spectra, pulse_ids
def energy_calibration(spectra, pulse_ids, calib_params, samplerate, energy_nodes, valid_energies):
def f(t, a, b, c, d):
return a / t**3 + b / t**2 + c / t + d
def df(t, a, b, c, d):
return -3 * a / t**4 - 2 * b / t**3 - c / t**2
E = f(1e-9 * t[:, None], *calib_params)[valid_energies].T
dE = df(1e-9 * t[:, None], *calib_params)[valid_energies].T
transformed = -spectra[:,:,valid_energies] / dE
calib_spectra = (np.asarray([interp1d(e, t)(energy_nodes) for e, t in zip(E, transformed.transpose(1, 0, 2))])).transpose(1, 0, 2) if energy_nodes is not None else None
return E, transformed, energy_nodes, calib_spectra, t[valid_energies]