Skip to content
Snippets Groups Projects

Includes input energy parameter in the model and adds non-linearities

Merged Danilo Enoque Ferreira de Lima requested to merge with_energy into main
4 files
+ 254
308
Compare changes
  • Side-by-side
  • Inline
Files
4
@@ -134,7 +134,8 @@ def main():
"""
parser = argparse.ArgumentParser(prog="offline_analysis", description="Test pes2spec doing an offline analysis of the data.")
parser.add_argument('-p', '--proposal', type=int, metavar='INT', help='Proposal number', default=2828)
parser.add_argument('-r', '--run', type=int, metavar='INT', help='Run number', default=206)
parser.add_argument('-r', '--run', type=str, metavar='INT,INT,...', help='Run numbers, comma-separated.', default=206)
parser.add_argument('-t', '--test-run', type=int, metavar='INT', help='Run to test', default=None)
parser.add_argument('-m', '--model', type=str, metavar='FILENAME', default="", help='Model to load. If given, do not train a model and just do inference with this one.')
parser.add_argument('-d', '--directory', type=str, metavar='DIRECTORY', default=".", help='Where to save the results.')
parser.add_argument('-S', '--spec', type=str, metavar='NAME', default="SA3_XTD10_SPECT/MDL/SPECTROMETER_SQS_NAVITAR:output", help='SPEC name')
@@ -147,8 +148,20 @@ def main():
args = parser.parse_args()
print("Opening run ...")
runs = args.run.split(',')
runs = [int(r) for r in runs]
# get run
run = open_run(proposal=args.proposal, run=args.run)
run_list = [open_run(proposal=args.proposal, run=r) for r in runs]
run = run_list[0]
if len(run_list) > 1:
run = run.union(*run_list[1:])
run_test = run
other_run_test = False
if "test_run" in args and args.test_run is not None:
other_run_test = True
run_test = open_run(proposal=args.proposal, run=args.test_run)
#run = RunDirectory("/gpfs/exfel/data/scratch/tmichela/data/r0206")
# ----------------Used in the first tests-------------------------
@@ -186,15 +199,25 @@ def main():
# reserve part of it for the test stage
train_tids = tids[:-10]
test_tids = tids[-10:]
if other_run_test:
pes_tidt = run_test[pes_name, "digitizers.trainId"].ndarray()
xgm_tidt = run_test[xgm_name, "data.trainId"].ndarray()
spec_tidt = run_test[spec_name, "data.trainId"].ndarray()
test_tids = matching_ids(spec_tidt, pes_tidt, xgm_tidt)
else:
test_tids = tids
# read the PES data for each channel
channels = [f"channel_{i}_{l}"
for i, l in product(range(1, 5), ["A", "B", "C", "D"])]
pes_raw = {ch: run[pes_name, f"digitizers.{ch}.raw.samples"].select_trains(by_id[tids]).ndarray()
for ch in channels}
pes_raw_t = {ch: run[pes_name, f"digitizers.{ch}.raw.samples"].select_trains(by_id[test_tids]).ndarray()
for ch in channels}
pes_raw_t = {ch: run_test[pes_name, f"digitizers.{ch}.raw.samples"].select_trains(by_id[test_tids]).ndarray()
for ch in channels}
# select test SPEC data
spec_raw_pe_t = run_test[spec_name, "data.photonEnergy"].select_trains(by_id[test_tids - spec_offset]).ndarray()
spec_raw_int_t = run_test[spec_name, "data.intensityDistribution"].select_trains(by_id[test_tids - spec_offset]).ndarray()
print("Data in memory.")
@@ -203,9 +226,9 @@ def main():
#xgm_pe = run['SA3_XTD10_XGM/XGM/DOOCS:output', "data.intensitySa3TD"].select_trains(by_id[tids]).ndarray()
#retvol_raw = run["SA3_XTD10_PES/MDL/DAQ_MPOD", "u212.value"].select_trains(by_id[tids]).ndarray()
#retvol_raw_timestamp = run["SA3_XTD10_PES/MDL/DAQ_MPOD", "u212.timestamp"].select_trains(by_id[tids]).ndarray()
xgm_flux = run['SA3_XTD10_XGM/XGM/DOOCS:output', "data.intensitySa3TD"].select_trains(by_id[tids]).ndarray()[:, 0][:, np.newaxis]
xgm_flux_t = run['SA3_XTD10_XGM/XGM/DOOCS:output', "data.intensitySa3TD"].select_trains(by_id[test_tids]).ndarray()[:, 0][:, np.newaxis]
xgm_flux_t = run_test['SA3_XTD10_XGM/XGM/DOOCS:output', "data.intensitySa3TD"].select_trains(by_id[test_tids]).ndarray()[:, 0][:, np.newaxis]
t = list()
t_names = list()
@@ -213,6 +236,9 @@ def main():
model = Model(poly=args.poly)
train_idx = np.isin(tids, train_tids) & (xgm_flux[:,0] > args.xgm_cut)
# we just need this for training and we need to avoid copying it, which blows up the memoray usage
for k in pes_raw.keys():
pes_raw[k] = pes_raw[k][train_idx]
model.debug_peak_finding(pes_raw, os.path.join(args.directory, "test_peak_finding.png"))
if len(args.model) == 0:
@@ -220,10 +246,11 @@ def main():
start = time_ns()
w = model.uniformize(xgm_flux[train_idx])
print("w", np.amin(w), np.amax(w), np.median(w))
model.fit({k: v[train_idx, :]
for k, v in pes_raw.items()},
spec_raw_int[train_idx, :],
spec_raw_pe[train_idx, :],
model.fit(pes_raw,
#{k: v[train_idx]
#for k, v in pes_raw.items()},
spec_raw_int[train_idx],
spec_raw_pe[train_idx],
w,
pulse_energy=xgm_flux[train_idx],
)
@@ -282,7 +309,7 @@ def main():
# test
print("Predict")
start = time_ns()
spec_pred = model.predict(pes_raw, pulse_energy=xgm_flux)
spec_pred = model.predict(pes_raw_t, pulse_energy=xgm_flux_t)
spec_pred["deconvolved"] = model.deconvolve(spec_pred["expected"])
t += [time_ns() - start]
t_names += ["Predict"]
@@ -296,17 +323,16 @@ def main():
showSpec = False
if len(args.model) == 0:
showSpec = True
spec_smooth = model.preprocess_high_res(spec_raw_int)
spec_smooth = model.preprocess_high_res(spec_raw_int_t)
# chi2 w.r.t XGM intensity
erange = spec_raw_pe[0,-1] - spec_raw_pe[0,0]
de = (spec_raw_pe[0,1] - spec_raw_pe[0,0])
de = (spec_raw_pe_t[0,1] - spec_raw_pe_t[0,0])
chi2 = np.sum((spec_smooth[:, np.newaxis, :] - spec_pred["expected"])**2/(spec_pred["total_unc"]**2), axis=(-1, -2))
ndof = float(spec_smooth.shape[1]) - 1.0
fig = plt.figure(figsize=(12, 8))
gs = GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
ax.scatter(chi2/ndof, xgm_flux[:,0], c='r', s=20)
ax.scatter(chi2/ndof, xgm_flux_t[:,0], c='r', s=20)
ax.set(title=f"", #avg(stat unc) = {unc_stat}, avg(pca unc) = {unc_pca}",
xlabel=r"$\chi^2/$ndof",
ylabel="XGM intensity [uJ]",
@@ -316,7 +342,7 @@ def main():
# Manually set the position and relative size of the inset axes within ax1
ip = InsetPosition(ax, [0.65,0.6,0.35,0.4])
ax2.set_axes_locator(ip)
ax2.scatter(chi2/ndof, xgm_flux[:,0], c='r', s=30)
ax2.scatter(chi2/ndof, xgm_flux_t[:,0], c='r', s=30)
#ax2.scatter(chi2/ndof, np.sum(spec_pred["expected"], axis=1)*de, c='b', s=30)
#ax2.scatter(chi2/ndof, np.sum(spec_raw_int, axis=1)*de, c='g', s=30)
ax2.set(title="",
@@ -353,16 +379,16 @@ def main():
fig = plt.figure(figsize=(12, 8))
gs = GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
sns.kdeplot(x=xgm_flux[:,0], ax=ax)
sns.kdeplot(x=xgm_flux_t[:,0], ax=ax)
ax.set(title=f"",
xlabel="XGM intensity [uJ]",
ylabel="Density [a.u.]",
)
ax.text(0.90, 0.95, fr"$\mu = ${np.mean(xgm_flux[:,0]):.2f}",
ax.text(0.90, 0.95, fr"$\mu = ${np.mean(xgm_flux_t[:,0]):.2f}",
verticalalignment='top', horizontalalignment='right',
transform=ax.transAxes,
color='black', fontsize=15)
ax.text(0.90, 0.90, fr"$\sigma = ${np.std(xgm_flux[:,0]):.2f}",
ax.text(0.90, 0.90, fr"$\sigma = ${np.std(xgm_flux_t[:,0]):.2f}",
verticalalignment='top', horizontalalignment='right',
transform=ax.transAxes,
color='black', fontsize=15)
@@ -374,7 +400,7 @@ def main():
fig = plt.figure(figsize=(12, 8))
gs = GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
ax.scatter(rmse, xgm_flux[:,0], c='r', s=30)
ax.scatter(rmse, xgm_flux_t[:,0], c='r', s=30)
ax = plt.gca()
ax.set(title=f"",
xlabel=r"Root-mean-squared error",
@@ -407,7 +433,7 @@ def main():
fig = plt.figure(figsize=(12, 8))
gs = GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
sns.regplot(x=np.sum(spec_raw_int, axis=1)*de, y=xgm_flux[:,0], color='r', robust=True, ax=ax)
sns.regplot(x=np.sum(spec_raw_int_t, axis=1)*de, y=xgm_flux_t[:,0], color='r', robust=True, ax=ax)
ax.set(title=f"",
xlabel="SPEC (raw) integral",
ylabel="XGM Intensity [uJ]",
@@ -419,7 +445,7 @@ def main():
fig = plt.figure(figsize=(12, 8))
gs = GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
sns.regplot(x=np.sum(spec_raw_int, axis=-1)*de, y=np.sum(spec_pred["expected"], axis=(-1, -2))*de, color='r', robust=True, ax=ax)
sns.regplot(x=np.sum(spec_raw_int_t, axis=-1)*de, y=np.sum(spec_pred["expected"], axis=(-1, -2))*de, color='r', robust=True, ax=ax)
ax.set(title=f"",
xlabel="SPEC (raw) integral",
ylabel="Predicted integral",
@@ -430,7 +456,7 @@ def main():
fig = plt.figure(figsize=(12, 8))
gs = GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
sns.regplot(x=np.sum(spec_pred["expected"], axis=(-1, -2))*de, y=xgm_flux[:,0], color='r', robust=True, ax=ax)
sns.regplot(x=np.sum(spec_pred["expected"], axis=(-1, -2))*de, y=xgm_flux_t[:,0], color='r', robust=True, ax=ax)
ax.set(title=f"",
xlabel="Predicted integral",
ylabel="XGM intensity [uJ]",
@@ -443,23 +469,19 @@ def main():
last -= 10
pes_to_show = 'channel_1_D'
# plot
for tid in test_tids:
idx = np.where(tid==tids)[0][0]
for idx in range(len(spec_raw_int_t) - 10, len(spec_raw_int_t)):
tid = test_tids[idx]
plot_result(os.path.join(args.directory, f"test_{tid}.png"),
{k: item[idx, 0, ...] if k != "pca"
else item[0, ...]
for k, item in spec_pred.items()},
spec_smooth[idx, :] if showSpec else None,
spec_raw_pe[idx, :] if showSpec else None,
spec_raw_int[idx, :] if showSpec else None,
#pes=-pes_raw[pes_to_show][idx, first:last],
#pes_to_show=pes_to_show.replace('_', ' '),
#pes_bin=np.arange(first, last),
#wiener=model.wiener_filter
spec_raw_pe_t[idx, :] if showSpec else None,
spec_raw_int_t[idx, :] if showSpec else None,
)
for ch in channels:
plot_pes(os.path.join(args.directory, f"test_pes_{tid}_{ch}.png"),
pes_raw[ch][idx, first:last], first, last)
pes_raw_t[ch][idx, first:last], first, last)
if __name__ == '__main__':
main()
Loading