Skip to content
Snippets Groups Projects

Improve fit

Merged Danilo Enoque Ferreira de Lima requested to merge parallelization into main
Files
4
@@ -8,8 +8,8 @@ import os
@@ -8,8 +8,8 @@ import os
import argparse
import argparse
import numpy as np
import numpy as np
from extra_data import open_run, by_id
from extra_data import open_run, by_id, RunDirectory
from pes_to_spec.model import Model, matching_two_ids
from pes_to_spec.model import Model, matching_ids
from itertools import product
from itertools import product
@@ -19,6 +19,7 @@ matplotlib.use('Agg')
@@ -19,6 +19,7 @@ matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.gridspec import GridSpec
from mpl_toolkits.axes_grid.inset_locator import InsetPosition
from mpl_toolkits.axes_grid.inset_locator import InsetPosition
 
import seaborn as sns
from typing import Dict, Optional
from typing import Dict, Optional
@@ -82,8 +83,8 @@ def plot_result(filename: str,
@@ -82,8 +83,8 @@ def plot_result(filename: str,
fig = plt.figure(figsize=(12, 8))
fig = plt.figure(figsize=(12, 8))
gs = GridSpec(1, 1)
gs = GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
ax = fig.add_subplot(gs[0, 0])
unc_stat = np.mean(spec_pred["unc"])
unc_stat = spec_pred["unc"]
unc_pca = np.mean(spec_pred["pca"])
unc_pca = spec_pred["pca"]
unc = np.sqrt(unc_stat**2 + unc_pca**2)
unc = np.sqrt(unc_stat**2 + unc_pca**2)
ax.plot(spec_raw_pe, spec_smooth, c='b', lw=3, label="High-res. measurement (smoothened)")
ax.plot(spec_raw_pe, spec_smooth, c='b', lw=3, label="High-res. measurement (smoothened)")
ax.plot(spec_raw_pe, spec_pred["expected"], c='r', ls='--', lw=3, label="High-res. prediction")
ax.plot(spec_raw_pe, spec_pred["expected"], c='r', ls='--', lw=3, label="High-res. prediction")
@@ -134,11 +135,15 @@ def main():
@@ -134,11 +135,15 @@ def main():
parser.add_argument('-P', '--pes', type=str, metavar='NAME', default="SA3_XTD10_PES/ADC/1:network", help='PES name')
parser.add_argument('-P', '--pes', type=str, metavar='NAME', default="SA3_XTD10_PES/ADC/1:network", help='PES name')
parser.add_argument('-X', '--xgm', type=str, metavar='NAME', default="SA3_XTD10_XGM/XGM/DOOCS:output", help='XGM name')
parser.add_argument('-X', '--xgm', type=str, metavar='NAME', default="SA3_XTD10_XGM/XGM/DOOCS:output", help='XGM name')
parser.add_argument('-o', '--offset', type=int, metavar='INT', default=0, help='Train ID offset')
parser.add_argument('-o', '--offset', type=int, metavar='INT', default=0, help='Train ID offset')
 
parser.add_argument('-c', '--xgm_cut', type=float, metavar='INTENSITY', default=0, help='XGM intensity threshold in uJ.')
 
parser.add_argument('-e', '--poly', action="store_true", default=False, help='Wheteher to expand PES data in higher order polynomials.')
args = parser.parse_args()
args = parser.parse_args()
 
print("Opening run ...")
# get run
# get run
run = open_run(proposal=args.proposal, run=args.run)
run = open_run(proposal=args.proposal, run=args.run)
 
#run = RunDirectory("/gpfs/exfel/data/scratch/tmichela/data/r0206")
# ----------------Used in the first tests-------------------------
# ----------------Used in the first tests-------------------------
# get train IDs and match them, so we are sure to have information from all needed sources
# get train IDs and match them, so we are sure to have information from all needed sources
@@ -155,16 +160,16 @@ def main():
@@ -155,16 +160,16 @@ def main():
spec_offset = args.offset
spec_offset = args.offset
spec_name = args.spec
spec_name = args.spec
pes_name = args.pes
pes_name = args.pes
#xgm_name = args.xgm
xgm_name = args.xgm
pes_tid = run[pes_name, "digitizers.trainId"].ndarray()
pes_tid = run[pes_name, "digitizers.trainId"].ndarray()
#xgm_tid = run[xgm_name, "data.trainId"].ndarray()
xgm_tid = run[xgm_name, "data.trainId"].ndarray()
if len(args.model) == 0:
if len(args.model) == 0:
spec_tid = spec_offset + run[spec_name, "data.trainId"].ndarray()
spec_tid = spec_offset + run[spec_name, "data.trainId"].ndarray()
# these are the train ID intersection
# these are the train ID intersection
# this could have been done by a select call in the RunDirectory, but it would not correct for the spec_offset
# this could have been done by a select call in the RunDirectory, but it would not correct for the spec_offset
tids = matching_two_ids(spec_tid, pes_tid)
tids = matching_ids(spec_tid, pes_tid, xgm_tid)
# read the spec photon energy and intensity
# read the spec photon energy and intensity
spec_raw_pe = run[spec_name, "data.photonEnergy"].select_trains(by_id[tids - spec_offset]).ndarray()
spec_raw_pe = run[spec_name, "data.photonEnergy"].select_trains(by_id[tids - spec_offset]).ndarray()
@@ -185,27 +190,36 @@ def main():
@@ -185,27 +190,36 @@ def main():
pes_raw_t = {ch: run[pes_name, f"digitizers.{ch}.raw.samples"].select_trains(by_id[test_tids]).ndarray()
pes_raw_t = {ch: run[pes_name, f"digitizers.{ch}.raw.samples"].select_trains(by_id[test_tids]).ndarray()
for ch in channels}
for ch in channels}
 
print("Data in memory.")
 
# read the XGM information
# read the XGM information
#xgm_pressure = run['SA3_XTD10_XGM/XGM/DOOCS', "pressure.pressureFiltered.value"].select_trains(by_id[tids]).ndarray()
#xgm_pressure = run['SA3_XTD10_XGM/XGM/DOOCS', "pressure.pressureFiltered.value"].select_trains(by_id[tids]).ndarray()
#xgm_pe = run['SA3_XTD10_XGM/XGM/DOOCS:output', "data.intensitySa3TD"].select_trains(by_id[tids]).ndarray()
#xgm_pe = run['SA3_XTD10_XGM/XGM/DOOCS:output', "data.intensitySa3TD"].select_trains(by_id[tids]).ndarray()
#retvol_raw = run["SA3_XTD10_PES/MDL/DAQ_MPOD", "u212.value"].select_trains(by_id[tids]).ndarray()
#retvol_raw = run["SA3_XTD10_PES/MDL/DAQ_MPOD", "u212.value"].select_trains(by_id[tids]).ndarray()
#retvol_raw_timestamp = run["SA3_XTD10_PES/MDL/DAQ_MPOD", "u212.timestamp"].select_trains(by_id[tids]).ndarray()
#retvol_raw_timestamp = run["SA3_XTD10_PES/MDL/DAQ_MPOD", "u212.timestamp"].select_trains(by_id[tids]).ndarray()
 
 
xgm_flux = run['SA3_XTD10_XGM/XGM/DOOCS:output', "data.intensitySa3TD"].select_trains(by_id[tids]).ndarray()[:, 0][:, np.newaxis]
 
xgm_flux_t = run['SA3_XTD10_XGM/XGM/DOOCS:output', "data.intensitySa3TD"].select_trains(by_id[test_tids]).ndarray()[:, 0][:, np.newaxis]
t = list()
t = list()
t_names = list()
t_names = list()
model = Model()
model = Model(poly=args.poly)
train_idx = np.isin(tids, train_tids)
train_idx = np.isin(tids, train_tids) & (xgm_flux[:,0] > args.xgm_cut)
model.debug_peak_finding(pes_raw, os.path.join(args.directory, "test_peak_finding.png"))
model.debug_peak_finding(pes_raw, os.path.join(args.directory, "test_peak_finding.png"))
if len(args.model) == 0:
if len(args.model) == 0:
print("Fitting")
print("Fitting")
start = time_ns()
start = time_ns()
 
w = model.uniformize(xgm_flux[train_idx])
 
print("w", np.amin(w), np.amax(w), np.median(w))
model.fit({k: v[train_idx, :]
model.fit({k: v[train_idx, :]
for k, v in pes_raw.items()},
for k, v in pes_raw.items()},
spec_raw_int[train_idx, :],
spec_raw_int[train_idx, :],
spec_raw_pe[train_idx, :])
spec_raw_pe[train_idx, :],
 
w
 
)
t += [time_ns() - start]
t += [time_ns() - start]
t_names += ["Fit"]
t_names += ["Fit"]
@@ -227,10 +241,10 @@ def main():
@@ -227,10 +241,10 @@ def main():
print("Check consistency")
print("Check consistency")
start = time_ns()
start = time_ns()
rmse = model.check_compatibility(pes_raw_t)
Z = model.check_compatibility(pes_raw_t)
print("Consistency check RMSE ratios:", rmse)
print("Consistency check:", Z)
rmse = model.check_compatibility_per_channel(pes_raw_t)
Z = model.check_compatibility_per_channel(pes_raw_t)
print("Consistency per channel check RMSE ratios:", rmse)
print("Consistency per channel:", Z)
t += [time_ns() - start]
t += [time_ns() - start]
t_names += ["Consistency"]
t_names += ["Consistency"]
@@ -251,9 +265,150 @@ def main():
@@ -251,9 +265,150 @@ def main():
if len(args.model) == 0:
if len(args.model) == 0:
showSpec = True
showSpec = True
spec_smooth = model.preprocess_high_res(spec_raw_int)
spec_smooth = model.preprocess_high_res(spec_raw_int)
 
 
# chi2 w.r.t XGM intensity
 
erange = spec_raw_pe[0,-1] - spec_raw_pe[0,0]
 
de = (spec_raw_pe[0,1] - spec_raw_pe[0,0])
 
chi2 = np.sum((spec_smooth - spec_pred["expected"])**2/(spec_pred["total_unc"]**2), axis=1)
 
ndof = float(spec_smooth.shape[1]) - 1.0
 
fig = plt.figure(figsize=(12, 8))
 
gs = GridSpec(1, 1)
 
ax = fig.add_subplot(gs[0, 0])
 
ax.scatter(chi2/ndof, xgm_flux[:,0], c='r', s=20)
 
ax.set(title=f"", #avg(stat unc) = {unc_stat}, avg(pca unc) = {unc_pca}",
 
xlabel=r"$\chi^2/$ndof",
 
ylabel="XGM intensity [uJ]",
 
xlim=(0, 5),
 
)
 
ax2 = plt.axes([0,0,1,1])
 
# Manually set the position and relative size of the inset axes within ax1
 
ip = InsetPosition(ax, [0.65,0.6,0.35,0.4])
 
ax2.set_axes_locator(ip)
 
ax2.scatter(chi2/ndof, xgm_flux[:,0], c='r', s=30)
 
#ax2.scatter(chi2/ndof, np.sum(spec_pred["expected"], axis=1)*de, c='b', s=30)
 
#ax2.scatter(chi2/ndof, np.sum(spec_raw_int, axis=1)*de, c='g', s=30)
 
ax2.set(title="",
 
xlabel=r"$\chi^2/$ndof",
 
ylabel=f"XGM intensity [uJ]",
 
)
 
ax2.title.set_size(SMALL_SIZE)
 
ax2.xaxis.label.set_size(SMALL_SIZE)
 
ax2.yaxis.label.set_size(SMALL_SIZE)
 
ax2.tick_params(axis='both', which='major', labelsize=SMALL_SIZE)
 
fig.savefig(os.path.join(args.directory, "intensity_vs_chi2.png"))
 
plt.close(fig)
 
 
fig = plt.figure(figsize=(12, 8))
 
gs = GridSpec(1, 1)
 
ax = fig.add_subplot(gs[0, 0])
 
sns.kdeplot(x=chi2/ndof, ax=ax)
 
ax.set(title=f"",
 
xlabel=r"$\chi^2/$ndof",
 
ylabel="Density [a.u.]",
 
xlim=(0, 5),
 
)
 
ax.text(0.90, 0.95, fr"$\mu = ${np.mean(chi2/ndof):.2f}",
 
verticalalignment='top', horizontalalignment='right',
 
transform=ax.transAxes,
 
color='black', fontsize=15)
 
ax.text(0.90, 0.90, fr"$\sigma = ${np.std(chi2/ndof):.2f}",
 
verticalalignment='top', horizontalalignment='right',
 
transform=ax.transAxes,
 
color='black', fontsize=15)
 
fig.savefig(os.path.join(args.directory, "chi2.png"))
 
plt.close(fig)
 
 
fig = plt.figure(figsize=(12, 8))
 
gs = GridSpec(1, 1)
 
ax = fig.add_subplot(gs[0, 0])
 
sns.kdeplot(x=xgm_flux[:,0], ax=ax)
 
ax.set(title=f"",
 
xlabel="XGM intensity [uJ]",
 
ylabel="Density [a.u.]",
 
)
 
ax.text(0.90, 0.95, fr"$\mu = ${np.mean(xgm_flux[:,0]):.2f}",
 
verticalalignment='top', horizontalalignment='right',
 
transform=ax.transAxes,
 
color='black', fontsize=15)
 
ax.text(0.90, 0.90, fr"$\sigma = ${np.std(xgm_flux[:,0]):.2f}",
 
verticalalignment='top', horizontalalignment='right',
 
transform=ax.transAxes,
 
color='black', fontsize=15)
 
fig.savefig(os.path.join(args.directory, "intensity.png"))
 
plt.close(fig)
 
 
# rmse
 
rmse = np.sqrt(np.mean((spec_smooth - spec_pred["expected"])**2, axis=1))
 
fig = plt.figure(figsize=(12, 8))
 
gs = GridSpec(1, 1)
 
ax = fig.add_subplot(gs[0, 0])
 
ax.scatter(rmse, xgm_flux[:,0], c='r', s=30)
 
ax = plt.gca()
 
ax.set(title=f"",
 
xlabel=r"Root-mean-squared error",
 
ylabel="XGM intensity [uJ]",
 
)
 
fig.savefig(os.path.join(args.directory, "intensity_vs_rmse.png"))
 
plt.close(fig)
 
 
fig = plt.figure(figsize=(12, 8))
 
gs = GridSpec(1, 1)
 
ax = fig.add_subplot(gs[0, 0])
 
sns.kdeplot(x=rmse, ax=ax)
 
ax.set(title=f"",
 
xlabel="Root-mean-squared error",
 
ylabel="Density [a.u.]",
 
xlim=(0, 20),
 
)
 
ax.text(0.90, 0.95, fr"$\mu = ${np.mean(rmse):.2f}",
 
verticalalignment='top', horizontalalignment='right',
 
transform=ax.transAxes,
 
color='black', fontsize=15)
 
ax.text(0.90, 0.90, fr"$\sigma = ${np.std(rmse):.2f}",
 
verticalalignment='top', horizontalalignment='right',
 
transform=ax.transAxes,
 
color='black', fontsize=15)
 
fig.savefig(os.path.join(args.directory, "rmse.png"))
 
plt.close(fig)
 
 
# SPEC integral w.r.t XGM intensity
 
fig = plt.figure(figsize=(12, 8))
 
gs = GridSpec(1, 1)
 
ax = fig.add_subplot(gs[0, 0])
 
sns.regplot(x=np.sum(spec_raw_int, axis=1)*de, y=xgm_flux[:,0], color='r', robust=True, ax=ax)
 
ax.set(title=f"",
 
xlabel="SPEC (raw) integral",
 
ylabel="XGM Intensity [uJ]",
 
)
 
fig.savefig(os.path.join(args.directory, "xgm_vs_intensity.png"))
 
plt.close(fig)
 
 
# SPEC integral w.r.t XGM intensity
 
fig = plt.figure(figsize=(12, 8))
 
gs = GridSpec(1, 1)
 
ax = fig.add_subplot(gs[0, 0])
 
sns.regplot(x=np.sum(spec_raw_int, axis=1)*de, y=np.sum(spec_pred["expected"], axis=1)*de, color='r', robust=True, ax=ax)
 
ax.set(title=f"",
 
xlabel="SPEC (raw) integral",
 
ylabel="Predicted integral",
 
)
 
fig.savefig(os.path.join(args.directory, "expected_vs_intensity.png"))
 
plt.close(fig)
 
 
fig = plt.figure(figsize=(12, 8))
 
gs = GridSpec(1, 1)
 
ax = fig.add_subplot(gs[0, 0])
 
sns.regplot(x=np.sum(spec_pred["expected"], axis=1)*de, y=xgm_flux[:,0], color='r', robust=True, ax=ax)
 
ax.set(title=f"",
 
xlabel="Predicted integral",
 
ylabel="XGM intensity [uJ]",
 
)
 
fig.savefig(os.path.join(args.directory, "xgm_vs_expected.png"))
 
plt.close(fig)
 
first, last = model.get_low_resolution_range()
first, last = model.get_low_resolution_range()
first += 10
first += 10
last -= 100
last -= 10
pes_to_show = 'channel_1_D'
pes_to_show = 'channel_1_D'
# plot
# plot
for tid in test_tids:
for tid in test_tids:
@@ -265,9 +420,9 @@ def main():
@@ -265,9 +420,9 @@ def main():
spec_smooth[idx, :] if showSpec else None,
spec_smooth[idx, :] if showSpec else None,
spec_raw_pe[idx, :] if showSpec else None,
spec_raw_pe[idx, :] if showSpec else None,
spec_raw_int[idx, :] if showSpec else None,
spec_raw_int[idx, :] if showSpec else None,
pes=-pes_raw[pes_to_show][idx, first:last],
#pes=-pes_raw[pes_to_show][idx, first:last],
pes_to_show=pes_to_show.replace('_', ' '),
#pes_to_show=pes_to_show.replace('_', ' '),
pes_bin=np.arange(first, last)
#pes_bin=np.arange(first, last)
)
)
for ch in channels:
for ch in channels:
plot_pes(os.path.join(args.directory, f"test_pes_{tid}_{ch}.png"),
plot_pes(os.path.join(args.directory, f"test_pes_{tid}_{ch}.png"),
Loading