Skip to content
Snippets Groups Projects

Improve fit

Merged Danilo Enoque Ferreira de Lima requested to merge parallelization into main
Files
3
@@ -4,9 +4,12 @@ import sys
sys.path.append('.')
sys.path.append('..')
import os
import argparse
import numpy as np
from extra_data import RunDirectory, by_id
from pes_to_spec.model import Model, matching_ids
from extra_data import open_run, by_id, RunDirectory
from pes_to_spec.model import Model, matching_two_ids
from itertools import product
@@ -15,8 +18,7 @@ matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from mpl_toolkits.axes_grid.inset_locator import (inset_axes, InsetPosition,
mark_inset)
from mpl_toolkits.axes_grid.inset_locator import InsetPosition
from typing import Dict, Optional
@@ -55,7 +57,14 @@ def plot_pes(filename: str, pes_raw_int: np.ndarray, first: int, last: int):
fig.savefig(filename)
plt.close(fig)
def plot_result(filename: str, spec_pred: Dict[str, np.ndarray], spec_smooth: np.ndarray, spec_raw_pe: np.ndarray, spec_raw_int: Optional[np.ndarray]=None, pes: Optional[np.ndarray]=None, pes_to_show: Optional[str]="", pes_bin: Optional[np.ndarray]=None):
def plot_result(filename: str,
spec_pred: Dict[str, np.ndarray],
spec_smooth: np.ndarray,
spec_raw_pe: np.ndarray,
spec_raw_int: Optional[np.ndarray]=None,
pes: Optional[np.ndarray]=None,
pes_to_show: Optional[str]="",
pes_bin: Optional[np.ndarray]=None):
"""
Plot result with uncertainty band.
@@ -116,41 +125,69 @@ def main():
"""
Main entry point. Reads some data, trains and predicts.
"""
run_dir = "/gpfs/exfel/exp/SA3/202121/p002935/raw"
run_id = "r0015"
parser = argparse.ArgumentParser(prog="offline_analysis", description="Test pes2spec doing an offline analysis of the data.")
parser.add_argument('-p', '--proposal', type=int, metavar='INT', help='Proposal number', default=2828)
parser.add_argument('-r', '--run', type=int, metavar='INT', help='Run number', default=206)
parser.add_argument('-m', '--model', type=str, metavar='FILENAME', default="", help='Model to load. If given, do not train a model and just do inference with this one.')
parser.add_argument('-d', '--directory', type=str, metavar='DIRECTORY', default=".", help='Where to save the results.')
parser.add_argument('-S', '--spec', type=str, metavar='NAME', default="SA3_XTD10_SPECT/MDL/SPECTROMETER_SQS_NAVITAR:output", help='SPEC name')
parser.add_argument('-P', '--pes', type=str, metavar='NAME', default="SA3_XTD10_PES/ADC/1:network", help='PES name')
parser.add_argument('-X', '--xgm', type=str, metavar='NAME', default="SA3_XTD10_XGM/XGM/DOOCS:output", help='XGM name')
parser.add_argument('-o', '--offset', type=int, metavar='INT', default=0, help='Train ID offset')
args = parser.parse_args()
print("Opening run ...")
# get run
run = RunDirectory(f"{run_dir}/{run_id}")
run = open_run(proposal=args.proposal, run=args.run)
#run = RunDirectory("/gpfs/exfel/data/scratch/tmichela/data/r0206")
# ----------------Used in the first tests-------------------------
# get train IDs and match them, so we are sure to have information from all needed sources
# in this example, there is an offset of -2 in the SPEC train ID, so correct for it
spec_offset = -2
spec_tid = spec_offset + run['SA3_XTD10_SPECT/MDL/FEL_BEAM_SPECTROMETER_SQS1:output',
"data.trainId"].ndarray()
pes_tid = run['SA3_XTD10_PES/ADC/1:network',
"digitizers.trainId"].ndarray()
xgm_tid = run['SA3_XTD10_XGM/XGM/DOOCS:output',
"data.trainId"].ndarray()
# these are the train ID intersection
# this could have been done by a select call in the RunDirectory, but it would not correct for the spec_offset
tids = matching_ids(spec_tid, pes_tid, xgm_tid)
spec_name = 'SA3_XTD10_SPECT/MDL/FEL_BEAM_SPECTROMETER_SQS1:output'
pes_name = 'SA3_XTD10_PES/ADC/1:network'
spec_offset = 0
spec_name = 'SA3_XTD10_SPECT/MDL/SPECTROMETER_SQS_NAVITAR:output'
pes_name = 'SA3_XTD10_PES/ADC/1:network'
# -------------------End of test setup ----------------------------
spec_offset = args.offset
spec_name = args.spec
pes_name = args.pes
#xgm_name = args.xgm
pes_tid = run[pes_name, "digitizers.trainId"].ndarray()
#xgm_tid = run[xgm_name, "data.trainId"].ndarray()
if len(args.model) == 0:
spec_tid = spec_offset + run[spec_name, "data.trainId"].ndarray()
# these are the train ID intersection
# this could have been done by a select call in the RunDirectory, but it would not correct for the spec_offset
tids = matching_two_ids(spec_tid, pes_tid)
# read the spec photon energy and intensity
spec_raw_pe = run[spec_name, "data.photonEnergy"].select_trains(by_id[tids - spec_offset]).ndarray()
spec_raw_int = run[spec_name, "data.intensityDistribution"].select_trains(by_id[tids - spec_offset]).ndarray()
else: # when doing inference, no need to load SPEC data
tids = pes_tid
# reserve part of it for the test stage
train_tids = tids[:-10]
test_tids = tids[-10:]
# read the spec photon energy and intensity
spec_raw_pe = run['SA3_XTD10_SPECT/MDL/FEL_BEAM_SPECTROMETER_SQS1:output',
"data.photonEnergy"].select_trains(by_id[tids - spec_offset]).ndarray()
spec_raw_int = run['SA3_XTD10_SPECT/MDL/FEL_BEAM_SPECTROMETER_SQS1:output',
"data.intensityDistribution"].select_trains(by_id[tids - spec_offset]).ndarray()
# read the PES data for each channel
channels = [f"channel_{i}_{l}"
for i, l in product(range(1, 5), ["A", "B", "C", "D"])]
pes_raw = {ch: run['SA3_XTD10_PES/ADC/1:network',
f"digitizers.{ch}.raw.samples"].select_trains(by_id[tids]).ndarray()
pes_raw = {ch: run[pes_name, f"digitizers.{ch}.raw.samples"].select_trains(by_id[tids]).ndarray()
for ch in channels}
pes_raw_t = {ch: run['SA3_XTD10_PES/ADC/1:network',
f"digitizers.{ch}.raw.samples"].select_trains(by_id[test_tids]).ndarray()
pes_raw_t = {ch: run[pes_name, f"digitizers.{ch}.raw.samples"].select_trains(by_id[test_tids]).ndarray()
for ch in channels}
print("Data in memory.")
# read the XGM information
#xgm_pressure = run['SA3_XTD10_XGM/XGM/DOOCS', "pressure.pressureFiltered.value"].select_trains(by_id[tids]).ndarray()
@@ -161,36 +198,34 @@ def main():
t = list()
t_names = list()
# these have been manually selected:
#useful_channels = ["channel_1_D",
# "channel_2_B",
# "channel_3_A",
# "channel_3_B",
# "channel_4_C",
# "channel_4_D"]
model = Model()
train_idx = np.isin(tids, train_tids)
model.debug_peak_finding(pes_raw, "test_peak_finding.png")
print("Fitting")
start = time_ns()
model.fit({k: v[train_idx, :]
for k, v in pes_raw.items()},
spec_raw_int[train_idx, :],
spec_raw_pe[train_idx, :])
t += [time_ns() - start]
t_names += ["Fit"]
#model.debug_peak_finding(pes_raw, os.path.join(args.directory, "test_peak_finding.png"))
if len(args.model) == 0:
print("Fitting")
start = time_ns()
model.fit({k: v[train_idx, :]
for k, v in pes_raw.items()},
spec_raw_int[train_idx, :],
spec_raw_pe[train_idx, :])
t += [time_ns() - start]
t_names += ["Fit"]
print("Saving the model")
start = time_ns()
model.save("model.joblib")
t += [time_ns() - start]
t_names += ["Save"]
print("Saving the model")
start = time_ns()
modelFilename = os.path.join(args.directory, "model.joblib")
model.save(modelFilename)
t += [time_ns() - start]
t_names += ["Save"]
else:
print("Model has been given, so I will just load it.")
modelFilename = args.model
print("Loading the model")
start = time_ns()
model = Model.load("model.joblib")
model = Model.load(modelFilename)
t += [time_ns() - start]
t_names += ["Load"]
@@ -199,7 +234,7 @@ def main():
rmse = model.check_compatibility(pes_raw_t)
print("Consistency check RMSE ratios:", rmse)
rmse = model.check_compatibility_per_channel(pes_raw_t)
print("Consistency per channel check RMSE ratios:", rmse)
print("Consistency per channel check (chi2 - icdf(p=0.05))/ndof:", rmse)
t += [time_ns() - start]
t_names += ["Consistency"]
@@ -216,27 +251,32 @@ def main():
print(df_time)
print("Plotting")
spec_smooth = model.preprocess_high_res(spec_raw_int)
showSpec = False
if len(args.model) == 0:
showSpec = True
spec_smooth = model.preprocess_high_res(spec_raw_int)
first, last = model.get_low_resolution_range()
first += 10
last -= 100
last -= 10
pes_to_show = 'channel_1_D'
# plot
for tid in test_tids:
idx = np.where(tid==tids)[0][0]
plot_result(f"test_{tid}.png",
plot_result(os.path.join(args.directory, f"test_{tid}.png"),
{k: item[idx, ...] if k != "pca"
else item[0, ...]
for k, item in spec_pred.items()},
spec_smooth[idx, :],
spec_raw_pe[idx, :],
spec_raw_int[idx, :],
spec_smooth[idx, :] if showSpec else None,
spec_raw_pe[idx, :] if showSpec else None,
spec_raw_int[idx, :] if showSpec else None,
pes=-pes_raw[pes_to_show][idx, first:last],
pes_to_show=pes_to_show.replace('_', ' '),
pes_bin=np.arange(first, last)
)
for ch in channels:
plot_pes(f"test_pes_{tid}_{ch}.png", pes_raw[ch][idx, first:last], first, last)
plot_pes(os.path.join(args.directory, f"test_pes_{tid}_{ch}.png"),
pes_raw[ch][idx, first:last], first, last)
if __name__ == '__main__':
main()
Loading