Danilo Enoque Ferreira de Lima · e84aae6a · 474512b9 · 326fa661 · 27cbdaad · a7eb3d35
--- a/pes_to_spec/test/offline_analysis.py

+ 97

− 57
+++ b/pes_to_spec/test/offline_analysis.py

+ 97

− 57
 @@ -4,9 +4,12 @@ import sys
 sys.path.append('.')
 sys.path.append('..')

+import os
+import argparse
+
 import numpy as np
-from extra_data import RunDirectory, by_id
-from pes_to_spec.model import Model, matching_ids
+from extra_data import open_run, by_id, RunDirectory
+from pes_to_spec.model import Model, matching_two_ids

 from itertools import product

 @@ -15,8 +18,7 @@ matplotlib.use('Agg')

 import matplotlib.pyplot as plt
 from matplotlib.gridspec import GridSpec
-from mpl_toolkits.axes_grid.inset_locator import (inset_axes, InsetPosition,
-                                                  mark_inset)
+from mpl_toolkits.axes_grid.inset_locator import InsetPosition

 from typing import Dict, Optional

 @@ -55,7 +57,14 @@ def plot_pes(filename: str, pes_raw_int: np.ndarray, first: int, last: int):
    fig.savefig(filename)
    plt.close(fig)

-def plot_result(filename: str, spec_pred: Dict[str, np.ndarray], spec_smooth: np.ndarray, spec_raw_pe: np.ndarray, spec_raw_int: Optional[np.ndarray]=None, pes: Optional[np.ndarray]=None, pes_to_show: Optional[str]="", pes_bin: Optional[np.ndarray]=None):
+def plot_result(filename: str,
+                spec_pred: Dict[str, np.ndarray],
+                spec_smooth: np.ndarray,
+                spec_raw_pe: np.ndarray,
+                spec_raw_int: Optional[np.ndarray]=None,
+                pes: Optional[np.ndarray]=None,
+                pes_to_show: Optional[str]="",
+                pes_bin: Optional[np.ndarray]=None):
    """
    Plot result with uncertainty band.

 @@ -116,41 +125,69 @@ def main():
    """
    Main entry point. Reads some data, trains and predicts.
    """
-    run_dir = "/gpfs/exfel/exp/SA3/202121/p002935/raw"
-    run_id = "r0015"
+    parser = argparse.ArgumentParser(prog="offline_analysis", description="Test pes2spec doing an offline analysis of the data.")
+    parser.add_argument('-p', '--proposal', type=int, metavar='INT', help='Proposal number', default=2828)
+    parser.add_argument('-r', '--run', type=int, metavar='INT', help='Run number', default=206)
+    parser.add_argument('-m', '--model', type=str, metavar='FILENAME', default="", help='Model to load. If given, do not train a model and just do inference with this one.')
+    parser.add_argument('-d', '--directory', type=str, metavar='DIRECTORY', default=".", help='Where to save the results.')
+    parser.add_argument('-S', '--spec', type=str, metavar='NAME', default="SA3_XTD10_SPECT/MDL/SPECTROMETER_SQS_NAVITAR:output", help='SPEC name')
+    parser.add_argument('-P', '--pes', type=str, metavar='NAME', default="SA3_XTD10_PES/ADC/1:network", help='PES name')
+    parser.add_argument('-X', '--xgm', type=str, metavar='NAME', default="SA3_XTD10_XGM/XGM/DOOCS:output", help='XGM name')
+    parser.add_argument('-o', '--offset', type=int, metavar='INT', default=0, help='Train ID offset')
+
+    args = parser.parse_args()
+
+    print("Opening run ...")
    # get run
-    run = RunDirectory(f"{run_dir}/{run_id}")
+    run = open_run(proposal=args.proposal, run=args.run)
+    #run = RunDirectory("/gpfs/exfel/data/scratch/tmichela/data/r0206")

+    # ----------------Used in the first tests-------------------------
    # get train IDs and match them, so we are sure to have information from all needed sources
    # in this example, there is an offset of -2 in the SPEC train ID, so correct for it
    spec_offset = -2
-    spec_tid = spec_offset + run['SA3_XTD10_SPECT/MDL/FEL_BEAM_SPECTROMETER_SQS1:output',
-                                 "data.trainId"].ndarray()
-    pes_tid = run['SA3_XTD10_PES/ADC/1:network',
-                  "digitizers.trainId"].ndarray()
-    xgm_tid = run['SA3_XTD10_XGM/XGM/DOOCS:output',
-                  "data.trainId"].ndarray()
-    # these are the train ID intersection
-    # this could have been done by a select call in the RunDirectory, but it would not correct for the spec_offset
-    tids = matching_ids(spec_tid, pes_tid, xgm_tid)
+    spec_name = 'SA3_XTD10_SPECT/MDL/FEL_BEAM_SPECTROMETER_SQS1:output'
+    pes_name = 'SA3_XTD10_PES/ADC/1:network'
+
+    spec_offset = 0
+    spec_name = 'SA3_XTD10_SPECT/MDL/SPECTROMETER_SQS_NAVITAR:output'
+    pes_name = 'SA3_XTD10_PES/ADC/1:network'
+    # -------------------End of test setup ----------------------------
+
+    spec_offset = args.offset
+    spec_name = args.spec
+    pes_name = args.pes
+    #xgm_name = args.xgm
+
+    pes_tid = run[pes_name, "digitizers.trainId"].ndarray()
+    #xgm_tid = run[xgm_name, "data.trainId"].ndarray()
+
+    if len(args.model) == 0:
+        spec_tid = spec_offset + run[spec_name, "data.trainId"].ndarray()
+        # these are the train ID intersection
+        # this could have been done by a select call in the RunDirectory, but it would not correct for the spec_offset
+        tids = matching_two_ids(spec_tid, pes_tid)
+
+        # read the spec photon energy and intensity
+        spec_raw_pe = run[spec_name, "data.photonEnergy"].select_trains(by_id[tids - spec_offset]).ndarray()
+        spec_raw_int = run[spec_name, "data.intensityDistribution"].select_trains(by_id[tids - spec_offset]).ndarray()
+
+    else: # when doing inference, no need to load SPEC data
+        tids = pes_tid
+
+    # reserve part of it for the test stage
    train_tids = tids[:-10]
    test_tids = tids[-10:]

-    # read the spec photon energy and intensity
-    spec_raw_pe = run['SA3_XTD10_SPECT/MDL/FEL_BEAM_SPECTROMETER_SQS1:output',
-                      "data.photonEnergy"].select_trains(by_id[tids - spec_offset]).ndarray()
-    spec_raw_int = run['SA3_XTD10_SPECT/MDL/FEL_BEAM_SPECTROMETER_SQS1:output',
-                       "data.intensityDistribution"].select_trains(by_id[tids - spec_offset]).ndarray()
-
    # read the PES data for each channel
    channels = [f"channel_{i}_{l}"
                for i, l in product(range(1, 5), ["A", "B", "C", "D"])]
-    pes_raw = {ch: run['SA3_XTD10_PES/ADC/1:network',
-                   f"digitizers.{ch}.raw.samples"].select_trains(by_id[tids]).ndarray()
+    pes_raw = {ch: run[pes_name, f"digitizers.{ch}.raw.samples"].select_trains(by_id[tids]).ndarray()
               for ch in channels}
-    pes_raw_t = {ch: run['SA3_XTD10_PES/ADC/1:network',
-                   f"digitizers.{ch}.raw.samples"].select_trains(by_id[test_tids]).ndarray()
+    pes_raw_t = {ch: run[pes_name, f"digitizers.{ch}.raw.samples"].select_trains(by_id[test_tids]).ndarray()
               for ch in channels}
+    
+    print("Data in memory.")

    # read the XGM information
    #xgm_pressure = run['SA3_XTD10_XGM/XGM/DOOCS', "pressure.pressureFiltered.value"].select_trains(by_id[tids]).ndarray()
 @@ -161,36 +198,34 @@ def main():
    t = list()
    t_names = list()

-    # these have been manually selected:
-    #useful_channels = ["channel_1_D",
-    #                  "channel_2_B",
-    #                  "channel_3_A",
-    #                  "channel_3_B",
-    #                  "channel_4_C",
-    #                  "channel_4_D"]
    model = Model()

    train_idx = np.isin(tids, train_tids)

-    model.debug_peak_finding(pes_raw, "test_peak_finding.png")
-    print("Fitting")
-    start = time_ns()
-    model.fit({k: v[train_idx, :]
-               for k, v in pes_raw.items()},
-              spec_raw_int[train_idx, :],
-              spec_raw_pe[train_idx, :])
-    t += [time_ns() - start]
-    t_names += ["Fit"]
+    #model.debug_peak_finding(pes_raw, os.path.join(args.directory, "test_peak_finding.png"))
+    if len(args.model) == 0:
+        print("Fitting")
+        start = time_ns()
+        model.fit({k: v[train_idx, :]
+                   for k, v in pes_raw.items()},
+                   spec_raw_int[train_idx, :],
+                   spec_raw_pe[train_idx, :])
+        t += [time_ns() - start]
+        t_names += ["Fit"]

-    print("Saving the model")
-    start = time_ns()
-    model.save("model.joblib")
-    t += [time_ns() - start]
-    t_names += ["Save"]
+        print("Saving the model")
+        start = time_ns()
+        modelFilename = os.path.join(args.directory, "model.joblib")
+        model.save(modelFilename)
+        t += [time_ns() - start]
+        t_names += ["Save"]
+    else:
+        print("Model has been given, so I will just load it.")
+        modelFilename = args.model

    print("Loading the model")
    start = time_ns()
-    model = Model.load("model.joblib")
+    model = Model.load(modelFilename)
    t += [time_ns() - start]
    t_names += ["Load"]

 @@ -199,7 +234,7 @@ def main():
    rmse = model.check_compatibility(pes_raw_t)
    print("Consistency check RMSE ratios:", rmse)
    rmse = model.check_compatibility_per_channel(pes_raw_t)
-    print("Consistency per channel check RMSE ratios:", rmse)
+    print("Consistency per channel check (chi2 - icdf(p=0.05))/ndof:", rmse)
    t += [time_ns() - start]
    t_names += ["Consistency"]

 @@ -216,27 +251,32 @@ def main():
    print(df_time)

    print("Plotting")
-    spec_smooth = model.preprocess_high_res(spec_raw_int)
+    showSpec = False
+    if len(args.model) == 0:
+        showSpec = True
+        spec_smooth = model.preprocess_high_res(spec_raw_int)
    first, last = model.get_low_resolution_range()
    first += 10
-    last -= 100
+    last -= 10
    pes_to_show = 'channel_1_D'
    # plot
    for tid in test_tids:
        idx = np.where(tid==tids)[0][0]
-        plot_result(f"test_{tid}.png",
+        plot_result(os.path.join(args.directory, f"test_{tid}.png"),
                   {k: item[idx, ...] if k != "pca"
                       else item[0, ...]
                       for k, item in spec_pred.items()},
-                    spec_smooth[idx, :],
-                    spec_raw_pe[idx, :],
-                    spec_raw_int[idx, :],
+                    spec_smooth[idx, :] if showSpec else None,
+                    spec_raw_pe[idx, :] if showSpec else None,
+                    spec_raw_int[idx, :] if showSpec else None,
                    pes=-pes_raw[pes_to_show][idx, first:last],
                    pes_to_show=pes_to_show.replace('_', ' '),
                    pes_bin=np.arange(first, last)
                    )
        for ch in channels:
-            plot_pes(f"test_pes_{tid}_{ch}.png", pes_raw[ch][idx, first:last], first, last)
+            plot_pes(os.path.join(args.directory, f"test_pes_{tid}_{ch}.png"),
+                     pes_raw[ch][idx, first:last], first, last)

 if __name__ == '__main__':
    main()
+