From ad8c73de316d38771466ff723eadf75d8ee69f5e Mon Sep 17 00:00:00 2001
From: Danilo Ferreira de Lima <danilo.enoque.ferreira.de.lima@xfel.de>
Date: Mon, 2 Oct 2023 10:47:21 +0200
Subject: [PATCH] Properly showing smoothing.

---
 pes_to_spec/test/offline_analysis.py | 28 ++++++++++++++++------------
 pes_to_spec/test/prepare_plots.py    | 10 +++++++---
 2 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/pes_to_spec/test/offline_analysis.py b/pes_to_spec/test/offline_analysis.py
index 0f772e1..1ea015c 100755
--- a/pes_to_spec/test/offline_analysis.py
+++ b/pes_to_spec/test/offline_analysis.py
@@ -334,37 +334,41 @@ def main():
     showSpec = False
     if len(args.model) == 0:
         showSpec = True
-        spec_smooth = model.preprocess_high_res(spec_raw_int_t)
-
-        mse = np.mean((spec_smooth[:, np.newaxis, :] - spec_pred["expected"])**2, axis=(0, 1))
-        var = np.std(spec_smooth, axis=0)**2
+        # always smooth by 0.2, even if no smoothing was requested, for visualization
+        smoother = HighResolutionSmoother(0.2)
+        spec_smooth = smoother.fit_transform(spec_raw_int_t, energy=spec_raw_pe_t)
+        # use whatever smoothing was used in the model for the calculations
+        spec_data = model.preprocess_high_res(spec_raw_int_t)
+
+        mse = np.mean((spec_data[:, np.newaxis, :] - spec_pred["expected"])**2, axis=(0, 1))
+        var = np.std(spec_data, axis=0)**2
         r2 = 1 - mse/var
         print(f"MSE = {np.mean(mse):.2f}, var = {np.mean(var):.2f}, R^2 = {np.mean(r2):.2f}")
 
         # chi2 w.r.t XGM intensity
-        chi2 = np.sum((spec_smooth[:, np.newaxis, :] - spec_pred["expected"])**2/(spec_pred["total_unc"]**2), axis=(-1, -2))
-        ndof = spec_smooth.shape[1]
+        chi2 = np.sum((spec_data[:, np.newaxis, :] - spec_pred["expected"])**2/(spec_pred["total_unc"]**2), axis=(-1, -2))
+        ndof = spec_data.shape[1]
         print(f"Chi2 after PCA: {np.mean(chi2):.2f}, ndof: {ndof}, chi2/ndof: {np.mean(chi2/ndof):.2f}")
 
-        spec_smooth_pca = model.y_model['pca'].transform(spec_smooth)
+        spec_data_pca = model.y_model['pca'].transform(spec_data)
         unc2 = spec_pred["expected_pca_unc"]**2
         pca_var = (spec_pred["expected_pca"].std(axis=0, keepdims=True)**2).reshape(1, 1, -1)
         print("Expected pca std:", pca_var)
-        chi2_prepca = np.sum((spec_smooth_pca[:, np.newaxis, :] - spec_pred["expected_pca"])**2/unc2, axis=(-1, -2))
+        chi2_prepca = np.sum((spec_data_pca[:, np.newaxis, :] - spec_pred["expected_pca"])**2/unc2, axis=(-1, -2))
 
-        ndof_prepca = float(spec_smooth_pca.shape[-1])
+        ndof_prepca = float(spec_data_pca.shape[-1])
         print(f"Chi2 before PCA: {np.mean(chi2_prepca):.2f}, ndof: {ndof_prepca}, chi2/ndof: {np.mean(chi2_prepca/ndof_prepca):.2f} +/- {np.std(chi2_prepca/ndof_prepca):.2f}")
 
-        res_prepca = np.sum((spec_smooth_pca[:, np.newaxis, :] - spec_pred["expected_pca"])/spec_pred["expected_pca_unc"], axis=1)
+        res_prepca = np.sum((spec_data_pca[:, np.newaxis, :] - spec_pred["expected_pca"])/spec_pred["expected_pca_unc"], axis=1)
 
         # rmse
-        rmse = np.sqrt(np.mean((spec_smooth[:, np.newaxis, :] - spec_pred["expected"])**2, axis=(-1, -2)))
+        rmse = np.sqrt(np.mean((spec_data[:, np.newaxis, :] - spec_pred["expected"])**2, axis=(-1, -2)))
         nopca_unc = np.sqrt(np.mean(spec_pred["nopca_unc"]**2, axis=(-1, -2)))
         total_unc = np.sqrt(np.mean(spec_pred["total_unc"]**2, axis=(-1, -2)))
         median_unc = np.median(spec_pred["total_unc"], axis=(-1, -2))
 
         q = dict(chi2_prepca=chi2_prepca,
-                 ndof=spec_smooth_pca.shape[-1]*np.ones_like(chi2_prepca),
+                 ndof=spec_data_pca.shape[-1]*np.ones_like(chi2_prepca),
                  xgm_flux_t=xgm_flux_t[:,0],
                  rmse=rmse,
                  nopca_unc=nopca_unc,
diff --git a/pes_to_spec/test/prepare_plots.py b/pes_to_spec/test/prepare_plots.py
index 1298950..49f8f31 100755
--- a/pes_to_spec/test/prepare_plots.py
+++ b/pes_to_spec/test/prepare_plots.py
@@ -25,11 +25,14 @@ plt.rc('ytick', labelsize=BIGGER_SIZE)   # fontsize of the tick labels
 plt.rc('legend', fontsize=MEDIUM_SIZE)   # legend fontsize
 plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title
 
-def plot_final(df: pd.DataFrame, filename: str):
+def plot_final(df: pd.DataFrame, smooth: bool, filename: str):
     fig = plt.figure(figsize=(12, 8))
     gs = GridSpec(1, 1)
     ax = fig.add_subplot(gs[0, 0])
-    ax.plot(df.energy, df.spec, c='b', lw=3, label="Grating spectrometer")
+    if smooth:
+        ax.plot(df.energy, df.spec_smooth, c='b', lw=3, label="Grating spectrometer (smoothened)")
+    else:
+        ax.plot(df.energy, df.spec, c='b', lw=3, label="Grating spectrometer")
     ax.plot(df.energy, df.prediction, c='r', ls='--', lw=3, label="Prediction")
     ax.fill_between(df.energy, df.prediction - 1*df.unc, df.prediction + 1*df.unc, facecolor='gold', alpha=0.5, label="68% unc. (total)")
     ax.fill_between(df.energy, df.prediction - 1*df.unc_pca, df.prediction + 1*df.unc_pca, facecolor='magenta', alpha=0.5, label="68% unc. (PCA only)")
@@ -416,7 +419,8 @@ if __name__ == '__main__':
     #        plot_pes(pd.read_csv(f'{indir}/{fname}_pes.csv'), channel, f'{fname}_pes.pdf')
 
     for fname in ('test_q100_1724098413', 'test_q100_1724098596', 'test_q50_1724099445'):
-        plot_final(pd.read_csv(f'{indir}/{fname}.csv'), f'{fname}.pdf')
+        plot_final(pd.read_csv(f'{indir}/{fname}.csv'), smooth=True, filename=f'{fname}_smooth.pdf')
+        plot_final(pd.read_csv(f'{indir}/{fname}.csv'), smooth=False, filename=f'{fname}.pdf')
         plot_pes(pd.read_csv(f'{indir}/{fname}_pes.csv'), channel, f'{fname}_pes.pdf',
                  fast_range=fast_range, Ne1s=Ne1s, label=label, refs=refs,
                  counts_to_mv=counts_to_mv)
-- 
GitLab