Removed plotting from offline analysis and moved all plots to prepare_plots.

01ebe6fb · Danilo Ferreira de Lima · 4be78de5 · 01ebe6fb · 01ebe6fb
Commit 01ebe6fb authored 1 year ago by Danilo Ferreira de Lima
--- a/pes_to_spec/test/offline_analysis.py
+++ b/pes_to_spec/test/offline_analysis.py
--- a/pes_to_spec/test/prepare_plots.py
+++ b/pes_to_spec/test/prepare_plots.py
 #!/usr/bin/env python

+import os
+import re
+
+import matplotlib
+matplotlib.use('Agg')
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 from matplotlib.gridspec import GridSpec
 import seaborn as sns
+from scipy.interpolate import make_interp_spline, BSpline

 SMALL_SIZE = 12
 MEDIUM_SIZE = 18
-BIGGER_SIZE = 22
+BIGGER_SIZE = 24

 plt.rc('font', size=BIGGER_SIZE)         # controls default text sizes
 plt.rc('axes', titlesize=BIGGER_SIZE)    # fontsize of the axes title
@@ -24,8 +30,8 @@ def plot_final(df: pd.DataFrame, filename: str):
    ax = fig.add_subplot(gs[0, 0])
    ax.plot(df.energy, df.spec, c='b', lw=3, label="Grating spectrometer")
    ax.plot(df.energy, df.prediction, c='r', ls='--', lw=3, label="Prediction")
-    ax.fill_between(df.energy, df.prediction - df.unc, df.prediction + df.unc, facecolor='gold', alpha=0.5, label="68% unc. (total)")
-    ax.fill_between(df.energy, df.prediction - df.unc_pca, df.prediction + df.unc_pca, facecolor='magenta', alpha=0.5, label="68% unc. (PCA only)")
+    ax.fill_between(df.energy, df.prediction - 2*df.unc, df.prediction + 2*df.unc, facecolor='gold', alpha=0.5, label="95% unc. (total)")
+    ax.fill_between(df.energy, df.prediction - 2*df.unc_pca, df.prediction + 2*df.unc_pca, facecolor='magenta', alpha=0.5, label="95% unc. (PCA only)")
    Y = np.amax(df.spec)
    ax.legend(frameon=False, borderaxespad=0, loc='upper left')
    ax.set_title(f"Beam intensity: {df.beam_intensity.iloc[0]:.1f} mJ", loc="left")
@@ -35,6 +41,7 @@ def plot_final(df: pd.DataFrame, filename: str):
           xlabel="Photon energy [eV]",
           ylabel="Intensity [a.u.]",
           ylim=(0, 1.3*Y))
+    plt.tight_layout()
    fig.savefig(filename)
    plt.close(fig)

@@ -76,22 +83,52 @@ def plot_rmse_intensity(df: pd.DataFrame, filename: str):
    fig.savefig(filename)
    plt.close(fig)

+def plot_residue(df: pd.DataFrame, filename: str):
+    cols = [k for k in df.columns if "res_prepca" in k]
+    df_res = df.loc[:, cols]
+    n_plots = len(df_res.columns)//10
+    fig = plt.figure(figsize=(8*n_plots, 8))
+    gs = GridSpec(1, n_plots)
+    for i_plot in range(n_plots):
+        ax = fig.add_subplot(gs[0, i_plot])
+        sns.kdeplot(data={f"Dim. {k+1}": df_res.loc[:, cols[k]] for k in range(i_plot*10, i_plot*10 + 10)},
+                    linewidth=3, ax=ax)
+        ax.set(title=f"",
+           xlabel=r"residue/uncertainty [a.u.]",
+           ylabel="Counts [a.u.]",
+           xlim=(-3, 3),
+           )
+        ax.legend(frameon=False)
+    fig.savefig(filename)
+    plt.close(fig)
+
 def plot_chi2_intensity(df: pd.DataFrame, filename: str):
    fig = plt.figure(figsize=(12, 8))
    gs = GridSpec(1, 1)
    ax = fig.add_subplot(gs[0, 0])
-    ax.scatter(df.chi2_prepca/df.ndof.iloc[0], df.xgm_flux_t, c='r', s=30)
+    sns.kdeplot(x=df.chi2_prepca/df.ndof.iloc[0], y=df.xgm_flux_t*1e-3,
+                fill=True,
+                ax=ax)
+    sns.scatterplot(x=df.chi2_prepca/df.ndof.iloc[0], y=df.xgm_flux_t*1e-3,
+                    s=200,
+                    alpha=0.1,
+                    #size=df.root_mean_squared_pca_unc,
+                    #sizes=(20, 200),
+                    ax=ax)
    ax = plt.gca()
    ax.set(title=f"",
           xlabel=r"$\chi^2/$ndof",
-           ylabel="Beam intensity [uJ]",
+           ylabel="Beam intensity [mJ]",
           xlim=(0, 5),
-           ylim=(0, df.xgm_flux_t.mean() + 3*df.xgm_flux_t.std())
+           ylim=(0, df.xgm_flux_t.mean()*1e-3 + 3*df.xgm_flux_t.std()*1e-3)
           )
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    plt.tight_layout()
    fig.savefig(filename)
    plt.close(fig)

-def pca_variance_plot(df: pd.DataFrame, filename: str):
+def pca_variance_plot(df: pd.DataFrame, filename: str, max_comp_frac: float=0.99):
    """
    Plot variance contribution.

@@ -100,25 +137,88 @@ def pca_variance_plot(df: pd.DataFrame, filename: str):
      variance_ratio: Contribution of each component's variance.

    """
-    fig = plt.figure(figsize=(8, 8))
+    fig = plt.figure(figsize=(12, 8))
    gs = GridSpec(1, 1)
    ax = fig.add_subplot(gs[0, 0])
    c = np.cumsum(df.variance_ratio)
-    n_comp = df.n_comp.iloc[0]
+    n_comp = int(df.n_comp.iloc[0])
    ax.bar(1+np.arange(len(df.variance_ratio)), df.variance_ratio*100, color='tab:red', alpha=0.3, label="Per component")
    ax.plot(1+np.arange(len(df.variance_ratio)), c*100, c='tab:blue', lw=5, label="Cumulative")
    ax.plot([n_comp, n_comp], [0, c[n_comp]*100], lw=3, ls='--', c='m', label="Components kept")
    ax.plot([0, n_comp], [c[n_comp]*100, c[n_comp]*100], lw=3, ls='--', c='m')
    ax.legend(frameon=False)
    print(f"PCA plot: total n. components: {len(df.variance_ratio)}")
-    x_max = np.where(c > 0.99)[0][0]
+    x_max = np.where(c > max_comp_frac)[0][0]
    print(f"Fraction of variance: {c[n_comp]}")
    ax.set_yscale('log')
    ax.set(title=f"",
           xlabel="Component",
           ylabel="Variance [%]",
           xlim=(1, x_max),
-           ylim=(0.1, 100))
+           ylim=(0.01, 100))
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    plt.tight_layout()
+    fig.savefig(filename)
+    plt.close(fig)
+
+def moving_average(a, n=3):
+    ret = np.cumsum(a)
+    ret[n:] = ret[n:] - ret[:-n]
+    return ret[n - 1:] / n
+
+def plot_impulse(df: pd.DataFrame, filename: str):
+    """
+    Plot variance contribution.
+
+    Args:
+      filename: Output file name.
+      variance_ratio: Contribution of each component's variance.
+
+    """
+    fig = plt.figure(figsize=(12, 8))
+    gs = GridSpec(1, 1)
+    ax = fig.add_subplot(gs[0, 0])
+    x = df.wiener_energy.to_numpy()
+    y = np.absolute(df.impulse.to_numpy())
+    #x_new = np.linspace(-6, 6, 601)
+    #spl = make_interp_spline(x, np.log10(y), k=3)
+    #y_new = np.power(10, spl(x_new))
+    x_new = moving_average(x, n=10)
+    y_new = moving_average(y, n=10)
+    sel = (x_new >= -10) & (x_new <= 10)
+    ax.plot(x_new[sel], y_new[sel], c='tab:blue', lw=4)
+    ax.set_yscale('log')
+    ax.set(title=f"",
+           xlabel="Energy [eV]",
+           ylim=(1e-4, 0.5),
+           ylabel="Response [a.u.]",
+           )
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    plt.tight_layout()
+    fig.savefig(filename)
+    plt.close(fig)
+
+def plot_wiener(df: pd.DataFrame, filename: str):
+    """
+    Plot variance contribution.
+
+    Args:
+      filename: Output file name.
+      variance_ratio: Contribution of each component's variance.
+
+    """
+    fig = plt.figure(figsize=(12, 8))
+    gs = GridSpec(1, 1)
+    ax = fig.add_subplot(gs[0, 0])
+    ax.plot(df.wiener_energy, np.absolute(df.wiener_filter), c='tab:blue', lw=3)
+    ax.set_yscale('log')
+    ax.set(title=f"",
+           xlabel="Energy [eV]",
+           ylim=(1e-3, 1),
+           ylabel="Response [a.u.]",
+           )
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    plt.tight_layout()
@@ -134,36 +234,64 @@ def plot_pes(df: pd.DataFrame, channel:str, filename: str):
      pes_raw_int: Low-resolution spectrum.

    """
-    fig = plt.figure(figsize=(16, 8))
+    fig = plt.figure(figsize=(12, 8))
    gs = GridSpec(1, 1)
    ax = fig.add_subplot(gs[0, 0])
    first, last = df.loc[:, 'first'].iloc[0], df.loc[:, 'last'].iloc[0]
-    ax.plot(df.loc[(df.bin >= first) & (df.bin < last), "bin"], df.loc[(df.bin >= first) & (df.bin < last), channel], c='b', lw=3)
-    #ax.legend()
+    first = first+220
+    last = last-270
+    print("Range:", first, last)
+    sel = (df.bin >= first) & (df.bin < last)
+    x = df.loc[sel, "bin"]
+    if channel == "sum":
+        y = df.loc[sel, [k for k in df.columns if "channel_" in k]].sum(axis=1)
+        ax.plot(x, y, c='b', lw=5)
+    elif isinstance(channel, list):
+        for ch in channel:
+            sch = ch.replace('_', ' ')
+            y = df.loc[sel, ch]
+            ax.plot(x, y, lw=5, label=sch)
+    else:
+        y = df.loc[sel, channel]
+        ax.plot(x, y, c='b', lw=5)
+    ax.legend(frameon=False)
    ax.set(title=f"",
           xlabel="Time-of-flight index",
           ylabel="Counts [a.u.]")
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
+    plt.tight_layout()
    fig.savefig(filename)
    plt.close(fig)

 if __name__ == '__main__':
    indir = 'p900331r69t70'
-    channel = 'channel_4_A'
-    fname = 'test_q100_1724098413'
-    plot_final(pd.read_csv(f'{indir}/{fname}.csv'), f'{fname}.pdf')
-    plot_pes(pd.read_csv(f'{indir}/{fname}_pes.csv'), channel, f'{fname}_{channel}.pdf')
+    channel = ['channel_1_A', 'channel_4_A', 'channel_3_B']
+    #channel = 'sum'
+    #for fname in os.listdir(indir):
+    #    if re.match(r'test_q100_[0-9]*\.csv', fname):
+    #        fname = fname[:-4]
+    #        print(f"Plotting {fname}")
+    #        plot_final(pd.read_csv(f'{indir}/{fname}.csv'), f'{fname}.pdf')
+    #        plot_pes(pd.read_csv(f'{indir}/{fname}_pes.csv'), channel, f'{fname}_pes.pdf')

-    fname = 'test_q100_1724098596'
-    plot_final(pd.read_csv(f'{indir}/{fname}.csv'), f'{fname}.pdf')
-    plot_pes(pd.read_csv(f'{indir}/{fname}_pes.csv'), channel, f'{fname}_{channel}.pdf')
+    for fname in ('test_q100_1724098413', 'test_q100_1724098596', 'test_q50_1724099445'):
+        plot_final(pd.read_csv(f'{indir}/{fname}.csv'), f'{fname}.pdf')
+        plot_pes(pd.read_csv(f'{indir}/{fname}_pes.csv'), channel, f'{fname}_pes.pdf')

    plot_chi2(pd.read_csv(f'{indir}/quality.csv'), f'chi2_prepca.pdf')
    plot_chi2_intensity(pd.read_csv(f'{indir}/quality.csv'), f'intensity_vs_chi2_prepca.pdf')
    plot_rmse(pd.read_csv(f'{indir}/quality.csv'), f'rmse.pdf')
    plot_rmse_intensity(pd.read_csv(f'{indir}/quality.csv'), f'intensity_vs_rmse.pdf')

-    pca_variance_plot(pd.read_csv(f'{indir}/pca_spec.csv'), f'pca_spec.pdf')
-    pca_variance_plot(pd.read_csv(f'{indir}/pca_pes.csv'), f'pca_pes.pdf')
+    plot_residue(pd.read_csv(f'{indir}/quality.csv'), f'residue.pdf')
+
+    df_model = pd.read_csv(f'{indir}/model.csv')
+    df_model.impulse = df_model.impulse.str.replace('i','j').apply(lambda x: np.complex(x))
+    df_model.wiener_filter = df_model.wiener_filter.str.replace('i','j').apply(lambda x: np.complex(x))
+    plot_impulse(df_model, f'impulse.pdf')
+    plot_wiener(df_model, f'wiener.pdf')
+
+    pca_variance_plot(pd.read_csv(f'{indir}/pca_spec.csv'), f'pca_spec.pdf', max_comp_frac=0.99)
+    pca_variance_plot(pd.read_csv(f'{indir}/pca_pes.csv'), f'pca_pes.pdf', max_comp_frac=0.95)