Added parameter to adjust number of peaks for quality check. Prettified the sensitivity check plot.

00c35091 · Danilo Ferreira de Lima · c1523ca0 · 00c35091 · 00c35091 · 00c35091
Commit 00c35091 authored 1 year ago by Danilo Ferreira de Lima
--- a/pes_to_spec/model.py
+++ b/pes_to_spec/model.py
@@ -577,6 +577,7 @@ class Model(TransformerMixin, BaseEstimator):
      validation_size: Fraction (number between 0 and 1) of the data to take for
                       validation and systematic uncertainty estimate.
      model_type: Which model to use. "bnn" for a BNN, "bnn_rvm" for a BNN with RVM, "ridge" for Ridge and "ard" for ARD.
+      n_peaks: Minimum numbr of peaks in the grating spectrometer.

    """
    def __init__(self,
@@ -589,6 +590,7 @@ class Model(TransformerMixin, BaseEstimator):
                 delta_tof: Optional[int]=300,
                 validation_size: float=0.05,
                 model_type: Literal["bnn", "bnn_rvm", "ridge", "ard"]="ard",
+                 n_peaks: int=0,
                ):
        self.high_res_sigma = high_res_sigma
        # models
@@ -629,6 +631,9 @@ class Model(TransformerMixin, BaseEstimator):
        # size of the test subset
        self.validation_size = validation_size

+        # minimum number of peaks
+        self.n_peaks = n_peaks
+
    def n_pars(self) -> float:
        """Get number of parameters."""
        if self.model_type in ("bnn", "bnn_rvm"):
@@ -739,7 +744,7 @@ class Model(TransformerMixin, BaseEstimator):
        """
        print("Checking data quality in high-resolution data.")
        peaks = self.count_peaks(high_res_data, high_res_photon_energy)
-        filter_hr = (peaks > 3)
+        filter_hr = (peaks >= self.n_peaks)

        print("Fitting PCA on low-resolution data.")
        self.x_select.fit(low_res_data)

--- a/pes_to_spec/test/channel_sensitivity.py
+++ b/pes_to_spec/test/channel_sensitivity.py
@@ -235,8 +235,10 @@ def main():
    nch_axis = np.arange(1, len(channels)+1)
    resolution = list()
    rmse = list()
+    delta_rmse = list()
    chi2_prepca = list()
    unc = list()
+    delta_unc = list()
    for nch in nch_axis:
        model = Model(channels=channels[:nch], model_type=args.model_type)

@@ -273,13 +275,17 @@ def main():
        print("Expected pca std:", pca_var)
        chi2_prepca += [np.mean(np.sum((spec_smooth_pca[:, np.newaxis, :] - spec_pred["expected_pca"])**2/unc2, axis=(-1, -2)))/ndof_prepca]
        rmse += [np.mean(np.sqrt(np.mean((spec_smooth[:, np.newaxis, :] - spec_pred["expected"])**2, axis=(-1, -2))))]
-        unc += [np.mean(spec_pred["total_unc"])]
+        delta_rmse += [np.std(np.sqrt(np.mean((spec_smooth[:, np.newaxis, :] - spec_pred["expected"])**2, axis=(-1, -2))))]
+        unc += [np.mean(np.mean(spec_pred["total_unc"], axis=(-1, -2)))]
+        delta_unc += [np.std(np.mean(spec_pred["total_unc"], axis=(-1, -2)))]

    df = pd.DataFrame(dict(number_channels=nch_axis,
                           resolution=resolution,
                           rmse=rmse,
+                           delta_rmse=delta_rmse,
                           chi2_prepca=chi2_prepca,
                           unc=unc,
+                           delta_unc=delta_unc,
                           ))
    df.to_csv(os.path.join(args.directory, "number_channel_effect.csv"))


--- a/pes_to_spec/test/plot_channel_sensitivity.py
+++ b/pes_to_spec/test/plot_channel_sensitivity.py
@@ -44,23 +44,26 @@ def plot_unc(df: pd.DataFrame, filename: str):
    fig = plt.figure(figsize=(12, 8))
    gs = GridSpec(1, 1)
    ax = fig.add_subplot(gs[0, 0])
-    ax.plot(df.number_channels, 2*df.unc, c='tab:blue', lw=3, alpha=0.5)
+    #ax.plot(df.number_channels, 2*df.unc, c='tab:blue', lw=3, alpha=0.7, label="Avg. 95% CL uncertainty band")
+    #ax.fill_between(df.number_channels, 2*df.unc - 2*df.delta_unc, 2*df.unc + 2*df.delta_unc, color='tab:blue', alpha=0.2)
+    ax.errorbar(df.number_channels, 2*df.unc, yerr=2*df.delta_unc, color='tab:blue', alpha=0.5, marker='o', markersize=20, lw=3, linestyle='none', label="95% CL uncertainty band")
    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
    ax.set(
           xlabel="Number of channels",
-           ylabel="Average uncertainty [a.u.]",
+           ylabel="Grating spectrometer intensity [a.u.]",
           )
-    #maxY = 1.1*df.unc.max()
-    #ax.set_ylim((None, maxY))
-    rax = ax.twinx()
-    rax.plot(df.number_channels, df.rmse, c='tab:red', lw=3, alpha=0.5)
-    rax.spines['right'].set_color('tab:red')
-    rax.spines['top'].set_visible(False)
-    rax.tick_params(axis='y', colors='tab:red')
-    rax.set_ylabel("Root-mean-squared error [a.u.]")
-    rax.yaxis.label.set_color("tab:red")
-    #maxY = 1.1*df.rmse.max()
-    #rax.set_ylim((None, maxY))
+    #rax = ax.twinx()
+    rax = ax
+    #rax.plot(df.number_channels, df.rmse, c='tab:red', lw=3, alpha=0.7, label="Avg. root-mean-squared error")
+    #rax.fill_between(df.number_channels, df.rmse - df.delta_rmse, df.rmse + df.delta_rmse, color='tab:red', alpha=0.2)
+    rax.errorbar(df.number_channels, df.rmse, yerr=df.delta_rmse, color='tab:red', alpha=0.5, marker='^', markersize=20, lw=3, linestyle='none', label="Root-mean-squared error")
+    #rax.spines['right'].set_color('tab:red')
+    #rax.spines['top'].set_visible(False)
+    #rax.tick_params(axis='y', colors='tab:red')
+    #rax.set_ylabel("Root-mean-squared error [a.u.]")
+    #rax.yaxis.label.set_color("tab:red")
+    ax.legend(frameon=False)
    plt.tight_layout()
    fig.savefig(filename)
    plt.close(fig)

--- a/run_tests.sh
+++ b/run_tests.sh
+#!/bin/bash
+#SBATCH --partition=exfel
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --time=8:00:00
+#SBATCH --job-name=pes2spec
+#SBATCH -o slurm.%x.err.txt
+#SBATCH -e slurm.%x.err.txt
+#SBATCH --reservation=exfel_ml
+
+optstring="d:"
+
+DIR=results
+while getopts ${optstring} arg; do
+    case ${arg} in
+        d)
+            DIR=${OPTARG}
+            S=$OPTIND
+            ;;
+        *)
+            S=$((OPTIND))
+            break
+            ;;
+    esac
+done
+OPTS="${@:$S}"
+echo "Options: $OPTS"
+
+source /usr/share/Modules/init/sh
+
+module load exfel exfel_anaconda3
+
+cd $HOME/scratch/karabo/devices/pes_to_spec
+source env/bin/activate
+pwd
+export PYTHONPATH=$PYTHONPATH:$PWD
+
+mkdir $DIR
+
+do_it() {
+    p=$1
+    r=$2
+    rt=$3
+    output=$DIR/p${p}r${r}t${rt}
+    mkdir -p $output
+    echo "Proposal $p, run $r, test at run $rt"
+    CMD=(./pes_to_spec/test/offline_analysis.py -p $p -r $r -t $rt -d $output ${@:4})
+    echo "${CMD[*]}"
+    ${CMD[*]} 2>&1 | tee $output/log.txt
+}
+
+do_it 900331  69 70 $OPTS
+
+# train in run 2 and test in run 3
+#do_it 3384  2  3 $OPTS
+
+# new runs:
+#for run in 2 4
+#do
+#    do_it 3384 $run $run $OPTS
+#done
+
+# train in run 4 and test in run 3
+#do_it 3384  4  3 $OPTS
+
+# old run
+#do_it 2828 206 206 $OPTS
+#do_it 2828 206 207 $OPTS
+
+#do_it 2828 207 207 $OPTS
+