From 00c35091e0dfb4bf3d697f08c33ea35120ab3d01 Mon Sep 17 00:00:00 2001
From: Danilo Ferreira de Lima <danilo.enoque.ferreira.de.lima@xfel.de>
Date: Tue, 12 Sep 2023 09:21:39 +0200
Subject: [PATCH] Added parameter to adjust number of peaks for quality check.
 Prettified the sensitivity check plot.

---
 pes_to_spec/model.py                         |  7 +-
 pes_to_spec/test/channel_sensitivity.py      |  8 ++-
 pes_to_spec/test/plot_channel_sensitivity.py | 29 ++++----
 run_tests.sh                                 | 71 ++++++++++++++++++++
 4 files changed, 100 insertions(+), 15 deletions(-)
 create mode 100644 run_tests.sh

diff --git a/pes_to_spec/model.py b/pes_to_spec/model.py
index e198b82..1af3a3b 100644
--- a/pes_to_spec/model.py
+++ b/pes_to_spec/model.py
@@ -577,6 +577,7 @@ class Model(TransformerMixin, BaseEstimator):
       validation_size: Fraction (number between 0 and 1) of the data to take for
                        validation and systematic uncertainty estimate.
       model_type: Which model to use. "bnn" for a BNN, "bnn_rvm" for a BNN with RVM, "ridge" for Ridge and "ard" for ARD.
+      n_peaks: Minimum numbr of peaks in the grating spectrometer.
 
     """
     def __init__(self,
@@ -589,6 +590,7 @@ class Model(TransformerMixin, BaseEstimator):
                  delta_tof: Optional[int]=300,
                  validation_size: float=0.05,
                  model_type: Literal["bnn", "bnn_rvm", "ridge", "ard"]="ard",
+                 n_peaks: int=0,
                 ):
         self.high_res_sigma = high_res_sigma
         # models
@@ -629,6 +631,9 @@ class Model(TransformerMixin, BaseEstimator):
         # size of the test subset
         self.validation_size = validation_size
 
+        # minimum number of peaks
+        self.n_peaks = n_peaks
+
     def n_pars(self) -> float:
         """Get number of parameters."""
         if self.model_type in ("bnn", "bnn_rvm"):
@@ -739,7 +744,7 @@ class Model(TransformerMixin, BaseEstimator):
         """
         print("Checking data quality in high-resolution data.")
         peaks = self.count_peaks(high_res_data, high_res_photon_energy)
-        filter_hr = (peaks > 3)
+        filter_hr = (peaks >= self.n_peaks)
 
         print("Fitting PCA on low-resolution data.")
         self.x_select.fit(low_res_data)
diff --git a/pes_to_spec/test/channel_sensitivity.py b/pes_to_spec/test/channel_sensitivity.py
index 18cba12..2ff4372 100755
--- a/pes_to_spec/test/channel_sensitivity.py
+++ b/pes_to_spec/test/channel_sensitivity.py
@@ -235,8 +235,10 @@ def main():
     nch_axis = np.arange(1, len(channels)+1)
     resolution = list()
     rmse = list()
+    delta_rmse = list()
     chi2_prepca = list()
     unc = list()
+    delta_unc = list()
     for nch in nch_axis:
         model = Model(channels=channels[:nch], model_type=args.model_type)
 
@@ -273,13 +275,17 @@ def main():
         print("Expected pca std:", pca_var)
         chi2_prepca += [np.mean(np.sum((spec_smooth_pca[:, np.newaxis, :] - spec_pred["expected_pca"])**2/unc2, axis=(-1, -2)))/ndof_prepca]
         rmse += [np.mean(np.sqrt(np.mean((spec_smooth[:, np.newaxis, :] - spec_pred["expected"])**2, axis=(-1, -2))))]
-        unc += [np.mean(spec_pred["total_unc"])]
+        delta_rmse += [np.std(np.sqrt(np.mean((spec_smooth[:, np.newaxis, :] - spec_pred["expected"])**2, axis=(-1, -2))))]
+        unc += [np.mean(np.mean(spec_pred["total_unc"], axis=(-1, -2)))]
+        delta_unc += [np.std(np.mean(spec_pred["total_unc"], axis=(-1, -2)))]
 
     df = pd.DataFrame(dict(number_channels=nch_axis,
                            resolution=resolution,
                            rmse=rmse,
+                           delta_rmse=delta_rmse,
                            chi2_prepca=chi2_prepca,
                            unc=unc,
+                           delta_unc=delta_unc,
                            ))
     df.to_csv(os.path.join(args.directory, "number_channel_effect.csv"))
 
diff --git a/pes_to_spec/test/plot_channel_sensitivity.py b/pes_to_spec/test/plot_channel_sensitivity.py
index d2a1282..813b4ec 100755
--- a/pes_to_spec/test/plot_channel_sensitivity.py
+++ b/pes_to_spec/test/plot_channel_sensitivity.py
@@ -44,23 +44,26 @@ def plot_unc(df: pd.DataFrame, filename: str):
     fig = plt.figure(figsize=(12, 8))
     gs = GridSpec(1, 1)
     ax = fig.add_subplot(gs[0, 0])
-    ax.plot(df.number_channels, 2*df.unc, c='tab:blue', lw=3, alpha=0.5)
+    #ax.plot(df.number_channels, 2*df.unc, c='tab:blue', lw=3, alpha=0.7, label="Avg. 95% CL uncertainty band")
+    #ax.fill_between(df.number_channels, 2*df.unc - 2*df.delta_unc, 2*df.unc + 2*df.delta_unc, color='tab:blue', alpha=0.2)
+    ax.errorbar(df.number_channels, 2*df.unc, yerr=2*df.delta_unc, color='tab:blue', alpha=0.5, marker='o', markersize=20, lw=3, linestyle='none', label="95% CL uncertainty band")
     ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
     ax.set(
            xlabel="Number of channels",
-           ylabel="Average uncertainty [a.u.]",
+           ylabel="Grating spectrometer intensity [a.u.]",
            )
-    #maxY = 1.1*df.unc.max()
-    #ax.set_ylim((None, maxY))
-    rax = ax.twinx()
-    rax.plot(df.number_channels, df.rmse, c='tab:red', lw=3, alpha=0.5)
-    rax.spines['right'].set_color('tab:red')
-    rax.spines['top'].set_visible(False)
-    rax.tick_params(axis='y', colors='tab:red')
-    rax.set_ylabel("Root-mean-squared error [a.u.]")
-    rax.yaxis.label.set_color("tab:red")
-    #maxY = 1.1*df.rmse.max()
-    #rax.set_ylim((None, maxY))
+    #rax = ax.twinx()
+    rax = ax
+    #rax.plot(df.number_channels, df.rmse, c='tab:red', lw=3, alpha=0.7, label="Avg. root-mean-squared error")
+    #rax.fill_between(df.number_channels, df.rmse - df.delta_rmse, df.rmse + df.delta_rmse, color='tab:red', alpha=0.2)
+    rax.errorbar(df.number_channels, df.rmse, yerr=df.delta_rmse, color='tab:red', alpha=0.5, marker='^', markersize=20, lw=3, linestyle='none', label="Root-mean-squared error")
+    #rax.spines['right'].set_color('tab:red')
+    #rax.spines['top'].set_visible(False)
+    #rax.tick_params(axis='y', colors='tab:red')
+    #rax.set_ylabel("Root-mean-squared error [a.u.]")
+    #rax.yaxis.label.set_color("tab:red")
+    ax.legend(frameon=False)
     plt.tight_layout()
     fig.savefig(filename)
     plt.close(fig)
diff --git a/run_tests.sh b/run_tests.sh
new file mode 100644
index 0000000..41fb026
--- /dev/null
+++ b/run_tests.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+#SBATCH --partition=exfel
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --time=8:00:00
+#SBATCH --job-name=pes2spec
+#SBATCH -o slurm.%x.err.txt
+#SBATCH -e slurm.%x.err.txt
+#SBATCH --reservation=exfel_ml
+
+optstring="d:"
+
+DIR=results
+while getopts ${optstring} arg; do
+    case ${arg} in
+        d)
+            DIR=${OPTARG}
+            S=$OPTIND
+            ;;
+        *)
+            S=$((OPTIND))
+            break
+            ;;
+    esac
+done
+OPTS="${@:$S}"
+echo "Options: $OPTS"
+
+source /usr/share/Modules/init/sh
+
+module load exfel exfel_anaconda3
+
+cd $HOME/scratch/karabo/devices/pes_to_spec
+source env/bin/activate
+pwd
+export PYTHONPATH=$PYTHONPATH:$PWD
+
+mkdir $DIR
+
+do_it() {
+    p=$1
+    r=$2
+    rt=$3
+    output=$DIR/p${p}r${r}t${rt}
+    mkdir -p $output
+    echo "Proposal $p, run $r, test at run $rt"
+    CMD=(./pes_to_spec/test/offline_analysis.py -p $p -r $r -t $rt -d $output ${@:4})
+    echo "${CMD[*]}"
+    ${CMD[*]} 2>&1 | tee $output/log.txt
+}
+
+do_it 900331  69 70 $OPTS
+
+# train in run 2 and test in run 3
+#do_it 3384  2  3 $OPTS
+
+# new runs:
+#for run in 2 4
+#do
+#    do_it 3384 $run $run $OPTS
+#done
+
+# train in run 4 and test in run 3
+#do_it 3384  4  3 $OPTS
+
+# old run
+#do_it 2828 206 206 $OPTS
+#do_it 2828 206 207 $OPTS
+
+#do_it 2828 207 207 $OPTS
+
-- 
GitLab