diff --git a/pes_to_spec/model.py b/pes_to_spec/model.py index e198b82acc550d38e2c0ee4351aaec45133934fd..1af3a3bf849cd06f2b47d64e5a8a99bc4d254abb 100644 --- a/pes_to_spec/model.py +++ b/pes_to_spec/model.py @@ -577,6 +577,7 @@ class Model(TransformerMixin, BaseEstimator): validation_size: Fraction (number between 0 and 1) of the data to take for validation and systematic uncertainty estimate. model_type: Which model to use. "bnn" for a BNN, "bnn_rvm" for a BNN with RVM, "ridge" for Ridge and "ard" for ARD. + n_peaks: Minimum numbr of peaks in the grating spectrometer. """ def __init__(self, @@ -589,6 +590,7 @@ class Model(TransformerMixin, BaseEstimator): delta_tof: Optional[int]=300, validation_size: float=0.05, model_type: Literal["bnn", "bnn_rvm", "ridge", "ard"]="ard", + n_peaks: int=0, ): self.high_res_sigma = high_res_sigma # models @@ -629,6 +631,9 @@ class Model(TransformerMixin, BaseEstimator): # size of the test subset self.validation_size = validation_size + # minimum number of peaks + self.n_peaks = n_peaks + def n_pars(self) -> float: """Get number of parameters.""" if self.model_type in ("bnn", "bnn_rvm"): @@ -739,7 +744,7 @@ class Model(TransformerMixin, BaseEstimator): """ print("Checking data quality in high-resolution data.") peaks = self.count_peaks(high_res_data, high_res_photon_energy) - filter_hr = (peaks > 3) + filter_hr = (peaks >= self.n_peaks) print("Fitting PCA on low-resolution data.") self.x_select.fit(low_res_data) diff --git a/pes_to_spec/test/channel_sensitivity.py b/pes_to_spec/test/channel_sensitivity.py index 18cba123c1818e52731fc71b437eae5035a21928..2ff437238966b8a8a2f2665245bb2a956841a16d 100755 --- a/pes_to_spec/test/channel_sensitivity.py +++ b/pes_to_spec/test/channel_sensitivity.py @@ -235,8 +235,10 @@ def main(): nch_axis = np.arange(1, len(channels)+1) resolution = list() rmse = list() + delta_rmse = list() chi2_prepca = list() unc = list() + delta_unc = list() for nch in nch_axis: model = Model(channels=channels[:nch], model_type=args.model_type) @@ -273,13 +275,17 @@ def main(): print("Expected pca std:", pca_var) chi2_prepca += [np.mean(np.sum((spec_smooth_pca[:, np.newaxis, :] - spec_pred["expected_pca"])**2/unc2, axis=(-1, -2)))/ndof_prepca] rmse += [np.mean(np.sqrt(np.mean((spec_smooth[:, np.newaxis, :] - spec_pred["expected"])**2, axis=(-1, -2))))] - unc += [np.mean(spec_pred["total_unc"])] + delta_rmse += [np.std(np.sqrt(np.mean((spec_smooth[:, np.newaxis, :] - spec_pred["expected"])**2, axis=(-1, -2))))] + unc += [np.mean(np.mean(spec_pred["total_unc"], axis=(-1, -2)))] + delta_unc += [np.std(np.mean(spec_pred["total_unc"], axis=(-1, -2)))] df = pd.DataFrame(dict(number_channels=nch_axis, resolution=resolution, rmse=rmse, + delta_rmse=delta_rmse, chi2_prepca=chi2_prepca, unc=unc, + delta_unc=delta_unc, )) df.to_csv(os.path.join(args.directory, "number_channel_effect.csv")) diff --git a/pes_to_spec/test/plot_channel_sensitivity.py b/pes_to_spec/test/plot_channel_sensitivity.py index d2a128265c895c6d9ac93e49ad067f8a0b9dc8bc..813b4ecb5f4785e47b79a5b710ad286bbb28e22e 100755 --- a/pes_to_spec/test/plot_channel_sensitivity.py +++ b/pes_to_spec/test/plot_channel_sensitivity.py @@ -44,23 +44,26 @@ def plot_unc(df: pd.DataFrame, filename: str): fig = plt.figure(figsize=(12, 8)) gs = GridSpec(1, 1) ax = fig.add_subplot(gs[0, 0]) - ax.plot(df.number_channels, 2*df.unc, c='tab:blue', lw=3, alpha=0.5) + #ax.plot(df.number_channels, 2*df.unc, c='tab:blue', lw=3, alpha=0.7, label="Avg. 95% CL uncertainty band") + #ax.fill_between(df.number_channels, 2*df.unc - 2*df.delta_unc, 2*df.unc + 2*df.delta_unc, color='tab:blue', alpha=0.2) + ax.errorbar(df.number_channels, 2*df.unc, yerr=2*df.delta_unc, color='tab:blue', alpha=0.5, marker='o', markersize=20, lw=3, linestyle='none', label="95% CL uncertainty band") ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) ax.set( xlabel="Number of channels", - ylabel="Average uncertainty [a.u.]", + ylabel="Grating spectrometer intensity [a.u.]", ) - #maxY = 1.1*df.unc.max() - #ax.set_ylim((None, maxY)) - rax = ax.twinx() - rax.plot(df.number_channels, df.rmse, c='tab:red', lw=3, alpha=0.5) - rax.spines['right'].set_color('tab:red') - rax.spines['top'].set_visible(False) - rax.tick_params(axis='y', colors='tab:red') - rax.set_ylabel("Root-mean-squared error [a.u.]") - rax.yaxis.label.set_color("tab:red") - #maxY = 1.1*df.rmse.max() - #rax.set_ylim((None, maxY)) + #rax = ax.twinx() + rax = ax + #rax.plot(df.number_channels, df.rmse, c='tab:red', lw=3, alpha=0.7, label="Avg. root-mean-squared error") + #rax.fill_between(df.number_channels, df.rmse - df.delta_rmse, df.rmse + df.delta_rmse, color='tab:red', alpha=0.2) + rax.errorbar(df.number_channels, df.rmse, yerr=df.delta_rmse, color='tab:red', alpha=0.5, marker='^', markersize=20, lw=3, linestyle='none', label="Root-mean-squared error") + #rax.spines['right'].set_color('tab:red') + #rax.spines['top'].set_visible(False) + #rax.tick_params(axis='y', colors='tab:red') + #rax.set_ylabel("Root-mean-squared error [a.u.]") + #rax.yaxis.label.set_color("tab:red") + ax.legend(frameon=False) plt.tight_layout() fig.savefig(filename) plt.close(fig) diff --git a/run_tests.sh b/run_tests.sh new file mode 100644 index 0000000000000000000000000000000000000000..41fb02678541b9814f347b9666bc11c9658bf56a --- /dev/null +++ b/run_tests.sh @@ -0,0 +1,71 @@ +#!/bin/bash +#SBATCH --partition=exfel +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --time=8:00:00 +#SBATCH --job-name=pes2spec +#SBATCH -o slurm.%x.err.txt +#SBATCH -e slurm.%x.err.txt +#SBATCH --reservation=exfel_ml + +optstring="d:" + +DIR=results +while getopts ${optstring} arg; do + case ${arg} in + d) + DIR=${OPTARG} + S=$OPTIND + ;; + *) + S=$((OPTIND)) + break + ;; + esac +done +OPTS="${@:$S}" +echo "Options: $OPTS" + +source /usr/share/Modules/init/sh + +module load exfel exfel_anaconda3 + +cd $HOME/scratch/karabo/devices/pes_to_spec +source env/bin/activate +pwd +export PYTHONPATH=$PYTHONPATH:$PWD + +mkdir $DIR + +do_it() { + p=$1 + r=$2 + rt=$3 + output=$DIR/p${p}r${r}t${rt} + mkdir -p $output + echo "Proposal $p, run $r, test at run $rt" + CMD=(./pes_to_spec/test/offline_analysis.py -p $p -r $r -t $rt -d $output ${@:4}) + echo "${CMD[*]}" + ${CMD[*]} 2>&1 | tee $output/log.txt +} + +do_it 900331 69 70 $OPTS + +# train in run 2 and test in run 3 +#do_it 3384 2 3 $OPTS + +# new runs: +#for run in 2 4 +#do +# do_it 3384 $run $run $OPTS +#done + +# train in run 4 and test in run 3 +#do_it 3384 4 3 $OPTS + +# old run +#do_it 2828 206 206 $OPTS +#do_it 2828 206 207 $OPTS + +#do_it 2828 207 207 $OPTS +