From 4febf3b9613eff30ad4bcb4ad81c3594bbab7b44 Mon Sep 17 00:00:00 2001 From: arman <arman.davtyan@xfel.eu> Date: Wed, 7 Dec 2022 09:25:51 +0100 Subject: [PATCH] first commit --- README.md | 11 + data_drift_check.py | 34 + inv_eval.py | 1627 ++++++++++++++++++ inv_inference.py | 69 + inv_train.py | 103 ++ src/data/__init__.py | 0 src/data/data.py | 326 ++++ src/data/data_preproc.py | 99 ++ src/models/find_components/Find_Component.py | 194 +++ src/models/find_components/__init__.py | 0 src/models/fit_methods/Fit_Methods.py | 302 ++++ src/models/fit_methods/__init__.py | 0 src/models/fit_methods/model.py | 164 ++ src/utils/__init__.py | 0 src/utils/utils.py | 238 +++ 15 files changed, 3167 insertions(+) create mode 100644 data_drift_check.py create mode 100644 inv_eval.py create mode 100644 inv_inference.py create mode 100644 inv_train.py create mode 100644 src/data/__init__.py create mode 100644 src/data/data.py create mode 100644 src/data/data_preproc.py create mode 100644 src/models/find_components/Find_Component.py create mode 100644 src/models/find_components/__init__.py create mode 100644 src/models/fit_methods/Fit_Methods.py create mode 100644 src/models/fit_methods/__init__.py create mode 100644 src/models/fit_methods/model.py create mode 100644 src/utils/__init__.py create mode 100644 src/utils/utils.py diff --git a/README.md b/README.md index 89637b7..f76d3e1 100644 --- a/README.md +++ b/README.md @@ -2,3 +2,14 @@ Aim of the project to transform the data from PES to SPEC. +# Usage + +1. inv_train.py -> Train a model on the specific RUN. +Thish will save the pca model and fit model in experiments/YOUR_DIR/checkpoints and the data in +experiments/YOUR_DIR/data. + +2. inv_eval.py -> Use to evaluate the trained model + +3. inv_inference -> Use trained model to do inference on new data point + +4. data_drift_check.py -> Check data drift between two datasets \ No newline at end of file diff --git a/data_drift_check.py b/data_drift_check.py new file mode 100644 index 0000000..6bef495 --- /dev/null +++ b/data_drift_check.py @@ -0,0 +1,34 @@ +import sys +sys.path.append("./") +sys.path.append("..") + +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt + +from src.data.data import ReadPesSpec, PesChannelSelector +from src.data.data_preproc import SpecPreprocessing + +from src.models.find_components.Find_Component import FindPCAcomps +from src.models.fit_methods.Fit_Methods import FitBFGS, FitBFGSPCA +from src.models.fit_methods.model import Model + +from src.utils.utils import load_train_test_h5, load_rec_data_h5, load_rec_data_h5_bfgs_pca, load_checkpoint +from src.utils.utils import create_experiment_dirs + +from sklearn.model_selection import train_test_split +import numpy as np + + +# Load trained model in the path exp_dir +exp_dir = "test3_pulseen_short_test_eps_r0015" +Y_train_model, Y_test_model, spec_train_model, spec_test_model, spec_raw_pe, X_train_model, X_test_model, pes_train_model, pes_test_model, att_dict, xgm_pulseen_train, xgm_pulseen_test = load_train_test_h5(exp_dir) + +att_dict["pes_pca_preprocessing"] = False # set pca non trainable +att_dict["spec_pca_preprocessing"] = False # set pca non trainable + +model_instance = Model(model_type="bfgs_pca_eps", data_info=att_dict) + +inference_model, pes_pca_model, spec_pca_model = model_instance.load_model() # Move to incference sctipt TODO! + +print("Finish") \ No newline at end of file diff --git a/inv_eval.py b/inv_eval.py new file mode 100644 index 0000000..8985125 --- /dev/null +++ b/inv_eval.py @@ -0,0 +1,1627 @@ + +import sys +sys.path.append("./") +sys.path.append("..") + +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt + +from src.utils.utils import load_train_test_h5, load_rec_data_h5, load_rec_data_h5_bfgs_pca, load_checkpoint, load_rec_data_h5_bfgs_pca_eps +import numpy as np +import h5py + +exp_dir = "test3_pulseen_short_test_eps_r0015" # Evaluate the trained model + +# Load Data +Y_train, Y_test, spec_train, spec_test, spec_raw_pe, X_train, X_test, pes_train, pes_test, att_dict, xgm_pulseen_trains, xgm_pulseen_tests = load_train_test_h5(exp_dir) + +xgm_pulseen_test = [] +for xgm_i in range(len(xgm_pulseen_tests)): + xgm_pulseen_test.append(xgm_pulseen_tests[xgm_i][0]) + +n_pca_comps = att_dict["n_pca_comps_pes"] + +if att_dict["model_type"]=="bfgs": + + + # Load Rec + Y_rec, Y_rec_unc = load_rec_data_h5(exp_dir) + print(Y_test.shape, Y_rec.shape) + + fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) + axes_2 = axes_2.flatten() + + for i_bfgs in range(15): + + ax = axes_2[i_bfgs] + ax.plot(np.arange(0,Y_rec.shape[1],1), Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") + ax.plot(np.arange(0,Y_rec.shape[1],1), Y_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + ax.fill_between( + np.arange(0,Y_rec.shape[1],1), Y_rec[i_bfgs,:] - Y_rec_unc, Y_rec[i_bfgs,:] + Y_rec_unc, color="pink", alpha=0.5, label="BFGS std" + ) + ax.set_xlabel('XXX ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.legend(fontsize=10) + ax.set_title(f"{exp_dir}", y=1.0, pad=-10) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction.png", bbox_inches='tight') + plt.close(fig) + + +if att_dict["model_type"]=="bfgs_pca": + + # Load Rec + Y_rec, Y_rec_unc_bfgs, Y_rec_unc_specpca, Y_rec_unc = load_rec_data_h5_bfgs_pca(exp_dir) + print(Y_test.shape, Y_rec.shape) + + Y_eps = 0*Y_rec_unc_bfgs # TODO! fix this remove eps version from here + eps_mean = np.mean(Y_eps, axis=1) + print("len eps mean", len(eps_mean)) + + # compute RMSE + def int_sum_1(A): + return np.sum(A, axis=1) + pes_sum = int_sum_1(pes_test) + spec_sum = int_sum_1(spec_test) + + cor_mat = np.corrcoef(pes_test) + cor_mat_int = np.sum(cor_mat, axis=1) + #where_1 = np.where(cor_mat == 1) + #cor_mat[where_1] = 0 + + + + # quick fft eval + FC_START = 0 + FC_END = 80 + N_ROI = 40 + # Get the Fourier Components + n_Y_comps = spec_test.shape[1] + Y_test_fft_raw = np.abs(np.fft.fft(spec_test))[:, :round(n_Y_comps/2)] + Y_rec_fft_raw = np.abs(np.fft.fft(Y_rec))[:, :round(n_Y_comps/2)] + + # Cut the Fourier components and take only those who are relevant and not close to 0 + Y_test_fft = Y_test_fft_raw[:, FC_START:FC_END] + Y_rec_fft = Y_rec_fft_raw[:, FC_START:FC_END] + + # Split relevant fourier components into N regions of interests (roi) + split_size = round((FC_END - FC_START)/N_ROI) + + def split_to_rois(a, splitedSize = split_size): + a_splited = [a[:,x:x+splitedSize] for x in range(0, a.shape[1], splitedSize)] + return a_splited + + Y_test_fft_rois = split_to_rois(Y_test_fft) + Y_rec_fft_rois = split_to_rois(Y_rec_fft) + + # Define the X axis, pixels + pixel_rois = split_to_rois(np.arange(FC_START, FC_END, 1).reshape(1,-1)) + + # Compute the absolute difference of gt and rec fourier components per region of interest per train ID + delta_Y_fft_rois = [np.abs(a-b) for a, b in zip(Y_test_fft_rois, Y_rec_fft_rois)] + # Sum along all Fourier components in roi per train id + delta_Y_fft_rois_sum = [np.sum(a, axis=1) for a in delta_Y_fft_rois] + print("len(delta_Y_fft_rois_sum)", len(delta_Y_fft_rois)) + # Sum along all train IDs per roi + delta_Y_fft_rois_sum_mean = [x.mean() for x in delta_Y_fft_rois_sum] + + + # find corr coef + def corr_coef(a, b): + cor_coef = np.corrcoef(a, b)[0][1] + #print("The corr coef is :", np.round(cor_coef, 2)) + return np.round(cor_coef, 2) + cc = [] + for i_bfgs in range(Y_rec.shape[0]): + #corr_coef(sc_res[i_bfgs,:], Y_test[i_bfgs,:]) + cc.append(corr_coef(Y_rec[i_bfgs,:], spec_test[i_bfgs,:]) ) + + + + # Function to calculate Chi-distance + def chi2_distance(A, B): + # compute the chi-squared distance using above formula + chi = 0.5 * np.sum([((a - b) ** 2) / (a + b) + for (a, b) in zip(A, B)]) + + #print("The Chi-square distance is :", chi) + return chi + chi_2 = [] + for i_bfgs in range(Y_rec.shape[0]): + chi_2.append(chi2_distance(Y_rec[i_bfgs,:], spec_test[i_bfgs,:])) + + + # compute RMSE + def rmse_value(A, B): + return np.sqrt(np.mean((A-B)**2)) + rmse = [] + for i_bfgs in range(Y_rec.shape[0]): + rmse.append(rmse_value(Y_rec[i_bfgs,:], spec_test[i_bfgs,:])) + rmse_quickfft = [] + for i_bfgs in range(Y_rec_fft.shape[0]): + rmse_quickfft.append(rmse_value(Y_rec_fft[i_bfgs,:], Y_test_fft[i_bfgs,:])) + + + + + + + # Save Images + + # Save cor mat test set + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + im = ax.imshow(cor_mat) + ax.set_xlabel('train IDs.', fontsize=22) + ax.set_ylabel('train IDs.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"cor mat: PCA(PES)", y=1.0, pad=-20, fontsize=22) + + cax = fig.add_axes([0.27, 0.95, 0.5, 0.05]) + fig.colorbar(im, cax=cax, orientation='horizontal') + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cor_mat_pca_pes.png", bbox_inches='tight') + plt.close(fig) + + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + ax.plot(cor_mat[42,:], label=f"42") + ax.plot(cor_mat[43,:], label=f"43") + ax.plot(cor_mat[171,:], label=f"171") + #ax.plot(cor_mat_int, label=f"int") + ax.legend(fontsize=20) + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cor_mat_pca_pes_line.png", bbox_inches='tight') + plt.close(fig) + + + # Save xgm_pulse energy + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(np.arange(len(xgm_pulseen_test)), xgm_pulseen_test, label=f"xgm pulse energy: PES") + + ax.set_xlabel('train IDs.', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"xgm test pulse energy: PES", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/xgm_pulse_energy.png", bbox_inches='tight') + plt.close(fig) + + + # Save xgm_pulse energy vs PES sum + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.scatter(eps_mean, xgm_pulseen_test, label=f"eps_mean xgm pulse energy") + + ax.set_xlabel('eps_mean', fontsize=22) + ax.set_ylabel('pulse energy', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"pes_sum vs pulse energy", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pulse_energy_vs_eps_mean.png", bbox_inches='tight') + plt.close(fig) + + + # Save xgm_pulse energy vs QuickFFT + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.scatter(eps_mean, rmse_quickfft, label=f"eps_mean QuickFFT") + + ax.set_xlabel('eps_mean', fontsize=22) + ax.set_ylabel('QuickFFT', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"eps_mean QuickFFT", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/eps_mean_vs_quick_fft.png", bbox_inches='tight') + plt.close(fig) + + + # Save xgm_pulse energy vs QuickFFT + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.scatter(xgm_pulseen_test, rmse_quickfft, label=f"xgm_pulseen_test QuickFFT") + + ax.set_xlabel('xgm_pulseen_test', fontsize=22) + ax.set_ylabel('QuickFFT', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"pulse energy QuickFFT", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/xgm_pulseen_test_vs_quick_fft.png", bbox_inches='tight') + plt.close(fig) + + + # Save CC vs QuickFFT + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.scatter(cc, rmse_quickfft, label=f"cc vs QuickFFT") + + ax.set_xlabel('cc', fontsize=22) + ax.set_ylabel('QuickFFT', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"cc vs QuickFFT", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cc_vs_quick_fft.png", bbox_inches='tight') + plt.close(fig) + + + # Save xgm_pulse energy vs Spec sum + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.scatter(spec_sum, xgm_pulseen_test, label=f"xgm pulse energy vs spec int") + + ax.set_xlabel('spec_sum', fontsize=22) + ax.set_ylabel('pulse energy', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"spec_sum vs pulse energy", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pulse_energy_vs_spec_int.png", bbox_inches='tight') + plt.close(fig) + + + # Save xgm_pulse energy vs corr matrix sum + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.scatter(cor_mat_int, xgm_pulseen_test, label=f"xgm pulse energy vs cor_mat_int") + + ax.set_xlabel('cor_mat_int', fontsize=22) + ax.set_ylabel('pulse energy', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"spec_sum vs pulse energy", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pulse_energy_vs_cor_mat_int.png", bbox_inches='tight') + plt.close(fig) + + + # pes_sum vs Spec sum + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(spec_sum, pes_sum, label=f" spec int vs pes_sum") + + ax.set_xlabel('spec_sum', fontsize=22) + ax.set_ylabel('pulse energy', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"spec_sum vs pes_sum", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/spec_int_vs_pes_int.png", bbox_inches='tight') + plt.close(fig) + + + # Save pes sum test data + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(np.arange(len(pes_sum)), pes_sum, label=f"integrated int: PES") + + ax.set_xlabel('train IDs.', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"integrated int: PES", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pes_int_sum.png", bbox_inches='tight') + plt.close(fig) + + + # Save pes sum test data + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(np.arange(len(spec_sum)), spec_sum, label=f"integrated int: SPEC") + + ax.set_xlabel('train IDs.', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"integrated int: SPEC", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/spec_int_sum.png", bbox_inches='tight') + plt.close(fig) + + + # Save pes_sum vs pulse energy + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + #ax.plot(np.arange(len(cc)), cc/max(cc), label=f"cross correlation") + #ax.plot(np.arange(len(cc)), pes_sum/max(np.array(pes_sum)), label=f"pes_sum") + + ax.plot(pes_sum/max(np.array(pes_sum)), cc/max(cc), label=f"correlation coef.") + + ax.set_xlabel('pes_sum', fontsize=22) + ax.set_ylabel('cor. coef.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"pes_sum vs cor. coef.", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pes_int_sum_vs_cor_coef.png", bbox_inches='tight') + plt.close(fig) + + # Save spec_sum vs pulse energy + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + #ax.plot(np.arange(len(cc)), cc/max(cc), label=f"cross correlation") + #ax.plot(np.arange(len(cc)), spec_sum/max(np.array(spec_sum)), label=f"spec_sum") + + ax.plot(spec_sum/max(np.array(spec_sum)), cc/max(cc), label=f"spec_sum correlation coef") + + ax.set_xlabel('spec_sum', fontsize=22) + ax.set_ylabel('cor. coef.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"spec_sum vs cor. coef.", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/spec_int_sum_vs_corr_coef.png", bbox_inches='tight') + plt.close(fig) + + + # Save quick fft eval + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(np.arange(N_ROI), delta_Y_fft_rois_sum_mean, label=f"FFT eval {i_bfgs}") + + ax.set_xlabel('FFT bin comps.', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f" qucik fft eval", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/quick_fft_eval.png", bbox_inches='tight') + plt.close(fig) + + + # Save cross correlation + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(np.arange(len(cc)), cc, label=f"correlation coef") + + ax.set_xlabel('train IDs.', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"cor. coef.", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/corr_coef.png", bbox_inches='tight') + plt.close(fig) + + + # Save cross correlation vs pulse energy + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + #ax.plot(np.arange(len(cc)), cc/max(cc), label=f"cross correlation") + ax.scatter( cc/max(cc), eps_mean/spec_sum, label=f"cc vs eps_mean") + + ax.set_xlabel('cor coef', fontsize=22) + ax.set_ylabel('eps mean', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"cc vs eps_mean", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cor_coef_vs_eps_mean.png", bbox_inches='tight') + plt.close(fig) + + + # Save cross correlation vs pulse energy + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + #ax.plot(np.arange(len(cc)), cc/max(cc), label=f"cross correlation") + ax.scatter( cc/max(cc), xgm_pulseen_test/max(np.array(xgm_pulseen_test)), label=f"xgm_pulseen_test") + + ax.set_xlabel('cor coef', fontsize=22) + ax.set_ylabel('pulse energy', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"cor. coef. vs pulse energy", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cor_coef_vs_pulse_energy.png", bbox_inches='tight') + plt.close(fig) + + + # Save cross correlation best rec spec + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + i_bfgs = np.argmax(cc) + + ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") + ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_bfgs[0,:], Y_rec[i_bfgs,:] + Y_rec_unc_bfgs[0,:], color="r", alpha=0.5, label="u_bfgs" + ) + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_specpca, Y_rec[i_bfgs,:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" + ) + ax.set_xlabel('Energy (eV) ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"cc best rec", y=1.0, pad=-20) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cc_best_rec.png", bbox_inches='tight') + plt.close(fig) + + + # Save cross correlation best rec spec + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + i_bfgs = np.argmin(cc) + + ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") + ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_bfgs[0,:], Y_rec[i_bfgs,:] + Y_rec_unc_bfgs[0,:], color="r", alpha=0.5, label="u_bfgs" + ) + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_specpca, Y_rec[i_bfgs,:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" + ) + ax.set_xlabel('Energy (eV) ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"cc worse rec", y=1.0, pad=-20) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cc_worse_rec.png", bbox_inches='tight') + plt.close(fig) + + + # Save cross correlation best rec pes + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + i_bfgs = np.argmax(cc) + + ax.plot(pes_test[i_bfgs,:], "r", label=f"PES test {i_bfgs}") + + ax.set_xlabel(' x comps ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"pes raw (cc best rec)", y=1.0, pad=-20) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pes_raw_best_rec.png", bbox_inches='tight') + plt.close(fig) + + + # Save cross correlation worst rec pes + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + i_bfgs = np.argmin(cc) + + ax.plot(pes_test[i_bfgs,:], "r", label=f"PES test {i_bfgs}") + + ax.set_xlabel(' x comps ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"pes raw (cc worst rec)", y=1.0, pad=-20) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pes_raw_worst_rec.png", bbox_inches='tight') + plt.close(fig) + + + # Save chi_2 + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(np.arange(len(chi_2)), chi_2, label=f"chi_2") + + ax.set_xlabel('train IDs.', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"chi_2", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/chi_2.png", bbox_inches='tight') + plt.close(fig) + + + # Save rmse + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(np.arange(len(rmse)), rmse, label=f"rmse") + + ax.set_xlabel('train IDs.', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"rmse", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/rmse.png", bbox_inches='tight') + plt.close(fig) + + + # Save rmse vs eps_mean + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.scatter(eps_mean, rmse, label=f"rmse_vs_eps_mean") + + ax.set_xlabel('eps mean', fontsize=22) + ax.set_ylabel('rmse', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"rmse_vs_eps_mean", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/rmse_vs_eps_mean.png", bbox_inches='tight') + plt.close(fig) + + + # reconstruction with total unc + fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) + axes_2 = axes_2.flatten() + for i_bfgs in range(20): + + ax = axes_2[i_bfgs] + ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") + ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc[0,:], Y_rec[i_bfgs,:] + Y_rec_unc[0,:], color="pink", alpha=0.5, label="u_total" + ) + ax.set_xlabel('Energy (eV) ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.legend(fontsize=10) + ax.set_title(f"{exp_dir} eps={eps_mean[i_bfgs]}", y=1.0, pad=-10) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction.png", bbox_inches='tight') + plt.close(fig) + + + # reconstruction with total unc + eps + fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) + axes_2 = axes_2.flatten() + for i_bfgs in range(20): + + ax = axes_2[i_bfgs] + ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") + ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_specpca, Y_rec[i_bfgs,:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" + ) + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_eps[0,:], Y_rec[i_bfgs,:] + Y_eps[0,:], color="blue", alpha=0.5, label="eps" + ) + ax.set_xlabel('Energy (eV) ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.legend(fontsize=10) + ax.set_title(f"{exp_dir} eps={eps_mean[i_bfgs]}", y=1.0, pad=-10) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_eps.png", bbox_inches='tight') + plt.close(fig) + + + print(Y_rec_unc_bfgs[0,:]) + # Reconstruction with 2 uncs + fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) + axes_2 = axes_2.flatten() + for i_bfgs in range(20): + + ax = axes_2[i_bfgs] + ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") + ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_bfgs[0,:], Y_rec[i_bfgs,:] + Y_rec_unc_bfgs[0,:], color="r", alpha=0.5, label="u_bfgs" + ) + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_specpca, Y_rec[i_bfgs,:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" + ) + ax.set_xlabel('Energy (eV) ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.legend(fontsize=10) + ax.set_title(f"{exp_dir}", y=1.0, pad=-10) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_2unc.png", bbox_inches='tight') + plt.close(fig) + + + # Single ID reconstruction with 2 unc + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") + ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_bfgs[0,:], Y_rec[i_bfgs,:] + Y_rec_unc_bfgs[0,:], color="r", alpha=0.5, label="u_bfgs" + ) + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_specpca, Y_rec[i_bfgs,:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" + ) + ax.set_xlabel('Energy (eV) ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_2unc_id1.png", bbox_inches='tight') + plt.close(fig) + + + # Single ID reconstruction with TOTAL unc + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") + ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc[0,:], Y_rec[i_bfgs,:] + Y_rec_unc[0,:], color="r", alpha=0.5, label="u_total" + ) + + ax.set_xlabel('Energy (eV) ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_unc_id1.png", bbox_inches='tight') + plt.close(fig) + + + # Example single spec + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + + ax.set_xlabel('Energy (eV) ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/single_spec.png", bbox_inches='tight') + plt.close(fig) + + + # Example single Y + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(Y_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + + ax.set_xlabel('PCA comps.', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/single_Y.png", bbox_inches='tight') + plt.close(fig) + + + # Example single PES + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(pes_train[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + + ax.set_xlabel('Channel comps.', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/single_pes.png", bbox_inches='tight') + plt.close(fig) + + + # # Example single X + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(X_train[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + + ax.set_xlabel('PCA comps.', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/single_X.png", bbox_inches='tight') + plt.close(fig) + + + args_cor_mat_int = np.argsort(eps_mean) + args_cor_mat_int = args_cor_mat_int[230:] + # ORDERED reconstruction with total unc + eps + fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) + axes_2 = axes_2.flatten() + for i_bfgs in range(20): + i_bfgs = i_bfgs + ax = axes_2[i_bfgs] + ax.plot(spec_raw_pe, Y_rec[args_cor_mat_int[i_bfgs],:], "r", label=f"SPEC rec {i_bfgs, args_cor_mat_int[i_bfgs]}") + ax.plot(spec_raw_pe, spec_test[args_cor_mat_int[i_bfgs],:], label=f"SPEC gt {i_bfgs, args_cor_mat_int[i_bfgs]}") + ax.fill_between( + spec_raw_pe, Y_rec[args_cor_mat_int[i_bfgs],:] - Y_rec_unc_specpca, Y_rec[args_cor_mat_int[i_bfgs],:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" + ) + ax.fill_between( + spec_raw_pe, Y_rec[args_cor_mat_int[i_bfgs],:] - Y_eps[0,:], Y_rec[args_cor_mat_int[i_bfgs],:] + Y_eps[0,:], color="blue", alpha=0.5, label="eps" + ) + ax.set_xlabel('Energy (eV) ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.legend(fontsize=10) + ax.set_title(f"eps={eps_mean[args_cor_mat_int[i_bfgs]]}", y=1.0, pad=-10) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_eps_SORTED.png", bbox_inches='tight') + plt.close(fig) + + + args_eps = np.argsort(eps_mean) + # ORDERED reconstruction with total unc + eps + fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) + axes_2 = axes_2.flatten() + for i_bfgs in range(20): + + ax = axes_2[i_bfgs] + ax.plot(spec_raw_pe, Y_rec[args_eps[i_bfgs],:], "r", label=f"SPEC rec {i_bfgs, args_eps[i_bfgs]}") + ax.plot(spec_raw_pe, spec_test[args_eps[i_bfgs],:], label=f"SPEC gt {i_bfgs, args_eps[i_bfgs]}") + ax.fill_between( + spec_raw_pe, Y_rec[args_eps[i_bfgs],:] - Y_rec_unc_specpca, Y_rec[args_eps[i_bfgs],:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" + ) + ax.fill_between( + spec_raw_pe, Y_rec[args_eps[i_bfgs],:] - Y_eps[0,:], Y_rec[args_eps[i_bfgs],:] + Y_eps[0,:], color="blue", alpha=0.5, label="eps" + ) + ax.set_xlabel('Energy (eV) ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.legend(fontsize=10) + ax.set_title(f"eps={eps_mean[args_eps[i_bfgs]]}", y=1.0, pad=-10) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_eps_SORTED_descending.png", bbox_inches='tight') + plt.close(fig) + + + + + + print("finished eval") + + +if att_dict["model_type"]=="bfgs_pca_eps": + + # Load Rec + Y_rec, Y_rec_unc_bfgs, Y_rec_unc_specpca, Y_rec_unc, Y_eps = load_rec_data_h5_bfgs_pca_eps(exp_dir) + print(Y_test.shape, Y_rec.shape) + + eps_mean = np.mean(Y_eps, axis=1) + eps_mean = np.round(eps_mean, 2) # round eps mean + print("len eps mean", len(eps_mean)) + + # compute RMSE + def int_sum_1(A): + return np.sum(A, axis=1) + pes_sum = int_sum_1(pes_test) + spec_sum = int_sum_1(spec_test) + + cor_mat = np.corrcoef(pes_test) + cor_mat_int = np.sum(cor_mat, axis=1) + #where_1 = np.where(cor_mat == 1) + #cor_mat[where_1] = 0 + + + + # quick fft eval + FC_START = 0 + FC_END = 80 + N_ROI = 40 + # Get the Fourier Components + n_Y_comps = spec_test.shape[1] + Y_test_fft_raw = np.abs(np.fft.fft(spec_test))[:, :round(n_Y_comps/2)] + Y_rec_fft_raw = np.abs(np.fft.fft(Y_rec))[:, :round(n_Y_comps/2)] + + # Cut the Fourier components and take only those who are relevant and not close to 0 + Y_test_fft = Y_test_fft_raw[:, FC_START:FC_END] + Y_rec_fft = Y_rec_fft_raw[:, FC_START:FC_END] + + # Split relevant fourier components into N regions of interests (roi) + split_size = round((FC_END - FC_START)/N_ROI) + + def split_to_rois(a, splitedSize = split_size): + a_splited = [a[:,x:x+splitedSize] for x in range(0, a.shape[1], splitedSize)] + return a_splited + + Y_test_fft_rois = split_to_rois(Y_test_fft) + Y_rec_fft_rois = split_to_rois(Y_rec_fft) + + # Define the X axis, pixels + pixel_rois = split_to_rois(np.arange(FC_START, FC_END, 1).reshape(1,-1)) + + # Compute the absolute difference of gt and rec fourier components per region of interest per train ID + delta_Y_fft_rois = [np.abs(a-b) for a, b in zip(Y_test_fft_rois, Y_rec_fft_rois)] + # Sum along all Fourier components in roi per train id + delta_Y_fft_rois_sum = [np.sum(a, axis=1) for a in delta_Y_fft_rois] + print("len(delta_Y_fft_rois_sum)", len(delta_Y_fft_rois)) + # Sum along all train IDs per roi + delta_Y_fft_rois_sum_mean = [x.mean() for x in delta_Y_fft_rois_sum] + + + # find corr coef + def corr_coef(a, b): + cor_coef = np.corrcoef(a, b)[0][1] + #print("The corr coef is :", np.round(cor_coef, 2)) + return np.round(cor_coef, 2) + cc = [] + for i_bfgs in range(Y_rec.shape[0]): + #corr_coef(sc_res[i_bfgs,:], Y_test[i_bfgs,:]) + cc.append(corr_coef(Y_rec[i_bfgs,:], spec_test[i_bfgs,:]) ) + + + + # Function to calculate Chi-distance + def chi2_distance(A, B): + # compute the chi-squared distance using above formula + chi = 0.5 * np.sum([((a - b) ** 2) / (a + b) + for (a, b) in zip(A, B)]) + + #print("The Chi-square distance is :", chi) + return chi + chi_2 = [] + for i_bfgs in range(Y_rec.shape[0]): + chi_2.append(chi2_distance(Y_rec[i_bfgs,:], spec_test[i_bfgs,:])) + + + # compute RMSE + def rmse_value(A, B): + return np.sqrt(np.mean((A-B)**2)) + rmse = [] + for i_bfgs in range(Y_rec.shape[0]): + rmse.append(rmse_value(Y_rec[i_bfgs,:], spec_test[i_bfgs,:])) + rmse_quickfft = [] + for i_bfgs in range(Y_rec_fft.shape[0]): + rmse_quickfft.append(rmse_value(Y_rec_fft[i_bfgs,:], Y_test_fft[i_bfgs,:])) + + + + + + + # Save Images + + # Save cor mat test set + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + im = ax.imshow(cor_mat) + ax.set_xlabel('train IDs.', fontsize=22) + ax.set_ylabel('train IDs.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"cor mat: PCA(PES)", y=1.0, pad=-20, fontsize=22) + + cax = fig.add_axes([0.27, 0.95, 0.5, 0.05]) + fig.colorbar(im, cax=cax, orientation='horizontal') + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cor_mat_pca_pes.png", bbox_inches='tight') + plt.close(fig) + + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + ax.plot(cor_mat[42,:], label=f"42") + ax.plot(cor_mat[43,:], label=f"43") + ax.plot(cor_mat[171,:], label=f"171") + #ax.plot(cor_mat_int, label=f"int") + ax.legend(fontsize=20) + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cor_mat_pca_pes_line.png", bbox_inches='tight') + plt.close(fig) + + + # Save xgm_pulse energy + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(np.arange(len(xgm_pulseen_test)), xgm_pulseen_test, label=f"xgm pulse energy: PES") + + ax.set_xlabel('train IDs.', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"xgm test pulse energy: PES", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/xgm_pulse_energy.png", bbox_inches='tight') + plt.close(fig) + + + # Save xgm_pulse energy vs PES sum + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.scatter(eps_mean, xgm_pulseen_test, label=f"eps_mean xgm pulse energy") + + ax.set_xlabel('eps_mean', fontsize=22) + ax.set_ylabel('pulse energy', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"pes_sum vs pulse energy", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pulse_energy_vs_eps_mean.png", bbox_inches='tight') + plt.close(fig) + + + # Save xgm_pulse energy vs QuickFFT + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.scatter(eps_mean, rmse_quickfft, label=f"eps_mean QuickFFT") + + ax.set_xlabel('eps_mean', fontsize=22) + ax.set_ylabel('QuickFFT', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"eps_mean QuickFFT", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/eps_mean_vs_quick_fft.png", bbox_inches='tight') + plt.close(fig) + + + # Save xgm_pulse energy vs QuickFFT + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.scatter(xgm_pulseen_test, rmse_quickfft, label=f"xgm_pulseen_test QuickFFT") + + ax.set_xlabel('xgm_pulseen_test', fontsize=22) + ax.set_ylabel('QuickFFT', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"pulse energy QuickFFT", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/xgm_pulseen_test_vs_quick_fft.png", bbox_inches='tight') + plt.close(fig) + + + # Save CC vs QuickFFT + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.scatter(cc, rmse_quickfft, label=f"cc vs QuickFFT") + + ax.set_xlabel('cc', fontsize=22) + ax.set_ylabel('QuickFFT', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"cc vs QuickFFT", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cc_vs_quick_fft.png", bbox_inches='tight') + plt.close(fig) + + + # Save xgm_pulse energy vs Spec sum + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.scatter(spec_sum, xgm_pulseen_test, label=f"xgm pulse energy vs spec int") + + ax.set_xlabel('spec_sum', fontsize=22) + ax.set_ylabel('pulse energy', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"spec_sum vs pulse energy", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pulse_energy_vs_spec_int.png", bbox_inches='tight') + plt.close(fig) + + + # Save xgm_pulse energy vs corr matrix sum + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.scatter(cor_mat_int, xgm_pulseen_test, label=f"xgm pulse energy vs cor_mat_int") + + ax.set_xlabel('cor_mat_int', fontsize=22) + ax.set_ylabel('pulse energy', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"spec_sum vs pulse energy", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pulse_energy_vs_cor_mat_int.png", bbox_inches='tight') + plt.close(fig) + + + # pes_sum vs Spec sum + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(spec_sum, pes_sum, label=f" spec int vs pes_sum") + + ax.set_xlabel('spec_sum', fontsize=22) + ax.set_ylabel('pulse energy', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"spec_sum vs pes_sum", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/spec_int_vs_pes_int.png", bbox_inches='tight') + plt.close(fig) + + + # Save pes sum test data + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(np.arange(len(pes_sum)), pes_sum, label=f"integrated int: PES") + + ax.set_xlabel('train IDs.', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"integrated int: PES", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pes_int_sum.png", bbox_inches='tight') + plt.close(fig) + + + # Save pes sum test data + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(np.arange(len(spec_sum)), spec_sum, label=f"integrated int: SPEC") + + ax.set_xlabel('train IDs.', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"integrated int: SPEC", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/spec_int_sum.png", bbox_inches='tight') + plt.close(fig) + + + # Save pes_sum vs pulse energy + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + #ax.plot(np.arange(len(cc)), cc/max(cc), label=f"cross correlation") + #ax.plot(np.arange(len(cc)), pes_sum/max(np.array(pes_sum)), label=f"pes_sum") + + ax.plot(pes_sum/max(np.array(pes_sum)), cc/max(cc), label=f"correlation coef.") + + ax.set_xlabel('pes_sum', fontsize=22) + ax.set_ylabel('cor. coef.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"pes_sum vs cor. coef.", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pes_int_sum_vs_cor_coef.png", bbox_inches='tight') + plt.close(fig) + + # Save spec_sum vs pulse energy + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + #ax.plot(np.arange(len(cc)), cc/max(cc), label=f"cross correlation") + #ax.plot(np.arange(len(cc)), spec_sum/max(np.array(spec_sum)), label=f"spec_sum") + + ax.plot(spec_sum/max(np.array(spec_sum)), cc/max(cc), label=f"spec_sum correlation coef") + + ax.set_xlabel('spec_sum', fontsize=22) + ax.set_ylabel('cor. coef.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"spec_sum vs cor. coef.", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/spec_int_sum_vs_corr_coef.png", bbox_inches='tight') + plt.close(fig) + + + # Save quick fft eval + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(np.arange(N_ROI), delta_Y_fft_rois_sum_mean, label=f"FFT eval {i_bfgs}") + + ax.set_xlabel('FFT bin comps.', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f" qucik fft eval", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/quick_fft_eval.png", bbox_inches='tight') + plt.close(fig) + + + # Save cross correlation + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(np.arange(len(cc)), cc, label=f"correlation coef") + + ax.set_xlabel('train IDs.', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"cor. coef.", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/corr_coef.png", bbox_inches='tight') + plt.close(fig) + + + # Save cross correlation vs pulse energy + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + #ax.plot(np.arange(len(cc)), cc/max(cc), label=f"cross correlation") + ax.scatter( cc/max(cc), eps_mean/spec_sum, label=f"cc vs eps_mean") + + ax.set_xlabel('cor coef', fontsize=22) + ax.set_ylabel('eps mean', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"cc vs eps_mean", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cor_coef_vs_eps_mean.png", bbox_inches='tight') + plt.close(fig) + + + # Save cross correlation vs pulse energy + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + #ax.plot(np.arange(len(cc)), cc/max(cc), label=f"cross correlation") + ax.scatter( cc/max(cc), xgm_pulseen_test/max(np.array(xgm_pulseen_test)), label=f"xgm_pulseen_test") + + ax.set_xlabel('cor coef', fontsize=22) + ax.set_ylabel('pulse energy', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"cor. coef. vs pulse energy", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cor_coef_vs_pulse_energy.png", bbox_inches='tight') + plt.close(fig) + + + # Save cross correlation best rec spec + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + i_bfgs = np.argmax(cc) + + ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") + ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_bfgs[0,:], Y_rec[i_bfgs,:] + Y_rec_unc_bfgs[0,:], color="r", alpha=0.5, label="u_bfgs" + ) + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_specpca, Y_rec[i_bfgs,:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" + ) + ax.set_xlabel('Energy (eV) ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"cc best rec", y=1.0, pad=-20) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cc_best_rec.png", bbox_inches='tight') + plt.close(fig) + + + # Save cross correlation best rec spec + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + i_bfgs = np.argmin(cc) + + ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") + ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_bfgs[0,:], Y_rec[i_bfgs,:] + Y_rec_unc_bfgs[0,:], color="r", alpha=0.5, label="u_bfgs" + ) + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_specpca, Y_rec[i_bfgs,:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" + ) + ax.set_xlabel('Energy (eV) ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"cc worse rec", y=1.0, pad=-20) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cc_worse_rec.png", bbox_inches='tight') + plt.close(fig) + + + # Save cross correlation best rec pes + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + i_bfgs = np.argmax(cc) + + ax.plot(pes_test[i_bfgs,:], "r", label=f"PES test {i_bfgs}") + + ax.set_xlabel(' x comps ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"pes raw (cc best rec)", y=1.0, pad=-20) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pes_raw_best_rec.png", bbox_inches='tight') + plt.close(fig) + + + # Save cross correlation worst rec pes + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + i_bfgs = np.argmin(cc) + + ax.plot(pes_test[i_bfgs,:], "r", label=f"PES test {i_bfgs}") + + ax.set_xlabel(' x comps ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"pes raw (cc worst rec)", y=1.0, pad=-20) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pes_raw_worst_rec.png", bbox_inches='tight') + plt.close(fig) + + + # Save chi_2 + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(np.arange(len(chi_2)), chi_2, label=f"chi_2") + + ax.set_xlabel('train IDs.', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"chi_2", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/chi_2.png", bbox_inches='tight') + plt.close(fig) + + + # Save rmse + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(np.arange(len(rmse)), rmse, label=f"rmse") + + ax.set_xlabel('train IDs.', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"rmse", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/rmse.png", bbox_inches='tight') + plt.close(fig) + + + # Save rmse vs eps_mean + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.scatter(eps_mean, rmse, label=f"rmse_vs_eps_mean") + + ax.set_xlabel('eps mean', fontsize=22) + ax.set_ylabel('rmse', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + ax.set_title(f"rmse_vs_eps_mean", y=1.0, pad=-20, fontsize=22) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/rmse_vs_eps_mean.png", bbox_inches='tight') + plt.close(fig) + + + # reconstruction with total unc + fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) + axes_2 = axes_2.flatten() + for i_bfgs in range(20): + + ax = axes_2[i_bfgs] + ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") + ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc[0,:], Y_rec[i_bfgs,:] + Y_rec_unc[0,:], color="pink", alpha=0.5, label="u_total" + ) + ax.set_xlabel('Energy (eV) ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.legend(fontsize=10) + ax.set_title(f"{exp_dir} eps={eps_mean[i_bfgs]}", y=1.0, pad=-10) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction.png", bbox_inches='tight') + plt.close(fig) + + + # reconstruction with total unc + eps + fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) + axes_2 = axes_2.flatten() + for i_bfgs in range(20): + + ax = axes_2[i_bfgs] + ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") + ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_specpca, Y_rec[i_bfgs,:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" + ) + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_eps[0,:], Y_rec[i_bfgs,:] + Y_eps[0,:], color="blue", alpha=0.5, label="eps" + ) + ax.set_xlabel('Energy (eV) ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.legend(fontsize=10) + ax.set_title(f"{exp_dir} eps={eps_mean[i_bfgs]}", y=1.0, pad=-10) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_eps.png", bbox_inches='tight') + plt.close(fig) + + + print(Y_rec_unc_bfgs[0,:]) + # Reconstruction with 2 uncs + fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) + axes_2 = axes_2.flatten() + for i_bfgs in range(20): + + ax = axes_2[i_bfgs] + ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") + ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_bfgs[0,:], Y_rec[i_bfgs,:] + Y_rec_unc_bfgs[0,:], color="r", alpha=0.5, label="u_bfgs" + ) + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_specpca, Y_rec[i_bfgs,:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" + ) + ax.set_xlabel('Energy (eV) ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.legend(fontsize=10) + ax.set_title(f"{exp_dir}", y=1.0, pad=-10) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_2unc.png", bbox_inches='tight') + plt.close(fig) + + + # Single ID reconstruction with 2 unc + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") + ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_bfgs[0,:], Y_rec[i_bfgs,:] + Y_rec_unc_bfgs[0,:], color="r", alpha=0.5, label="u_bfgs" + ) + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_specpca, Y_rec[i_bfgs,:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" + ) + ax.set_xlabel('Energy (eV) ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_2unc_id1.png", bbox_inches='tight') + plt.close(fig) + + + # Single ID reconstruction with TOTAL unc + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") + ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + ax.fill_between( + spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc[0,:], Y_rec[i_bfgs,:] + Y_rec_unc[0,:], color="r", alpha=0.5, label="u_total" + ) + + ax.set_xlabel('Energy (eV) ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_unc_id1.png", bbox_inches='tight') + plt.close(fig) + + + # Example single spec + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + + ax.set_xlabel('Energy (eV) ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/single_spec.png", bbox_inches='tight') + plt.close(fig) + + + # Example single Y + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(Y_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + + ax.set_xlabel('PCA comps.', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/single_Y.png", bbox_inches='tight') + plt.close(fig) + + + # Example single PES + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(pes_train[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + + ax.set_xlabel('Channel comps.', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/single_pes.png", bbox_inches='tight') + plt.close(fig) + + + # # Example single X + fig, ax = plt.subplots(1, 1, figsize=(20, 10)) + for i_bfgs in range(1): + + ax.plot(X_train[i_bfgs,:], label=f"SPEC gt {i_bfgs}") + + ax.set_xlabel('PCA comps.', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.tick_params(axis="y", labelsize=20) + ax.legend(fontsize=20) + #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/single_X.png", bbox_inches='tight') + plt.close(fig) + + + args_cor_mat_int = np.argsort(eps_mean) + args_cor_mat_int = args_cor_mat_int[230:] + # ORDERED reconstruction with total unc + eps + fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) + axes_2 = axes_2.flatten() + for i_bfgs in range(20): + i_bfgs = i_bfgs + ax = axes_2[i_bfgs] + ax.plot(spec_raw_pe, Y_rec[args_cor_mat_int[i_bfgs],:], "r", label=f"SPEC rec {i_bfgs, args_cor_mat_int[i_bfgs]}") + ax.plot(spec_raw_pe, spec_test[args_cor_mat_int[i_bfgs],:], label=f"SPEC gt {i_bfgs, args_cor_mat_int[i_bfgs]}") + ax.fill_between( + spec_raw_pe, Y_rec[args_cor_mat_int[i_bfgs],:] - Y_rec_unc_specpca, Y_rec[args_cor_mat_int[i_bfgs],:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" + ) + ax.fill_between( + spec_raw_pe, Y_rec[args_cor_mat_int[i_bfgs],:] - Y_eps[0,:], Y_rec[args_cor_mat_int[i_bfgs],:] + Y_eps[0,:], color="blue", alpha=0.5, label="eps" + ) + ax.set_xlabel('Energy (eV) ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.legend(fontsize=10) + ax.set_title(f"eps={eps_mean[args_cor_mat_int[i_bfgs]]}", y=1.0, pad=-10) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_eps_SORTED.png", bbox_inches='tight') + plt.close(fig) + + + args_eps = np.argsort(eps_mean) + # ORDERED reconstruction with total unc + eps + fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) + axes_2 = axes_2.flatten() + for i_bfgs in range(20): + + ax = axes_2[i_bfgs] + ax.plot(spec_raw_pe, Y_rec[args_eps[i_bfgs],:], "r", label=f"SPEC rec {i_bfgs, args_eps[i_bfgs]}") + ax.plot(spec_raw_pe, spec_test[args_eps[i_bfgs],:], label=f"SPEC gt {i_bfgs, args_eps[i_bfgs]}") + ax.fill_between( + spec_raw_pe, Y_rec[args_eps[i_bfgs],:] - Y_rec_unc_specpca, Y_rec[args_eps[i_bfgs],:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" + ) + ax.fill_between( + spec_raw_pe, Y_rec[args_eps[i_bfgs],:] - Y_eps[0,:], Y_rec[args_eps[i_bfgs],:] + Y_eps[0,:], color="blue", alpha=0.5, label="eps" + ) + ax.set_xlabel('Energy (eV) ', fontsize=22) + ax.set_ylabel('int a.u.', fontsize=22) + ax.tick_params(axis="x", labelsize=20) + ax.legend(fontsize=10) + ax.set_title(f"eps={eps_mean[args_eps[i_bfgs]]}", y=1.0, pad=-10) + + fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_eps_SORTED_descending.png", bbox_inches='tight') + plt.close(fig) + + + + + + print("finished eval") + + + + + +#import joblib +#spec_pca_model = joblib.load(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/checkpoints/spec_pca_model.joblib") + + diff --git a/inv_inference.py b/inv_inference.py new file mode 100644 index 0000000..6ad195c --- /dev/null +++ b/inv_inference.py @@ -0,0 +1,69 @@ +import sys +sys.path.append("./") +sys.path.append("..") + +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt + +from src.data.data import ReadPesSpec, PesChannelSelector +from src.data.data_preproc import SpecPreprocessing + +from src.models.find_components.Find_Component import FindPCAcomps +from src.models.fit_methods.Fit_Methods import FitBFGS, FitBFGSPCA +from src.models.fit_methods.model import Model + +from src.utils.utils import load_train_test_h5, load_rec_data_h5, load_rec_data_h5_bfgs_pca, load_checkpoint +from src.utils.utils import create_experiment_dirs + +from sklearn.model_selection import train_test_split +import numpy as np + + +# Load trained model in the path exp_dir +exp_dir = "test3_pulseen_short_test_eps_r0015" +Y_train_model, Y_test_model, spec_train_model, spec_test_model, spec_raw_pe, X_train_model, X_test_model, pes_train_model, pes_test_model, att_dict, xgm_pulseen_train, xgm_pulseen_test = load_train_test_h5(exp_dir) + +att_dict["pes_pca_preprocessing"] = False # set pca non trainable +att_dict["spec_pca_preprocessing"] = False # set pca non trainable + + +# Change the exp_dir to new dir where inference data is located +#att_dict["exp_dir"] = "test3_inference" +att_dict["run_number"] = "r0014" +att_dict["spec_ofset"] = -2 + + +RPS = ReadPesSpec(att_dict) # read data +data_dict = RPS.get_data() + + +model_instance = Model(model_type="bfgs_pca_eps", data_info=att_dict) +pes_train, pes_test, spec_train, spec_test = model_instance.preprocess(data_dict, att_dict) # Model Preprocess + + +inference_model, pes_pca_model, spec_pca_model = model_instance.load_model() # Move to incference sctipt TODO! + + +# Update the Train and Test data for inference model +model_instance.X_train, model_instance.X_test = pes_pca_model.transform(pes_train), pes_pca_model.transform(pes_test) +model_instance.Y_train, model_instance.Y_test = spec_pca_model.transform(spec_train), spec_pca_model.transform(spec_test) + + +inf_dir = exp_dir + "_inference_TestDATA" + att_dict["run_number"] +model_instance.data_info["exp_dir"] = inf_dir + + +create_experiment_dirs(inf_dir) +model_instance.save_preproc() + + +result = model_instance.predict(input_value=model_instance.X_test, + target_value=model_instance.Y_test, + spec_target=spec_test_model) + + +model_instance.save_prediction() + + +print("Finished Inference") \ No newline at end of file diff --git a/inv_train.py b/inv_train.py new file mode 100644 index 0000000..17c693d --- /dev/null +++ b/inv_train.py @@ -0,0 +1,103 @@ +import sys +sys.path.append("./") +sys.path.append("..") + +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt + +from src.data.data import ReadPesSpec, PesChannelSelector +from src.data.data_preproc import SpecPreprocessing + +from src.models.find_components.Find_Component import FindPCAcomps + +from src.models.fit_methods.model import Model + +from src.utils.utils import create_experiment_dirs, save_train_test_h5, save_rec_data_h5, save_checkpoint, save_checkpoint_fft + +from sklearn.model_selection import train_test_split +import numpy as np + +run_directory = "/gpfs/exfel/exp/SA3/202121/p002935/raw" +run_number = "r0015" +spec_ofset = -2 +n_pca_comps_pes = 400 +n_pca_comps_spec = 20 +pes_pca_preprocessing = True +spec_pca_preprocessing = True +spec_gc = True +spec_fft = False +use_data_subset = True +data_subset_start, data_subset_end = 1000, 6000 # Min Max +channel_names = "all_chs" # "default", "all_chs", or list with channel names ex ["channel_1_D", "channel_2_B"] +test_size = 0.05 +model_type = "bfgs_pca_eps" # "bfgs", "bfgs_pca", "bfgs_pca_eps" + +exp_dir = "test3_pulseen_short_test_eps_" + run_number +general_comment = exp_dir + +data_info = {"run_directory": run_directory, + "run_number": run_number, + "spec_ofset": spec_ofset, + "exp_dir": exp_dir, + "n_pca_comps_pes": n_pca_comps_pes, + "n_pca_comps_spec": n_pca_comps_spec, + "pes_pca_preprocessing": pes_pca_preprocessing, + "spec_pca_preprocessing": spec_pca_preprocessing, + "spec_gc": spec_gc, + "spec_fft": spec_fft, + "use_data_subset": use_data_subset, + "data_subset_start": data_subset_start, + "data_subset_end": data_subset_end, + "channel_names": channel_names, + "test_size": test_size, + "model_type": model_type, + "general_comment": general_comment + + } + + +experiment_dir, summary_dir, checkpoint_dir, output_dir, test_dir = create_experiment_dirs(exp_dir) + +import datetime +now = datetime.datetime.now() +print ("Start Read Pes Spec Data : ") +print (now.strftime("%Y-%m-%d %H:%M:%S")) + +# Read Pes Spec Data +RPS = ReadPesSpec(data_info) +data_dict = RPS.get_data() + +now = datetime.datetime.now() +print ("End Read Pes Spec Data : ") +print (now.strftime("%Y-%m-%d %H:%M:%S")) + +# Model +model_instance = Model(model_type=model_type, data_info=data_info) +X_train, X_test, Y_train, Y_test = model_instance.preprocess(data_dict, data_info) # Model Preprocess + +now = datetime.datetime.now() +print ("End Model Preprocessor : ") +print (now.strftime("%Y-%m-%d %H:%M:%S")) + +model_instance.save_preproc() +print("################") +print(X_train.shape) +print("################") +model_instance.fit_eval(X_train, Y_train, X_test, Y_test) # Model FIT + +now = datetime.datetime.now() +print ("End Model Train : ") +print (now.strftime("%Y-%m-%d %H:%M:%S")) + +model_instance.save_latest_ckp() + +result = model_instance.predict(input_value=X_test) + +model_instance.save_prediction() + + + + +print("FINISHED ALL") + diff --git a/src/data/__init__.py b/src/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/data/data.py b/src/data/data.py new file mode 100644 index 0000000..cd9517e --- /dev/null +++ b/src/data/data.py @@ -0,0 +1,326 @@ +""" + Data loaders for SPEC and PES. +""" + +from extra_data import open_run, RunDirectory +import numpy as np + +from typing import Any, Union, List, Dict + +import os +import h5py +import numpy as np + +import itertools + +import scipy +import scipy.interpolate +from random import choices + +from skimage.feature import canny +import torch +import torchvision.transforms as transforms +from collections import namedtuple +import torch.nn.functional as F + +import cv2 + +import sys +sys.path.append("./") +sys.path.append("..") + + +from sklearn.datasets import fetch_olivetti_faces +from numpy.random import RandomState + + +class ReadPesSpec: + """ + Read PES and SPEC data, select by unique IDs + """ + + def __init__(self, data_info): + + self.data_info = data_info + + self.rundir = self.data_info["run_directory"] + self.runid = self.data_info["run_number"] + self.spec_ofset = self.data_info["spec_ofset"] + + self.run = RunDirectory(f"{self.rundir}/{self.runid}") + + self.spec_inds = None + self.pes_inds = None + self.xgm_inds = None + + self.spec_tid_all = None + self.spec_tid_all_offset = None + self.pes_tid_all = None + self.xgm_tid_all = None + + self.spec_tids = None + self.pes_tids = None + self.xgm_tids = None + + self.retvol_raw = None + self.retvol_raw_timestamp = None + self.min_ret_vol = None + + + self.spec_raw_pe = None + self.spec_raw_int = None + self.pes_raw_dict = None + self.pes_data = None + self.xgm_pressureFiltered = None + self.xgm_pulseen = None + self.retvol_raw = None + self.raw_timestamp = None + self.min_ret_vol = None + + + # Output + self.data = {"spec_raw_pe":None, + "spec_raw_int":None, + "pes_raw_dict":None, + "pes_data":None, + "xgm_pressureFiltered":None, + "xgm_pulseen":None, + "retvol_raw":None, + "retvol_raw_timestamp":None, + "min_ret_vol":None} + + + + def get_unq_inds(self, a, b, c): + uniq_vals = list(set(a).intersection(b).intersection(c)) + return [a.index(x) for x in uniq_vals], [b.index(x) for x in uniq_vals], [c.index(x) for x in uniq_vals] + + # create channel names + def gen_channel_names(self): + channels = [] + for i in range(4): + channels.append(f"channel_{i+1}_A") + channels.append(f"channel_{i+1}_B") + channels.append(f"channel_{i+1}_C") + channels.append(f"channel_{i+1}_D") + + chs_short_name = [] + for i in range(4): + chs_short_name.append(f"ch_{i+1}_A") + chs_short_name.append(f"ch_{i+1}_B") + chs_short_name.append(f"ch_{i+1}_C") + chs_short_name.append(f"ch_{i+1}_D") + + return channels, chs_short_name + + + def read_train_ids(self, debug=False): + + # Read raw SPEC Data train IDs + self.spec_tid_all = self.run['SA3_XTD10_SPECT/MDL/FEL_BEAM_SPECTROMETER_SQS1:output', f"data.trainId"].ndarray() + self.spec_tid_all_offset = self.spec_tid_all + self.spec_ofset ##### Offset correction ##### + #print(f" spec_tid_all_offset = {len(spec_tid_all_offset)}") + + # Read raw PES Data train IDs + self.pes_tid_all = self.run['SA3_XTD10_PES/ADC/1:network', f"digitizers.trainId"].ndarray() + #print(f" pes_tid_all = {len(pes_tid_all)}") + + + # Read raw XGM Data train IDs + self.xgm_tid_all = self.run['SA3_XTD10_XGM/XGM/DOOCS:output', f"data.trainId"].ndarray() + #print(f" xgm_tid_all = {len(xgm_tid_all)}") + + print(f" Introduced SPEC_OFFSET = {self.spec_ofset} \n ") + + if debug: + print(f"\n Len of train IDs: \n \ + spec_tid_all_offset {len(self.spec_tid_all_offset)} \n \ + pes_tid_all {len(self.pes_tid_all)} \n \ + xgm_tid_all {len(self.xgm_tid_all)} \n \ + ") + + + print(f"\n First 5 train IDs from all sources: \n \ + spec_tid_all {self.spec_tid_all[:5]} \n \ + spec_tid_all_offset {self.spec_tid_all_offset[:5]} \n \ + pes_tid_all {self.pes_tid_all[:5]} \n \ + xgm_tid_all {self.xgm_tid_all[:5]} \n \ + ") + + + def merge_train_ids(self, debug=True): + # Merge Train IDs + self.spec_inds, self.pes_inds, self.xgm_inds = self.get_unq_inds(list(self.spec_tid_all_offset), list(self.pes_tid_all), list(self.xgm_tid_all)) + + + print(" Len of indices for common elements in spec, pes, xgm :", len(self.spec_inds), len(self.pes_inds), len(self.xgm_inds)) + + # Find common train ids + self.spec_tids = self.spec_tid_all_offset[self.spec_inds] + self.pes_tids = self.pes_tid_all[self.pes_inds] + self.xgm_tids = self.xgm_tid_all[self.xgm_inds] + + if debug: + print(f"\n First 5 indices from all sources: \n \ + spec_inds {self.spec_inds[:5]} \n \ + pes_inds {self.pes_inds[:5]} \n \ + xgm_inds {self.xgm_inds[:5]} \n \ + ") + + print(f"\n First 5 train IDs from all sources: \n \ + spec_tids {self.spec_tids[:5]} \n \ + pes_tids {self.pes_tids[:5]} \n \ + xgm_tids {self.xgm_tids[:5]} \n \ + ") + return self.spec_inds, self.pes_inds, self.xgm_inds + + + def retVol(self): + # Read PES Data RetVol + self.retvol_raw = self.run["SA3_XTD10_PES/MDL/DAQ_MPOD", "u212.value"].ndarray()[list(self.pes_inds)] + self.min_ret_vol = np.abs(self.retvol_raw.min()) + self.retvol_raw_timestamp = self.run["SA3_XTD10_PES/MDL/DAQ_MPOD", "u212.timestamp"].ndarray()[list(self.pes_inds)] + + + def read_data(self, debug=True): + + # Read raw SPEC Data + self.spec_raw_pe= self.run['SA3_XTD10_SPECT/MDL/FEL_BEAM_SPECTROMETER_SQS1:output', f"data.photonEnergy"].ndarray().astype(float)[list(self.spec_inds),:] + #spec_raw_all = run_spec['SA3_XTD10_SPECT/MDL/FEL_BEAM_SPECTROMETER_SQS1:output', f"data.intensityDistribution"].ndarray() + self.spec_raw_int = self.run['SA3_XTD10_SPECT/MDL/FEL_BEAM_SPECTROMETER_SQS1:output', f"data.intensityDistribution"].ndarray().astype(float)[list(self.spec_inds),:] + + # Read PES Data + channels, _ = self.gen_channel_names() # create channel names + self.pes_raw_dict = {} + pes_raw_dict_all = {} + for ch in channels: + #pes_raw = run['SA3_XTD10_PES/ADC/1:network', f"digitizers.{ch}.raw.samples"].ndarray() + #pes_raw_dict_all[ch] = run_pes['SA3_XTD10_PES/ADC/1:network', f"digitizers.{ch}.raw.samples"].ndarray() + self.pes_raw_dict[ch] = self.run['SA3_XTD10_PES/ADC/1:network', f"digitizers.{ch}.raw.samples"].ndarray().astype(float)[list(self.pes_inds),:] + + # Read XGM gas pressure + self.xgm_pressureFiltered= self.run['SA3_XTD10_XGM/XGM/DOOCS', f"pressure.pressureFiltered.value"].ndarray().astype(float)[list(self.xgm_inds)] + + # Read PES Data RetVol + self.retVol() + + # Read XGM Pusle Energy + self.xgm_pulseen = self.run['SA3_XTD10_XGM/XGM/DOOCS:output', f"data.intensitySa3TD"].ndarray().astype(float)[list(self.xgm_inds)] + + if debug: + #print(f"\n pes data channels :: \n {self.pes_raw_dict.keys()}") + print(f"spec_raw_pe.shape{self.spec_raw_pe.shape}, \n spec_raw_int.shape{self.spec_raw_int.shape}") + print(f"\n pes channel shape channel_1_A:: {self.pes_raw_dict['channel_1_A'].shape}") + print("\n shape of xgm_pressureFiltered", self.xgm_pressureFiltered.shape) + print(f"\n min_ret_vol = {self.min_ret_vol}") + + + assert self.spec_raw_pe.shape, self.spec_raw_int.shape + assert self.pes_raw_dict['channel_1_A'].shape[0], self.spec_raw_int.shape[0] + + + + def get_data(self): + + # Read Train Ids + self.read_train_ids(self.run) + + # Merge Train Ids + self.merge_train_ids() + + # Read Data + self.read_data() + + self.data["spec_raw_pe"] = self.spec_raw_pe + self.data["spec_raw_int"] = self.spec_raw_int + self.data["pes_raw_dict"] = self.pes_raw_dict + self.data["xgm_pressureFiltered"] = self.xgm_pressureFiltered + self.data["xgm_pulseen"] = self.xgm_pulseen + self.data["retvol_raw"] = self.retvol_raw + self.data["retvol_raw_timestamp"] = self.retvol_raw_timestamp + self.data["min_ret_vol"] = self.min_ret_vol + + print("Data loaded in dataframe") + + PCS = PesChannelSelector(pes_dict=self.data, ch_names=self.data_info["channel_names"]) # Select Pes Channels + self.pes_data = PCS.sel_pes_chs() + self.data["pes_data"] = self.pes_data + + print("pes_data.shape = ", self.pes_data.shape) # (14497, 1200) + print("spec_data.shape = ", self.spec_raw_int.shape) # (14497, 1900) + + + return self.data + + +class PesChannelSelector: + """_Use this class to take all or part of PES channels for data preprocessing_ + """ + def __init__(self, pes_dict, ch_names="default"): + + self.pes_dict = pes_dict + self.default_channels = ["channel_1_D", "channel_2_B", "channel_3_A", "channel_3_B", "channel_4_C", "channel_4_D"] + + + if ch_names is "default": + self.channels = self.default_channels + elif ch_names == "all_chs": + self.channels, _ = self.gen_channel_names() + else: + self.channels = ch_names + + # Output + self.pes_data = np.array([]) + + + + # create channel names + def gen_channel_names(self): + """ + Generate channel names for quick reference + + Returns: + [channels], [chs_short_name]: raw channel names, channel short names + """ + channels = [] + for i in range(4): + channels.append(f"channel_{i+1}_A") + channels.append(f"channel_{i+1}_B") + channels.append(f"channel_{i+1}_C") + channels.append(f"channel_{i+1}_D") + + chs_short_name = [] + for i in range(4): + chs_short_name.append(f"ch_{i+1}_A") + chs_short_name.append(f"ch_{i+1}_B") + chs_short_name.append(f"ch_{i+1}_C") + chs_short_name.append(f"ch_{i+1}_D") + + return channels, chs_short_name + + + def get_array_from_dict(self, ch_name): + """ Select ROI where TOF is present + + Args: + ch_name (str): channel name + + Returns: + Croped channel : crop channel around the TOF measurement + """ + TOF_start = 31445 + 0 + TOF_end = TOF_start + 200 + return self.pes_dict["pes_raw_dict"][ch_name][:,TOF_start:TOF_end] + + + def sel_pes_chs(self): + """ Select PES channel + + Returns: + pes_data: PES data concat together all channels + """ + + self.pes_data = np.concatenate([self.get_array_from_dict(ch_name) for ch_name in self.channels], axis=1) + + return self.pes_data + diff --git a/src/data/data_preproc.py b/src/data/data_preproc.py new file mode 100644 index 0000000..fba4bb0 --- /dev/null +++ b/src/data/data_preproc.py @@ -0,0 +1,99 @@ +""" + Data Preprocessing +""" + +import scipy +import scipy.signal +import numpy as np +from sklearn import preprocessing + +import sys +sys.path.append("./") +sys.path.append("..") + +from src.models.find_components.Find_Component import FindPCAcomps + + +# GC Spec data +class SpecPreprocessing(): + def __init__(self, spec_data, spec_raw_pe, gauss_sigma=0.2): + + self.gauss_sigma = gauss_sigma + self.spec_data = spec_data + self.spec_raw_pe = spec_raw_pe + + # Output + self.gaussian = None + self.spec_data_gc = None + self.spec_data_gc_fft = None + + def gaussian_convolve(self): + gx = self.spec_raw_pe[:,:] + mu = self.spec_raw_pe[0,gx.shape[1]//2] + + self.gaussian = np.exp(-((gx-mu)/self.gauss_sigma)**2/2)/np.sqrt(2*np.pi*self.gauss_sigma**2) + print(self.spec_data.shape, self.spec_raw_pe.shape, self.gaussian.shape) + #result = np.convolve(spec_data[0,:], gaussian, mode="full") + + self.spec_data_gc = scipy.signal.fftconvolve(self.spec_data, self.gaussian, mode="same",axes=1)/80 + print(self.spec_data_gc.shape) + + return self.spec_data_gc, self.gaussian + + def make_fft(self): + + self.spec_data_gc_fft = np.fft.fft(self.spec_data_gc) + + return self.spec_data_gc_fft + + + +class PesPreprocessing(): + def __init__(self, data_train, data_test, n_pca=50): + + self.data_train = data_train + self.data_test = data_test + self.n_pca = n_pca + + # Output + self.pca_model = None + self.scaler = None + self.new_train = None + self.new_test = None + + + + def find_pca(self): + pes_PCA = FindPCAcomps( + data_train=self.data_train, + data_test=self.data_test, + n_pca_comps=self.n_pca + ) + + self.pca_model, self.new_train, self.new_test = pes_PCA.get_pca() + return self.pca_model, self.new_train, self.new_test + + def stand_scaler(self): + + self.scaler = preprocessing.StandardScaler().fit(self.data_train) + + self.new_train = self.scaler.transform(self.data_train) + self.new_test = self.scaler.transform( self.data_test) + + return self.scaler, self.new_train, self.new_test + + + def get(self, data_info): + + if data_info["pes_pca_preprocessing"]: + self.pca_model, self.new_train, self.new_test = self.find_pca() + return self.pca_model, self.new_train, self.new_test + + if data_info["standard_scaler"]: + self.new_train, self.new_test = self.stand_scaler() + return self.scaler, self.new_train, self.new_test + + + + + diff --git a/src/models/find_components/Find_Component.py b/src/models/find_components/Find_Component.py new file mode 100644 index 0000000..54c93b0 --- /dev/null +++ b/src/models/find_components/Find_Component.py @@ -0,0 +1,194 @@ +import sys +sys.path.append("..") + + +from sklearn.datasets import load_digits +from sklearn.datasets import fetch_olivetti_faces +from sklearn.decomposition import FastICA +from sklearn import decomposition +from sklearn.cluster import MiniBatchKMeans +from sklearn import decomposition +import matplotlib.pyplot as plt +import skimage + +import numpy as np +from numpy.random import RandomState + +import logging +from time import time + +from sklearn import linear_model +import torch + +from scipy import signal +from scipy.signal import find_peaks +from sklearn.decomposition import PCA + +class FindComponents(object): + """This class find the components in data. Those components could be independent or principal. It also returns the transformer. + + Args: + object (object): Object of components and transformer + """ + + + def __init__(self): + """_summary_ + + Args: + train_data (np.array): _description_ + val_data (np.array): _description_ + image_shape (tuple, optional): _description_. Defaults to (64, 64). + is_image (bool, optional): _description_. Defaults to True. + """ + + super(FindComponents, self).__init__() + + self.transformer = None + self.ica_comps = None + self.ica_comps_pix = None + + def clear_input(self): + self.train_data = None + self.val_data = None + self.image_shape = None + self.is_image = None + + + def get_transformer_ica(self, train_data, val_data, image_shape=(64, 64), is_image=True, N_COMP_ICA=5, ICA_Method='logcosh'): + """Takes number of data points and calculates the N Independent components based on sklearn.decomposition.FastICA. + Returns the transformer and corresponding ICA components in 2 verions (N ICA Components, and N ICA components per pixel. + + Args: + train_data (np.array): _description_ + val_data (np.array): _description_ + image_shape (tuple, optional): _description_. Defaults to (64, 64). + is_image (bool, optional): _description_. Defaults to True. + N_COMP_ICA (int, optional): _description_. Defaults to 5. + + Returns: + [transformer (object)]: transformer object for FastICA + [ica_comps (list)]: FastICA N componenrs ordered for all pixels in single data + [ica_comps_pix (list)]: FastICA N components ordered per pixel for all components in single data + """ + + self.train_data = train_data + self.val_data = val_data + self.image_shape = image_shape + self.is_image = is_image + + self.train_data_points = self.train_data.shape[0] + if len(val_data.shape)==1: + self.val_data_points = 1 + elif len(val_data.shape)!=1: + self.val_data_points = self.val_data.shape[0] + + assert(N_COMP_ICA<=self.train_data_points) + + transformer = FastICA(n_components=N_COMP_ICA, max_iter=5000, random_state=0, whiten=True, fun=ICA_Method) + print("shape train data", self.train_data.shape) + print("ICA_Method", ICA_Method) + X_transformed = transformer.fit_transform(self.train_data) + print("X_transformed.shape=", X_transformed.shape) + + n_img, l_img = transformer.mixing_.T.shape + print("transformer.mixing_.T.shape", n_img, l_img) + ica_comps = [transformer.mixing_[:, i] for i in range(N_COMP_ICA)] + #print("ica_comps", ica_comps) + + + # Get pixel wise N components by + ica_comps_pix = [transformer.mixing_[i, :] for i in range(l_img)] + #print("ica_comps_pix", ica_comps_pix) + + self.transformer = transformer + self.ica_comps = ica_comps + self.ica_comps_pix = ica_comps_pix + + return self.transformer, self.ica_comps, self.ica_comps_pix, X_transformed + + +class FindNumberOfComponents(object): + + def __init__(self): + self.comps = None + self.PSDs = None + self.PSDs_means = None + self.user_n_components = None + self.X_centered, self.y_centered = None, None + + self.n_good_comps = None + self.good_comps = None + + + def find_components(self): + # Find components + number_of_components = self.user_n_components + fit_method = "BayesianRidge" + + FC = FindComponents() + transformer, comps, comps_pix, X_transformed = FC.get_transformer_ica(train_data=self.X_centered, val_data=self.y_centered, N_COMP_ICA=self.user_n_components) # finds components + self.comps = comps + + def find_PSDs(self): + #self.find_components() + self.PSDs = [signal.welch(ac)[1] for ac in self.comps] + return self.PSDs + + def find_peaks_PSD(self): + self.PSDs_means = np.mean(self.PSDs, axis=1) + peaks, _ = find_peaks(self.PSDs_means, threshold=self.PSDs_means.min()*3) + self.n_good_comps = len(peaks) + self.good_comps = peaks + print("DONE !!!!!!!!!!!!!!!!!!") + return self.n_good_comps, self.good_comps + + def n_good_components(self, + X_centered, + y_centered, + number_of_components=20): + + self.user_n_components = number_of_components + self.X_centered, self.y_centered = X_centered, y_centered + + self.find_components() + self.find_PSDs() + self.find_peaks_PSD() + + #plt.plot(self.PSDs_means) + #plt.plot(self.good_comps, self.PSDs_means[self.good_comps], "*") + #plt.plot(np.zeros_like(self.PSDs_means), "--", color="gray") + #plt.show() + #print(f"n_good_components = {self.n_good_comps}") + #print(f"Good components = {self.good_comps}") + + + return self.n_good_comps, self.good_comps + + +class FindPCAcomps(object): + """_summary_ + + Args: + object (_type_): _description_ + """ + + def __init__(self, data_train, data_test, n_pca_comps=50): + self.pca_model = None + self.data_train = data_train + self.data_test = data_test + self.n_pca_comps = n_pca_comps + + def clear_output(self): + self.pca_model = None + self.pca_train = None + self.pca_test = None + + + def get_pca(self): + self.clear_output() + self.pca_model = PCA(n_components=self.n_pca_comps, whiten=True) + self.pca_train = self.pca_model.fit_transform(self.data_train) + self.pca_test = self.pca_model.transform(self.data_test) + + return self.pca_model, self.pca_train, self.pca_test \ No newline at end of file diff --git a/src/models/find_components/__init__.py b/src/models/find_components/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/models/fit_methods/Fit_Methods.py b/src/models/fit_methods/Fit_Methods.py new file mode 100644 index 0000000..e7baf72 --- /dev/null +++ b/src/models/fit_methods/Fit_Methods.py @@ -0,0 +1,302 @@ +from sklearn import linear_model +import torch +import scipy +import h5py +import numpy as np +from autograd import numpy as anp # Thinly-wrapped version of Numpy +from autograd import grad + + + + +class FitBFGSPCA_EPS(object): + + def __init__(self): + + self.X_train = None + self.Y_train = None + + self.X_test = None + self.Y_test = None + + self.Y_train_norm = None + self.Y_test_norm = None + + self.spec_test = None + + self.Nx = None + self.Ny = None + self.A0 = None + self.Aeps = None + + self.b0 = None + self.u0 = None + self.x0 = None + + self.sc_op = None + self.A_inf = None + self.b_inf = None + self.u_inf = None + + self.model = {"A_inf":self.A_inf, + "b_inf":self.b_inf, + "u_inf":self.u_inf + + } + + self.pca_model_spec = None + + self.res = None + self.res_unc = None + self.res_pca_unc = None + self.res_unc_specpca = None + + self.result = {"res":self.res, + "res_unc":self.res_unc, + "res_pca_unc": self.res_pca_unc, + "res_unc_specpca": self.res_unc_specpca + } + + self.input_data = None + self.loss_train = None + self.loss_test = None + + + def loss_mae(self, x): + # diag( (in @ x - out) @ (in @ x - out)^T ) + A = x[:self.Nx*self.Ny].reshape((self.Nx, self.Ny)) + b = x[self.Nx*self.Ny:(self.Nx*self.Ny+self.Ny)].reshape((1, self.Ny)) + log_unc = anp.clip(x[(self.Nx*self.Ny+self.Ny):].reshape((1, self.Ny)), -3, 8) + #log_unc = x[(Nx*Ny+Ny):].reshape((1, Ny)) + iunc = anp.exp(-log_unc) + #print("iunc2", iunc2) + #print("log_unc", log_unc) + + return anp.mean((anp.fabs(self.X_train @ A + b - self.Y_train)*iunc + log_unc).sum(axis=1), axis=0 ) # Put RELU on (XX@x) and introduce new matrix W + + def loss_regul(self, x): + # diag( (in @ x - out) @ (in @ x - out)^T ) + A = x[:self.Nx*self.Ny].reshape((self.Nx, self.Ny)) + b = x[self.Nx*self.Ny:(self.Nx*self.Ny+self.Ny)].reshape((1, self.Ny)) + #log_unc = anp.clip(x[(Nx*Ny+Ny):].reshape((1, Ny)), -3, 8) + log_unc = x[(self.Nx*self.Ny+self.Ny):].reshape((1, self.Ny)) + iunc2 = anp.exp(-2*log_unc) + unc2 = anp.exp(2*log_unc) + #print("iunc2", iunc2) + #print("log_unc", log_unc) + width = 100.0 + w = 0.5/width**2 + #prior = w*((A**2).reshape((1, -1)).mean(axis=1)) + + + # New from Danilo-> p(unc|a, b) = ba/gamma(a) * unc(a-1) * np.exp(-b * unc) + a = 3 + b = 6 + # prior -> - log p(unc|a, b) = - (a-1)*anp.log(unc) + b*unc + prior = - (a-1) * log_unc + b * anp.exp(log_unc) + + + return anp.mean( (0.5*((self.X_train @ A + b - self.Y_train_norm)**2)*iunc2 + log_unc).sum(axis=1) + prior.sum(axis=1), axis=0 ) # Put RELU on (XX@x) and introduce new matrix W + + def fill_init(self): + + + self.Nx = self.X_train.shape[1] + self.Ny = self.Y_train.shape[1] + self.A0 = np.eye(self.Nx, self.Ny).reshape(self.Nx*self.Ny) + self.Aeps = np.zeros(self.Nx) + + self.b0 = np.zeros(self.Ny) + self.u0 = np.zeros(self.Ny) + self.x0 = np.concatenate((self.A0,self.b0,self.u0,self.Aeps)) + + + + + def fit_eval(self, X_train, Y_train, X_test, Y_test): + + self.X_train = X_train + self.Y_train = Y_train + + self.X_test = X_test + self.Y_test = Y_test + + self.fill_init() + + + def loss(x, X, Y): + # diag( (in @ x - out) @ (in @ x - out)^T ) + A = x[:self.Nx*self.Ny].reshape((self.Nx, self.Ny)) + b = x[self.Nx*self.Ny:(self.Nx*self.Ny+self.Ny)].reshape((1, self.Ny)) + + b_eps = x[(self.Nx*self.Ny+self.Ny):(self.Nx*self.Ny+self.Ny+self.Ny)].reshape((1, self.Ny)) + + A_eps = x[(self.Nx*self.Ny+self.Ny+self.Ny):].reshape((self.Nx, 1)) + log_unc = X @ A_eps + b_eps + + #log_unc = anp.log(anp.exp(log_unc) + anp.exp(log_eps)) + iunc2 = anp.exp(-2*log_unc) + + #print("iunc2", iunc2) + #print("log_unc", log_unc) + + return anp.mean( (0.5*((X@ A + b - Y)**2)*iunc2 + log_unc).sum(axis=1), axis=0 ) # Put RELU on (XX@x) and introduce new matrix W + + self.loss_train = [] + self.loss_test = [] + + def loss_hist(x): + l_train = loss(x, X_train, self.Y_train) + l_test = loss(x, X_test, self.Y_test) + + self.loss_train.append(l_train) + self.loss_test.append(l_test) + return l_train + + def loss_hist_2(x): + l_train = loss(x, self.X_train, self.Y_train) + return l_train + + grad_loss = grad(loss_hist_2) + #self.grad_loss_regul = grad(loss_regul) + self.sc_op = scipy.optimize.fmin_l_bfgs_b(loss_hist, self.x0, grad_loss, disp=True, factr=1e7, maxiter=100, iprint = 0) + + # Inference + #print(sc_res.shape) + self.A_inf = self.sc_op[0][:self.Nx*self.Ny].reshape(self.Nx, self.Ny) + self.b_inf = self.sc_op[0][self.Nx*self.Ny:(self.Nx*self.Ny+self.Ny)].reshape(1, self.Ny) + self.u_inf = self.sc_op[0][(self.Nx*self.Ny+self.Ny):(self.Nx*self.Ny+self.Ny+self.Ny)].reshape(1, self.Ny) # removed np.exp + self.A_eps = self.sc_op[0][(self.Nx*self.Ny+self.Ny+self.Ny):].reshape(self.Nx, 1) + + self.model = {"A_inf":self.A_inf, + "b_inf":self.b_inf, + "u_inf":self.u_inf, + "A_eps":self.A_eps, + "loss_train": self.loss_train, + "loss_test": self.loss_test, + } + + return self.model + + def save_preproc(self, + pes_train, pes_test, + spec_train, spec_test, + X_train, X_test, + Y_train, Y_test, + spec_raw_pe, + data_info, + xgm_pulseen_train, + xgm_pulseen_test + ): + + exp_dir = data_info["exp_dir"] + + hf = h5py.File(f"experiments/{exp_dir}/output/data.h5", 'w') + hf.create_dataset('pes_train', data=pes_train) + hf.create_dataset('pes_test', data=pes_test) + hf.create_dataset('spec_train', data=spec_train) + hf.create_dataset('spec_test', data=spec_test) + hf.create_dataset('X_train', data=X_train) + hf.create_dataset('X_test', data=X_test) + hf.create_dataset('Y_train', data=Y_train) + hf.create_dataset('Y_test', data=Y_test) + hf.create_dataset('spec_raw_pe', data=spec_raw_pe[0,:]) + hf.create_dataset('xgm_pulseen_train', data=xgm_pulseen_train) + hf.create_dataset('xgm_pulseen_test', data=xgm_pulseen_test) + + for key, val in data_info.items(): + hf.attrs[key] = val + + def save_latest_chp(self, exp_dir): + filemodel = f"experiments/{exp_dir}/checkpoints/chp.h5" + hf = h5py.File(filemodel, 'w') + + for k, v in self.model.items(): + print("saving:", k) + hf.create_dataset(k, data=v) + hf.close() + print(f"model saved in -> {filemodel}") + + def load_model(self, exp_dir): + + # Load Checkpoint Data + print("loading model", exp_dir) + filename = f"experiments/{exp_dir}/checkpoints/chp.h5" + file = h5py.File(filename, 'r') + A_inf = file["A_inf"][()] # returns as a numpy array + b_inf = file["b_inf"][()] # returns as a numpy array + u_inf = file["u_inf"][()] # returns as a numpy array + A_eps = file["A_eps"][()] # returns as a numpy array + + + import joblib + pes_pca_model=joblib.load(f"experiments/{exp_dir}/checkpoints/pes_pca_model.joblib") + spec_pca_model=joblib.load(f"experiments/{exp_dir}/checkpoints/spec_pca_model.joblib") + + self.model = {"A_inf":A_inf, + "b_inf":b_inf, + "u_inf":u_inf, + "A_eps":A_eps, + "pes_pca_model":pes_pca_model, + "spec_pca_model":spec_pca_model + + } + return self.model, pes_pca_model, spec_pca_model + + def predict(self, input_data, input_target, spec_target): + + if self.pca_model_spec is not None: + self.model["spec_pca_model"] = self.pca_model_spec # TODO! check for training + + if input_target is None: + self.model["Y_test"] = self.Y_test + else: + self.model["Y_test"] = input_target + + if spec_target is None: + self.model["spec_target"] = self.spec_test + else: + self.model["spec_target"] = spec_target + + + + + + self.result["res_pca"] = (input_data @ self.model["A_inf"] + self.model["b_inf"]) # inference reconstruction in PCA space + self.result["res_pca_unc"] = self.model["u_inf"][0,:] # unc in pca space + self.result["res_pca_eps"] = np.exp(input_data @ self.model["A_eps"] + self.result["res_pca_unc"]) # eps in pca space + + self.result["res"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"]) # transform PCA space to real space + + self.result["res_unc"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] + self.model["u_inf"]) - self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] ) + self.result["res_unc"] = np.fabs(self.result["res_unc"]) + + self.result["res_eps"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] + self.result["res_pca_eps"]) - self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] ) + self.result["res_eps"] = np.fabs(self.result["res_eps"]) + + + self.Yhat_pca = self.model["spec_pca_model"].inverse_transform(self.model["Y_test"]) + self.result["res_unc_specpca"] = np.sqrt(((self.Yhat_pca - self.model["spec_target"])**2).mean(axis=0)) + + self.result["res_unc_total"] = np.sqrt(self.result["res_eps"]**2 + self.result["res_unc_specpca"]**2) + return self.result + + + def save_prediction(self, exp_dir): + + fileprediction = f"experiments/{exp_dir}/output/pred_data.h5" + hf = h5py.File(fileprediction, 'w') + hf.create_dataset('res_pca', data=self.result["res_pca"]) + hf.create_dataset('res_pca_unc', data=self.result["res_pca_unc"]) + hf.create_dataset('res', data=self.result["res"]) + hf.create_dataset('res_unc', data=self.result["res_unc"]) + hf.create_dataset('res_unc_specpca', data=self.result["res_unc_specpca"]) + hf.create_dataset('res_unc_total', data=self.result["res_unc_total"]) + hf.create_dataset('res_pca_eps', data=self.result["res_pca_eps"]) + hf.create_dataset('res_eps', data=self.result["res_eps"]) + + hf.close() + + print(f"Prediction result saved in -> {fileprediction}") + + diff --git a/src/models/fit_methods/__init__.py b/src/models/fit_methods/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/models/fit_methods/model.py b/src/models/fit_methods/model.py new file mode 100644 index 0000000..db101ab --- /dev/null +++ b/src/models/fit_methods/model.py @@ -0,0 +1,164 @@ +from sklearn.ensemble import RandomForestRegressor +from sklearn.linear_model import LinearRegression +from sklearn.model_selection import train_test_split +import numpy as np + +import pandas as pd + +from src.models.fit_methods.Fit_Methods import FitBFGSPCA_EPS +from src.data.data_preproc import SpecPreprocessing +from src.models.find_components.Find_Component import FindPCAcomps + +from src.utils.utils import save_chp_bfgs + + +class Model: + def __init__(self, model_type = None, data_info = None): + + + self.model_type = model_type + self.data_info = data_info + + self.data_dict = None + + self.pes_train = None + self.pes_test = None + self.spec_train = None + self.spec_test = None + + self.xgm_pulseen = None + self.xgm_pulseen_train = None + self.xgm_pulseen_test = None + + self.X_train = None + self.X_test = None + self.y_train = None + self.y_test = None + + self.pes_pca_model = None + self.spec_pca_model = None + + + if self.model_type == 'bfgs': + self.user_defined_model = FitBFGS() + #self.user_defined_preproc = SpecPreprocessing() + elif self.model_type == 'bfgs_pca': + self.user_defined_model = FitBFGSPCA() + elif self.model_type == 'bfgs_pca_eps': + self.user_defined_model = FitBFGSPCA_EPS() + + + + def split(self, test_size): + X = np.array(self.df[['Humidity', 'Pressure (millibars)']]) + y = np.array(self.df['Temperature (C)']) + self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size = test_size, random_state = 42) + + def preprocess(self, data_dict, data_info): + self.data_dict = data_dict + self.data_info = data_info + + self.xgm_pulseen = data_dict["xgm_pulseen"] + + self.spec_data = self.data_dict["spec_raw_int"].copy() # Spec Data Intensity + self.spec_data_pe = self.data_dict["spec_raw_pe"].copy() # Spec Data Photon energy + self.pes_data = self.data_dict["pes_data"].copy() # Pes Data Intensity + + + # Data Preprocessing + SP = SpecPreprocessing(self.spec_data, self.spec_data_pe) + self.spec_data_gc, _ = SP.gaussian_convolve() + self.spec_data = self.spec_data_gc #SP.make_fft() + + # Split Data into TEST TRAIN + if self.data_info["use_data_subset"]: + self.pes_train, self.pes_test, self.spec_train, self.spec_test, self.xgm_pulseen_train, self.xgm_pulseen_test = train_test_split(self.pes_data[self.data_info["data_subset_start"]:self.data_info["data_subset_end"]], + self.spec_data[self.data_info["data_subset_start"]:self.data_info["data_subset_end"]], + self.xgm_pulseen[self.data_info["data_subset_start"]:self.data_info["data_subset_end"]], + test_size=self.data_info["test_size"], + random_state=42 + ) + else: + self.pes_train, self.pes_test, self.spec_train, self.spec_test, self.xgm_pulseen_train, self.xgm_pulseen_test = train_test_split(self.pes_data, + self.spec_data, + self.xgm_pulseen, + test_size=self.data_info["test_size"], + random_state=42 + ) + + + if self.data_info["pes_pca_preprocessing"]: + pes_PCA = FindPCAcomps(data_train=self.pes_train, data_test=self.pes_test, n_pca_comps=self.data_info["n_pca_comps_pes"]) + self.pes_pca_model, self.X_train, self.X_test = pes_PCA.get_pca() + else: + self.X_train, self.X_test = self.pes_train, self.pes_test + + if self.data_info["spec_pca_preprocessing"]: + spec_PCA = FindPCAcomps(data_train=self.spec_train, data_test=self.spec_test, n_pca_comps=self.data_info["n_pca_comps_spec"]) + self.spec_pca_model, self.Y_train, self.Y_test = spec_PCA.get_pca() + self.user_defined_model.pca_model_spec = self.spec_pca_model + self.user_defined_model.spec_test = self.spec_test + + else: + self.Y_train, self.Y_test = self.spec_train, self.spec_test + + print("print Y shape train and test in preprocessing last step :::: ",self.Y_train.shape, self.Y_test.shape) + #save_train_test_h5(pes_train, pes_test, X_train, X_test, Y_train, Y_test, spec_data_pe, self.data_info, data_info["exp_dir"]) + return self.X_train, self.X_test, self.Y_train, self.Y_test + + def save_preproc(self): + if self.model_type == 'bfgs': + + self.user_defined_model.save_preproc(self.pes_train, self.pes_test, + self.spec_train, self.spec_test, + self.X_train, self.X_test, + self.Y_train, self.Y_test, + self.spec_data_pe, + self.data_info, + ) + + import joblib + joblib.dump(self.pes_pca_model, f"experiments/{self.data_info['exp_dir']}/checkpoints/pes_pca_model.joblib") + joblib.dump(self.spec_pca_model, f"experiments/{self.data_info['exp_dir']}/checkpoints/spec_pca_model.joblib") + elif self.model_type == 'bfgs_pca' or self.model_type == 'bfgs_pca_eps': + self.user_defined_model.save_preproc(self.pes_train, self.pes_test, + self.spec_train, self.spec_test, + self.X_train, self.X_test, + self.Y_train, self.Y_test, + self.spec_data_pe, + self.data_info, + self.xgm_pulseen_train, self.xgm_pulseen_test + ) + + import joblib + joblib.dump(self.pes_pca_model, f"experiments/{self.data_info['exp_dir']}/checkpoints/pes_pca_model.joblib") + joblib.dump(self.spec_pca_model, f"experiments/{self.data_info['exp_dir']}/checkpoints/spec_pca_model.joblib") + + + def fit_eval(self, X_train, y_train, X_test, Y_test): + #A_inf, b_inf, u_inf, Y_norm = fig_bgfs.train_bgfs() + self.model = self.user_defined_model.fit_eval(X_train, y_train, X_test, Y_test) # Change the name of self.model to something else ? + + + def save_latest_ckp(self): + self.user_defined_model.save_latest_chp(self.data_info["exp_dir"]) + + def load_model(self): + self.model, self.pes_pca_model, self.spec_pca_model = self.user_defined_model.load_model(self.data_info["exp_dir"]) + return self.model, self.pes_pca_model, self.spec_pca_model + + def predict(self, input_value, target_value=None, spec_target=None): + result = self.user_defined_model.predict(input_value, target_value, spec_target) + return result + + def save_prediction(self): + self.user_defined_model.save_prediction(self.data_info["exp_dir"]) + +if __name__ == '__main__': + print("Creating Model Instance") + #model_instance = Model(model_type=None) + #model_instance.preprocess() + #model_instance.split(0.2) + #model_instance.fit() + #print(model_instance.predict([.9, 1000])) + #print("Accuracy: ", model_instance.model.score(model_instance.X_test, model_instance.y_test)) \ No newline at end of file diff --git a/src/utils/__init__.py b/src/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/utils/utils.py b/src/utils/utils.py new file mode 100644 index 0000000..0db96e6 --- /dev/null +++ b/src/utils/utils.py @@ -0,0 +1,238 @@ +import numpy as np +import scipy +import matplotlib.pyplot as plt +from skimage import filters as skfilters +import plotly +import plotly.graph_objs as go + +import os + + + +# plot lines +def plot_gallery_line(title, data, n_col, n_row): + plt.figure(figsize=(2.0 * n_col, 2.26 * n_row)) + plt.suptitle(title, size=16) + for i, comp in enumerate(data): + plt.subplot(n_row, n_col, i + 1) + vmax = max(comp.max(), -comp.min()) + plt.plot( + comp + ) + + if i + 1 not in [k*n_col+1 for k in range(n_row)]: + plt.xticks(()) + plt.yticks(()) + + plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.0) + + + +def plot_unc_band(lp_hr, lp_rec, lp_rec_unc, fit_method): + + x_pltly = np.arange(len(lp_hr)) + + + fig = go.Figure([ + go.Scatter( + name='HR (gt)', + x=x_pltly, + y=lp_hr, + mode='lines', + line=dict(color='rgb(0, 150, 255)'), + ), + go.Scatter( + name=f"{fit_method}", + x=x_pltly, + y=lp_rec, + mode='lines', + line=dict(color='rgb(255,99,71)'), + ), + go.Scatter( + name='Upper Bound', + x=x_pltly, + y=lp_rec + 2*lp_rec_unc, + mode='lines', + marker=dict(color="#DC143C"), + line=dict(width=0), + showlegend=False + ), + go.Scatter( + name='Lower Bound', + x=x_pltly, + y=lp_rec - 2*lp_rec_unc, + marker=dict(color="#DC143C"), + line=dict(width=0), + mode='lines', + fillcolor='rgba(255,99,71, 0.3)', + fill='tonexty', + showlegend=False + ) + ]) + fig.update_layout( + yaxis_title='log intensity (a.u.)', + xaxis_title='pixel/coord number (a.u.)', + title=f"{fit_method} reconstruction with 95% confidence on uncertainities", + hovermode="x" + ) + fig.show() + + + +def create_experiment_dirs(exp_dir): + """ + Create Directories of a regular tensorflow experiment directory + :param exp_dir: + :return summary_dir, checkpoint_dir: + """ + experiment_dir = "experiments/" + exp_dir + "/" + summary_dir = experiment_dir + 'summaries/' + checkpoint_dir = experiment_dir + 'checkpoints/' + output_dir = experiment_dir + 'output/' + test_dir = experiment_dir + 'test/' + dirs = [summary_dir, checkpoint_dir, output_dir, test_dir] + try: + for dir_ in dirs: + if not os.path.exists(dir_): + os.makedirs(dir_) + print("Experiment directories created") + return experiment_dir, summary_dir, checkpoint_dir, output_dir, test_dir + except Exception as err: + print("Creating directories error: {0}".format(err)) + exit(-1) + + +def save_train_test_h5(pes_train, pes_test, X_train, X_test, Y_train, Y_test, spec_raw_pe, data_info, exp_dir): + import h5py + + hf = h5py.File(f"experiments/{exp_dir}/output/data.h5", 'w') + hf.create_dataset('pes_train', data=pes_train) + hf.create_dataset('pes_test', data=pes_test) + hf.create_dataset('X_train', data=X_train) + hf.create_dataset('X_test', data=X_test) + hf.create_dataset('Y_train', data=Y_train) + hf.create_dataset('Y_test', data=Y_test) + hf.create_dataset('spec_raw_pe', data=spec_raw_pe[0,:]) + for key, val in data_info.items(): + hf.attrs[key] = val + + + hf.close() + +def save_rec_data_h5(sc_res, sc_res_unc, exp_dir): + import h5py + hf = h5py.File(f"experiments/{exp_dir}/output/rec_data.h5", 'w') + hf.create_dataset('sc_res', data=sc_res) + hf.create_dataset('sc_res_unc', data=sc_res_unc) + hf.close() + + +def save_checkpoint(A_inf, b_inf, u_inf, Y_norm, exp_dir): + + import h5py + hf = h5py.File(f"experiments/{exp_dir}/checkpoints/chp.h5", 'w') + hf.create_dataset('A_inf', data=A_inf) + hf.create_dataset('b_inf', data=b_inf) + hf.create_dataset('u_inf', data=u_inf) + hf.create_dataset('Y_norm', data=Y_norm) + hf.close() + +def save_chp_bfgs(model, exp_dir): + import h5py + hf = h5py.File(f"experiments/{exp_dir}/checkpoints/chp.h5", 'w') + + for k, v in model.items(): + hf.create_dataset(k, data=v) + hf.close() + + + +def save_checkpoint_fft(A_inf_1, b_inf_1, + A_inf_2, b_inf_2, + u_inf_1, u_inf_2, + Y_norm, exp_dir): + + import h5py + hf = h5py.File(f"experiments/{exp_dir}/checkpoints/chp.h5", 'w') + hf.create_dataset('A_inf_1', data=A_inf_1) + hf.create_dataset('b_inf_1', data=b_inf_1) + hf.create_dataset('A_inf_2', data=A_inf_2) + hf.create_dataset('b_inf_2', data=b_inf_2) + hf.create_dataset('u_inf_1', data=u_inf_1) + hf.create_dataset('u_inf_2', data=u_inf_2) + hf.create_dataset('Y_norm', data=Y_norm) + hf.close() + + + +def load_train_test_h5(exp_dir): + import h5py + # Load Data + filename = f"experiments/{exp_dir}/output/data.h5" + file = h5py.File(filename, 'r') + pes_train = file["pes_train"][()] # returns as a numpy array + pes_test = file["pes_test"][()] # returns as a numpy array + spec_train = file["spec_train"][()] # returns as a numpy array + spec_test = file["spec_test"][()] # returns as a numpy array + X_train = file["X_train"][()] # returns as a numpy array + X_test = file["X_test"][()] # returns as a numpy array + Y_train = file["Y_train"][()] # returns as a numpy array + Y_test = file["Y_test"][()] # returns as a numpy array + spec_raw_pe = file["spec_raw_pe"][()] # returns as a numpy + xgm_pulseen_train = file["xgm_pulseen_train"][()] + xgm_pulseen_test = file["xgm_pulseen_test"][()] + + att_names = file.attrs.keys() + att_dict = {} + for att_name in att_names: + att_dict[att_name] = file.attrs[att_name] + + return Y_train, Y_test, spec_train, spec_test, spec_raw_pe, X_train, X_test, pes_train, pes_test, att_dict, xgm_pulseen_train, xgm_pulseen_test + + +def load_rec_data_h5(exp_dir): + import h5py + filename = f"experiments/{exp_dir}/output/pred_data.h5" + file = h5py.File(filename, 'r') + sc_res = file["res"][()] # returns as a numpy array + sc_res_unc = file["res_unc"][()] # returns as a numpy array + + return sc_res, sc_res_unc + +def load_rec_data_h5_bfgs_pca(exp_dir): + import h5py + filename = f"experiments/{exp_dir}/output/pred_data.h5" + file = h5py.File(filename, 'r') + sc_res = file["res"][()] # returns as a numpy array + sc_res_unc = file["res_unc"][()] # returns as a numpy array + sc_res_unc_specpca = file["res_unc_specpca"][()] # returns as a numpy array + sc_res_unc_total = file["res_unc_total"][()] # returns as a numpy array + + + return sc_res, sc_res_unc, sc_res_unc_specpca, sc_res_unc_total + +def load_rec_data_h5_bfgs_pca_eps(exp_dir): + import h5py + filename = f"experiments/{exp_dir}/output/pred_data.h5" + file = h5py.File(filename, 'r') + sc_res = file["res"][()] # returns as a numpy array + sc_res_unc = file["res_unc"][()] # returns as a numpy array + sc_res_unc_specpca = file["res_unc_specpca"][()] # returns as a numpy array + sc_res_unc_total = file["res_unc_total"][()] # returns as a numpy array + sc_res_eps = file["res_eps"][()] # returns as a numpy array + + return sc_res, sc_res_unc, sc_res_unc_specpca, sc_res_unc_total, sc_res_eps + +def load_checkpoint(exp_dir): + import h5py + # Load Checkpoint Data + filename = f"experiments/{exp_dir}/checkpoints/chp.h5" + file = h5py.File(filename, 'r') + A_inf = file["A_inf"][()] # returns as a numpy array + b_inf = file["b_inf"][()] # returns as a numpy array + u_inf = file["u_inf"][()] # returns as a numpy array + Y_norm = file["Y_norm"][()] # returns as a numpy array + + return A_inf, b_inf, u_inf, Y_norm + + -- GitLab