diff --git a/README.md b/README.md index bc8544882d9dc1db6687820ee67b63c46ef30961..68be2858fc7aa42e698d3aa9deb5da2825551103 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# pes_to_spec +# Virtual spectrometer The aim of this project is to read low-resolution photon spectrometer measurements and predict, with an uncertainty band, a high-resolution invasive photon spectrometer result, in such a way that one may continue to collect low-resolution data without stopping the beam, but obtaining nevertheless, high-resolution @@ -10,17 +10,32 @@ The concept is to collect both results simultaneously during a training phase an One may install it simply with `pip install pes_to_spec`. +While the dependencies should be automatically used, using the Intel-optimized `numpy` and `scipy` packages is recommended, as they are much faster. +This has been tested with `scipy==1.7.3`, but it should work with any version of scipy and numpy. +``` +# install the optimized numpy and scipy versions +# skip dependencies +pip install --force-reinstall -i https://pypi.anaconda.org/intel/simple --no-dependencies numpy scipy +# now install dependencies +pip install numpy scipy +# install PyTorch +pip install torch --index-url https://download.pytorch.org/whl/cpu +``` + ## Usage The API may be used as follows: ``` from pes_to_spec.model import Model +from itertools import product # this is the main object holding all # information needed for training and prediction # the default parameters should be sufficient in most times -model = Model() +# if some channels are dead, you may want to specify the active channels +channels = [f"channel_{i}_{l}" for i, l in product(range(1, 5), ["A", "B", "C", "D"])] +model = Model(channels=channels) # this trains the model # low_resolution_raw_data is a dictionary with keys "channel_[1-4]_[A-D]" and values set to 2D-shaped @@ -29,7 +44,8 @@ model = Model() # high_resolution_intensity and high_resolution_photon_energy are estimates from the high-resolution invasive spectrometer model.fit(low_resolution_raw_data, high_resolution_intensity, - high_resolution_photon_energy) + high_resolution_photon_energy, + pulse_energy=beam_intensity) # save it for later usage: model.save("model.joblib") @@ -42,7 +58,7 @@ model = Model.load("model.joblib") # as before, the low_resolution_raw_data refers to a dictionary mapping the channel name # in the format "channel_[1-4]_[A-D]" to the 2D numpy array with shape (number_of_train_IDs, features) # all names and shapes must match the format in training, except for the number_of_train_IDs, which may vary -interpolated_high_resolution_data = model.predict(low_resolution_raw_data) +interpolated_high_resolution_data = model.predict(low_resolution_raw_data, pulse_energy=beam_intensity) # extra useful functions for debugging: @@ -100,16 +116,3 @@ low_resolution_raw_data = {ch: run['SA3_XTD10_PES/ADC/1:network', After installation, the command-line tool `offline_analysis` may be used to test the tool in recorded datasets. -## Exploration initial tests - -A first draft and explorative code can be seen in the `exploration` directory. - -1. inv_train.py -> Train a model on the specific RUN. -Thish will save the pca model and fit model in experiments/YOUR_DIR/checkpoints and the data in -experiments/YOUR_DIR/data. - -2. inv_eval.py -> Use to evaluate the trained model - -3. inv_inference -> Use trained model to do inference on new data point - -4. data_drift_check.py -> Check data drift between two datasets diff --git a/Update_in_DA_server.md b/Update_in_DA_server.md new file mode 100644 index 0000000000000000000000000000000000000000..77909ef3e9fbcca3c057b1381a416e0c459ffdda --- /dev/null +++ b/Update_in_DA_server.md @@ -0,0 +1,32 @@ +# Building wheel + +First, set the current version number in `pes_to_spec/__init__.py`. + +Use the following to build a wheel: +``` +python -m build --sdist --wheel . +``` + +The generated file would be named: `dist/pes_to_spec-*-py3-none-any.whl` + +# Updating wheel in DA server + +This tool is packaged and made available through the DA server. +The following commands can be used to update the tool in the DA server: + +``` +# if not done before: +devpi use https://devpi.exfldadev01.desy.de/ +devpi login [username] +devpi upload [wheel_file_name_generated_above.whl] +devpi push pes_to_spec==X.Y.Z euxfel/internal +``` + +# Installing it from the DA server + +To install this tool from the DA server, use the following index URL: + +``` +pip install --index-url "https://devpi.exfldadev01.desy.de/euxfel/internal" pes_to_spec==X.Y.Z +``` + diff --git a/exploration/data_drift_check.py b/exploration/data_drift_check.py deleted file mode 100644 index 6bef4952adf4b0e48bedf2d7d12b06eb472dc93a..0000000000000000000000000000000000000000 --- a/exploration/data_drift_check.py +++ /dev/null @@ -1,34 +0,0 @@ -import sys -sys.path.append("./") -sys.path.append("..") - -import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt - -from src.data.data import ReadPesSpec, PesChannelSelector -from src.data.data_preproc import SpecPreprocessing - -from src.models.find_components.Find_Component import FindPCAcomps -from src.models.fit_methods.Fit_Methods import FitBFGS, FitBFGSPCA -from src.models.fit_methods.model import Model - -from src.utils.utils import load_train_test_h5, load_rec_data_h5, load_rec_data_h5_bfgs_pca, load_checkpoint -from src.utils.utils import create_experiment_dirs - -from sklearn.model_selection import train_test_split -import numpy as np - - -# Load trained model in the path exp_dir -exp_dir = "test3_pulseen_short_test_eps_r0015" -Y_train_model, Y_test_model, spec_train_model, spec_test_model, spec_raw_pe, X_train_model, X_test_model, pes_train_model, pes_test_model, att_dict, xgm_pulseen_train, xgm_pulseen_test = load_train_test_h5(exp_dir) - -att_dict["pes_pca_preprocessing"] = False # set pca non trainable -att_dict["spec_pca_preprocessing"] = False # set pca non trainable - -model_instance = Model(model_type="bfgs_pca_eps", data_info=att_dict) - -inference_model, pes_pca_model, spec_pca_model = model_instance.load_model() # Move to incference sctipt TODO! - -print("Finish") \ No newline at end of file diff --git a/exploration/inv_eval.py b/exploration/inv_eval.py deleted file mode 100644 index 8985125bd4f404355a0250f0a6833670508090bd..0000000000000000000000000000000000000000 --- a/exploration/inv_eval.py +++ /dev/null @@ -1,1627 +0,0 @@ - -import sys -sys.path.append("./") -sys.path.append("..") - -import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt - -from src.utils.utils import load_train_test_h5, load_rec_data_h5, load_rec_data_h5_bfgs_pca, load_checkpoint, load_rec_data_h5_bfgs_pca_eps -import numpy as np -import h5py - -exp_dir = "test3_pulseen_short_test_eps_r0015" # Evaluate the trained model - -# Load Data -Y_train, Y_test, spec_train, spec_test, spec_raw_pe, X_train, X_test, pes_train, pes_test, att_dict, xgm_pulseen_trains, xgm_pulseen_tests = load_train_test_h5(exp_dir) - -xgm_pulseen_test = [] -for xgm_i in range(len(xgm_pulseen_tests)): - xgm_pulseen_test.append(xgm_pulseen_tests[xgm_i][0]) - -n_pca_comps = att_dict["n_pca_comps_pes"] - -if att_dict["model_type"]=="bfgs": - - - # Load Rec - Y_rec, Y_rec_unc = load_rec_data_h5(exp_dir) - print(Y_test.shape, Y_rec.shape) - - fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) - axes_2 = axes_2.flatten() - - for i_bfgs in range(15): - - ax = axes_2[i_bfgs] - ax.plot(np.arange(0,Y_rec.shape[1],1), Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") - ax.plot(np.arange(0,Y_rec.shape[1],1), Y_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - ax.fill_between( - np.arange(0,Y_rec.shape[1],1), Y_rec[i_bfgs,:] - Y_rec_unc, Y_rec[i_bfgs,:] + Y_rec_unc, color="pink", alpha=0.5, label="BFGS std" - ) - ax.set_xlabel('XXX ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.legend(fontsize=10) - ax.set_title(f"{exp_dir}", y=1.0, pad=-10) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction.png", bbox_inches='tight') - plt.close(fig) - - -if att_dict["model_type"]=="bfgs_pca": - - # Load Rec - Y_rec, Y_rec_unc_bfgs, Y_rec_unc_specpca, Y_rec_unc = load_rec_data_h5_bfgs_pca(exp_dir) - print(Y_test.shape, Y_rec.shape) - - Y_eps = 0*Y_rec_unc_bfgs # TODO! fix this remove eps version from here - eps_mean = np.mean(Y_eps, axis=1) - print("len eps mean", len(eps_mean)) - - # compute RMSE - def int_sum_1(A): - return np.sum(A, axis=1) - pes_sum = int_sum_1(pes_test) - spec_sum = int_sum_1(spec_test) - - cor_mat = np.corrcoef(pes_test) - cor_mat_int = np.sum(cor_mat, axis=1) - #where_1 = np.where(cor_mat == 1) - #cor_mat[where_1] = 0 - - - - # quick fft eval - FC_START = 0 - FC_END = 80 - N_ROI = 40 - # Get the Fourier Components - n_Y_comps = spec_test.shape[1] - Y_test_fft_raw = np.abs(np.fft.fft(spec_test))[:, :round(n_Y_comps/2)] - Y_rec_fft_raw = np.abs(np.fft.fft(Y_rec))[:, :round(n_Y_comps/2)] - - # Cut the Fourier components and take only those who are relevant and not close to 0 - Y_test_fft = Y_test_fft_raw[:, FC_START:FC_END] - Y_rec_fft = Y_rec_fft_raw[:, FC_START:FC_END] - - # Split relevant fourier components into N regions of interests (roi) - split_size = round((FC_END - FC_START)/N_ROI) - - def split_to_rois(a, splitedSize = split_size): - a_splited = [a[:,x:x+splitedSize] for x in range(0, a.shape[1], splitedSize)] - return a_splited - - Y_test_fft_rois = split_to_rois(Y_test_fft) - Y_rec_fft_rois = split_to_rois(Y_rec_fft) - - # Define the X axis, pixels - pixel_rois = split_to_rois(np.arange(FC_START, FC_END, 1).reshape(1,-1)) - - # Compute the absolute difference of gt and rec fourier components per region of interest per train ID - delta_Y_fft_rois = [np.abs(a-b) for a, b in zip(Y_test_fft_rois, Y_rec_fft_rois)] - # Sum along all Fourier components in roi per train id - delta_Y_fft_rois_sum = [np.sum(a, axis=1) for a in delta_Y_fft_rois] - print("len(delta_Y_fft_rois_sum)", len(delta_Y_fft_rois)) - # Sum along all train IDs per roi - delta_Y_fft_rois_sum_mean = [x.mean() for x in delta_Y_fft_rois_sum] - - - # find corr coef - def corr_coef(a, b): - cor_coef = np.corrcoef(a, b)[0][1] - #print("The corr coef is :", np.round(cor_coef, 2)) - return np.round(cor_coef, 2) - cc = [] - for i_bfgs in range(Y_rec.shape[0]): - #corr_coef(sc_res[i_bfgs,:], Y_test[i_bfgs,:]) - cc.append(corr_coef(Y_rec[i_bfgs,:], spec_test[i_bfgs,:]) ) - - - - # Function to calculate Chi-distance - def chi2_distance(A, B): - # compute the chi-squared distance using above formula - chi = 0.5 * np.sum([((a - b) ** 2) / (a + b) - for (a, b) in zip(A, B)]) - - #print("The Chi-square distance is :", chi) - return chi - chi_2 = [] - for i_bfgs in range(Y_rec.shape[0]): - chi_2.append(chi2_distance(Y_rec[i_bfgs,:], spec_test[i_bfgs,:])) - - - # compute RMSE - def rmse_value(A, B): - return np.sqrt(np.mean((A-B)**2)) - rmse = [] - for i_bfgs in range(Y_rec.shape[0]): - rmse.append(rmse_value(Y_rec[i_bfgs,:], spec_test[i_bfgs,:])) - rmse_quickfft = [] - for i_bfgs in range(Y_rec_fft.shape[0]): - rmse_quickfft.append(rmse_value(Y_rec_fft[i_bfgs,:], Y_test_fft[i_bfgs,:])) - - - - - - - # Save Images - - # Save cor mat test set - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - im = ax.imshow(cor_mat) - ax.set_xlabel('train IDs.', fontsize=22) - ax.set_ylabel('train IDs.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"cor mat: PCA(PES)", y=1.0, pad=-20, fontsize=22) - - cax = fig.add_axes([0.27, 0.95, 0.5, 0.05]) - fig.colorbar(im, cax=cax, orientation='horizontal') - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cor_mat_pca_pes.png", bbox_inches='tight') - plt.close(fig) - - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - ax.plot(cor_mat[42,:], label=f"42") - ax.plot(cor_mat[43,:], label=f"43") - ax.plot(cor_mat[171,:], label=f"171") - #ax.plot(cor_mat_int, label=f"int") - ax.legend(fontsize=20) - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cor_mat_pca_pes_line.png", bbox_inches='tight') - plt.close(fig) - - - # Save xgm_pulse energy - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(np.arange(len(xgm_pulseen_test)), xgm_pulseen_test, label=f"xgm pulse energy: PES") - - ax.set_xlabel('train IDs.', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"xgm test pulse energy: PES", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/xgm_pulse_energy.png", bbox_inches='tight') - plt.close(fig) - - - # Save xgm_pulse energy vs PES sum - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.scatter(eps_mean, xgm_pulseen_test, label=f"eps_mean xgm pulse energy") - - ax.set_xlabel('eps_mean', fontsize=22) - ax.set_ylabel('pulse energy', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"pes_sum vs pulse energy", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pulse_energy_vs_eps_mean.png", bbox_inches='tight') - plt.close(fig) - - - # Save xgm_pulse energy vs QuickFFT - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.scatter(eps_mean, rmse_quickfft, label=f"eps_mean QuickFFT") - - ax.set_xlabel('eps_mean', fontsize=22) - ax.set_ylabel('QuickFFT', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"eps_mean QuickFFT", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/eps_mean_vs_quick_fft.png", bbox_inches='tight') - plt.close(fig) - - - # Save xgm_pulse energy vs QuickFFT - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.scatter(xgm_pulseen_test, rmse_quickfft, label=f"xgm_pulseen_test QuickFFT") - - ax.set_xlabel('xgm_pulseen_test', fontsize=22) - ax.set_ylabel('QuickFFT', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"pulse energy QuickFFT", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/xgm_pulseen_test_vs_quick_fft.png", bbox_inches='tight') - plt.close(fig) - - - # Save CC vs QuickFFT - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.scatter(cc, rmse_quickfft, label=f"cc vs QuickFFT") - - ax.set_xlabel('cc', fontsize=22) - ax.set_ylabel('QuickFFT', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"cc vs QuickFFT", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cc_vs_quick_fft.png", bbox_inches='tight') - plt.close(fig) - - - # Save xgm_pulse energy vs Spec sum - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.scatter(spec_sum, xgm_pulseen_test, label=f"xgm pulse energy vs spec int") - - ax.set_xlabel('spec_sum', fontsize=22) - ax.set_ylabel('pulse energy', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"spec_sum vs pulse energy", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pulse_energy_vs_spec_int.png", bbox_inches='tight') - plt.close(fig) - - - # Save xgm_pulse energy vs corr matrix sum - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.scatter(cor_mat_int, xgm_pulseen_test, label=f"xgm pulse energy vs cor_mat_int") - - ax.set_xlabel('cor_mat_int', fontsize=22) - ax.set_ylabel('pulse energy', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"spec_sum vs pulse energy", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pulse_energy_vs_cor_mat_int.png", bbox_inches='tight') - plt.close(fig) - - - # pes_sum vs Spec sum - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(spec_sum, pes_sum, label=f" spec int vs pes_sum") - - ax.set_xlabel('spec_sum', fontsize=22) - ax.set_ylabel('pulse energy', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"spec_sum vs pes_sum", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/spec_int_vs_pes_int.png", bbox_inches='tight') - plt.close(fig) - - - # Save pes sum test data - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(np.arange(len(pes_sum)), pes_sum, label=f"integrated int: PES") - - ax.set_xlabel('train IDs.', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"integrated int: PES", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pes_int_sum.png", bbox_inches='tight') - plt.close(fig) - - - # Save pes sum test data - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(np.arange(len(spec_sum)), spec_sum, label=f"integrated int: SPEC") - - ax.set_xlabel('train IDs.', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"integrated int: SPEC", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/spec_int_sum.png", bbox_inches='tight') - plt.close(fig) - - - # Save pes_sum vs pulse energy - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - #ax.plot(np.arange(len(cc)), cc/max(cc), label=f"cross correlation") - #ax.plot(np.arange(len(cc)), pes_sum/max(np.array(pes_sum)), label=f"pes_sum") - - ax.plot(pes_sum/max(np.array(pes_sum)), cc/max(cc), label=f"correlation coef.") - - ax.set_xlabel('pes_sum', fontsize=22) - ax.set_ylabel('cor. coef.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"pes_sum vs cor. coef.", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pes_int_sum_vs_cor_coef.png", bbox_inches='tight') - plt.close(fig) - - # Save spec_sum vs pulse energy - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - #ax.plot(np.arange(len(cc)), cc/max(cc), label=f"cross correlation") - #ax.plot(np.arange(len(cc)), spec_sum/max(np.array(spec_sum)), label=f"spec_sum") - - ax.plot(spec_sum/max(np.array(spec_sum)), cc/max(cc), label=f"spec_sum correlation coef") - - ax.set_xlabel('spec_sum', fontsize=22) - ax.set_ylabel('cor. coef.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"spec_sum vs cor. coef.", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/spec_int_sum_vs_corr_coef.png", bbox_inches='tight') - plt.close(fig) - - - # Save quick fft eval - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(np.arange(N_ROI), delta_Y_fft_rois_sum_mean, label=f"FFT eval {i_bfgs}") - - ax.set_xlabel('FFT bin comps.', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f" qucik fft eval", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/quick_fft_eval.png", bbox_inches='tight') - plt.close(fig) - - - # Save cross correlation - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(np.arange(len(cc)), cc, label=f"correlation coef") - - ax.set_xlabel('train IDs.', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"cor. coef.", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/corr_coef.png", bbox_inches='tight') - plt.close(fig) - - - # Save cross correlation vs pulse energy - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - #ax.plot(np.arange(len(cc)), cc/max(cc), label=f"cross correlation") - ax.scatter( cc/max(cc), eps_mean/spec_sum, label=f"cc vs eps_mean") - - ax.set_xlabel('cor coef', fontsize=22) - ax.set_ylabel('eps mean', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"cc vs eps_mean", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cor_coef_vs_eps_mean.png", bbox_inches='tight') - plt.close(fig) - - - # Save cross correlation vs pulse energy - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - #ax.plot(np.arange(len(cc)), cc/max(cc), label=f"cross correlation") - ax.scatter( cc/max(cc), xgm_pulseen_test/max(np.array(xgm_pulseen_test)), label=f"xgm_pulseen_test") - - ax.set_xlabel('cor coef', fontsize=22) - ax.set_ylabel('pulse energy', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"cor. coef. vs pulse energy", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cor_coef_vs_pulse_energy.png", bbox_inches='tight') - plt.close(fig) - - - # Save cross correlation best rec spec - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - i_bfgs = np.argmax(cc) - - ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") - ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_bfgs[0,:], Y_rec[i_bfgs,:] + Y_rec_unc_bfgs[0,:], color="r", alpha=0.5, label="u_bfgs" - ) - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_specpca, Y_rec[i_bfgs,:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" - ) - ax.set_xlabel('Energy (eV) ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"cc best rec", y=1.0, pad=-20) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cc_best_rec.png", bbox_inches='tight') - plt.close(fig) - - - # Save cross correlation best rec spec - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - i_bfgs = np.argmin(cc) - - ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") - ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_bfgs[0,:], Y_rec[i_bfgs,:] + Y_rec_unc_bfgs[0,:], color="r", alpha=0.5, label="u_bfgs" - ) - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_specpca, Y_rec[i_bfgs,:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" - ) - ax.set_xlabel('Energy (eV) ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"cc worse rec", y=1.0, pad=-20) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cc_worse_rec.png", bbox_inches='tight') - plt.close(fig) - - - # Save cross correlation best rec pes - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - i_bfgs = np.argmax(cc) - - ax.plot(pes_test[i_bfgs,:], "r", label=f"PES test {i_bfgs}") - - ax.set_xlabel(' x comps ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"pes raw (cc best rec)", y=1.0, pad=-20) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pes_raw_best_rec.png", bbox_inches='tight') - plt.close(fig) - - - # Save cross correlation worst rec pes - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - i_bfgs = np.argmin(cc) - - ax.plot(pes_test[i_bfgs,:], "r", label=f"PES test {i_bfgs}") - - ax.set_xlabel(' x comps ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"pes raw (cc worst rec)", y=1.0, pad=-20) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pes_raw_worst_rec.png", bbox_inches='tight') - plt.close(fig) - - - # Save chi_2 - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(np.arange(len(chi_2)), chi_2, label=f"chi_2") - - ax.set_xlabel('train IDs.', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"chi_2", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/chi_2.png", bbox_inches='tight') - plt.close(fig) - - - # Save rmse - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(np.arange(len(rmse)), rmse, label=f"rmse") - - ax.set_xlabel('train IDs.', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"rmse", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/rmse.png", bbox_inches='tight') - plt.close(fig) - - - # Save rmse vs eps_mean - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.scatter(eps_mean, rmse, label=f"rmse_vs_eps_mean") - - ax.set_xlabel('eps mean', fontsize=22) - ax.set_ylabel('rmse', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"rmse_vs_eps_mean", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/rmse_vs_eps_mean.png", bbox_inches='tight') - plt.close(fig) - - - # reconstruction with total unc - fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) - axes_2 = axes_2.flatten() - for i_bfgs in range(20): - - ax = axes_2[i_bfgs] - ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") - ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc[0,:], Y_rec[i_bfgs,:] + Y_rec_unc[0,:], color="pink", alpha=0.5, label="u_total" - ) - ax.set_xlabel('Energy (eV) ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.legend(fontsize=10) - ax.set_title(f"{exp_dir} eps={eps_mean[i_bfgs]}", y=1.0, pad=-10) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction.png", bbox_inches='tight') - plt.close(fig) - - - # reconstruction with total unc + eps - fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) - axes_2 = axes_2.flatten() - for i_bfgs in range(20): - - ax = axes_2[i_bfgs] - ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") - ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_specpca, Y_rec[i_bfgs,:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" - ) - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_eps[0,:], Y_rec[i_bfgs,:] + Y_eps[0,:], color="blue", alpha=0.5, label="eps" - ) - ax.set_xlabel('Energy (eV) ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.legend(fontsize=10) - ax.set_title(f"{exp_dir} eps={eps_mean[i_bfgs]}", y=1.0, pad=-10) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_eps.png", bbox_inches='tight') - plt.close(fig) - - - print(Y_rec_unc_bfgs[0,:]) - # Reconstruction with 2 uncs - fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) - axes_2 = axes_2.flatten() - for i_bfgs in range(20): - - ax = axes_2[i_bfgs] - ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") - ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_bfgs[0,:], Y_rec[i_bfgs,:] + Y_rec_unc_bfgs[0,:], color="r", alpha=0.5, label="u_bfgs" - ) - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_specpca, Y_rec[i_bfgs,:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" - ) - ax.set_xlabel('Energy (eV) ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.legend(fontsize=10) - ax.set_title(f"{exp_dir}", y=1.0, pad=-10) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_2unc.png", bbox_inches='tight') - plt.close(fig) - - - # Single ID reconstruction with 2 unc - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") - ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_bfgs[0,:], Y_rec[i_bfgs,:] + Y_rec_unc_bfgs[0,:], color="r", alpha=0.5, label="u_bfgs" - ) - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_specpca, Y_rec[i_bfgs,:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" - ) - ax.set_xlabel('Energy (eV) ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_2unc_id1.png", bbox_inches='tight') - plt.close(fig) - - - # Single ID reconstruction with TOTAL unc - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") - ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc[0,:], Y_rec[i_bfgs,:] + Y_rec_unc[0,:], color="r", alpha=0.5, label="u_total" - ) - - ax.set_xlabel('Energy (eV) ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_unc_id1.png", bbox_inches='tight') - plt.close(fig) - - - # Example single spec - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - - ax.set_xlabel('Energy (eV) ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/single_spec.png", bbox_inches='tight') - plt.close(fig) - - - # Example single Y - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(Y_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - - ax.set_xlabel('PCA comps.', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/single_Y.png", bbox_inches='tight') - plt.close(fig) - - - # Example single PES - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(pes_train[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - - ax.set_xlabel('Channel comps.', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/single_pes.png", bbox_inches='tight') - plt.close(fig) - - - # # Example single X - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(X_train[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - - ax.set_xlabel('PCA comps.', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/single_X.png", bbox_inches='tight') - plt.close(fig) - - - args_cor_mat_int = np.argsort(eps_mean) - args_cor_mat_int = args_cor_mat_int[230:] - # ORDERED reconstruction with total unc + eps - fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) - axes_2 = axes_2.flatten() - for i_bfgs in range(20): - i_bfgs = i_bfgs - ax = axes_2[i_bfgs] - ax.plot(spec_raw_pe, Y_rec[args_cor_mat_int[i_bfgs],:], "r", label=f"SPEC rec {i_bfgs, args_cor_mat_int[i_bfgs]}") - ax.plot(spec_raw_pe, spec_test[args_cor_mat_int[i_bfgs],:], label=f"SPEC gt {i_bfgs, args_cor_mat_int[i_bfgs]}") - ax.fill_between( - spec_raw_pe, Y_rec[args_cor_mat_int[i_bfgs],:] - Y_rec_unc_specpca, Y_rec[args_cor_mat_int[i_bfgs],:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" - ) - ax.fill_between( - spec_raw_pe, Y_rec[args_cor_mat_int[i_bfgs],:] - Y_eps[0,:], Y_rec[args_cor_mat_int[i_bfgs],:] + Y_eps[0,:], color="blue", alpha=0.5, label="eps" - ) - ax.set_xlabel('Energy (eV) ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.legend(fontsize=10) - ax.set_title(f"eps={eps_mean[args_cor_mat_int[i_bfgs]]}", y=1.0, pad=-10) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_eps_SORTED.png", bbox_inches='tight') - plt.close(fig) - - - args_eps = np.argsort(eps_mean) - # ORDERED reconstruction with total unc + eps - fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) - axes_2 = axes_2.flatten() - for i_bfgs in range(20): - - ax = axes_2[i_bfgs] - ax.plot(spec_raw_pe, Y_rec[args_eps[i_bfgs],:], "r", label=f"SPEC rec {i_bfgs, args_eps[i_bfgs]}") - ax.plot(spec_raw_pe, spec_test[args_eps[i_bfgs],:], label=f"SPEC gt {i_bfgs, args_eps[i_bfgs]}") - ax.fill_between( - spec_raw_pe, Y_rec[args_eps[i_bfgs],:] - Y_rec_unc_specpca, Y_rec[args_eps[i_bfgs],:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" - ) - ax.fill_between( - spec_raw_pe, Y_rec[args_eps[i_bfgs],:] - Y_eps[0,:], Y_rec[args_eps[i_bfgs],:] + Y_eps[0,:], color="blue", alpha=0.5, label="eps" - ) - ax.set_xlabel('Energy (eV) ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.legend(fontsize=10) - ax.set_title(f"eps={eps_mean[args_eps[i_bfgs]]}", y=1.0, pad=-10) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_eps_SORTED_descending.png", bbox_inches='tight') - plt.close(fig) - - - - - - print("finished eval") - - -if att_dict["model_type"]=="bfgs_pca_eps": - - # Load Rec - Y_rec, Y_rec_unc_bfgs, Y_rec_unc_specpca, Y_rec_unc, Y_eps = load_rec_data_h5_bfgs_pca_eps(exp_dir) - print(Y_test.shape, Y_rec.shape) - - eps_mean = np.mean(Y_eps, axis=1) - eps_mean = np.round(eps_mean, 2) # round eps mean - print("len eps mean", len(eps_mean)) - - # compute RMSE - def int_sum_1(A): - return np.sum(A, axis=1) - pes_sum = int_sum_1(pes_test) - spec_sum = int_sum_1(spec_test) - - cor_mat = np.corrcoef(pes_test) - cor_mat_int = np.sum(cor_mat, axis=1) - #where_1 = np.where(cor_mat == 1) - #cor_mat[where_1] = 0 - - - - # quick fft eval - FC_START = 0 - FC_END = 80 - N_ROI = 40 - # Get the Fourier Components - n_Y_comps = spec_test.shape[1] - Y_test_fft_raw = np.abs(np.fft.fft(spec_test))[:, :round(n_Y_comps/2)] - Y_rec_fft_raw = np.abs(np.fft.fft(Y_rec))[:, :round(n_Y_comps/2)] - - # Cut the Fourier components and take only those who are relevant and not close to 0 - Y_test_fft = Y_test_fft_raw[:, FC_START:FC_END] - Y_rec_fft = Y_rec_fft_raw[:, FC_START:FC_END] - - # Split relevant fourier components into N regions of interests (roi) - split_size = round((FC_END - FC_START)/N_ROI) - - def split_to_rois(a, splitedSize = split_size): - a_splited = [a[:,x:x+splitedSize] for x in range(0, a.shape[1], splitedSize)] - return a_splited - - Y_test_fft_rois = split_to_rois(Y_test_fft) - Y_rec_fft_rois = split_to_rois(Y_rec_fft) - - # Define the X axis, pixels - pixel_rois = split_to_rois(np.arange(FC_START, FC_END, 1).reshape(1,-1)) - - # Compute the absolute difference of gt and rec fourier components per region of interest per train ID - delta_Y_fft_rois = [np.abs(a-b) for a, b in zip(Y_test_fft_rois, Y_rec_fft_rois)] - # Sum along all Fourier components in roi per train id - delta_Y_fft_rois_sum = [np.sum(a, axis=1) for a in delta_Y_fft_rois] - print("len(delta_Y_fft_rois_sum)", len(delta_Y_fft_rois)) - # Sum along all train IDs per roi - delta_Y_fft_rois_sum_mean = [x.mean() for x in delta_Y_fft_rois_sum] - - - # find corr coef - def corr_coef(a, b): - cor_coef = np.corrcoef(a, b)[0][1] - #print("The corr coef is :", np.round(cor_coef, 2)) - return np.round(cor_coef, 2) - cc = [] - for i_bfgs in range(Y_rec.shape[0]): - #corr_coef(sc_res[i_bfgs,:], Y_test[i_bfgs,:]) - cc.append(corr_coef(Y_rec[i_bfgs,:], spec_test[i_bfgs,:]) ) - - - - # Function to calculate Chi-distance - def chi2_distance(A, B): - # compute the chi-squared distance using above formula - chi = 0.5 * np.sum([((a - b) ** 2) / (a + b) - for (a, b) in zip(A, B)]) - - #print("The Chi-square distance is :", chi) - return chi - chi_2 = [] - for i_bfgs in range(Y_rec.shape[0]): - chi_2.append(chi2_distance(Y_rec[i_bfgs,:], spec_test[i_bfgs,:])) - - - # compute RMSE - def rmse_value(A, B): - return np.sqrt(np.mean((A-B)**2)) - rmse = [] - for i_bfgs in range(Y_rec.shape[0]): - rmse.append(rmse_value(Y_rec[i_bfgs,:], spec_test[i_bfgs,:])) - rmse_quickfft = [] - for i_bfgs in range(Y_rec_fft.shape[0]): - rmse_quickfft.append(rmse_value(Y_rec_fft[i_bfgs,:], Y_test_fft[i_bfgs,:])) - - - - - - - # Save Images - - # Save cor mat test set - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - im = ax.imshow(cor_mat) - ax.set_xlabel('train IDs.', fontsize=22) - ax.set_ylabel('train IDs.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"cor mat: PCA(PES)", y=1.0, pad=-20, fontsize=22) - - cax = fig.add_axes([0.27, 0.95, 0.5, 0.05]) - fig.colorbar(im, cax=cax, orientation='horizontal') - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cor_mat_pca_pes.png", bbox_inches='tight') - plt.close(fig) - - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - ax.plot(cor_mat[42,:], label=f"42") - ax.plot(cor_mat[43,:], label=f"43") - ax.plot(cor_mat[171,:], label=f"171") - #ax.plot(cor_mat_int, label=f"int") - ax.legend(fontsize=20) - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cor_mat_pca_pes_line.png", bbox_inches='tight') - plt.close(fig) - - - # Save xgm_pulse energy - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(np.arange(len(xgm_pulseen_test)), xgm_pulseen_test, label=f"xgm pulse energy: PES") - - ax.set_xlabel('train IDs.', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"xgm test pulse energy: PES", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/xgm_pulse_energy.png", bbox_inches='tight') - plt.close(fig) - - - # Save xgm_pulse energy vs PES sum - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.scatter(eps_mean, xgm_pulseen_test, label=f"eps_mean xgm pulse energy") - - ax.set_xlabel('eps_mean', fontsize=22) - ax.set_ylabel('pulse energy', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"pes_sum vs pulse energy", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pulse_energy_vs_eps_mean.png", bbox_inches='tight') - plt.close(fig) - - - # Save xgm_pulse energy vs QuickFFT - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.scatter(eps_mean, rmse_quickfft, label=f"eps_mean QuickFFT") - - ax.set_xlabel('eps_mean', fontsize=22) - ax.set_ylabel('QuickFFT', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"eps_mean QuickFFT", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/eps_mean_vs_quick_fft.png", bbox_inches='tight') - plt.close(fig) - - - # Save xgm_pulse energy vs QuickFFT - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.scatter(xgm_pulseen_test, rmse_quickfft, label=f"xgm_pulseen_test QuickFFT") - - ax.set_xlabel('xgm_pulseen_test', fontsize=22) - ax.set_ylabel('QuickFFT', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"pulse energy QuickFFT", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/xgm_pulseen_test_vs_quick_fft.png", bbox_inches='tight') - plt.close(fig) - - - # Save CC vs QuickFFT - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.scatter(cc, rmse_quickfft, label=f"cc vs QuickFFT") - - ax.set_xlabel('cc', fontsize=22) - ax.set_ylabel('QuickFFT', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"cc vs QuickFFT", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cc_vs_quick_fft.png", bbox_inches='tight') - plt.close(fig) - - - # Save xgm_pulse energy vs Spec sum - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.scatter(spec_sum, xgm_pulseen_test, label=f"xgm pulse energy vs spec int") - - ax.set_xlabel('spec_sum', fontsize=22) - ax.set_ylabel('pulse energy', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"spec_sum vs pulse energy", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pulse_energy_vs_spec_int.png", bbox_inches='tight') - plt.close(fig) - - - # Save xgm_pulse energy vs corr matrix sum - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.scatter(cor_mat_int, xgm_pulseen_test, label=f"xgm pulse energy vs cor_mat_int") - - ax.set_xlabel('cor_mat_int', fontsize=22) - ax.set_ylabel('pulse energy', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"spec_sum vs pulse energy", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pulse_energy_vs_cor_mat_int.png", bbox_inches='tight') - plt.close(fig) - - - # pes_sum vs Spec sum - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(spec_sum, pes_sum, label=f" spec int vs pes_sum") - - ax.set_xlabel('spec_sum', fontsize=22) - ax.set_ylabel('pulse energy', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"spec_sum vs pes_sum", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/spec_int_vs_pes_int.png", bbox_inches='tight') - plt.close(fig) - - - # Save pes sum test data - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(np.arange(len(pes_sum)), pes_sum, label=f"integrated int: PES") - - ax.set_xlabel('train IDs.', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"integrated int: PES", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pes_int_sum.png", bbox_inches='tight') - plt.close(fig) - - - # Save pes sum test data - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(np.arange(len(spec_sum)), spec_sum, label=f"integrated int: SPEC") - - ax.set_xlabel('train IDs.', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"integrated int: SPEC", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/spec_int_sum.png", bbox_inches='tight') - plt.close(fig) - - - # Save pes_sum vs pulse energy - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - #ax.plot(np.arange(len(cc)), cc/max(cc), label=f"cross correlation") - #ax.plot(np.arange(len(cc)), pes_sum/max(np.array(pes_sum)), label=f"pes_sum") - - ax.plot(pes_sum/max(np.array(pes_sum)), cc/max(cc), label=f"correlation coef.") - - ax.set_xlabel('pes_sum', fontsize=22) - ax.set_ylabel('cor. coef.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"pes_sum vs cor. coef.", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pes_int_sum_vs_cor_coef.png", bbox_inches='tight') - plt.close(fig) - - # Save spec_sum vs pulse energy - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - #ax.plot(np.arange(len(cc)), cc/max(cc), label=f"cross correlation") - #ax.plot(np.arange(len(cc)), spec_sum/max(np.array(spec_sum)), label=f"spec_sum") - - ax.plot(spec_sum/max(np.array(spec_sum)), cc/max(cc), label=f"spec_sum correlation coef") - - ax.set_xlabel('spec_sum', fontsize=22) - ax.set_ylabel('cor. coef.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"spec_sum vs cor. coef.", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/spec_int_sum_vs_corr_coef.png", bbox_inches='tight') - plt.close(fig) - - - # Save quick fft eval - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(np.arange(N_ROI), delta_Y_fft_rois_sum_mean, label=f"FFT eval {i_bfgs}") - - ax.set_xlabel('FFT bin comps.', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f" qucik fft eval", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/quick_fft_eval.png", bbox_inches='tight') - plt.close(fig) - - - # Save cross correlation - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(np.arange(len(cc)), cc, label=f"correlation coef") - - ax.set_xlabel('train IDs.', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"cor. coef.", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/corr_coef.png", bbox_inches='tight') - plt.close(fig) - - - # Save cross correlation vs pulse energy - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - #ax.plot(np.arange(len(cc)), cc/max(cc), label=f"cross correlation") - ax.scatter( cc/max(cc), eps_mean/spec_sum, label=f"cc vs eps_mean") - - ax.set_xlabel('cor coef', fontsize=22) - ax.set_ylabel('eps mean', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"cc vs eps_mean", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cor_coef_vs_eps_mean.png", bbox_inches='tight') - plt.close(fig) - - - # Save cross correlation vs pulse energy - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - #ax.plot(np.arange(len(cc)), cc/max(cc), label=f"cross correlation") - ax.scatter( cc/max(cc), xgm_pulseen_test/max(np.array(xgm_pulseen_test)), label=f"xgm_pulseen_test") - - ax.set_xlabel('cor coef', fontsize=22) - ax.set_ylabel('pulse energy', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"cor. coef. vs pulse energy", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cor_coef_vs_pulse_energy.png", bbox_inches='tight') - plt.close(fig) - - - # Save cross correlation best rec spec - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - i_bfgs = np.argmax(cc) - - ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") - ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_bfgs[0,:], Y_rec[i_bfgs,:] + Y_rec_unc_bfgs[0,:], color="r", alpha=0.5, label="u_bfgs" - ) - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_specpca, Y_rec[i_bfgs,:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" - ) - ax.set_xlabel('Energy (eV) ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"cc best rec", y=1.0, pad=-20) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cc_best_rec.png", bbox_inches='tight') - plt.close(fig) - - - # Save cross correlation best rec spec - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - i_bfgs = np.argmin(cc) - - ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") - ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_bfgs[0,:], Y_rec[i_bfgs,:] + Y_rec_unc_bfgs[0,:], color="r", alpha=0.5, label="u_bfgs" - ) - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_specpca, Y_rec[i_bfgs,:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" - ) - ax.set_xlabel('Energy (eV) ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"cc worse rec", y=1.0, pad=-20) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/cc_worse_rec.png", bbox_inches='tight') - plt.close(fig) - - - # Save cross correlation best rec pes - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - i_bfgs = np.argmax(cc) - - ax.plot(pes_test[i_bfgs,:], "r", label=f"PES test {i_bfgs}") - - ax.set_xlabel(' x comps ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"pes raw (cc best rec)", y=1.0, pad=-20) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pes_raw_best_rec.png", bbox_inches='tight') - plt.close(fig) - - - # Save cross correlation worst rec pes - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - i_bfgs = np.argmin(cc) - - ax.plot(pes_test[i_bfgs,:], "r", label=f"PES test {i_bfgs}") - - ax.set_xlabel(' x comps ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"pes raw (cc worst rec)", y=1.0, pad=-20) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/pes_raw_worst_rec.png", bbox_inches='tight') - plt.close(fig) - - - # Save chi_2 - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(np.arange(len(chi_2)), chi_2, label=f"chi_2") - - ax.set_xlabel('train IDs.', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"chi_2", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/chi_2.png", bbox_inches='tight') - plt.close(fig) - - - # Save rmse - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(np.arange(len(rmse)), rmse, label=f"rmse") - - ax.set_xlabel('train IDs.', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"rmse", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/rmse.png", bbox_inches='tight') - plt.close(fig) - - - # Save rmse vs eps_mean - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.scatter(eps_mean, rmse, label=f"rmse_vs_eps_mean") - - ax.set_xlabel('eps mean', fontsize=22) - ax.set_ylabel('rmse', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - ax.set_title(f"rmse_vs_eps_mean", y=1.0, pad=-20, fontsize=22) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/rmse_vs_eps_mean.png", bbox_inches='tight') - plt.close(fig) - - - # reconstruction with total unc - fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) - axes_2 = axes_2.flatten() - for i_bfgs in range(20): - - ax = axes_2[i_bfgs] - ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") - ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc[0,:], Y_rec[i_bfgs,:] + Y_rec_unc[0,:], color="pink", alpha=0.5, label="u_total" - ) - ax.set_xlabel('Energy (eV) ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.legend(fontsize=10) - ax.set_title(f"{exp_dir} eps={eps_mean[i_bfgs]}", y=1.0, pad=-10) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction.png", bbox_inches='tight') - plt.close(fig) - - - # reconstruction with total unc + eps - fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) - axes_2 = axes_2.flatten() - for i_bfgs in range(20): - - ax = axes_2[i_bfgs] - ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") - ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_specpca, Y_rec[i_bfgs,:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" - ) - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_eps[0,:], Y_rec[i_bfgs,:] + Y_eps[0,:], color="blue", alpha=0.5, label="eps" - ) - ax.set_xlabel('Energy (eV) ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.legend(fontsize=10) - ax.set_title(f"{exp_dir} eps={eps_mean[i_bfgs]}", y=1.0, pad=-10) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_eps.png", bbox_inches='tight') - plt.close(fig) - - - print(Y_rec_unc_bfgs[0,:]) - # Reconstruction with 2 uncs - fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) - axes_2 = axes_2.flatten() - for i_bfgs in range(20): - - ax = axes_2[i_bfgs] - ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") - ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_bfgs[0,:], Y_rec[i_bfgs,:] + Y_rec_unc_bfgs[0,:], color="r", alpha=0.5, label="u_bfgs" - ) - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_specpca, Y_rec[i_bfgs,:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" - ) - ax.set_xlabel('Energy (eV) ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.legend(fontsize=10) - ax.set_title(f"{exp_dir}", y=1.0, pad=-10) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_2unc.png", bbox_inches='tight') - plt.close(fig) - - - # Single ID reconstruction with 2 unc - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") - ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_bfgs[0,:], Y_rec[i_bfgs,:] + Y_rec_unc_bfgs[0,:], color="r", alpha=0.5, label="u_bfgs" - ) - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc_specpca, Y_rec[i_bfgs,:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" - ) - ax.set_xlabel('Energy (eV) ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_2unc_id1.png", bbox_inches='tight') - plt.close(fig) - - - # Single ID reconstruction with TOTAL unc - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(spec_raw_pe, Y_rec[i_bfgs,:], "r", label=f"SPEC rec {i_bfgs}") - ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - ax.fill_between( - spec_raw_pe, Y_rec[i_bfgs,:] - Y_rec_unc[0,:], Y_rec[i_bfgs,:] + Y_rec_unc[0,:], color="r", alpha=0.5, label="u_total" - ) - - ax.set_xlabel('Energy (eV) ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_unc_id1.png", bbox_inches='tight') - plt.close(fig) - - - # Example single spec - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(spec_raw_pe, spec_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - - ax.set_xlabel('Energy (eV) ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/single_spec.png", bbox_inches='tight') - plt.close(fig) - - - # Example single Y - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(Y_test[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - - ax.set_xlabel('PCA comps.', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/single_Y.png", bbox_inches='tight') - plt.close(fig) - - - # Example single PES - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(pes_train[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - - ax.set_xlabel('Channel comps.', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/single_pes.png", bbox_inches='tight') - plt.close(fig) - - - # # Example single X - fig, ax = plt.subplots(1, 1, figsize=(20, 10)) - for i_bfgs in range(1): - - ax.plot(X_train[i_bfgs,:], label=f"SPEC gt {i_bfgs}") - - ax.set_xlabel('PCA comps.', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.tick_params(axis="y", labelsize=20) - ax.legend(fontsize=20) - #ax.set_title(f"{exp_dir}", y=1.0, pad=-20) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/single_X.png", bbox_inches='tight') - plt.close(fig) - - - args_cor_mat_int = np.argsort(eps_mean) - args_cor_mat_int = args_cor_mat_int[230:] - # ORDERED reconstruction with total unc + eps - fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) - axes_2 = axes_2.flatten() - for i_bfgs in range(20): - i_bfgs = i_bfgs - ax = axes_2[i_bfgs] - ax.plot(spec_raw_pe, Y_rec[args_cor_mat_int[i_bfgs],:], "r", label=f"SPEC rec {i_bfgs, args_cor_mat_int[i_bfgs]}") - ax.plot(spec_raw_pe, spec_test[args_cor_mat_int[i_bfgs],:], label=f"SPEC gt {i_bfgs, args_cor_mat_int[i_bfgs]}") - ax.fill_between( - spec_raw_pe, Y_rec[args_cor_mat_int[i_bfgs],:] - Y_rec_unc_specpca, Y_rec[args_cor_mat_int[i_bfgs],:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" - ) - ax.fill_between( - spec_raw_pe, Y_rec[args_cor_mat_int[i_bfgs],:] - Y_eps[0,:], Y_rec[args_cor_mat_int[i_bfgs],:] + Y_eps[0,:], color="blue", alpha=0.5, label="eps" - ) - ax.set_xlabel('Energy (eV) ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.legend(fontsize=10) - ax.set_title(f"eps={eps_mean[args_cor_mat_int[i_bfgs]]}", y=1.0, pad=-10) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_eps_SORTED.png", bbox_inches='tight') - plt.close(fig) - - - args_eps = np.argsort(eps_mean) - # ORDERED reconstruction with total unc + eps - fig, axes_2 = plt.subplots(10, 2, figsize=(15, 35)) - axes_2 = axes_2.flatten() - for i_bfgs in range(20): - - ax = axes_2[i_bfgs] - ax.plot(spec_raw_pe, Y_rec[args_eps[i_bfgs],:], "r", label=f"SPEC rec {i_bfgs, args_eps[i_bfgs]}") - ax.plot(spec_raw_pe, spec_test[args_eps[i_bfgs],:], label=f"SPEC gt {i_bfgs, args_eps[i_bfgs]}") - ax.fill_between( - spec_raw_pe, Y_rec[args_eps[i_bfgs],:] - Y_rec_unc_specpca, Y_rec[args_eps[i_bfgs],:] + Y_rec_unc_specpca, color="g", alpha=0.5, label="u_specpca" - ) - ax.fill_between( - spec_raw_pe, Y_rec[args_eps[i_bfgs],:] - Y_eps[0,:], Y_rec[args_eps[i_bfgs],:] + Y_eps[0,:], color="blue", alpha=0.5, label="eps" - ) - ax.set_xlabel('Energy (eV) ', fontsize=22) - ax.set_ylabel('int a.u.', fontsize=22) - ax.tick_params(axis="x", labelsize=20) - ax.legend(fontsize=10) - ax.set_title(f"eps={eps_mean[args_eps[i_bfgs]]}", y=1.0, pad=-10) - - fig.savefig(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/summaries/reconstruction_eps_SORTED_descending.png", bbox_inches='tight') - plt.close(fig) - - - - - - print("finished eval") - - - - - -#import joblib -#spec_pca_model = joblib.load(f"/home/adavtyan/my_repos/invasive/experiments/{exp_dir}/checkpoints/spec_pca_model.joblib") - - diff --git a/exploration/inv_inference.py b/exploration/inv_inference.py deleted file mode 100644 index 6ad195cc998fb118a656d059c59d85c56916b411..0000000000000000000000000000000000000000 --- a/exploration/inv_inference.py +++ /dev/null @@ -1,69 +0,0 @@ -import sys -sys.path.append("./") -sys.path.append("..") - -import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt - -from src.data.data import ReadPesSpec, PesChannelSelector -from src.data.data_preproc import SpecPreprocessing - -from src.models.find_components.Find_Component import FindPCAcomps -from src.models.fit_methods.Fit_Methods import FitBFGS, FitBFGSPCA -from src.models.fit_methods.model import Model - -from src.utils.utils import load_train_test_h5, load_rec_data_h5, load_rec_data_h5_bfgs_pca, load_checkpoint -from src.utils.utils import create_experiment_dirs - -from sklearn.model_selection import train_test_split -import numpy as np - - -# Load trained model in the path exp_dir -exp_dir = "test3_pulseen_short_test_eps_r0015" -Y_train_model, Y_test_model, spec_train_model, spec_test_model, spec_raw_pe, X_train_model, X_test_model, pes_train_model, pes_test_model, att_dict, xgm_pulseen_train, xgm_pulseen_test = load_train_test_h5(exp_dir) - -att_dict["pes_pca_preprocessing"] = False # set pca non trainable -att_dict["spec_pca_preprocessing"] = False # set pca non trainable - - -# Change the exp_dir to new dir where inference data is located -#att_dict["exp_dir"] = "test3_inference" -att_dict["run_number"] = "r0014" -att_dict["spec_ofset"] = -2 - - -RPS = ReadPesSpec(att_dict) # read data -data_dict = RPS.get_data() - - -model_instance = Model(model_type="bfgs_pca_eps", data_info=att_dict) -pes_train, pes_test, spec_train, spec_test = model_instance.preprocess(data_dict, att_dict) # Model Preprocess - - -inference_model, pes_pca_model, spec_pca_model = model_instance.load_model() # Move to incference sctipt TODO! - - -# Update the Train and Test data for inference model -model_instance.X_train, model_instance.X_test = pes_pca_model.transform(pes_train), pes_pca_model.transform(pes_test) -model_instance.Y_train, model_instance.Y_test = spec_pca_model.transform(spec_train), spec_pca_model.transform(spec_test) - - -inf_dir = exp_dir + "_inference_TestDATA" + att_dict["run_number"] -model_instance.data_info["exp_dir"] = inf_dir - - -create_experiment_dirs(inf_dir) -model_instance.save_preproc() - - -result = model_instance.predict(input_value=model_instance.X_test, - target_value=model_instance.Y_test, - spec_target=spec_test_model) - - -model_instance.save_prediction() - - -print("Finished Inference") \ No newline at end of file diff --git a/exploration/inv_train.py b/exploration/inv_train.py deleted file mode 100644 index 17c693d3bae3544b0edcf45dc60c189001dd5711..0000000000000000000000000000000000000000 --- a/exploration/inv_train.py +++ /dev/null @@ -1,103 +0,0 @@ -import sys -sys.path.append("./") -sys.path.append("..") - -import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt - -from src.data.data import ReadPesSpec, PesChannelSelector -from src.data.data_preproc import SpecPreprocessing - -from src.models.find_components.Find_Component import FindPCAcomps - -from src.models.fit_methods.model import Model - -from src.utils.utils import create_experiment_dirs, save_train_test_h5, save_rec_data_h5, save_checkpoint, save_checkpoint_fft - -from sklearn.model_selection import train_test_split -import numpy as np - -run_directory = "/gpfs/exfel/exp/SA3/202121/p002935/raw" -run_number = "r0015" -spec_ofset = -2 -n_pca_comps_pes = 400 -n_pca_comps_spec = 20 -pes_pca_preprocessing = True -spec_pca_preprocessing = True -spec_gc = True -spec_fft = False -use_data_subset = True -data_subset_start, data_subset_end = 1000, 6000 # Min Max -channel_names = "all_chs" # "default", "all_chs", or list with channel names ex ["channel_1_D", "channel_2_B"] -test_size = 0.05 -model_type = "bfgs_pca_eps" # "bfgs", "bfgs_pca", "bfgs_pca_eps" - -exp_dir = "test3_pulseen_short_test_eps_" + run_number -general_comment = exp_dir - -data_info = {"run_directory": run_directory, - "run_number": run_number, - "spec_ofset": spec_ofset, - "exp_dir": exp_dir, - "n_pca_comps_pes": n_pca_comps_pes, - "n_pca_comps_spec": n_pca_comps_spec, - "pes_pca_preprocessing": pes_pca_preprocessing, - "spec_pca_preprocessing": spec_pca_preprocessing, - "spec_gc": spec_gc, - "spec_fft": spec_fft, - "use_data_subset": use_data_subset, - "data_subset_start": data_subset_start, - "data_subset_end": data_subset_end, - "channel_names": channel_names, - "test_size": test_size, - "model_type": model_type, - "general_comment": general_comment - - } - - -experiment_dir, summary_dir, checkpoint_dir, output_dir, test_dir = create_experiment_dirs(exp_dir) - -import datetime -now = datetime.datetime.now() -print ("Start Read Pes Spec Data : ") -print (now.strftime("%Y-%m-%d %H:%M:%S")) - -# Read Pes Spec Data -RPS = ReadPesSpec(data_info) -data_dict = RPS.get_data() - -now = datetime.datetime.now() -print ("End Read Pes Spec Data : ") -print (now.strftime("%Y-%m-%d %H:%M:%S")) - -# Model -model_instance = Model(model_type=model_type, data_info=data_info) -X_train, X_test, Y_train, Y_test = model_instance.preprocess(data_dict, data_info) # Model Preprocess - -now = datetime.datetime.now() -print ("End Model Preprocessor : ") -print (now.strftime("%Y-%m-%d %H:%M:%S")) - -model_instance.save_preproc() -print("################") -print(X_train.shape) -print("################") -model_instance.fit_eval(X_train, Y_train, X_test, Y_test) # Model FIT - -now = datetime.datetime.now() -print ("End Model Train : ") -print (now.strftime("%Y-%m-%d %H:%M:%S")) - -model_instance.save_latest_ckp() - -result = model_instance.predict(input_value=X_test) - -model_instance.save_prediction() - - - - -print("FINISHED ALL") - diff --git a/exploration/requirements.txt b/exploration/requirements.txt deleted file mode 100644 index 891565fa240eb738b7d6413819a83245467c983f..0000000000000000000000000000000000000000 --- a/exploration/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -numpy -scipy -scikit-learn -extra_data -autograd -h5py -joblib -matplotlib diff --git a/exploration/src/data/__init__.py b/exploration/src/data/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/exploration/src/data/data.py b/exploration/src/data/data.py deleted file mode 100644 index cd9517e8a72369e574d2e767cdd3a2bb1e9db310..0000000000000000000000000000000000000000 --- a/exploration/src/data/data.py +++ /dev/null @@ -1,326 +0,0 @@ -""" - Data loaders for SPEC and PES. -""" - -from extra_data import open_run, RunDirectory -import numpy as np - -from typing import Any, Union, List, Dict - -import os -import h5py -import numpy as np - -import itertools - -import scipy -import scipy.interpolate -from random import choices - -from skimage.feature import canny -import torch -import torchvision.transforms as transforms -from collections import namedtuple -import torch.nn.functional as F - -import cv2 - -import sys -sys.path.append("./") -sys.path.append("..") - - -from sklearn.datasets import fetch_olivetti_faces -from numpy.random import RandomState - - -class ReadPesSpec: - """ - Read PES and SPEC data, select by unique IDs - """ - - def __init__(self, data_info): - - self.data_info = data_info - - self.rundir = self.data_info["run_directory"] - self.runid = self.data_info["run_number"] - self.spec_ofset = self.data_info["spec_ofset"] - - self.run = RunDirectory(f"{self.rundir}/{self.runid}") - - self.spec_inds = None - self.pes_inds = None - self.xgm_inds = None - - self.spec_tid_all = None - self.spec_tid_all_offset = None - self.pes_tid_all = None - self.xgm_tid_all = None - - self.spec_tids = None - self.pes_tids = None - self.xgm_tids = None - - self.retvol_raw = None - self.retvol_raw_timestamp = None - self.min_ret_vol = None - - - self.spec_raw_pe = None - self.spec_raw_int = None - self.pes_raw_dict = None - self.pes_data = None - self.xgm_pressureFiltered = None - self.xgm_pulseen = None - self.retvol_raw = None - self.raw_timestamp = None - self.min_ret_vol = None - - - # Output - self.data = {"spec_raw_pe":None, - "spec_raw_int":None, - "pes_raw_dict":None, - "pes_data":None, - "xgm_pressureFiltered":None, - "xgm_pulseen":None, - "retvol_raw":None, - "retvol_raw_timestamp":None, - "min_ret_vol":None} - - - - def get_unq_inds(self, a, b, c): - uniq_vals = list(set(a).intersection(b).intersection(c)) - return [a.index(x) for x in uniq_vals], [b.index(x) for x in uniq_vals], [c.index(x) for x in uniq_vals] - - # create channel names - def gen_channel_names(self): - channels = [] - for i in range(4): - channels.append(f"channel_{i+1}_A") - channels.append(f"channel_{i+1}_B") - channels.append(f"channel_{i+1}_C") - channels.append(f"channel_{i+1}_D") - - chs_short_name = [] - for i in range(4): - chs_short_name.append(f"ch_{i+1}_A") - chs_short_name.append(f"ch_{i+1}_B") - chs_short_name.append(f"ch_{i+1}_C") - chs_short_name.append(f"ch_{i+1}_D") - - return channels, chs_short_name - - - def read_train_ids(self, debug=False): - - # Read raw SPEC Data train IDs - self.spec_tid_all = self.run['SA3_XTD10_SPECT/MDL/FEL_BEAM_SPECTROMETER_SQS1:output', f"data.trainId"].ndarray() - self.spec_tid_all_offset = self.spec_tid_all + self.spec_ofset ##### Offset correction ##### - #print(f" spec_tid_all_offset = {len(spec_tid_all_offset)}") - - # Read raw PES Data train IDs - self.pes_tid_all = self.run['SA3_XTD10_PES/ADC/1:network', f"digitizers.trainId"].ndarray() - #print(f" pes_tid_all = {len(pes_tid_all)}") - - - # Read raw XGM Data train IDs - self.xgm_tid_all = self.run['SA3_XTD10_XGM/XGM/DOOCS:output', f"data.trainId"].ndarray() - #print(f" xgm_tid_all = {len(xgm_tid_all)}") - - print(f" Introduced SPEC_OFFSET = {self.spec_ofset} \n ") - - if debug: - print(f"\n Len of train IDs: \n \ - spec_tid_all_offset {len(self.spec_tid_all_offset)} \n \ - pes_tid_all {len(self.pes_tid_all)} \n \ - xgm_tid_all {len(self.xgm_tid_all)} \n \ - ") - - - print(f"\n First 5 train IDs from all sources: \n \ - spec_tid_all {self.spec_tid_all[:5]} \n \ - spec_tid_all_offset {self.spec_tid_all_offset[:5]} \n \ - pes_tid_all {self.pes_tid_all[:5]} \n \ - xgm_tid_all {self.xgm_tid_all[:5]} \n \ - ") - - - def merge_train_ids(self, debug=True): - # Merge Train IDs - self.spec_inds, self.pes_inds, self.xgm_inds = self.get_unq_inds(list(self.spec_tid_all_offset), list(self.pes_tid_all), list(self.xgm_tid_all)) - - - print(" Len of indices for common elements in spec, pes, xgm :", len(self.spec_inds), len(self.pes_inds), len(self.xgm_inds)) - - # Find common train ids - self.spec_tids = self.spec_tid_all_offset[self.spec_inds] - self.pes_tids = self.pes_tid_all[self.pes_inds] - self.xgm_tids = self.xgm_tid_all[self.xgm_inds] - - if debug: - print(f"\n First 5 indices from all sources: \n \ - spec_inds {self.spec_inds[:5]} \n \ - pes_inds {self.pes_inds[:5]} \n \ - xgm_inds {self.xgm_inds[:5]} \n \ - ") - - print(f"\n First 5 train IDs from all sources: \n \ - spec_tids {self.spec_tids[:5]} \n \ - pes_tids {self.pes_tids[:5]} \n \ - xgm_tids {self.xgm_tids[:5]} \n \ - ") - return self.spec_inds, self.pes_inds, self.xgm_inds - - - def retVol(self): - # Read PES Data RetVol - self.retvol_raw = self.run["SA3_XTD10_PES/MDL/DAQ_MPOD", "u212.value"].ndarray()[list(self.pes_inds)] - self.min_ret_vol = np.abs(self.retvol_raw.min()) - self.retvol_raw_timestamp = self.run["SA3_XTD10_PES/MDL/DAQ_MPOD", "u212.timestamp"].ndarray()[list(self.pes_inds)] - - - def read_data(self, debug=True): - - # Read raw SPEC Data - self.spec_raw_pe= self.run['SA3_XTD10_SPECT/MDL/FEL_BEAM_SPECTROMETER_SQS1:output', f"data.photonEnergy"].ndarray().astype(float)[list(self.spec_inds),:] - #spec_raw_all = run_spec['SA3_XTD10_SPECT/MDL/FEL_BEAM_SPECTROMETER_SQS1:output', f"data.intensityDistribution"].ndarray() - self.spec_raw_int = self.run['SA3_XTD10_SPECT/MDL/FEL_BEAM_SPECTROMETER_SQS1:output', f"data.intensityDistribution"].ndarray().astype(float)[list(self.spec_inds),:] - - # Read PES Data - channels, _ = self.gen_channel_names() # create channel names - self.pes_raw_dict = {} - pes_raw_dict_all = {} - for ch in channels: - #pes_raw = run['SA3_XTD10_PES/ADC/1:network', f"digitizers.{ch}.raw.samples"].ndarray() - #pes_raw_dict_all[ch] = run_pes['SA3_XTD10_PES/ADC/1:network', f"digitizers.{ch}.raw.samples"].ndarray() - self.pes_raw_dict[ch] = self.run['SA3_XTD10_PES/ADC/1:network', f"digitizers.{ch}.raw.samples"].ndarray().astype(float)[list(self.pes_inds),:] - - # Read XGM gas pressure - self.xgm_pressureFiltered= self.run['SA3_XTD10_XGM/XGM/DOOCS', f"pressure.pressureFiltered.value"].ndarray().astype(float)[list(self.xgm_inds)] - - # Read PES Data RetVol - self.retVol() - - # Read XGM Pusle Energy - self.xgm_pulseen = self.run['SA3_XTD10_XGM/XGM/DOOCS:output', f"data.intensitySa3TD"].ndarray().astype(float)[list(self.xgm_inds)] - - if debug: - #print(f"\n pes data channels :: \n {self.pes_raw_dict.keys()}") - print(f"spec_raw_pe.shape{self.spec_raw_pe.shape}, \n spec_raw_int.shape{self.spec_raw_int.shape}") - print(f"\n pes channel shape channel_1_A:: {self.pes_raw_dict['channel_1_A'].shape}") - print("\n shape of xgm_pressureFiltered", self.xgm_pressureFiltered.shape) - print(f"\n min_ret_vol = {self.min_ret_vol}") - - - assert self.spec_raw_pe.shape, self.spec_raw_int.shape - assert self.pes_raw_dict['channel_1_A'].shape[0], self.spec_raw_int.shape[0] - - - - def get_data(self): - - # Read Train Ids - self.read_train_ids(self.run) - - # Merge Train Ids - self.merge_train_ids() - - # Read Data - self.read_data() - - self.data["spec_raw_pe"] = self.spec_raw_pe - self.data["spec_raw_int"] = self.spec_raw_int - self.data["pes_raw_dict"] = self.pes_raw_dict - self.data["xgm_pressureFiltered"] = self.xgm_pressureFiltered - self.data["xgm_pulseen"] = self.xgm_pulseen - self.data["retvol_raw"] = self.retvol_raw - self.data["retvol_raw_timestamp"] = self.retvol_raw_timestamp - self.data["min_ret_vol"] = self.min_ret_vol - - print("Data loaded in dataframe") - - PCS = PesChannelSelector(pes_dict=self.data, ch_names=self.data_info["channel_names"]) # Select Pes Channels - self.pes_data = PCS.sel_pes_chs() - self.data["pes_data"] = self.pes_data - - print("pes_data.shape = ", self.pes_data.shape) # (14497, 1200) - print("spec_data.shape = ", self.spec_raw_int.shape) # (14497, 1900) - - - return self.data - - -class PesChannelSelector: - """_Use this class to take all or part of PES channels for data preprocessing_ - """ - def __init__(self, pes_dict, ch_names="default"): - - self.pes_dict = pes_dict - self.default_channels = ["channel_1_D", "channel_2_B", "channel_3_A", "channel_3_B", "channel_4_C", "channel_4_D"] - - - if ch_names is "default": - self.channels = self.default_channels - elif ch_names == "all_chs": - self.channels, _ = self.gen_channel_names() - else: - self.channels = ch_names - - # Output - self.pes_data = np.array([]) - - - - # create channel names - def gen_channel_names(self): - """ - Generate channel names for quick reference - - Returns: - [channels], [chs_short_name]: raw channel names, channel short names - """ - channels = [] - for i in range(4): - channels.append(f"channel_{i+1}_A") - channels.append(f"channel_{i+1}_B") - channels.append(f"channel_{i+1}_C") - channels.append(f"channel_{i+1}_D") - - chs_short_name = [] - for i in range(4): - chs_short_name.append(f"ch_{i+1}_A") - chs_short_name.append(f"ch_{i+1}_B") - chs_short_name.append(f"ch_{i+1}_C") - chs_short_name.append(f"ch_{i+1}_D") - - return channels, chs_short_name - - - def get_array_from_dict(self, ch_name): - """ Select ROI where TOF is present - - Args: - ch_name (str): channel name - - Returns: - Croped channel : crop channel around the TOF measurement - """ - TOF_start = 31445 + 0 - TOF_end = TOF_start + 200 - return self.pes_dict["pes_raw_dict"][ch_name][:,TOF_start:TOF_end] - - - def sel_pes_chs(self): - """ Select PES channel - - Returns: - pes_data: PES data concat together all channels - """ - - self.pes_data = np.concatenate([self.get_array_from_dict(ch_name) for ch_name in self.channels], axis=1) - - return self.pes_data - diff --git a/exploration/src/data/data_preproc.py b/exploration/src/data/data_preproc.py deleted file mode 100644 index fba4bb0f6c6bac763618b392855f193bf3d9a705..0000000000000000000000000000000000000000 --- a/exploration/src/data/data_preproc.py +++ /dev/null @@ -1,99 +0,0 @@ -""" - Data Preprocessing -""" - -import scipy -import scipy.signal -import numpy as np -from sklearn import preprocessing - -import sys -sys.path.append("./") -sys.path.append("..") - -from src.models.find_components.Find_Component import FindPCAcomps - - -# GC Spec data -class SpecPreprocessing(): - def __init__(self, spec_data, spec_raw_pe, gauss_sigma=0.2): - - self.gauss_sigma = gauss_sigma - self.spec_data = spec_data - self.spec_raw_pe = spec_raw_pe - - # Output - self.gaussian = None - self.spec_data_gc = None - self.spec_data_gc_fft = None - - def gaussian_convolve(self): - gx = self.spec_raw_pe[:,:] - mu = self.spec_raw_pe[0,gx.shape[1]//2] - - self.gaussian = np.exp(-((gx-mu)/self.gauss_sigma)**2/2)/np.sqrt(2*np.pi*self.gauss_sigma**2) - print(self.spec_data.shape, self.spec_raw_pe.shape, self.gaussian.shape) - #result = np.convolve(spec_data[0,:], gaussian, mode="full") - - self.spec_data_gc = scipy.signal.fftconvolve(self.spec_data, self.gaussian, mode="same",axes=1)/80 - print(self.spec_data_gc.shape) - - return self.spec_data_gc, self.gaussian - - def make_fft(self): - - self.spec_data_gc_fft = np.fft.fft(self.spec_data_gc) - - return self.spec_data_gc_fft - - - -class PesPreprocessing(): - def __init__(self, data_train, data_test, n_pca=50): - - self.data_train = data_train - self.data_test = data_test - self.n_pca = n_pca - - # Output - self.pca_model = None - self.scaler = None - self.new_train = None - self.new_test = None - - - - def find_pca(self): - pes_PCA = FindPCAcomps( - data_train=self.data_train, - data_test=self.data_test, - n_pca_comps=self.n_pca - ) - - self.pca_model, self.new_train, self.new_test = pes_PCA.get_pca() - return self.pca_model, self.new_train, self.new_test - - def stand_scaler(self): - - self.scaler = preprocessing.StandardScaler().fit(self.data_train) - - self.new_train = self.scaler.transform(self.data_train) - self.new_test = self.scaler.transform( self.data_test) - - return self.scaler, self.new_train, self.new_test - - - def get(self, data_info): - - if data_info["pes_pca_preprocessing"]: - self.pca_model, self.new_train, self.new_test = self.find_pca() - return self.pca_model, self.new_train, self.new_test - - if data_info["standard_scaler"]: - self.new_train, self.new_test = self.stand_scaler() - return self.scaler, self.new_train, self.new_test - - - - - diff --git a/exploration/src/models/find_components/Find_Component.py b/exploration/src/models/find_components/Find_Component.py deleted file mode 100644 index 54c93b0532bb7c3996cdb89f2da06b0e4c291f63..0000000000000000000000000000000000000000 --- a/exploration/src/models/find_components/Find_Component.py +++ /dev/null @@ -1,194 +0,0 @@ -import sys -sys.path.append("..") - - -from sklearn.datasets import load_digits -from sklearn.datasets import fetch_olivetti_faces -from sklearn.decomposition import FastICA -from sklearn import decomposition -from sklearn.cluster import MiniBatchKMeans -from sklearn import decomposition -import matplotlib.pyplot as plt -import skimage - -import numpy as np -from numpy.random import RandomState - -import logging -from time import time - -from sklearn import linear_model -import torch - -from scipy import signal -from scipy.signal import find_peaks -from sklearn.decomposition import PCA - -class FindComponents(object): - """This class find the components in data. Those components could be independent or principal. It also returns the transformer. - - Args: - object (object): Object of components and transformer - """ - - - def __init__(self): - """_summary_ - - Args: - train_data (np.array): _description_ - val_data (np.array): _description_ - image_shape (tuple, optional): _description_. Defaults to (64, 64). - is_image (bool, optional): _description_. Defaults to True. - """ - - super(FindComponents, self).__init__() - - self.transformer = None - self.ica_comps = None - self.ica_comps_pix = None - - def clear_input(self): - self.train_data = None - self.val_data = None - self.image_shape = None - self.is_image = None - - - def get_transformer_ica(self, train_data, val_data, image_shape=(64, 64), is_image=True, N_COMP_ICA=5, ICA_Method='logcosh'): - """Takes number of data points and calculates the N Independent components based on sklearn.decomposition.FastICA. - Returns the transformer and corresponding ICA components in 2 verions (N ICA Components, and N ICA components per pixel. - - Args: - train_data (np.array): _description_ - val_data (np.array): _description_ - image_shape (tuple, optional): _description_. Defaults to (64, 64). - is_image (bool, optional): _description_. Defaults to True. - N_COMP_ICA (int, optional): _description_. Defaults to 5. - - Returns: - [transformer (object)]: transformer object for FastICA - [ica_comps (list)]: FastICA N componenrs ordered for all pixels in single data - [ica_comps_pix (list)]: FastICA N components ordered per pixel for all components in single data - """ - - self.train_data = train_data - self.val_data = val_data - self.image_shape = image_shape - self.is_image = is_image - - self.train_data_points = self.train_data.shape[0] - if len(val_data.shape)==1: - self.val_data_points = 1 - elif len(val_data.shape)!=1: - self.val_data_points = self.val_data.shape[0] - - assert(N_COMP_ICA<=self.train_data_points) - - transformer = FastICA(n_components=N_COMP_ICA, max_iter=5000, random_state=0, whiten=True, fun=ICA_Method) - print("shape train data", self.train_data.shape) - print("ICA_Method", ICA_Method) - X_transformed = transformer.fit_transform(self.train_data) - print("X_transformed.shape=", X_transformed.shape) - - n_img, l_img = transformer.mixing_.T.shape - print("transformer.mixing_.T.shape", n_img, l_img) - ica_comps = [transformer.mixing_[:, i] for i in range(N_COMP_ICA)] - #print("ica_comps", ica_comps) - - - # Get pixel wise N components by - ica_comps_pix = [transformer.mixing_[i, :] for i in range(l_img)] - #print("ica_comps_pix", ica_comps_pix) - - self.transformer = transformer - self.ica_comps = ica_comps - self.ica_comps_pix = ica_comps_pix - - return self.transformer, self.ica_comps, self.ica_comps_pix, X_transformed - - -class FindNumberOfComponents(object): - - def __init__(self): - self.comps = None - self.PSDs = None - self.PSDs_means = None - self.user_n_components = None - self.X_centered, self.y_centered = None, None - - self.n_good_comps = None - self.good_comps = None - - - def find_components(self): - # Find components - number_of_components = self.user_n_components - fit_method = "BayesianRidge" - - FC = FindComponents() - transformer, comps, comps_pix, X_transformed = FC.get_transformer_ica(train_data=self.X_centered, val_data=self.y_centered, N_COMP_ICA=self.user_n_components) # finds components - self.comps = comps - - def find_PSDs(self): - #self.find_components() - self.PSDs = [signal.welch(ac)[1] for ac in self.comps] - return self.PSDs - - def find_peaks_PSD(self): - self.PSDs_means = np.mean(self.PSDs, axis=1) - peaks, _ = find_peaks(self.PSDs_means, threshold=self.PSDs_means.min()*3) - self.n_good_comps = len(peaks) - self.good_comps = peaks - print("DONE !!!!!!!!!!!!!!!!!!") - return self.n_good_comps, self.good_comps - - def n_good_components(self, - X_centered, - y_centered, - number_of_components=20): - - self.user_n_components = number_of_components - self.X_centered, self.y_centered = X_centered, y_centered - - self.find_components() - self.find_PSDs() - self.find_peaks_PSD() - - #plt.plot(self.PSDs_means) - #plt.plot(self.good_comps, self.PSDs_means[self.good_comps], "*") - #plt.plot(np.zeros_like(self.PSDs_means), "--", color="gray") - #plt.show() - #print(f"n_good_components = {self.n_good_comps}") - #print(f"Good components = {self.good_comps}") - - - return self.n_good_comps, self.good_comps - - -class FindPCAcomps(object): - """_summary_ - - Args: - object (_type_): _description_ - """ - - def __init__(self, data_train, data_test, n_pca_comps=50): - self.pca_model = None - self.data_train = data_train - self.data_test = data_test - self.n_pca_comps = n_pca_comps - - def clear_output(self): - self.pca_model = None - self.pca_train = None - self.pca_test = None - - - def get_pca(self): - self.clear_output() - self.pca_model = PCA(n_components=self.n_pca_comps, whiten=True) - self.pca_train = self.pca_model.fit_transform(self.data_train) - self.pca_test = self.pca_model.transform(self.data_test) - - return self.pca_model, self.pca_train, self.pca_test \ No newline at end of file diff --git a/exploration/src/models/find_components/__init__.py b/exploration/src/models/find_components/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/exploration/src/models/fit_methods/Fit_Methods.py b/exploration/src/models/fit_methods/Fit_Methods.py deleted file mode 100644 index e7baf726266148f62bc6cbcb75bed48760cd742e..0000000000000000000000000000000000000000 --- a/exploration/src/models/fit_methods/Fit_Methods.py +++ /dev/null @@ -1,302 +0,0 @@ -from sklearn import linear_model -import torch -import scipy -import h5py -import numpy as np -from autograd import numpy as anp # Thinly-wrapped version of Numpy -from autograd import grad - - - - -class FitBFGSPCA_EPS(object): - - def __init__(self): - - self.X_train = None - self.Y_train = None - - self.X_test = None - self.Y_test = None - - self.Y_train_norm = None - self.Y_test_norm = None - - self.spec_test = None - - self.Nx = None - self.Ny = None - self.A0 = None - self.Aeps = None - - self.b0 = None - self.u0 = None - self.x0 = None - - self.sc_op = None - self.A_inf = None - self.b_inf = None - self.u_inf = None - - self.model = {"A_inf":self.A_inf, - "b_inf":self.b_inf, - "u_inf":self.u_inf - - } - - self.pca_model_spec = None - - self.res = None - self.res_unc = None - self.res_pca_unc = None - self.res_unc_specpca = None - - self.result = {"res":self.res, - "res_unc":self.res_unc, - "res_pca_unc": self.res_pca_unc, - "res_unc_specpca": self.res_unc_specpca - } - - self.input_data = None - self.loss_train = None - self.loss_test = None - - - def loss_mae(self, x): - # diag( (in @ x - out) @ (in @ x - out)^T ) - A = x[:self.Nx*self.Ny].reshape((self.Nx, self.Ny)) - b = x[self.Nx*self.Ny:(self.Nx*self.Ny+self.Ny)].reshape((1, self.Ny)) - log_unc = anp.clip(x[(self.Nx*self.Ny+self.Ny):].reshape((1, self.Ny)), -3, 8) - #log_unc = x[(Nx*Ny+Ny):].reshape((1, Ny)) - iunc = anp.exp(-log_unc) - #print("iunc2", iunc2) - #print("log_unc", log_unc) - - return anp.mean((anp.fabs(self.X_train @ A + b - self.Y_train)*iunc + log_unc).sum(axis=1), axis=0 ) # Put RELU on (XX@x) and introduce new matrix W - - def loss_regul(self, x): - # diag( (in @ x - out) @ (in @ x - out)^T ) - A = x[:self.Nx*self.Ny].reshape((self.Nx, self.Ny)) - b = x[self.Nx*self.Ny:(self.Nx*self.Ny+self.Ny)].reshape((1, self.Ny)) - #log_unc = anp.clip(x[(Nx*Ny+Ny):].reshape((1, Ny)), -3, 8) - log_unc = x[(self.Nx*self.Ny+self.Ny):].reshape((1, self.Ny)) - iunc2 = anp.exp(-2*log_unc) - unc2 = anp.exp(2*log_unc) - #print("iunc2", iunc2) - #print("log_unc", log_unc) - width = 100.0 - w = 0.5/width**2 - #prior = w*((A**2).reshape((1, -1)).mean(axis=1)) - - - # New from Danilo-> p(unc|a, b) = ba/gamma(a) * unc(a-1) * np.exp(-b * unc) - a = 3 - b = 6 - # prior -> - log p(unc|a, b) = - (a-1)*anp.log(unc) + b*unc - prior = - (a-1) * log_unc + b * anp.exp(log_unc) - - - return anp.mean( (0.5*((self.X_train @ A + b - self.Y_train_norm)**2)*iunc2 + log_unc).sum(axis=1) + prior.sum(axis=1), axis=0 ) # Put RELU on (XX@x) and introduce new matrix W - - def fill_init(self): - - - self.Nx = self.X_train.shape[1] - self.Ny = self.Y_train.shape[1] - self.A0 = np.eye(self.Nx, self.Ny).reshape(self.Nx*self.Ny) - self.Aeps = np.zeros(self.Nx) - - self.b0 = np.zeros(self.Ny) - self.u0 = np.zeros(self.Ny) - self.x0 = np.concatenate((self.A0,self.b0,self.u0,self.Aeps)) - - - - - def fit_eval(self, X_train, Y_train, X_test, Y_test): - - self.X_train = X_train - self.Y_train = Y_train - - self.X_test = X_test - self.Y_test = Y_test - - self.fill_init() - - - def loss(x, X, Y): - # diag( (in @ x - out) @ (in @ x - out)^T ) - A = x[:self.Nx*self.Ny].reshape((self.Nx, self.Ny)) - b = x[self.Nx*self.Ny:(self.Nx*self.Ny+self.Ny)].reshape((1, self.Ny)) - - b_eps = x[(self.Nx*self.Ny+self.Ny):(self.Nx*self.Ny+self.Ny+self.Ny)].reshape((1, self.Ny)) - - A_eps = x[(self.Nx*self.Ny+self.Ny+self.Ny):].reshape((self.Nx, 1)) - log_unc = X @ A_eps + b_eps - - #log_unc = anp.log(anp.exp(log_unc) + anp.exp(log_eps)) - iunc2 = anp.exp(-2*log_unc) - - #print("iunc2", iunc2) - #print("log_unc", log_unc) - - return anp.mean( (0.5*((X@ A + b - Y)**2)*iunc2 + log_unc).sum(axis=1), axis=0 ) # Put RELU on (XX@x) and introduce new matrix W - - self.loss_train = [] - self.loss_test = [] - - def loss_hist(x): - l_train = loss(x, X_train, self.Y_train) - l_test = loss(x, X_test, self.Y_test) - - self.loss_train.append(l_train) - self.loss_test.append(l_test) - return l_train - - def loss_hist_2(x): - l_train = loss(x, self.X_train, self.Y_train) - return l_train - - grad_loss = grad(loss_hist_2) - #self.grad_loss_regul = grad(loss_regul) - self.sc_op = scipy.optimize.fmin_l_bfgs_b(loss_hist, self.x0, grad_loss, disp=True, factr=1e7, maxiter=100, iprint = 0) - - # Inference - #print(sc_res.shape) - self.A_inf = self.sc_op[0][:self.Nx*self.Ny].reshape(self.Nx, self.Ny) - self.b_inf = self.sc_op[0][self.Nx*self.Ny:(self.Nx*self.Ny+self.Ny)].reshape(1, self.Ny) - self.u_inf = self.sc_op[0][(self.Nx*self.Ny+self.Ny):(self.Nx*self.Ny+self.Ny+self.Ny)].reshape(1, self.Ny) # removed np.exp - self.A_eps = self.sc_op[0][(self.Nx*self.Ny+self.Ny+self.Ny):].reshape(self.Nx, 1) - - self.model = {"A_inf":self.A_inf, - "b_inf":self.b_inf, - "u_inf":self.u_inf, - "A_eps":self.A_eps, - "loss_train": self.loss_train, - "loss_test": self.loss_test, - } - - return self.model - - def save_preproc(self, - pes_train, pes_test, - spec_train, spec_test, - X_train, X_test, - Y_train, Y_test, - spec_raw_pe, - data_info, - xgm_pulseen_train, - xgm_pulseen_test - ): - - exp_dir = data_info["exp_dir"] - - hf = h5py.File(f"experiments/{exp_dir}/output/data.h5", 'w') - hf.create_dataset('pes_train', data=pes_train) - hf.create_dataset('pes_test', data=pes_test) - hf.create_dataset('spec_train', data=spec_train) - hf.create_dataset('spec_test', data=spec_test) - hf.create_dataset('X_train', data=X_train) - hf.create_dataset('X_test', data=X_test) - hf.create_dataset('Y_train', data=Y_train) - hf.create_dataset('Y_test', data=Y_test) - hf.create_dataset('spec_raw_pe', data=spec_raw_pe[0,:]) - hf.create_dataset('xgm_pulseen_train', data=xgm_pulseen_train) - hf.create_dataset('xgm_pulseen_test', data=xgm_pulseen_test) - - for key, val in data_info.items(): - hf.attrs[key] = val - - def save_latest_chp(self, exp_dir): - filemodel = f"experiments/{exp_dir}/checkpoints/chp.h5" - hf = h5py.File(filemodel, 'w') - - for k, v in self.model.items(): - print("saving:", k) - hf.create_dataset(k, data=v) - hf.close() - print(f"model saved in -> {filemodel}") - - def load_model(self, exp_dir): - - # Load Checkpoint Data - print("loading model", exp_dir) - filename = f"experiments/{exp_dir}/checkpoints/chp.h5" - file = h5py.File(filename, 'r') - A_inf = file["A_inf"][()] # returns as a numpy array - b_inf = file["b_inf"][()] # returns as a numpy array - u_inf = file["u_inf"][()] # returns as a numpy array - A_eps = file["A_eps"][()] # returns as a numpy array - - - import joblib - pes_pca_model=joblib.load(f"experiments/{exp_dir}/checkpoints/pes_pca_model.joblib") - spec_pca_model=joblib.load(f"experiments/{exp_dir}/checkpoints/spec_pca_model.joblib") - - self.model = {"A_inf":A_inf, - "b_inf":b_inf, - "u_inf":u_inf, - "A_eps":A_eps, - "pes_pca_model":pes_pca_model, - "spec_pca_model":spec_pca_model - - } - return self.model, pes_pca_model, spec_pca_model - - def predict(self, input_data, input_target, spec_target): - - if self.pca_model_spec is not None: - self.model["spec_pca_model"] = self.pca_model_spec # TODO! check for training - - if input_target is None: - self.model["Y_test"] = self.Y_test - else: - self.model["Y_test"] = input_target - - if spec_target is None: - self.model["spec_target"] = self.spec_test - else: - self.model["spec_target"] = spec_target - - - - - - self.result["res_pca"] = (input_data @ self.model["A_inf"] + self.model["b_inf"]) # inference reconstruction in PCA space - self.result["res_pca_unc"] = self.model["u_inf"][0,:] # unc in pca space - self.result["res_pca_eps"] = np.exp(input_data @ self.model["A_eps"] + self.result["res_pca_unc"]) # eps in pca space - - self.result["res"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"]) # transform PCA space to real space - - self.result["res_unc"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] + self.model["u_inf"]) - self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] ) - self.result["res_unc"] = np.fabs(self.result["res_unc"]) - - self.result["res_eps"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] + self.result["res_pca_eps"]) - self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] ) - self.result["res_eps"] = np.fabs(self.result["res_eps"]) - - - self.Yhat_pca = self.model["spec_pca_model"].inverse_transform(self.model["Y_test"]) - self.result["res_unc_specpca"] = np.sqrt(((self.Yhat_pca - self.model["spec_target"])**2).mean(axis=0)) - - self.result["res_unc_total"] = np.sqrt(self.result["res_eps"]**2 + self.result["res_unc_specpca"]**2) - return self.result - - - def save_prediction(self, exp_dir): - - fileprediction = f"experiments/{exp_dir}/output/pred_data.h5" - hf = h5py.File(fileprediction, 'w') - hf.create_dataset('res_pca', data=self.result["res_pca"]) - hf.create_dataset('res_pca_unc', data=self.result["res_pca_unc"]) - hf.create_dataset('res', data=self.result["res"]) - hf.create_dataset('res_unc', data=self.result["res_unc"]) - hf.create_dataset('res_unc_specpca', data=self.result["res_unc_specpca"]) - hf.create_dataset('res_unc_total', data=self.result["res_unc_total"]) - hf.create_dataset('res_pca_eps', data=self.result["res_pca_eps"]) - hf.create_dataset('res_eps', data=self.result["res_eps"]) - - hf.close() - - print(f"Prediction result saved in -> {fileprediction}") - - diff --git a/exploration/src/models/fit_methods/__init__.py b/exploration/src/models/fit_methods/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/exploration/src/models/fit_methods/model.py b/exploration/src/models/fit_methods/model.py deleted file mode 100644 index ef7e8d4d66bbd9e56c0b3ae607d4bee986fa0f17..0000000000000000000000000000000000000000 --- a/exploration/src/models/fit_methods/model.py +++ /dev/null @@ -1,164 +0,0 @@ -from sklearn.ensemble import RandomForestRegressor -from sklearn.linear_model import LinearRegression -from sklearn.model_selection import train_test_split -import numpy as np - -import pandas as pd - -from src.models.fit_methods.Fit_Methods import FitBFGSPCA_EPS -from src.data.data_preproc import SpecPreprocessing -from src.models.find_components.Find_Component import FindPCAcomps - -from src.utils.utils import save_chp_bfgs - - -class Model: - def __init__(self, model_type = None, data_info = None): - - - self.model_type = model_type - self.data_info = data_info - - self.data_dict = None - - self.pes_train = None - self.pes_test = None - self.spec_train = None - self.spec_test = None - - self.xgm_pulseen = None - self.xgm_pulseen_train = None - self.xgm_pulseen_test = None - - self.X_train = None - self.X_test = None - self.y_train = None - self.y_test = None - - self.pes_pca_model = None - self.spec_pca_model = None - - - if self.model_type == 'bfgs': - self.user_defined_model = FitBFGS() - #self.user_defined_preproc = SpecPreprocessing() - elif self.model_type == 'bfgs_pca': - self.user_defined_model = FitBFGSPCA() - elif self.model_type == 'bfgs_pca_eps': - self.user_defined_model = FitBFGSPCA_EPS() - - - - def split(self, test_size): - X = np.array(self.df[['Humidity', 'Pressure (millibars)']]) - y = np.array(self.df['Temperature (C)']) - self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size = test_size, random_state = 42) - - def preprocess(self, data_dict, data_info): - self.data_dict = data_dict - self.data_info = data_info - - self.xgm_pulseen = data_dict["xgm_pulseen"] - - self.spec_data = self.data_dict["spec_raw_int"].copy() # Spec Data Intensity - self.spec_data_pe = self.data_dict["spec_raw_pe"].copy() # Spec Data Photon energy - self.pes_data = self.data_dict["pes_data"].copy() # Pes Data Intensity - - - # Data Preprocessing - SP = SpecPreprocessing(self.spec_data, self.spec_data_pe) - self.spec_data_gc, _ = SP.gaussian_convolve() - self.spec_data = self.spec_data_gc #SP.make_fft() - - # Split Data into TEST TRAIN - if self.data_info["use_data_subset"]: - self.pes_train, self.pes_test, self.spec_train, self.spec_test, self.xgm_pulseen_train, self.xgm_pulseen_test = train_test_split(self.pes_data[self.data_info["data_subset_start"]:self.data_info["data_subset_end"]], - self.spec_data[self.data_info["data_subset_start"]:self.data_info["data_subset_end"]], - self.xgm_pulseen[self.data_info["data_subset_start"]:self.data_info["data_subset_end"]], - test_size=self.data_info["test_size"], - random_state=42 - ) - else: - self.pes_train, self.pes_test, self.spec_train, self.spec_test, self.xgm_pulseen_train, self.xgm_pulseen_test = train_test_split(self.pes_data, - self.spec_data, - self.xgm_pulseen, - test_size=self.data_info["test_size"], - random_state=42 - ) - - - if self.data_info["pes_pca_preprocessing"]: - pes_PCA = FindPCAcomps(data_train=self.pes_train, data_test=self.pes_test, n_pca_comps=self.data_info["n_pca_comps_pes"]) - self.pes_pca_model, self.X_train, self.X_test = pes_PCA.get_pca() - else: - self.X_train, self.X_test = self.pes_train, self.pes_test - - if self.data_info["spec_pca_preprocessing"]: - spec_PCA = FindPCAcomps(data_train=self.spec_train, data_test=self.spec_test, n_pca_comps=self.data_info["n_pca_comps_spec"]) - self.spec_pca_model, self.Y_train, self.Y_test = spec_PCA.get_pca() - self.user_defined_model.pca_model_spec = self.spec_pca_model - self.user_defined_model.spec_test = self.spec_test - - else: - self.Y_train, self.Y_test = self.spec_train, self.spec_test - - print("print Y shape train and test in preprocessing last step :::: ",self.Y_train.shape, self.Y_test.shape) - #save_train_test_h5(pes_train, pes_test, X_train, X_test, Y_train, Y_test, spec_data_pe, self.data_info, data_info["exp_dir"]) - return self.X_train, self.X_test, self.Y_train, self.Y_test - - def save_preproc(self): - if self.model_type == 'bfgs': - - self.user_defined_model.save_preproc(self.pes_train, self.pes_test, - self.spec_train, self.spec_test, - self.X_train, self.X_test, - self.Y_train, self.Y_test, - self.spec_data_pe, - self.data_info, - ) - - import joblib - joblib.dump(self.pes_pca_model, f"experiments/{self.data_info['exp_dir']}/checkpoints/pes_pca_model.joblib") - joblib.dump(self.spec_pca_model, f"experiments/{self.data_info['exp_dir']}/checkpoints/spec_pca_model.joblib") - elif self.model_type == 'bfgs_pca' or self.model_type == 'bfgs_pca_eps': - self.user_defined_model.save_preproc(self.pes_train, self.pes_test, - self.spec_train, self.spec_test, - self.X_train, self.X_test, - self.Y_train, self.Y_test, - self.spec_data_pe, - self.data_info, - self.xgm_pulseen_train, self.xgm_pulseen_test - ) - - import joblib - joblib.dump(self.pes_pca_model, f"experiments/{self.data_info['exp_dir']}/checkpoints/pes_pca_model.joblib") - joblib.dump(self.spec_pca_model, f"experiments/{self.data_info['exp_dir']}/checkpoints/spec_pca_model.joblib") - - - def fit_eval(self, X_train, y_train, X_test, Y_test): - #A_inf, b_inf, u_inf, Y_norm = fig_bgfs.train_bgfs() - self.model = self.user_defined_model.fit_eval(X_train, y_train, X_test, Y_test) # Change the name of self.model to something else ? - - - def save_latest_ckp(self): - self.user_defined_model.save_latest_chp(self.data_info["exp_dir"]) - - def load_model(self): - self.model, self.pes_pca_model, self.spec_pca_model = self.user_defined_model.load_model(self.data_info["exp_dir"]) - return self.model, self.pes_pca_model, self.spec_pca_model - - def predict(self, input_value, target_value=None, spec_target=None): - result = self.user_defined_model.predict(input_value, target_value, spec_target) - return result - - def save_prediction(self): - self.user_defined_model.save_prediction(self.data_info["exp_dir"]) - -if __name__ == '__main__': - print("Creating Model Instance") - #model_instance = Model(model_type=None) - #model_instance.preprocess() - #model_instance.split(0.2) - #model_instance.fit() - #print(model_instance.predict([.9, 1000])) - #print("Accuracy: ", model_instance.model.score(model_instance.X_test, model_instance.y_test)) diff --git a/exploration/src/utils/__init__.py b/exploration/src/utils/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/exploration/src/utils/utils.py b/exploration/src/utils/utils.py deleted file mode 100644 index 0db96e6ddc397c7bfaf771da3aa97a6f3d4c20cf..0000000000000000000000000000000000000000 --- a/exploration/src/utils/utils.py +++ /dev/null @@ -1,238 +0,0 @@ -import numpy as np -import scipy -import matplotlib.pyplot as plt -from skimage import filters as skfilters -import plotly -import plotly.graph_objs as go - -import os - - - -# plot lines -def plot_gallery_line(title, data, n_col, n_row): - plt.figure(figsize=(2.0 * n_col, 2.26 * n_row)) - plt.suptitle(title, size=16) - for i, comp in enumerate(data): - plt.subplot(n_row, n_col, i + 1) - vmax = max(comp.max(), -comp.min()) - plt.plot( - comp - ) - - if i + 1 not in [k*n_col+1 for k in range(n_row)]: - plt.xticks(()) - plt.yticks(()) - - plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.0) - - - -def plot_unc_band(lp_hr, lp_rec, lp_rec_unc, fit_method): - - x_pltly = np.arange(len(lp_hr)) - - - fig = go.Figure([ - go.Scatter( - name='HR (gt)', - x=x_pltly, - y=lp_hr, - mode='lines', - line=dict(color='rgb(0, 150, 255)'), - ), - go.Scatter( - name=f"{fit_method}", - x=x_pltly, - y=lp_rec, - mode='lines', - line=dict(color='rgb(255,99,71)'), - ), - go.Scatter( - name='Upper Bound', - x=x_pltly, - y=lp_rec + 2*lp_rec_unc, - mode='lines', - marker=dict(color="#DC143C"), - line=dict(width=0), - showlegend=False - ), - go.Scatter( - name='Lower Bound', - x=x_pltly, - y=lp_rec - 2*lp_rec_unc, - marker=dict(color="#DC143C"), - line=dict(width=0), - mode='lines', - fillcolor='rgba(255,99,71, 0.3)', - fill='tonexty', - showlegend=False - ) - ]) - fig.update_layout( - yaxis_title='log intensity (a.u.)', - xaxis_title='pixel/coord number (a.u.)', - title=f"{fit_method} reconstruction with 95% confidence on uncertainities", - hovermode="x" - ) - fig.show() - - - -def create_experiment_dirs(exp_dir): - """ - Create Directories of a regular tensorflow experiment directory - :param exp_dir: - :return summary_dir, checkpoint_dir: - """ - experiment_dir = "experiments/" + exp_dir + "/" - summary_dir = experiment_dir + 'summaries/' - checkpoint_dir = experiment_dir + 'checkpoints/' - output_dir = experiment_dir + 'output/' - test_dir = experiment_dir + 'test/' - dirs = [summary_dir, checkpoint_dir, output_dir, test_dir] - try: - for dir_ in dirs: - if not os.path.exists(dir_): - os.makedirs(dir_) - print("Experiment directories created") - return experiment_dir, summary_dir, checkpoint_dir, output_dir, test_dir - except Exception as err: - print("Creating directories error: {0}".format(err)) - exit(-1) - - -def save_train_test_h5(pes_train, pes_test, X_train, X_test, Y_train, Y_test, spec_raw_pe, data_info, exp_dir): - import h5py - - hf = h5py.File(f"experiments/{exp_dir}/output/data.h5", 'w') - hf.create_dataset('pes_train', data=pes_train) - hf.create_dataset('pes_test', data=pes_test) - hf.create_dataset('X_train', data=X_train) - hf.create_dataset('X_test', data=X_test) - hf.create_dataset('Y_train', data=Y_train) - hf.create_dataset('Y_test', data=Y_test) - hf.create_dataset('spec_raw_pe', data=spec_raw_pe[0,:]) - for key, val in data_info.items(): - hf.attrs[key] = val - - - hf.close() - -def save_rec_data_h5(sc_res, sc_res_unc, exp_dir): - import h5py - hf = h5py.File(f"experiments/{exp_dir}/output/rec_data.h5", 'w') - hf.create_dataset('sc_res', data=sc_res) - hf.create_dataset('sc_res_unc', data=sc_res_unc) - hf.close() - - -def save_checkpoint(A_inf, b_inf, u_inf, Y_norm, exp_dir): - - import h5py - hf = h5py.File(f"experiments/{exp_dir}/checkpoints/chp.h5", 'w') - hf.create_dataset('A_inf', data=A_inf) - hf.create_dataset('b_inf', data=b_inf) - hf.create_dataset('u_inf', data=u_inf) - hf.create_dataset('Y_norm', data=Y_norm) - hf.close() - -def save_chp_bfgs(model, exp_dir): - import h5py - hf = h5py.File(f"experiments/{exp_dir}/checkpoints/chp.h5", 'w') - - for k, v in model.items(): - hf.create_dataset(k, data=v) - hf.close() - - - -def save_checkpoint_fft(A_inf_1, b_inf_1, - A_inf_2, b_inf_2, - u_inf_1, u_inf_2, - Y_norm, exp_dir): - - import h5py - hf = h5py.File(f"experiments/{exp_dir}/checkpoints/chp.h5", 'w') - hf.create_dataset('A_inf_1', data=A_inf_1) - hf.create_dataset('b_inf_1', data=b_inf_1) - hf.create_dataset('A_inf_2', data=A_inf_2) - hf.create_dataset('b_inf_2', data=b_inf_2) - hf.create_dataset('u_inf_1', data=u_inf_1) - hf.create_dataset('u_inf_2', data=u_inf_2) - hf.create_dataset('Y_norm', data=Y_norm) - hf.close() - - - -def load_train_test_h5(exp_dir): - import h5py - # Load Data - filename = f"experiments/{exp_dir}/output/data.h5" - file = h5py.File(filename, 'r') - pes_train = file["pes_train"][()] # returns as a numpy array - pes_test = file["pes_test"][()] # returns as a numpy array - spec_train = file["spec_train"][()] # returns as a numpy array - spec_test = file["spec_test"][()] # returns as a numpy array - X_train = file["X_train"][()] # returns as a numpy array - X_test = file["X_test"][()] # returns as a numpy array - Y_train = file["Y_train"][()] # returns as a numpy array - Y_test = file["Y_test"][()] # returns as a numpy array - spec_raw_pe = file["spec_raw_pe"][()] # returns as a numpy - xgm_pulseen_train = file["xgm_pulseen_train"][()] - xgm_pulseen_test = file["xgm_pulseen_test"][()] - - att_names = file.attrs.keys() - att_dict = {} - for att_name in att_names: - att_dict[att_name] = file.attrs[att_name] - - return Y_train, Y_test, spec_train, spec_test, spec_raw_pe, X_train, X_test, pes_train, pes_test, att_dict, xgm_pulseen_train, xgm_pulseen_test - - -def load_rec_data_h5(exp_dir): - import h5py - filename = f"experiments/{exp_dir}/output/pred_data.h5" - file = h5py.File(filename, 'r') - sc_res = file["res"][()] # returns as a numpy array - sc_res_unc = file["res_unc"][()] # returns as a numpy array - - return sc_res, sc_res_unc - -def load_rec_data_h5_bfgs_pca(exp_dir): - import h5py - filename = f"experiments/{exp_dir}/output/pred_data.h5" - file = h5py.File(filename, 'r') - sc_res = file["res"][()] # returns as a numpy array - sc_res_unc = file["res_unc"][()] # returns as a numpy array - sc_res_unc_specpca = file["res_unc_specpca"][()] # returns as a numpy array - sc_res_unc_total = file["res_unc_total"][()] # returns as a numpy array - - - return sc_res, sc_res_unc, sc_res_unc_specpca, sc_res_unc_total - -def load_rec_data_h5_bfgs_pca_eps(exp_dir): - import h5py - filename = f"experiments/{exp_dir}/output/pred_data.h5" - file = h5py.File(filename, 'r') - sc_res = file["res"][()] # returns as a numpy array - sc_res_unc = file["res_unc"][()] # returns as a numpy array - sc_res_unc_specpca = file["res_unc_specpca"][()] # returns as a numpy array - sc_res_unc_total = file["res_unc_total"][()] # returns as a numpy array - sc_res_eps = file["res_eps"][()] # returns as a numpy array - - return sc_res, sc_res_unc, sc_res_unc_specpca, sc_res_unc_total, sc_res_eps - -def load_checkpoint(exp_dir): - import h5py - # Load Checkpoint Data - filename = f"experiments/{exp_dir}/checkpoints/chp.h5" - file = h5py.File(filename, 'r') - A_inf = file["A_inf"][()] # returns as a numpy array - b_inf = file["b_inf"][()] # returns as a numpy array - u_inf = file["u_inf"][()] # returns as a numpy array - Y_norm = file["Y_norm"][()] # returns as a numpy array - - return A_inf, b_inf, u_inf, Y_norm - - diff --git a/pes_to_spec/__init__.py b/pes_to_spec/__init__.py index 97708c0e9346331019608654f315c43309499e37..7f354eff8b04d602fce25290864cd9441780ea3b 100644 --- a/pes_to_spec/__init__.py +++ b/pes_to_spec/__init__.py @@ -2,4 +2,4 @@ Estimate high-resolution photon spectrometer data from low-resolution non-invasive measurements. """ -VERSION = "0.2.7" +VERSION = "0.2.8"