Skip to content
Snippets Groups Projects
Commit c198865e authored by Danilo Ferreira de Lima's avatar Danilo Ferreira de Lima
Browse files

Started simplified setup and clean up.

parent 04d7d131
No related branches found
No related tags found
No related merge requests found
import numpy as np
from scipy.signal import fftconvolve
from sklearn.decomposition import PCA
from typing import Dict, List
def matching_ids(a: np.ndarray, b: np.ndarray, c: np.ndarray) -> np.ndarray:
"""Returns list of train IDs common to sets a, b and c."""
unique_ids = list(set(a).intersection(b).intersection(c))
return np.array(unique_ids)
class Model(object):
"""
Object representing a previous fit of the model to be used to predict high-resolution
spectrum from a low-resolution one.
Args:
channels: Selected channels to use as an input for the low resolution data.
n_pca_lr: Number of low-resolution data PCA components.
n_pca_hr: Number of high-resolution data PCA components.
"""
def __init__(self,
channels:List[str]=["channel_1_D",
"channel_2_B",
"channel_3_A",
"channel_3_B",
"channel_4_C",
"channel_4_D"],
n_pca_lr: int=400,
n_pca_hr: int=20):
self.channels = channels
self.n_pca_lr = n_pca_lr
self.n_pca_hr = n_pca_hr
# PCA models
self.lr_pca = PCA(n_pca_lr, whiten=True)
self.hr_pca = PCA(n_pca_hr, whiten=True)
# where to cut on the ToF PES data
self.tof_start = 31445
self.delta_tof = 200
self.tof_end = self.tof_start + self.delta_tof
# smoothing of the SPEC data in eV
self.high_res_sigma = 0.2
def preprocess_low_res(self, low_res_data: Dict[str, np.ndarray]) -> np.ndarray:
"""
Get a dictionary with the channel names for the inut low resolution data and output
only the relevant input data in an array.
Args:
low_res_data: Dictionary with keys named channel_{i}_{k}, where i is a number between 1 and 4 and k is a letter between A and D.
Returns: Concatenated and pre-processed low-resolution data of shape (train_id, features).
"""
cat = np.concatenate([low_res_data[k][:, self.tof_start:self.tof_end] for k in self.channels], axis=1)
return cat
def preprocess_high_res(self, high_res_data: np.ndarray) -> np.ndarray:
"""
Get the high resolution data and preprocess it.
Args:
high_res_data: High resolution data with shape (train_id, features).
Returns: Pre-processed high-resolution data of shape (train_id, features) before.
"""
# Apply smoothing
# TODO: Why?!
mu = high_res_data[0,high_res_data.shape[1]//2]
gaussian = np.exp(-((high_res_data - mu)/self.high_res_sigma)**2/2)/np.sqrt(2*np.pi*self.high_res_sigma**2)
# TODO: why 80?!
high_res_gc = fftconvolve(high_res_data, gaussian, mode="same", axes=1)/80
return high_res_gc
def fit(self, low_res_data: Dict[str, np.ndarray], high_res_data: np.ndarray):
"""
Train the model.
Args:
low_res_data: Low resolution data as a dictionary with the key set to `channel_{i}_{k}`, where i is a number between 1 and 4 and k is a letter between A and D. For each dictionary entry, a numpy array is expected with shape (train_id, ToF channel).
high_res_data: Reference high resolution data with a one-to-one match to the low resolution data in the train_id dimension. Shape (train_id, ToF channel).
"""
low_res = self.preprocess_low_res(low_res_data)
high_res = self.preprocess_high_res(high_res_data)
# fit PCA
low_pca = self.lr_pca.fit_transform(low_res)
high_pca = self.hr_pca.fit_transform(high_res)
pass
def predict(self, low_res_data: Dict[str, np.ndarray]) -> np.ndarray:
"""
Predict a high-resolution spectrum from a low resolution given one.
The output includes the uncertainty in its second and third entries of the first dimension.
Args:
low_res_data: Low resolution data as in the fit step with shape (train_id, channel, ToF channel).
Returns: High resolution data with shape (3, train_id, ToF channel). The component 0 of the first dimension is the predicted spectrum. Components 1 and 2 correspond to two sources of uncertainty.
"""
low_res = self.preprocess_low_res(low_res_data)
low_pca = self.lr_pca.transform(low_res)
# TODO: Get high res.
# high_pca = linear_model.predict(low_pca)
high_res_predicted = self.hr_pca.inverse_transform(high_pca)
# TODO: Add uncertainties
return high_res_predicted
......@@ -161,4 +161,4 @@ if __name__ == '__main__':
#model_instance.split(0.2)
#model_instance.fit()
#print(model_instance.predict([.9, 1000]))
#print("Accuracy: ", model_instance.model.score(model_instance.X_test, model_instance.y_test))
\ No newline at end of file
#print("Accuracy: ", model_instance.model.score(model_instance.X_test, model_instance.y_test))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment