Skip to content
Snippets Groups Projects
Commit 13e51a7d authored by Danilo Ferreira de Lima's avatar Danilo Ferreira de Lima
Browse files

Storing KDE of the XGM and intensity profiles.

parent f83e3c90
No related branches found
No related tags found
No related merge requests found
...@@ -52,11 +52,11 @@ class FitModel(RegressorMixin, BaseEstimator): ...@@ -52,11 +52,11 @@ class FitModel(RegressorMixin, BaseEstimator):
# fit result # fit result
self.pars: Optional[Dict[str, np.ndarray]] = None self.pars: Optional[Dict[str, np.ndarray]] = None
self.structure: Optional[Dict[str, Tuple[int, int]]] = None self.structure: Optional[Dict[str, Tuple[int, int]]] = None
# fit monitoring # fit monitoring
self.loss_train: List[float] = list() self.loss_train: List[float] = list()
self.loss_test: List[float] = list() self.loss_test: List[float] = list()
self.nll_train: Optional[np.ndarray] = None self.nll_train: Optional[np.ndarray] = None
self.nll_train_expected: Optional[np.ndarray] = None self.nll_train_expected: Optional[np.ndarray] = None
...@@ -163,7 +163,7 @@ class FitModel(RegressorMixin, BaseEstimator): ...@@ -163,7 +163,7 @@ class FitModel(RegressorMixin, BaseEstimator):
iprint=0) iprint=0)
# Inference # Inference
self.pars = FitModel.get_pars(sc_op[0], self.structure) self.pars = FitModel.get_pars(sc_op[0], self.structure)
self.nll_train = sc_op[1] self.nll_train = sc_op[1]
self.nll_train_expected = np.mean(self.nll(X, pars=self.pars), axis=0, keepdims=True) self.nll_train_expected = np.mean(self.nll(X, pars=self.pars), axis=0, keepdims=True)
...@@ -219,22 +219,22 @@ class FitModel(RegressorMixin, BaseEstimator): ...@@ -219,22 +219,22 @@ class FitModel(RegressorMixin, BaseEstimator):
-log p = log(2) + log(b) -log p = log(2) + log(b)
We return -log [p(M|X,Y=mu(X))/p(M|X_train,Y_train)] = -log p(M|X,Y=mu(X)) + log p(M|X_train, Y_traun) We return -log [p(M|X,Y=mu(X))/p(M|X_train,Y_train)] = -log p(M|X,Y=mu(X)) + log p(M|X_train, Y_traun)
Negative log likelihood L allows one Negative log likelihood L allows one
Args: Args:
X: The input data. X: The input data.
Y: The true result. If None, set it to the expectation. Y: The true result. If None, set it to the expectation.
Returns: negative log likelihood. Returns: negative log likelihood.
""" """
if pars is None: if pars is None:
pars = self.pars pars = self.pars
A = pars["A_inf"] A = pars["A_inf"]
b = pars["b_inf"] b = pars["b_inf"]
X2 = anp.square(X) X2 = anp.square(X)
Ap = pars["Ap_inf"] Ap = pars["Ap_inf"]
Y_pred = anp.matmul(X2, Ap) + anp.matmul(X, A) + b Y_pred = anp.matmul(X2, Ap) + anp.matmul(X, A) + b
log_inv_alpha = anp.matmul(X, pars["log_inv_alpha_prime"]) + pars["log_inv_alpha"] log_inv_alpha = anp.matmul(X, pars["log_inv_alpha_prime"]) + pars["log_inv_alpha"]
sqrt_alpha = anp.exp(-0.5*log_inv_alpha) sqrt_alpha = anp.exp(-0.5*log_inv_alpha)
#log_inv_tau1 = pars["log_inv_tau1"] #log_inv_tau1 = pars["log_inv_tau1"]
...@@ -399,7 +399,7 @@ class UncertaintyHolder(TransformerMixin, BaseEstimator): ...@@ -399,7 +399,7 @@ class UncertaintyHolder(TransformerMixin, BaseEstimator):
def uncertainty(self): def uncertainty(self):
"""The uncertainty recorded.""" """The uncertainty recorded."""
return self.unc return self.unc
class DataHolder(TransformerMixin, BaseEstimator): class DataHolder(TransformerMixin, BaseEstimator):
""" """
Keep track of relevance dictionaries. Keep track of relevance dictionaries.
...@@ -416,7 +416,7 @@ class DataHolder(TransformerMixin, BaseEstimator): ...@@ -416,7 +416,7 @@ class DataHolder(TransformerMixin, BaseEstimator):
data: Data. data: Data.
""" """
self.data = deepcopy(data) self.data = deepcopy(data)
def get_data(self) -> Dict[str, Any]: def get_data(self) -> Dict[str, Any]:
""" """
Get. Get.
...@@ -647,7 +647,7 @@ class MultiOutputWithStd(MetaEstimatorMixin, BaseEstimator): ...@@ -647,7 +647,7 @@ class MultiOutputWithStd(MetaEstimatorMixin, BaseEstimator):
return np.asarray(y).T, np.asarray(unc).T return np.asarray(y).T, np.asarray(unc).T
return np.asarray(y).T return np.asarray(y).T
class Model(TransformerMixin, BaseEstimator): class Model(TransformerMixin, BaseEstimator):
""" """
Object representing a previous fit of the model to be used to predict high-resolution Object representing a previous fit of the model to be used to predict high-resolution
...@@ -707,6 +707,8 @@ class Model(TransformerMixin, BaseEstimator): ...@@ -707,6 +707,8 @@ class Model(TransformerMixin, BaseEstimator):
self.fit_model = MultiOutputWithStd(BayesianRidge(n_iter=300, tol=1e-8, verbose=True), n_jobs=8) self.fit_model = MultiOutputWithStd(BayesianRidge(n_iter=300, tol=1e-8, verbose=True), n_jobs=8)
#self.fit_model = FitModel() #self.fit_model = FitModel()
self.kde_xgm = None
self.kde_intensity = None
self.mu_intensity = np.nan self.mu_intensity = np.nan
self.std_intensity = np.nan self.std_intensity = np.nan
...@@ -754,22 +756,22 @@ class Model(TransformerMixin, BaseEstimator): ...@@ -754,22 +756,22 @@ class Model(TransformerMixin, BaseEstimator):
Returns: Smoothened spectrum. Returns: Smoothened spectrum.
""" """
return self.y_model['smoothen'].transform(high_res_data) return self.y_model['smoothen'].transform(high_res_data)
def uniformize(self, intensity: np.ndarray) -> np.ndarray: def uniformize(self, intensity: np.ndarray) -> np.ndarray:
""" """
Calculate weights to uniformize data in variable intensity. Calculate weights to uniformize data in variable intensity.
Args: Args:
intensity: The variable to uniformize the weights on. intensity: The variable to uniformize the weights on.
Return: weights. Return: weights.
""" """
kde = KernelDensity() self.kde_xgm = KernelDensity()
kde.fit(intensity) self.kde_xgm.fit(intensity)
q = np.quantile(intensity, [0.10, 0.90]) q = np.quantile(intensity, [0.10, 0.90])
l, h = q[0], q[1] l, h = q[0], q[1]
x = intensity*((intensity > l) & (intensity < h)) + l*(intensity <= l) + h*(intensity >= h) x = intensity*((intensity > l) & (intensity < h)) + l*(intensity <= l) + h*(intensity >= h)
log_prob = kde.score_samples(x) log_prob = self.kde_xgm.score_samples(x)
w = np.exp(-log_prob) w = np.exp(-log_prob)
w = w/np.median(w) w = w/np.median(w)
return w return w
...@@ -793,7 +795,7 @@ class Model(TransformerMixin, BaseEstimator): ...@@ -793,7 +795,7 @@ class Model(TransformerMixin, BaseEstimator):
Returns: Smoothened high resolution spectrum. Returns: Smoothened high resolution spectrum.
""" """
if weights is None: if weights is None:
weights = np.ones(high_Res_data.shape[0]) weights = np.ones(high_res_data.shape[0])
print("Fitting PCA on low-resolution data.") print("Fitting PCA on low-resolution data.")
low_res_select = self.x_select.fit_transform(low_res_data) low_res_select = self.x_select.fit_transform(low_res_data)
n_components = min(self.x_model["pca"].n_components, low_res_select.shape[0]) n_components = min(self.x_model["pca"].n_components, low_res_select.shape[0])
...@@ -802,6 +804,8 @@ class Model(TransformerMixin, BaseEstimator): ...@@ -802,6 +804,8 @@ class Model(TransformerMixin, BaseEstimator):
print("Fitting PCA on high-resolution data.") print("Fitting PCA on high-resolution data.")
y_t = self.y_model.fit_transform(high_res_data, smoothen__energy=high_res_photon_energy) y_t = self.y_model.fit_transform(high_res_data, smoothen__energy=high_res_photon_energy)
intensity = np.sum(y_t, axis=1) intensity = np.sum(y_t, axis=1)
self.kde_intensity = KernelDensity()
self.kde_intensity.fit(intensity)
self.mu_intensity = np.mean(intensity, axis=0) self.mu_intensity = np.mean(intensity, axis=0)
self.sigma_intensity = np.mean(intensity, axis=0) self.sigma_intensity = np.mean(intensity, axis=0)
#self.fit_model.set_params(fex__gamma=1.0/float(x_t.shape[0])) #self.fit_model.set_params(fex__gamma=1.0/float(x_t.shape[0]))
...@@ -826,9 +830,7 @@ class Model(TransformerMixin, BaseEstimator): ...@@ -826,9 +830,7 @@ class Model(TransformerMixin, BaseEstimator):
print(f"Calculate PCA on {channel}") print(f"Calculate PCA on {channel}")
low_pca = self.channel_pca[channel].fit_transform(low_res_selected[channel]) low_pca = self.channel_pca[channel].fit_transform(low_res_selected[channel])
self.ood[channel].fit(low_pca) self.ood[channel].fit(low_pca)
#low_pca_rec = self.channel_pca[channel].inverse_transform(low_pca)
#self.channel_fit_model[channel].fit(low_pca, y_t)
print("End of fit.") print("End of fit.")
return high_res return high_res
...@@ -844,27 +846,12 @@ class Model(TransformerMixin, BaseEstimator): ...@@ -844,27 +846,12 @@ class Model(TransformerMixin, BaseEstimator):
Returns: Outlier score. If it is bigger than 0, this is an outlier. Returns: Outlier score. If it is bigger than 0, this is an outlier.
""" """
selection_model = self.x_select selection_model = self.x_select
quality = {channel: 0.0 for channel in low_res_data.keys()}
channels = list(low_res_data.keys()) channels = list(low_res_data.keys())
# check if each channel is close to the mean # check if each channel is close to the mean
low_res_selected = selection_model.transform(low_res_data, keep_dictionary_structure=True) low_res_selected = selection_model.transform(low_res_data, keep_dictionary_structure=True)
low_pca = {ch: self.channel_pca[ch].transform(low_res_selected[ch]) low_pca = {ch: self.channel_pca[ch].transform(low_res_selected[ch])
for ch in channels} for ch in channels}
return {ch: self.ood[ch].predict(low_pca[ch]) for ch in channels} return {ch: self.ood[ch].predict(low_pca[ch]) for ch in channels}
## for chi2
#deviation = {ch: low_pca[ch]
# for ch in channels}
#ndof = {ch: float(deviation[ch].shape[1] - 1)
# for ch in channels}
#chi2 = {ch: np.sum(deviation[ch]**2, axis=1, keepdims=True)
# for ch in channels}
#chi2_mu = {ch: scipy.stats.chi2.mean(ndof[ch])
# for ch in channels}
#chi2_sigma = {ch: scipy.stats.chi2.std(ndof[ch])
# for ch in channels}
#return {ch: (chi2[ch] - chi2_mu[ch])/chi2_sigma[ch]
# for ch in channels}
def check_compatibility(self, low_res_data: Dict[str, np.ndarray]) -> np.ndarray: def check_compatibility(self, low_res_data: Dict[str, np.ndarray]) -> np.ndarray:
""" """
...@@ -878,17 +865,16 @@ class Model(TransformerMixin, BaseEstimator): ...@@ -878,17 +865,16 @@ class Model(TransformerMixin, BaseEstimator):
""" """
low_res = self.x_select.transform(low_res_data) low_res = self.x_select.transform(low_res_data)
pca_model = self.x_model pca_model = self.x_model
#pca_model = self.x_model['pca']
#if 'fex' in self.x_model.named_steps:
# pca_model = self.x_model['fex'].named_steps['prepca']
low_pca = pca_model.transform(low_res) low_pca = pca_model.transform(low_res)
return self.ood['full'].predict(low_pca) return self.ood['full'].predict(low_pca)
#deviation = low_pca
#ndof = float(deviation.shape[1] - 1) def xgm_profile(self) -> KernelDensity:
#chi2 = np.sum(deviation**2, axis=1, keepdims=True) """Get KDE for the XGM intensity."""
#chi2_mu = scipy.stats.chi2.mean(ndof) return self.kde_xgm
#chi2_sigma = scipy.stats.chi2.std(ndof)
#return (chi2 - chi2_mu)/chi2_sigma def intensity_profile(self) -> KernelDensity:
"""Get KDE for the predicted intensity."""
return self.kde_intensity
def predict(self, low_res_data: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]: def predict(self, low_res_data: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
""" """
...@@ -945,7 +931,9 @@ class Model(TransformerMixin, BaseEstimator): ...@@ -945,7 +931,9 @@ class Model(TransformerMixin, BaseEstimator):
#self.channel_fit_model #self.channel_fit_model
DataHolder(self.mu_intensity), DataHolder(self.mu_intensity),
DataHolder(self.sigma_intensity), DataHolder(self.sigma_intensity),
self.ood self.ood,
self.kde_xgm,
self.kde_intensity,
], filename, compress='zlib') ], filename, compress='zlib')
@staticmethod @staticmethod
...@@ -964,7 +952,9 @@ class Model(TransformerMixin, BaseEstimator): ...@@ -964,7 +952,9 @@ class Model(TransformerMixin, BaseEstimator):
#channel_fit_model #channel_fit_model
mu_intensity, mu_intensity,
sigma_intensity, sigma_intensity,
ood ood,
kde_xgm,
kde_intensity,
) = joblib.load(filename) ) = joblib.load(filename)
obj = Model() obj = Model()
obj.x_select = x_select obj.x_select = x_select
...@@ -976,5 +966,7 @@ class Model(TransformerMixin, BaseEstimator): ...@@ -976,5 +966,7 @@ class Model(TransformerMixin, BaseEstimator):
obj.ood = ood obj.ood = ood
obj.mu_intensity = mu_intensity.get_data() obj.mu_intensity = mu_intensity.get_data()
obj.sigma_intensity = sigma_intensity.get_data() obj.sigma_intensity = sigma_intensity.get_data()
obj.kde_xgm = kde_xgm
obj.kde_intensity = kde_intensity
return obj return obj
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment