diff --git a/pes_to_spec/__init__.py b/pes_to_spec/__init__.py index ca6f3e9f1fcf657fec6836e8f13c3c178112bc3b..f40017d528d664e428b5f4c0f013848ca943d83d 100644 --- a/pes_to_spec/__init__.py +++ b/pes_to_spec/__init__.py @@ -2,4 +2,4 @@ Estimate high-resolution photon spectrometer data from low-resolution non-invasive measurements. """ -VERSION = "0.0.9" +VERSION = "0.1.2" diff --git a/pes_to_spec/model.py b/pes_to_spec/model.py index f54f7d43bd0d533256edbdbab8117e1996d68785..7cd34852939327276b9377d533c5edc0874408c1 100644 --- a/pes_to_spec/model.py +++ b/pes_to_spec/model.py @@ -3,23 +3,18 @@ from __future__ import annotations import joblib import numpy as np -import scipy.stats from scipy.signal import fftconvolve #from scipy.signal import find_peaks_cwt from scipy.optimize import fmin_l_bfgs_b from sklearn.decomposition import PCA -from sklearn.pipeline import Pipeline, FeatureUnion -from sklearn.preprocessing import FunctionTransformer from sklearn.base import TransformerMixin, BaseEstimator from sklearn.base import RegressorMixin +from sklearn.pipeline import Pipeline from sklearn.kernel_approximation import Nystroem -from sklearn.preprocessing import PolynomialFeatures -from sklearn.linear_model import ARDRegression +#from sklearn.linear_model import ARDRegression from sklearn.linear_model import BayesianRidge from sklearn.neighbors import KernelDensity from sklearn.ensemble import IsolationForest -#from sklearn.neighbors import LocalOutlierFactor -#from sklearn.covariance import EllipticEnvelope from functools import reduce from itertools import product @@ -166,6 +161,7 @@ class FitModel(RegressorMixin, BaseEstimator): self.pars = FitModel.get_pars(sc_op[0], self.structure) self.nll_train = sc_op[1] self.nll_train_expected = np.mean(self.nll(X, pars=self.pars), axis=0, keepdims=True) + return self def predict(self, X: np.ndarray, return_std: bool=False) -> Union[Tuple[np.ndarray, np.ndarray], np.ndarray]: """ @@ -681,6 +677,7 @@ class Model(TransformerMixin, BaseEstimator): n_nonlinear_kernel: int=0, poly: bool=False, ): + self.high_res_sigma = high_res_sigma # models self.x_select = SelectRelevantLowResolution(channels, tof_start, delta_tof, poly=poly) x_model_steps = list() @@ -768,10 +765,10 @@ class Model(TransformerMixin, BaseEstimator): Return: weights. """ - self.kde_xgm = KernelDensity() + self.kde_xgm = KernelDensity(bandwidth="scott", kernel="gaussian") self.kde_xgm.fit(intensity.reshape(-1, 1)) - self.mu_xgm = np.mean(intensity, axis=0) - self.sigma_xgm = np.mean(intensity, axis=0) + self.mu_xgm = np.mean(intensity.reshape(-1), axis=0) + self.sigma_xgm = np.std(intensity.reshape(-1), axis=0) q = np.quantile(intensity, [0.10, 0.90]) l, h = q[0], q[1] x = intensity*((intensity > l) & (intensity < h)) + l*(intensity <= l) + h*(intensity >= h) @@ -807,11 +804,7 @@ class Model(TransformerMixin, BaseEstimator): x_t = self.x_model.fit_transform(low_res_select) print("Fitting PCA on high-resolution data.") y_t = self.y_model.fit_transform(high_res_data, smoothen__energy=high_res_photon_energy) - intensity = np.sum(y_t, axis=1) - self.kde_intensity = KernelDensity() - self.kde_intensity.fit(intensity.reshape(-1, 1)) - self.mu_intensity = np.mean(intensity, axis=0) - self.sigma_intensity = np.std(intensity, axis=0) + #self.fit_model.set_params(fex__gamma=1.0/float(x_t.shape[0])) print("Fitting outlier detection") self.ood['full'].fit(x_t) @@ -827,6 +820,111 @@ class Model(TransformerMixin, BaseEstimator): high_pca_unc = np.sqrt(np.mean((high_res - high_pca_rec)**2, axis=0, keepdims=True)) self.y_model['unc'].set_uncertainty(high_pca_unc) + print("Calculate transfer function") + # Model: z(e) = conv(h(e), d(e)) + n(e) + # d: true high-resolution data + # h: the effect of the model + # z: estimated high-resolution data + # n: noise (uncertainty) + # e: energy + # true signal (as far as we can get -- it is smoothened, but this is the model target) + d = high_res[inliers] + D = np.fft.fft(d) + + y_pred, n = self.fit_model.predict(x_t[inliers], return_std=True) + z = self.y_model['pca'].inverse_transform(y_pred) + + #n = np.sqrt((self.y_model['pca'].inverse_transform(y_pred + n) - z)**2 + high_pca_unc**2) + e = high_res_photon_energy[0,:] if len(high_res_photon_energy.shape) == 2 else high_res_photon_energy + + Z = np.fft.fft(z) + #V = np.fft.fft(np.mean(n, axis=0)) + + de = e[1] - e[0] + E = np.fft.fftfreq(len(e), de) + e_axis = np.linspace(-0.5*(e[-1] - e[0]), 0.5*(e[-1] - e[0]), len(e)) + # generate a gaussian + gaussian = np.exp(-0.5*(e_axis)**2/self.high_res_sigma**2) + gaussian /= np.sum(gaussian, axis=0, keepdims=True) + gaussian = np.clip(gaussian, a_min=1e-6, a_max=None) + gaussian_ft = np.fft.fft(gaussian) + + H = np.mean(Z/D, axis=0) + N = np.absolute(gaussian_ft)**2 + S = np.mean(np.absolute(D)**2, axis=0) + H2 = np.absolute(H)**2 + nonzero = np.absolute(H) > 0.2 + Hinv = (1.0/H)*nonzero + # Wiener filter: + G = Hinv * (H2 * S) / (H2 * S + N) + + #import matplotlib.pyplot as plt + #from matplotlib.gridspec import GridSpec + #fig = plt.figure(figsize=(40, 40)) + #gs = GridSpec(2, 2) + #ax = fig.add_subplot(gs[0, 0]) + #ax.plot(np.fft.fftshift(np.mean(np.absolute(Z), axis=0)), c='b', lw=3, label="Prediction") + #ax.plot(np.fft.fftshift(np.mean(np.absolute(D), axis=0)), c='r', lw=3, label="True") + #ax.legend() + #ax.set(title=f"", + # xlabel="Reciprocal energy [1/eV]", + # ylabel="Intensity [a.u.]", + # yscale='log', + # ) + #ax = fig.add_subplot(gs[0, 1]) + #ax.plot(np.fft.fftshift(np.absolute(H)**2), c='b', lw=3, label=r"$|H|^2$") + #ax.plot(np.fft.fftshift(np.absolute(Hinv)), c='k', lw=3, label=r"$|H^{-1}|$") + #ax.plot(np.fft.fftshift(N), c='g', lw=3, label=r"$N$") + #ax.plot(np.fft.fftshift(S), c='r', lw=3, label=r"$S$") + #ax.legend() + #ax.set(title=f"", + # xlabel="Reciprocal energy [1/eV]", + # ylabel="Intensity [a.u.]", + # yscale='log', + # ) + #ax = fig.add_subplot(gs[1, 0]) + #ax.plot(np.fft.fftshift(np.absolute(H)), c='b', lw=3, label="H") + #ax.plot(np.fft.fftshift(np.absolute(np.mean(Z, axis=0)/np.mean(D, axis=0))), c='r', lw=3, label="mean Z/mean D") + #ax.legend() + #ax.set(title=f"", + # xlabel="Reciprocal energy [1/eV]", + # ylabel="Intensity [a.u.]", + # yscale="log", + # ) + #ax = fig.add_subplot(gs[1, 1]) + #ax.plot(np.fft.fftshift(np.absolute(G)), c='b', lw=3, label="H") + #ax.plot(np.fft.fftshift(np.absolute(np.mean(Z, axis=0)/np.mean(D, axis=0))), c='r', lw=3, label="mean Z/mean D") + #ax.legend() + #ax.set(title=f"", + # xlabel="Reciprocal energy [1/eV]", + # ylabel="Intensity [a.u.]", + # yscale="log", + # ) + #fig.savefig("tmp.png") + #plt.close(fig) + + Hmod = np.real(np.absolute(H)) + Gdir = np.fft.fftshift(np.fft.ifft(G)) + self.wiener_filter = Gdir + self.wiener_filter_ft = G + self.wiener_energy = e_axis + self.wiener_energy_ft = E + self.transfer_function = H + h = np.fft.fftshift(np.fft.ifft(H)) + hmod = np.real(np.absolute(h)) + self.impulse_response = h + energy_mu = np.sum(e_axis*hmod)/np.sum(hmod) + energy_var = np.sum(((e_axis - energy_mu)**2)*hmod)/np.sum(hmod) + self.resolution = np.sqrt(energy_var) + print("Resolution:", self.resolution) + + # get intensity effect + intensity = np.sum(z, axis=1) + self.kde_intensity = KernelDensity(bandwidth="scott", kernel="gaussian") + self.kde_intensity.fit(intensity.reshape(-1, 1)) + self.mu_intensity = np.mean(intensity.reshape(-1), axis=0) + self.sigma_intensity = np.std(intensity.reshape(-1), axis=0) + # for consistency check per channel selection_model = self.x_select low_res_selected = selection_model.transform(low_res_data, keep_dictionary_structure=True) @@ -912,10 +1010,19 @@ class Model(TransformerMixin, BaseEstimator): pca_unc = self.y_model['unc'].uncertainty() total_unc = np.sqrt(pca_unc**2 + unc**2) + M = self.wiener_filter.shape[0] + B = expected.shape[0] + assert expected.shape[1] == M + deconvolved = fftconvolve(expected, + np.broadcast_to(self.wiener_filter.reshape(1, -1), (B, M)), + mode="same", + axes=1) + return dict(expected=expected, unc=unc, pca=pca_unc, total_unc=total_unc, + deconvolved=deconvolved, Z_intensity=Z_intensity ) @@ -932,10 +1039,19 @@ class Model(TransformerMixin, BaseEstimator): self.fit_model, self.channel_pca, #self.channel_fit_model - DataHolder(self.mu_intensity), - DataHolder(self.sigma_intensity), - DataHolder(self.mu_xgm), - DataHolder(self.sigma_xgm), + DataHolder(dict(mu_intensity=self.mu_intensity, + sigma_intensity=self.sigma_intensity, + mu_xgm=self.mu_xgm, + sigma_xgm=self.sigma_xgm, + wiener_filter_ft=self.wiener_filter_ft, + wiener_filter=self.wiener_filter, + wiener_energy=self.wiener_energy, + wiener_energy_ft=self.wiener_energy_ft, + resolution=self.resolution, + transfer_function=self.transfer_function, + impulse_response=self.impulse_response, + ) + ), self.ood, self.kde_xgm, self.kde_intensity, @@ -955,10 +1071,7 @@ class Model(TransformerMixin, BaseEstimator): x_model, y_model, fit_model, channel_pca, #channel_fit_model - mu_intensity, - sigma_intensity, - mu_xgm, - sigma_xgm, + extra, ood, kde_xgm, kde_intensity, @@ -971,11 +1084,20 @@ class Model(TransformerMixin, BaseEstimator): obj.channel_pca = channel_pca #obj.channel_fit_model = channel_fit_model obj.ood = ood - obj.mu_intensity = mu_intensity.get_data() - obj.sigma_intensity = sigma_intensity.get_data() - obj.mu_xgm = mu_xgm.get_data() - obj.sigma_xgm = sigma_xgm.get_data() obj.kde_xgm = kde_xgm obj.kde_intensity = kde_intensity + + extra = extra.get_data() + obj.mu_intensity = extra["mu_intensity"] + obj.sigma_intensity = extra["sigma_intensity"] + obj.mu_xgm = extra["mu_xgm"] + obj.sigma_xgm = extra["sigma_xgm"] + obj.wiener_filter_ft = extra["wiener_filter_ft"] + obj.wiener_filter = extra["wiener_filter"] + obj.wiener_energy_ft = extra["wiener_energy_ft"] + obj.wiener_energy = extra["wiener_energy"] + obj.resolution = extra["resolution"] + obj.transfer_function = extra["transfer_function"] + obj.impulse_response = extra["impulse_response"] return obj diff --git a/pes_to_spec/test/offline_analysis.py b/pes_to_spec/test/offline_analysis.py index 790a5b368054825f9672b165fe753b4ddbfbe993..0abf2249411e6c2c59748a1691f59ca0e0e68185 100755 --- a/pes_to_spec/test/offline_analysis.py +++ b/pes_to_spec/test/offline_analysis.py @@ -18,8 +18,10 @@ matplotlib.use('Agg') import matplotlib.pyplot as plt from matplotlib.gridspec import GridSpec -from mpl_toolkits.axes_grid.inset_locator import InsetPosition +from mpl_toolkits.axes_grid1.inset_locator import InsetPosition import seaborn as sns +import scipy +from scipy.signal import fftconvolve from typing import Dict, Optional @@ -65,7 +67,8 @@ def plot_result(filename: str, spec_raw_int: Optional[np.ndarray]=None, pes: Optional[np.ndarray]=None, pes_to_show: Optional[str]="", - pes_bin: Optional[np.ndarray]=None): + pes_bin: Optional[np.ndarray]=None, + wiener: Optional[np.ndarray]=None): """ Plot result with uncertainty band. @@ -78,6 +81,7 @@ def plot_result(filename: str, pes: PES spectrum for the inset. pes_to_show: Name of the channel shown. pes_bin: PES bins. + wiener: A Wiener filter to use to improve the filter estimate. """ fig = plt.figure(figsize=(12, 8)) @@ -94,6 +98,9 @@ def plot_result(filename: str, #ax.fill_between(spec_raw_pe, spec_pred["expected"] - unc_pca, spec_pred["expected"] + unc_pca, facecolor='magenta', alpha=0.6, label="68% unc. (syst., PCA)") #if spec_raw_int is not None: # ax.plot(spec_raw_pe, spec_raw_int, c='b', lw=1, ls='--', label="High-resolution measurement") + #if wiener is not None: + # deconvolved = fftconvolve(spec_pred["expected"], wiener, mode="same") + ax.plot(spec_raw_pe, spec_pred["deconvolved"], c='g', ls='-.', lw=3, label="Wiener filter result") Y = np.amax(spec_smooth) ax.legend(frameon=False, borderaxespad=0, loc='upper left') ax.set(title=f"", #avg(stat unc) = {unc_stat}, avg(pca unc) = {unc_pca}", @@ -239,6 +246,30 @@ def main(): t += [time_ns() - start] t_names += ["Load"] + # plot Wiener filter + fig = plt.figure(figsize=(12, 8)) + gs = GridSpec(1, 1) + ax = fig.add_subplot(gs[0, 0]) + plt.plot(np.fft.fftshift(model.wiener_energy_ft), np.fft.fftshift(np.absolute(model.wiener_filter_ft))) + ax.set(title=f"", + xlabel=r"Reciprocal energy [1/eV]", + ylabel="Filter intensity [a.u.]", + yscale='log', + ) + fig.savefig(os.path.join(args.directory, "wiener_ft.png")) + plt.close(fig) + + fig = plt.figure(figsize=(12, 8)) + gs = GridSpec(1, 1) + ax = fig.add_subplot(gs[0, 0]) + plt.plot(model.wiener_energy, np.absolute(model.wiener_filter)) + ax.set(title=f"", + xlabel=r"Energy [eV]", + ylabel="Filter value [a.u.]", + ) + fig.savefig(os.path.join(args.directory, "wiener.png")) + plt.close(fig) + print("Check consistency") start = time_ns() Z = model.check_compatibility(pes_raw_t) @@ -297,7 +328,7 @@ def main(): ax2.tick_params(axis='both', which='major', labelsize=SMALL_SIZE) fig.savefig(os.path.join(args.directory, "intensity_vs_chi2.png")) plt.close(fig) - + fig = plt.figure(figsize=(12, 8)) gs = GridSpec(1, 1) ax = fig.add_subplot(gs[0, 0]) @@ -317,7 +348,7 @@ def main(): color='black', fontsize=15) fig.savefig(os.path.join(args.directory, "chi2.png")) plt.close(fig) - + fig = plt.figure(figsize=(12, 8)) gs = GridSpec(1, 1) ax = fig.add_subplot(gs[0, 0]) @@ -336,7 +367,7 @@ def main(): color='black', fontsize=15) fig.savefig(os.path.join(args.directory, "intensity.png")) plt.close(fig) - + # rmse rmse = np.sqrt(np.mean((spec_smooth - spec_pred["expected"])**2, axis=1)) fig = plt.figure(figsize=(12, 8)) @@ -350,7 +381,7 @@ def main(): ) fig.savefig(os.path.join(args.directory, "intensity_vs_rmse.png")) plt.close(fig) - + fig = plt.figure(figsize=(12, 8)) gs = GridSpec(1, 1) ax = fig.add_subplot(gs[0, 0]) @@ -370,7 +401,7 @@ def main(): color='black', fontsize=15) fig.savefig(os.path.join(args.directory, "rmse.png")) plt.close(fig) - + # SPEC integral w.r.t XGM intensity fig = plt.figure(figsize=(12, 8)) gs = GridSpec(1, 1) @@ -382,7 +413,7 @@ def main(): ) fig.savefig(os.path.join(args.directory, "xgm_vs_intensity.png")) plt.close(fig) - + # SPEC integral w.r.t XGM intensity fig = plt.figure(figsize=(12, 8)) gs = GridSpec(1, 1) @@ -394,7 +425,7 @@ def main(): ) fig.savefig(os.path.join(args.directory, "expected_vs_intensity.png")) plt.close(fig) - + fig = plt.figure(figsize=(12, 8)) gs = GridSpec(1, 1) ax = fig.add_subplot(gs[0, 0]) @@ -422,7 +453,8 @@ def main(): spec_raw_int[idx, :] if showSpec else None, #pes=-pes_raw[pes_to_show][idx, first:last], #pes_to_show=pes_to_show.replace('_', ' '), - #pes_bin=np.arange(first, last) + #pes_bin=np.arange(first, last), + wiener=model.wiener_filter ) for ch in channels: plot_pes(os.path.join(args.directory, f"test_pes_{tid}_{ch}.png"), diff --git a/pyproject.toml b/pyproject.toml index 32d592425209f6bcfc75d3000b62312009fc6419..a41df3d5f76768af6c1d4179e804a4e939c0c64f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dynamic = ["version", "readme"] dependencies = [ "numpy>=1.21", "scipy>=1.6", - "scikit-learn>=1.0.2", + "scikit-learn>=1.2.0", "autograd", ]