diff --git a/pes_to_spec/test/offline_analysis.py b/pes_to_spec/test/offline_analysis.py index cf5e482491e8c5432a7599b8bdabd0c0bad3441a..6c530092c3464c3dd3272d1b789dca55e64b24f7 100755 --- a/pes_to_spec/test/offline_analysis.py +++ b/pes_to_spec/test/offline_analysis.py @@ -17,15 +17,8 @@ from sklearn.decomposition import PCA from itertools import product -import matplotlib -matplotlib.use('Agg') - import pandas as pd from copy import deepcopy -import matplotlib.pyplot as plt -from matplotlib.gridspec import GridSpec -from mpl_toolkits.axes_grid1.inset_locator import InsetPosition -import seaborn as sns import scipy from scipy.signal import fftconvolve @@ -34,18 +27,6 @@ from typing import Dict, Optional from time import time_ns import pandas as pd -SMALL_SIZE = 12 -MEDIUM_SIZE = 18 -BIGGER_SIZE = 22 - -plt.rc('font', size=BIGGER_SIZE) # controls default text sizes -plt.rc('axes', titlesize=BIGGER_SIZE) # fontsize of the axes title -plt.rc('axes', labelsize=BIGGER_SIZE) # fontsize of the x and y labels -plt.rc('xtick', labelsize=BIGGER_SIZE) # fontsize of the tick labels -plt.rc('ytick', labelsize=BIGGER_SIZE) # fontsize of the tick labels -plt.rc('legend', fontsize=MEDIUM_SIZE) # legend fontsize -plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title - def get_gas(run, tids): gas_sources = [ "SA3_XTD10_PES/DCTRL/V30300S_NITROGEN", @@ -63,62 +44,7 @@ def get_gas(run, tids): return gas -def plot_pes(filename: str, pes_raw_int: np.ndarray, first: int, last: int): - """ - Plot low-resolution spectrum. - - Args: - filename: Output file name. - pes_raw_int: Low-resolution spectrum. - - """ - fig = plt.figure(figsize=(16, 8)) - gs = GridSpec(1, 1) - ax = fig.add_subplot(gs[0, 0]) - ax.plot(np.arange(first, last), pes_raw_int, c='b', lw=3) - #ax.legend() - ax.set(title=f"", - xlabel="Time-of-flight index", - ylabel="Counts [a.u.]") - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) - fig.savefig(filename) - plt.close(fig) - -def pca_variance_plot(filename: str, variance_ratio: np.ndarray, n_comp: int): - """ - Plot variance contribution. - - Args: - filename: Output file name. - variance_ratio: Contribution of each component's variance. - - """ - fig = plt.figure(figsize=(8, 8)) - gs = GridSpec(1, 1) - ax = fig.add_subplot(gs[0, 0]) - c = np.cumsum(variance_ratio) - ax.bar(1+np.arange(len(variance_ratio)), variance_ratio*100, color='tab:red', alpha=0.3, label="Per component") - ax.plot(1+np.arange(len(variance_ratio)), c*100, c='tab:blue', lw=5, label="Cumulative") - ax.plot([n_comp, n_comp], [0, c[n_comp]*100], lw=3, ls='--', c='m', label="Components kept") - ax.plot([0, n_comp], [c[n_comp]*100, c[n_comp]*100], lw=3, ls='--', c='m') - ax.legend(frameon=False) - print(f"PCA plot: total n. components: {len(variance_ratio)}") - x_max = np.where(c > 0.99)[0][0] - print(f"Fraction of variance: {c[n_comp]}") - ax.set_yscale('log') - ax.set(title=f"", - xlabel="Component", - ylabel="Variance [%]", - xlim=(1, x_max), - ylim=(0.1, 100)) - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) - plt.tight_layout() - fig.savefig(filename) - plt.close(fig) - -def plot_result(filename: str, +def save_result(filename: str, spec_pred: Dict[str, np.ndarray], spec_smooth: np.ndarray, spec_raw_pe: np.ndarray, @@ -152,9 +78,10 @@ def plot_result(filename: str, unc=unc, unc_pca=unc_pca, unc_stat=unc_stat, - beam_intensity=intensity*1e-3*np.ones_like(spec_raw_pe) + beam_intensity=intensity*1e-3*np.ones_like(spec_raw_pe), + deconvolved=spec_pred["deconvolved"] )) - df.to_csv(filename.replace('.pdf', '.csv')) + df.to_csv(filename) if pes is not None: pes_data = deepcopy(pes) pes_data['bin'] = np.arange(len(pes['channel_1_D'])) @@ -163,58 +90,31 @@ def plot_result(filename: str, df = pd.DataFrame(pes_data) df.to_csv(filename.replace('.pdf', '_pes.csv')) - fig = plt.figure(figsize=(12, 8)) - gs = GridSpec(1, 1) - ax = fig.add_subplot(gs[0, 0]) - ax.plot(spec_raw_pe, spec_smooth, c='b', lw=3, label="Grating spectrometer") - ax.plot(spec_raw_pe, spec_pred["expected"], c='r', ls='--', lw=3, label="Prediction") - #ax.fill_between(spec_raw_pe, spec_pred["expected"] - unc, spec_pred["expected"] + unc, facecolor='green', alpha=0.6, label="68% unc.") - ax.fill_between(spec_raw_pe, spec_pred["expected"] - unc, spec_pred["expected"] + unc, facecolor='gold', alpha=0.5, label="68% unc. (total)") - ax.fill_between(spec_raw_pe, spec_pred["expected"] - unc_pca, spec_pred["expected"] + unc_pca, facecolor='magenta', alpha=0.5, label="68% unc. (PCA only)") - #ax.fill_between(spec_raw_pe, spec_pred["expected"] - unc_stat, spec_pred["expected"] + unc_stat, facecolor='red', alpha=0.6, label="68% unc. (stat.)") - #ax.fill_between(spec_raw_pe, spec_pred["expected"] - unc_pca, spec_pred["expected"] + unc_pca, facecolor='magenta', alpha=0.6, label="68% unc. (syst., PCA)") - #if spec_raw_int is not None: - # ax.plot(spec_raw_pe, spec_raw_int, c='b', lw=1, ls='--', label="High-resolution measurement") - #if wiener is not None: - # deconvolved = fftconvolve(spec_pred["expected"], wiener, mode="same") - #ax.plot(spec_raw_pe, spec_pred["deconvolved"], c='g', ls='-.', lw=3, label="Wiener filter result") - Y = np.amax(spec_smooth) - ax.legend(frameon=False, borderaxespad=0, loc='upper left') - ax.set_title(f"Beam intensity: {intensity*1e-3:.1f} mJ", loc="left") - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) - ax.set( - xlabel="Photon energy [eV]", - ylabel="Intensity [a.u.]", - ylim=(0, 1.3*Y)) - if (pes is not None) and (pes_to_show != ""): - ax2 = plt.axes([0,0,1,1]) - # Manually set the position and relative size of the inset axes within ax1 - #ip = InsetPosition(ax, [0.65,0.6,0.35,0.4]) - ip = InsetPosition(ax, [0.72,0.7,0.35,0.4]) - ax2.set_axes_locator(ip) - pes_bin = np.arange(first, last) - if pes_to_show == "sum": - pes_plot = sum([pes[k][pes_bin] for k in pes.keys()]) - pes_label = r"$\sum$ PES channels" - else: - pes_plot = pes[pes_to_show][pes_bin] - pes_label = pes_to_show - ax2.plot(pes_bin, pes_plot, c='black', lw=3) - ax2.set(title=f"Low-resolution example data", - xlabel="Bin", - ylabel=pes_label, - ylim=(0, None), - #labelsize=SMALL_SIZE, - #xticklabels=dict(fontdict=dict(fontsize=SMALL_SIZE)), - #yticklabels=dict(fontdict=dict(fontsize=SMALL_SIZE)), - ) - ax2.title.set_size(SMALL_SIZE) - ax2.xaxis.label.set_size(SMALL_SIZE) - ax2.yaxis.label.set_size(SMALL_SIZE) - ax2.tick_params(axis='both', which='major', labelsize=SMALL_SIZE) - fig.savefig(filename) - plt.close(fig) +def save_pes_result(filename: str, + pes: Optional[np.ndarray]=None, + first: Optional[int]=None, + last: Optional[int]=None, + ): + """ + Plot result with uncertainty band. + + Args: + filename: Output file name. + spec_pred: Predicted result with uncertainty bands in a dictionary. + spec_smooth: Smoothened expected result with shape (features,). + spec_raw_pe: x axis with the photon energy in eV. + spec_raw_int: Original true expected result with shape (features,). + pes: PES spectrum for the inset. + pes_to_show: Name of the channel shown. + intensity: The XGM intensity in uJ. + + """ + pes_data = deepcopy(pes) + pes_data['bin'] = np.arange(len(pes['channel_1_D'])) + pes_data['first'] = first*np.ones_like(pes_data['bin']) + pes_data['last'] = last*np.ones_like(pes_data['bin']) + df = pd.DataFrame(pes_data) + df.to_csv(filename) def main(): """ @@ -351,42 +251,6 @@ def main(): for k in pes_raw.keys(): pes_raw[k] = pes_raw[k][train_idx] - # apply only PES selection for plotting - x_select = SelectRelevantLowResolution(channels, tof_start=None, delta_tof=300, poly=True) - x_select.fit(pes_raw) - pes_raw_select = x_select.transform(pes_raw, pulse_energy=xgm_flux[train_idx]) - ch = channels[0] - idx = 0 - first = x_select.tof_start - x_select.delta_tof - last = x_select.tof_start + x_select.delta_tof - plot_pes(os.path.join(args.directory, f"pes_example_{ch}.pdf"), - -pes_raw[ch][idx, first:last], - first=first, - last=last) - - # apply PCA for plotting - B, P, _ = pes_raw_select.shape - pes_raw_select = pes_raw_select.reshape((B*P, -1)) - pca = PCA(None, whiten=True) - pca.fit(pes_raw_select) - pca_variance_plot(os.path.join(args.directory, f"pca_pes.pdf"), - pca.explained_variance_ratio_, - 600) - df = pd.DataFrame(dict(variance_ratio=pca.explained_variance_ratio_, - n_comp=600*np.ones_like(pca.explained_variance_ratio_), - )) - df.to_csv(os.path.join(args.directory, "pca_pes.csv")) - - pca_spec = PCA(None, whiten=True) - pca_spec.fit(spec_raw_int[train_idx]) - pca_variance_plot(os.path.join(args.directory, f"pca_spec.pdf"), - pca_spec.explained_variance_ratio_, - 20) - df = pd.DataFrame(dict(variance_ratio=pca_spec.explained_variance_ratio_, - n_comp=20*np.ones_like(pca_spec.explained_variance_ratio_), - )) - df.to_csv(os.path.join(args.directory, "pca_spec.csv")) - model.debug_peak_finding(pes_raw, os.path.join(args.directory, "test_peak_finding.pdf")) if len(args.model) == 0: print("Fitting") @@ -418,44 +282,36 @@ def main(): t += [time_ns() - start] t_names += ["Load"] + # save PCA information + pes_raw_select = model.x_select.transform(pes_raw, pulse_energy=xgm_flux[train_idx]) + ch = channels[0] + idx = 0 + first = model.x_select.tof_start - model.x_select.delta_tof + last = model.x_select.tof_start + model.x_select.delta_tof + B, P, _ = pes_raw_select.shape + pes_raw_select = pes_raw_select.reshape((B*P, -1)) + pca = PCA(None, whiten=True) + pca.fit(pes_raw_select) + df = pd.DataFrame(dict(variance_ratio=pca.explained_variance_ratio_, + n_comp=600*np.ones_like(pca.explained_variance_ratio_), + )) + df.to_csv(os.path.join(args.directory, "pca_pes.csv")) + + pca_spec = PCA(None, whiten=True) + pca_spec.fit(spec_raw_int[train_idx]) + df = pd.DataFrame(dict(variance_ratio=pca_spec.explained_variance_ratio_, + n_comp=20*np.ones_like(pca_spec.explained_variance_ratio_), + )) + df.to_csv(os.path.join(args.directory, "pca_spec.csv")) + # transfer function - fig = plt.figure(figsize=(12, 8)) - gs = GridSpec(1, 1) - ax = fig.add_subplot(gs[0, 0]) - plt.plot(model.wiener_energy, np.absolute(model.impulse_response)) - ax.set(title=f"", - xlabel=r"Energy [eV]", - ylabel="Response [a.u.]", - yscale='log', - ) - fig.savefig(os.path.join(args.directory, "impulse.pdf")) - plt.close(fig) print(f"Resolution: {model.resolution:.2f} eV") - # plot Wiener filter - fig = plt.figure(figsize=(12, 8)) - gs = GridSpec(1, 1) - ax = fig.add_subplot(gs[0, 0]) - plt.plot(np.fft.fftshift(model.wiener_energy_ft), np.fft.fftshift(np.absolute(model.wiener_filter_ft))) - ax.set(title=f"", - xlabel=r"Reciprocal energy [1/eV]", - ylabel="Filter intensity [a.u.]", - yscale='log', - ) - fig.savefig(os.path.join(args.directory, "wiener_ft.pdf")) - plt.close(fig) - - fig = plt.figure(figsize=(12, 8)) - gs = GridSpec(1, 1) - ax = fig.add_subplot(gs[0, 0]) - plt.plot(model.wiener_energy, np.absolute(model.wiener_filter)) - ax.set(title=f"", - xlabel=r"Energy [eV]", - ylabel="Filter value [a.u.]", - yscale='log', - ) - fig.savefig(os.path.join(args.directory, "wiener.pdf")) - plt.close(fig) + df = pd.DataFrame(dict(wiener_energy=model.wiener_energy, + wiener_filter=model.wiener_filter, + impulse=model.impulse_response, + resolution=model.resolution*np.ones_like(model.wiener_energy))) + df.to_csv(os.path.join(args.directory, "model.csv")) print("Check consistency") start = time_ns() @@ -494,214 +350,46 @@ def main(): chi2 = np.sum((spec_smooth[:, np.newaxis, :] - spec_pred["expected"])**2/(spec_pred["total_unc"]**2), axis=(-1, -2)) ndof = spec_smooth.shape[1] print(f"Chi2 after PCA: {np.mean(chi2):.2f}, ndof: {ndof}, chi2/ndof: {np.mean(chi2/ndof):.2f}") - fig = plt.figure(figsize=(12, 8)) - gs = GridSpec(1, 1) - ax = fig.add_subplot(gs[0, 0]) - ax.scatter(chi2/ndof, xgm_flux_t[:,0], c='r', s=20) - ax.set(title=f"", #avg(stat unc) = {unc_stat}, avg(pca unc) = {unc_pca}", - xlabel=r"$\chi^2/$ndof", - ylabel="Beam intensity [uJ]", - xlim=(0, 5), - ) - ax2 = plt.axes([0,0,1,1]) - # Manually set the position and relative size of the inset axes within ax1 - ip = InsetPosition(ax, [0.65,0.6,0.35,0.4]) - ax2.set_axes_locator(ip) - ax2.scatter(chi2/ndof, xgm_flux_t[:,0], c='r', s=30) - #ax2.scatter(chi2/ndof, np.sum(spec_pred["expected"], axis=1)*de, c='b', s=30) - #ax2.scatter(chi2/ndof, np.sum(spec_raw_int, axis=1)*de, c='g', s=30) - ax2.set(title="", - xlabel=r"$\chi^2/$ndof", - ylabel=f"Beam intensity [uJ]", - ) - ax2.title.set_size(SMALL_SIZE) - ax2.xaxis.label.set_size(SMALL_SIZE) - ax2.yaxis.label.set_size(SMALL_SIZE) - ax2.tick_params(axis='both', which='major', labelsize=SMALL_SIZE) - fig.savefig(os.path.join(args.directory, "intensity_vs_chi2.pdf")) - plt.close(fig) - - fig = plt.figure(figsize=(12, 8)) - gs = GridSpec(1, 1) - ax = fig.add_subplot(gs[0, 0]) - sns.histplot(x=chi2/ndof, kde=True, linewidth=3, ax=ax) - ax.set(title=f"", - xlabel=r"$\chi^2/$ndof", - ylabel="Counts [a.u.]", - xlim=(0, 5), - ) - #ax.text(0.90, 0.95, fr"$\mu = ${np.mean(chi2/ndof):.2f}", - # verticalalignment='top', horizontalalignment='right', - # transform=ax.transAxes, - # color='black', fontsize=15) - #ax.text(0.90, 0.90, fr"$\sigma = ${np.std(chi2/ndof):.2f}", - # verticalalignment='top', horizontalalignment='right', - # transform=ax.transAxes, - # color='black', fontsize=15) - fig.savefig(os.path.join(args.directory, "chi2.pdf")) - plt.close(fig) spec_smooth_pca = model.y_model['pca'].transform(spec_smooth) - chi2_prepca = np.sum((spec_smooth_pca[:, np.newaxis, :] - spec_pred["expected_pca"])**2/(spec_pred["expected_pca_unc"]**2), axis=(-1, -2)) + unc2 = spec_pred["expected_pca_unc"]**2 + pca_var = (spec_pred["expected_pca"].std(axis=0, keepdims=True)**2).reshape(1, 1, -1) + print("Expected pca std:", pca_var) + chi2_prepca = np.sum((spec_smooth_pca[:, np.newaxis, :] - spec_pred["expected_pca"])**2/unc2, axis=(-1, -2)) ndof_prepca = float(spec_smooth_pca.shape[-1]) print(f"Chi2 before PCA: {np.mean(chi2_prepca):.2f}, ndof: {ndof_prepca}, chi2/ndof: {np.mean(chi2_prepca/ndof_prepca):.2f} +/- {np.std(chi2_prepca/ndof_prepca):.2f}") - fig = plt.figure(figsize=(12, 8)) - gs = GridSpec(1, 1) - ax = fig.add_subplot(gs[0, 0]) - sns.histplot(x=chi2_prepca/ndof_prepca, kde=True, linewidth=3, ax=ax) - ax.set(title=f"", - xlabel=r"$\chi^2/$ndof", - ylabel="Counts [a.u.]", - xlim=(0, 5), - ) - #ax.text(0.90, 0.95, fr"$\mu = ${np.mean(chi2/ndof):.2f}", - # verticalalignment='top', horizontalalignment='right', - # transform=ax.transAxes, - # color='black', fontsize=15) - #ax.text(0.90, 0.90, fr"$\sigma = ${np.std(chi2/ndof):.2f}", - # verticalalignment='top', horizontalalignment='right', - # transform=ax.transAxes, - # color='black', fontsize=15) - fig.savefig(os.path.join(args.directory, "chi2_prepca.pdf")) - plt.close(fig) - - fig = plt.figure(figsize=(12, 8)) - gs = GridSpec(1, 1) - ax = fig.add_subplot(gs[0, 0]) - ax.scatter(chi2_prepca/ndof_prepca, xgm_flux_t[:,0], c='r', s=20) - ax.set(title=f"", - xlabel=r"$\chi^2/$ndof", - ylabel="Beam intensity [uJ]", - xlim=(0, 5), - ylim=(0, np.mean(xgm_flux_t) + 3*np.std(xgm_flux_t)) - ) - fig.savefig(os.path.join(args.directory, "intensity_vs_chi2_prepca.pdf")) - plt.close(fig) res_prepca = np.sum((spec_smooth_pca[:, np.newaxis, :] - spec_pred["expected_pca"])/spec_pred["expected_pca_unc"], axis=1) - n_plots = res_prepca.shape[1]//10 - fig = plt.figure(figsize=(8*n_plots, 8)) - gs = GridSpec(1, n_plots) - for i_plot in range(n_plots): - ax = fig.add_subplot(gs[0, i_plot]) - sns.kdeplot(data={f"Dim. {k+1}": res_prepca[:, k] for k in range(i_plot*10, i_plot*10 + 10)}, - linewidth=3, ax=ax) - ax.set(title=f"", - xlabel=r"residue/uncertainty [a.u.]", - ylabel="Counts [a.u.]", - xlim=(-3, 3), - ) - ax.legend(frameon=False) - fig.savefig(os.path.join(args.directory, "res_prepca.pdf")) - plt.close(fig) - - fig = plt.figure(figsize=(12, 8)) - gs = GridSpec(1, 1) - ax = fig.add_subplot(gs[0, 0]) - sns.histplot(x=xgm_flux_t[:,0], kde=True, linewidth=3, ax=ax) - ax.set(title=f"", - xlabel="Beam intensity [uJ]", - ylabel="Counts [a.u.]", - ) - #ax.text(0.90, 0.95, fr"$\mu = ${np.mean(xgm_flux_t[:,0]):.2f}", - # verticalalignment='top', horizontalalignment='right', - # transform=ax.transAxes, - # color='black', fontsize=15) - #ax.text(0.90, 0.90, fr"$\sigma = ${np.std(xgm_flux_t[:,0]):.2f}", - # verticalalignment='top', horizontalalignment='right', - # transform=ax.transAxes, - # color='black', fontsize=15) - plt.tight_layout() - fig.savefig(os.path.join(args.directory, "intensity.pdf")) - plt.close(fig) # rmse rmse = np.sqrt(np.mean((spec_smooth[:, np.newaxis, :] - spec_pred["expected"])**2, axis=(-1, -2))) - fig = plt.figure(figsize=(12, 8)) - gs = GridSpec(1, 1) - ax = fig.add_subplot(gs[0, 0]) - ax.scatter(rmse, xgm_flux_t[:,0], c='r', s=30) - ax = plt.gca() - ax.set(title=f"", - xlabel=r"Root-mean-squared error", - ylabel="Beam intensity [uJ]", - ) - fig.savefig(os.path.join(args.directory, "intensity_vs_rmse.pdf")) - plt.close(fig) - - fig = plt.figure(figsize=(12, 8)) - gs = GridSpec(1, 1) - ax = fig.add_subplot(gs[0, 0]) - sns.histplot(x=rmse, kde=True, linewidth=3, ax=ax) - ax.set(title=f"", - xlabel="Root-mean-squared error", - ylabel="Counts [a.u.]", - ) - #ax.text(0.90, 0.95, fr"$\mu = ${np.mean(rmse):.2f}", - # verticalalignment='top', horizontalalignment='right', - # transform=ax.transAxes, - # color='black', fontsize=15) - #ax.text(0.90, 0.90, fr"$\sigma = ${np.std(rmse):.2f}", - # verticalalignment='top', horizontalalignment='right', - # transform=ax.transAxes, - # color='black', fontsize=15) - fig.savefig(os.path.join(args.directory, "rmse.pdf")) - plt.close(fig) - - ## SPEC integral w.r.t XGM intensity - #fig = plt.figure(figsize=(12, 8)) - #gs = GridSpec(1, 1) - #ax = fig.add_subplot(gs[0, 0]) - #sns.regplot(x=np.sum(spec_raw_int_t, axis=1)*de, y=xgm_flux_t[:,0], color='r', robust=True, ax=ax) - #ax.set(title=f"", - # xlabel="SPEC (raw) integral", - # ylabel="XGM Intensity [uJ]", - # ) - #fig.savefig(os.path.join(args.directory, "xgm_vs_intensity.png")) - #plt.close(fig) - - ## SPEC integral w.r.t XGM intensity - #fig = plt.figure(figsize=(12, 8)) - #gs = GridSpec(1, 1) - #ax = fig.add_subplot(gs[0, 0]) - #sns.regplot(x=np.sum(spec_raw_int_t, axis=-1)*de, y=np.sum(spec_pred["expected"], axis=(-1, -2))*de, color='r', robust=True, ax=ax) - #ax.set(title=f"", - # xlabel="SPEC (raw) integral", - # ylabel="Predicted integral", - # ) - #fig.savefig(os.path.join(args.directory, "expected_vs_intensity.png")) - #plt.close(fig) - - #fig = plt.figure(figsize=(12, 8)) - #gs = GridSpec(1, 1) - #ax = fig.add_subplot(gs[0, 0]) - #sns.regplot(x=np.sum(spec_pred["expected"], axis=(-1, -2))*de, y=xgm_flux_t[:,0], color='r', robust=True, ax=ax) - #ax.set(title=f"", - # xlabel="Predicted integral", - # ylabel="XGM intensity [uJ]", - # ) - #fig.savefig(os.path.join(args.directory, "xgm_vs_expected.png")) - #plt.close(fig) - - df = pd.DataFrame(dict(chi2_prepca=chi2_prepca, - ndof=spec_smooth_pca.shape[-1]*np.ones_like(chi2_prepca), - xgm_flux_t=xgm_flux_t[:,0], - rmse=rmse, - )) + + q = dict(chi2_prepca=chi2_prepca, + ndof=spec_smooth_pca.shape[-1]*np.ones_like(chi2_prepca), + xgm_flux_t=xgm_flux_t[:,0], + rmse=rmse, + root_mean_squared_pca_unc=np.sqrt((spec_pred["expected_pca_unc"][:, 0, :]**2).sum(axis=-1)) + ) + q.update({f'res_prepca_{k}': res_prepca[:, k] + for k in range(res_prepca.shape[1]) + } + ) + q.update({f'unc_prepca_{k}': spec_pred["expected_pca_unc"][:, 0, k] + for k in range(spec_pred["expected_pca_unc"].shape[-1]) + } + ) + df = pd.DataFrame(q) df.to_csv(os.path.join(args.directory, "quality.csv")) first, last = model.get_low_resolution_range() - first = max(0, first+200) - last = min(last-200, pes_raw_t["channel_1_D"].shape[1]-1) - pes_to_show = "" #'channel_1_D' # plot high_int_idx = np.argsort(xgm_flux_t[:,0]) for q in [10, 25, 50, 75, 100]: qi = int(len(high_int_idx)*(q/100.0)) for idx in high_int_idx[qi-10:qi]: tid = test_tids[idx] - plot_result(os.path.join(args.directory, f"test_q{q}_{tid}.pdf"), + save_result(os.path.join(args.directory, f"test_q{q}_{tid}.csv"), {k: item[idx, 0, ...] if k != "pca" else item[0, ...] for k, item in spec_pred.items()}, @@ -709,15 +397,13 @@ def main(): spec_raw_pe_t[idx, :] if showSpec else None, #spec_raw_int_t[idx, :] if showSpec else None, intensity=xgm_flux_t[idx,0], + ) + save_pes_result(os.path.join(args.directory, f"test_q{q}_{tid}_pes.csv"), pes={k: -item[idx, :] for k, item in pes_raw_t.items()}, - pes_to_show=pes_to_show, first=first, last=last, ) - for ch in channels: - plot_pes(os.path.join(args.directory, f"test_pes_{tid}_{ch}.pdf"), - -pes_raw_t[ch][idx, first:last], first, last) if __name__ == '__main__': main() diff --git a/pes_to_spec/test/prepare_plots.py b/pes_to_spec/test/prepare_plots.py index 50343609101884fe255ef660dafa43dc3f53f64b..80c80d7e5c66de8fea9bf6eebde4c6ec54dda613 100755 --- a/pes_to_spec/test/prepare_plots.py +++ b/pes_to_spec/test/prepare_plots.py @@ -1,14 +1,20 @@ #!/usr/bin/env python +import os +import re + +import matplotlib +matplotlib.use('Agg') import pandas as pd import numpy as np import matplotlib.pyplot as plt from matplotlib.gridspec import GridSpec import seaborn as sns +from scipy.interpolate import make_interp_spline, BSpline SMALL_SIZE = 12 MEDIUM_SIZE = 18 -BIGGER_SIZE = 22 +BIGGER_SIZE = 24 plt.rc('font', size=BIGGER_SIZE) # controls default text sizes plt.rc('axes', titlesize=BIGGER_SIZE) # fontsize of the axes title @@ -24,8 +30,8 @@ def plot_final(df: pd.DataFrame, filename: str): ax = fig.add_subplot(gs[0, 0]) ax.plot(df.energy, df.spec, c='b', lw=3, label="Grating spectrometer") ax.plot(df.energy, df.prediction, c='r', ls='--', lw=3, label="Prediction") - ax.fill_between(df.energy, df.prediction - df.unc, df.prediction + df.unc, facecolor='gold', alpha=0.5, label="68% unc. (total)") - ax.fill_between(df.energy, df.prediction - df.unc_pca, df.prediction + df.unc_pca, facecolor='magenta', alpha=0.5, label="68% unc. (PCA only)") + ax.fill_between(df.energy, df.prediction - 2*df.unc, df.prediction + 2*df.unc, facecolor='gold', alpha=0.5, label="95% unc. (total)") + ax.fill_between(df.energy, df.prediction - 2*df.unc_pca, df.prediction + 2*df.unc_pca, facecolor='magenta', alpha=0.5, label="95% unc. (PCA only)") Y = np.amax(df.spec) ax.legend(frameon=False, borderaxespad=0, loc='upper left') ax.set_title(f"Beam intensity: {df.beam_intensity.iloc[0]:.1f} mJ", loc="left") @@ -35,6 +41,7 @@ def plot_final(df: pd.DataFrame, filename: str): xlabel="Photon energy [eV]", ylabel="Intensity [a.u.]", ylim=(0, 1.3*Y)) + plt.tight_layout() fig.savefig(filename) plt.close(fig) @@ -76,22 +83,52 @@ def plot_rmse_intensity(df: pd.DataFrame, filename: str): fig.savefig(filename) plt.close(fig) +def plot_residue(df: pd.DataFrame, filename: str): + cols = [k for k in df.columns if "res_prepca" in k] + df_res = df.loc[:, cols] + n_plots = len(df_res.columns)//10 + fig = plt.figure(figsize=(8*n_plots, 8)) + gs = GridSpec(1, n_plots) + for i_plot in range(n_plots): + ax = fig.add_subplot(gs[0, i_plot]) + sns.kdeplot(data={f"Dim. {k+1}": df_res.loc[:, cols[k]] for k in range(i_plot*10, i_plot*10 + 10)}, + linewidth=3, ax=ax) + ax.set(title=f"", + xlabel=r"residue/uncertainty [a.u.]", + ylabel="Counts [a.u.]", + xlim=(-3, 3), + ) + ax.legend(frameon=False) + fig.savefig(filename) + plt.close(fig) + def plot_chi2_intensity(df: pd.DataFrame, filename: str): fig = plt.figure(figsize=(12, 8)) gs = GridSpec(1, 1) ax = fig.add_subplot(gs[0, 0]) - ax.scatter(df.chi2_prepca/df.ndof.iloc[0], df.xgm_flux_t, c='r', s=30) + sns.kdeplot(x=df.chi2_prepca/df.ndof.iloc[0], y=df.xgm_flux_t*1e-3, + fill=True, + ax=ax) + sns.scatterplot(x=df.chi2_prepca/df.ndof.iloc[0], y=df.xgm_flux_t*1e-3, + s=200, + alpha=0.1, + #size=df.root_mean_squared_pca_unc, + #sizes=(20, 200), + ax=ax) ax = plt.gca() ax.set(title=f"", xlabel=r"$\chi^2/$ndof", - ylabel="Beam intensity [uJ]", + ylabel="Beam intensity [mJ]", xlim=(0, 5), - ylim=(0, df.xgm_flux_t.mean() + 3*df.xgm_flux_t.std()) + ylim=(0, df.xgm_flux_t.mean()*1e-3 + 3*df.xgm_flux_t.std()*1e-3) ) + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + plt.tight_layout() fig.savefig(filename) plt.close(fig) -def pca_variance_plot(df: pd.DataFrame, filename: str): +def pca_variance_plot(df: pd.DataFrame, filename: str, max_comp_frac: float=0.99): """ Plot variance contribution. @@ -100,25 +137,88 @@ def pca_variance_plot(df: pd.DataFrame, filename: str): variance_ratio: Contribution of each component's variance. """ - fig = plt.figure(figsize=(8, 8)) + fig = plt.figure(figsize=(12, 8)) gs = GridSpec(1, 1) ax = fig.add_subplot(gs[0, 0]) c = np.cumsum(df.variance_ratio) - n_comp = df.n_comp.iloc[0] + n_comp = int(df.n_comp.iloc[0]) ax.bar(1+np.arange(len(df.variance_ratio)), df.variance_ratio*100, color='tab:red', alpha=0.3, label="Per component") ax.plot(1+np.arange(len(df.variance_ratio)), c*100, c='tab:blue', lw=5, label="Cumulative") ax.plot([n_comp, n_comp], [0, c[n_comp]*100], lw=3, ls='--', c='m', label="Components kept") ax.plot([0, n_comp], [c[n_comp]*100, c[n_comp]*100], lw=3, ls='--', c='m') ax.legend(frameon=False) print(f"PCA plot: total n. components: {len(df.variance_ratio)}") - x_max = np.where(c > 0.99)[0][0] + x_max = np.where(c > max_comp_frac)[0][0] print(f"Fraction of variance: {c[n_comp]}") ax.set_yscale('log') ax.set(title=f"", xlabel="Component", ylabel="Variance [%]", xlim=(1, x_max), - ylim=(0.1, 100)) + ylim=(0.01, 100)) + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + plt.tight_layout() + fig.savefig(filename) + plt.close(fig) + +def moving_average(a, n=3): + ret = np.cumsum(a) + ret[n:] = ret[n:] - ret[:-n] + return ret[n - 1:] / n + +def plot_impulse(df: pd.DataFrame, filename: str): + """ + Plot variance contribution. + + Args: + filename: Output file name. + variance_ratio: Contribution of each component's variance. + + """ + fig = plt.figure(figsize=(12, 8)) + gs = GridSpec(1, 1) + ax = fig.add_subplot(gs[0, 0]) + x = df.wiener_energy.to_numpy() + y = np.absolute(df.impulse.to_numpy()) + #x_new = np.linspace(-6, 6, 601) + #spl = make_interp_spline(x, np.log10(y), k=3) + #y_new = np.power(10, spl(x_new)) + x_new = moving_average(x, n=10) + y_new = moving_average(y, n=10) + sel = (x_new >= -10) & (x_new <= 10) + ax.plot(x_new[sel], y_new[sel], c='tab:blue', lw=4) + ax.set_yscale('log') + ax.set(title=f"", + xlabel="Energy [eV]", + ylim=(1e-4, 0.5), + ylabel="Response [a.u.]", + ) + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + plt.tight_layout() + fig.savefig(filename) + plt.close(fig) + +def plot_wiener(df: pd.DataFrame, filename: str): + """ + Plot variance contribution. + + Args: + filename: Output file name. + variance_ratio: Contribution of each component's variance. + + """ + fig = plt.figure(figsize=(12, 8)) + gs = GridSpec(1, 1) + ax = fig.add_subplot(gs[0, 0]) + ax.plot(df.wiener_energy, np.absolute(df.wiener_filter), c='tab:blue', lw=3) + ax.set_yscale('log') + ax.set(title=f"", + xlabel="Energy [eV]", + ylim=(1e-3, 1), + ylabel="Response [a.u.]", + ) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.tight_layout() @@ -134,36 +234,64 @@ def plot_pes(df: pd.DataFrame, channel:str, filename: str): pes_raw_int: Low-resolution spectrum. """ - fig = plt.figure(figsize=(16, 8)) + fig = plt.figure(figsize=(12, 8)) gs = GridSpec(1, 1) ax = fig.add_subplot(gs[0, 0]) first, last = df.loc[:, 'first'].iloc[0], df.loc[:, 'last'].iloc[0] - ax.plot(df.loc[(df.bin >= first) & (df.bin < last), "bin"], df.loc[(df.bin >= first) & (df.bin < last), channel], c='b', lw=3) - #ax.legend() + first = first+220 + last = last-270 + print("Range:", first, last) + sel = (df.bin >= first) & (df.bin < last) + x = df.loc[sel, "bin"] + if channel == "sum": + y = df.loc[sel, [k for k in df.columns if "channel_" in k]].sum(axis=1) + ax.plot(x, y, c='b', lw=5) + elif isinstance(channel, list): + for ch in channel: + sch = ch.replace('_', ' ') + y = df.loc[sel, ch] + ax.plot(x, y, lw=5, label=sch) + else: + y = df.loc[sel, channel] + ax.plot(x, y, c='b', lw=5) + ax.legend(frameon=False) ax.set(title=f"", xlabel="Time-of-flight index", ylabel="Counts [a.u.]") ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) + plt.tight_layout() fig.savefig(filename) plt.close(fig) if __name__ == '__main__': indir = 'p900331r69t70' - channel = 'channel_4_A' - fname = 'test_q100_1724098413' - plot_final(pd.read_csv(f'{indir}/{fname}.csv'), f'{fname}.pdf') - plot_pes(pd.read_csv(f'{indir}/{fname}_pes.csv'), channel, f'{fname}_{channel}.pdf') + channel = ['channel_1_A', 'channel_4_A', 'channel_3_B'] + #channel = 'sum' + #for fname in os.listdir(indir): + # if re.match(r'test_q100_[0-9]*\.csv', fname): + # fname = fname[:-4] + # print(f"Plotting {fname}") + # plot_final(pd.read_csv(f'{indir}/{fname}.csv'), f'{fname}.pdf') + # plot_pes(pd.read_csv(f'{indir}/{fname}_pes.csv'), channel, f'{fname}_pes.pdf') - fname = 'test_q100_1724098596' - plot_final(pd.read_csv(f'{indir}/{fname}.csv'), f'{fname}.pdf') - plot_pes(pd.read_csv(f'{indir}/{fname}_pes.csv'), channel, f'{fname}_{channel}.pdf') + for fname in ('test_q100_1724098413', 'test_q100_1724098596', 'test_q50_1724099445'): + plot_final(pd.read_csv(f'{indir}/{fname}.csv'), f'{fname}.pdf') + plot_pes(pd.read_csv(f'{indir}/{fname}_pes.csv'), channel, f'{fname}_pes.pdf') plot_chi2(pd.read_csv(f'{indir}/quality.csv'), f'chi2_prepca.pdf') plot_chi2_intensity(pd.read_csv(f'{indir}/quality.csv'), f'intensity_vs_chi2_prepca.pdf') plot_rmse(pd.read_csv(f'{indir}/quality.csv'), f'rmse.pdf') plot_rmse_intensity(pd.read_csv(f'{indir}/quality.csv'), f'intensity_vs_rmse.pdf') - pca_variance_plot(pd.read_csv(f'{indir}/pca_spec.csv'), f'pca_spec.pdf') - pca_variance_plot(pd.read_csv(f'{indir}/pca_pes.csv'), f'pca_pes.pdf') + plot_residue(pd.read_csv(f'{indir}/quality.csv'), f'residue.pdf') + + df_model = pd.read_csv(f'{indir}/model.csv') + df_model.impulse = df_model.impulse.str.replace('i','j').apply(lambda x: np.complex(x)) + df_model.wiener_filter = df_model.wiener_filter.str.replace('i','j').apply(lambda x: np.complex(x)) + plot_impulse(df_model, f'impulse.pdf') + plot_wiener(df_model, f'wiener.pdf') + + pca_variance_plot(pd.read_csv(f'{indir}/pca_spec.csv'), f'pca_spec.pdf', max_comp_frac=0.99) + pca_variance_plot(pd.read_csv(f'{indir}/pca_pes.csv'), f'pca_pes.pdf', max_comp_frac=0.95)