Skip to content
Snippets Groups Projects
Commit 1e64d429 authored by Karim Ahmed's avatar Karim Ahmed
Browse files

Feat: Update Fit Summary notebook with new plots

parent f740e137
No related branches found
Tags 3.11.4
1 merge request!1055[Jungfrau][FF] Improve Fitting performance and stop using pydetlib + many refactors.
%% Cell type:markdown id: tags:
# Jungfrau Spectra Fit Summary
Author: European XFEL Detector Department, Version: 1.0
Summary for plotting Spectra Fit results for Jungfrau FF histogram and fitting notebook
%% Cell type:code id: tags:
``` python
in_folder = '/gpfs/exfel/exp/SPB/202330/p900343/raw' # RAW data path, required
out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/JF4M_SPB_gain/r67" # Output path for gain data, required
metadata_folder = "" # Directory containing calibration_metadata.yml when run by xfel-calibrate.
runs = [67]
# Parameters used to access raw data.
karabo_da = [] # list of data aggregators, which corresponds to different JF modules. This is only needed for the detectors of one module.
karabo_id = "SPB_IRDA_JF4M" # detector identifier.
creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
fit_func = 'CHARGE_SHARING' # function used to fit the single photon peak\
g0_fit_dataset = 'gainMap_fit' # name of the data structure in the fit files
spectra_fit_temp = 'R{:04d}_{}_Gain_Spectra_{}_{}_Fit.h5'
histo_temp = 'R{:04d}_{}_Gain_Spectra_{}_Histo.h5'
```
%% Cell type:code id: tags:
``` python
import math
import multiprocessing as mp
import warnings
from IPython.display import Markdown, display
from logging import warning
from pathlib import Path
warnings.filterwarnings('ignore')
from h5py import File as h5file
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from h5py import File as h5file
matplotlib.use("agg")
%matplotlib inline
from cal_tools.calcat_interface import CalCatApi
from cal_tools.plotting import init_jungfrau_geom
from cal_tools.restful_config import calibration_client
from cal_tools.calcat_interface import CalCatApi
```
%% Cell type:code id: tags:
``` python
expected_modules, geom = init_jungfrau_geom(karabo_id=karabo_id, karabo_da=karabo_da)
nmods = len(expected_modules)
```
%% Cell type:code id: tags:
``` python
calcat_client = calibration_client()
calcat = CalCatApi(client=calcat_client)
detector_id = calcat.detector(karabo_id)['id']
da_mapping = calcat.physical_detector_units(detector_id, pdu_snapshot_at=creation_time)
da_to_pdu = {k: v["physical_name"] for k, v in da_mapping.items()}
run = runs[0] # TODO: Update for multiple runs
proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
```
%% Cell type:markdown id: tags:
## Histograms for all cells for each module
%% Cell type:code id: tags:
``` python
proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
run = runs[0] # TODO this will need to be fixed when I start implementing multiple runs.
stacked_constants = np.full(geom.expected_data_shape, np.nan) # nmods, 512, 1024
def process_histogram(file_path, shared_dict):
try:
with h5file(file_path, 'r') as f:
histos = f["histos"][:]
edges = f["edges"][:]
except Exception as e:
warning(f"Error while loading Histogram file {file_path}: {e}")
shared_dict['bin_centers'] = None
shared_dict['mean_histos'] = None
return
bin_centers = (edges[1:] + edges[:-1]) / 2
mean_histos = histos.mean(axis=(2, 3)) # Shape: (bins, cells)
shared_dict['bin_centers'] = bin_centers
shared_dict['mean_histos'] = mean_histos
```
for i, da in enumerate (da_to_pdu.keys()):
with h5file(
Path(out_folder) / spectra_fit_temp.format(run, proposal.upper(), da, fit_func),
'r'
) as f:
# f[g0_fit_dataset] shape is 1024, 512, mem_cells
stacked_constants[i] = np.moveaxis(
np.mean(np.array(f[g0_fit_dataset]), axis=-1), 0, 1)
fig, ax = plt.subplots(figsize=(18, 10))
vmin, vmax = np.percentile(stacked_constants, [5, 95])
geom.plot_data_fast(
stacked_constants,
ax=ax,
vmin=vmin,
vmax=vmax,
colorbar={'shrink': 1, 'pad': 0.01},
)
ax.set_title(f'{karabo_id} - One photon peak position', size=18)
%% Cell type:code id: tags:
``` python
ncols = min(4, nmods)
nrows = math.ceil(nmods / ncols)
fig, axs = plt.subplots(
nrows, ncols, figsize=((5*ncols)//1.8, (5*nrows + 1)//1.8), squeeze=False)
axs = axs.flatten()
# Use a consistent color cycle for all subplots
custom_colors = [
'#0000FF', '#FF0000', '#00FF00', '#FFFF00', '#FF00FF', '#FFA500',
'#800080', '#008000', '#000080', '#FFC0CB', '#A52A2A', '#808080',
'#FFD700', '#8B4513', '#FF4500', '#2E8B57'
]
# Prepare shared memory and multiprocessing
manager = mp.Manager()
shared_dict_list = [manager.dict() for _ in range(nmods)]
with mp.Pool(processes=min(mp.cpu_count(), nmods)) as pool:
file_paths = [Path(out_folder) / histo_temp.format(run, proposal.upper(), da) for da in da_to_pdu.keys()]
pool.starmap(process_histogram, zip(file_paths, shared_dict_list))
for i, ((da, pdu), shared_dict) in enumerate(zip(da_to_pdu.items(), shared_dict_list)):
ax = axs[i]
bin_centers = shared_dict['bin_centers']
mean_histos = shared_dict['mean_histos']
# Missing histogram data for module
if bin_centers is None:
continue
for m in range(mean_histos.shape[1]): # Iterate over cells
ax.semilogy(
bin_centers, mean_histos[:, m],
color=custom_colors[m],
# Create only for the first subplot as legend is shared.
label=f'Cell {m}' if i == 0 else ""
)
ax.set_title(f"{da} ({pdu})")
ax.set_xlabel("ADC value")
ax.set_ylabel("Counts")
# Hide unused subplots
for i in range(nmods, len(axs)):
axs[i].set_visible(False)
plt.tight_layout()
# Add a single legend at the top of the figure
handles, labels = axs[0].get_legend_handles_labels()
fig.legend(
handles, labels, loc='upper center', ncol=min(16, 8),
bbox_to_anchor=(0.5, 1.05), fontsize='small')
plt.subplots_adjust(top=0.9)
plt.show()
```
%% Cell type:markdown id: tags:
%% Cell type:code id: tags:
``` python
display(Markdown(f"## Display fitting results using {fit_func} model"))
## Histogram data for all cells for each module
def plot_stacked_heatmap(geom, stacked_constants, title):
_, ax = plt.subplots(figsize=(8, 6))
vmin, vmax = np.nanpercentile(stacked_constants, [5, 95])
geom.plot_data_fast(
stacked_constants,
ax=ax,
vmin=vmin,
vmax=vmax,
colorbar={'shrink': 1, 'pad': 0.01, 'label': title},
)
ax.set_title(title, size=12)
plt.show()
def plot_cells_comparison(data, title):
n_cells = data.shape[-1]
if n_cells == 1: # no need to plot for single cell
return
# For multiple cells, create a grid layout
n_cols = min(4, n_cells) # Max 4 columns
n_rows = math.ceil(n_cells / n_cols)
fig, axes = plt.subplots(
n_rows, n_cols, figsize=(5*n_cols//2, 5*n_rows//2))
fig.suptitle(title, fontsize=12)
# Flatten axes array for easy indexing
axes = axes.flatten() if n_cells > 1 else [axes]
vmin, vmax = np.nanpercentile(data, [5, 95])
for i in range(n_cells):
ax = axes[i]
im = ax.imshow(
data[..., i],
cmap='viridis',
vmin=vmin,
vmax=vmax
)
ax.set_title(f'Cell {i}', size=10)
plt.colorbar(im, ax=ax)
# Hide unused subplots
for i in range(n_cells, len(axes)):
axes[i].set_visible(False)
plt.tight_layout()
plt.show()
def plot_histogram_of_values(data, title, bins=100):
data = data.flatten()
# Count failed fittings. -1000 and -1 used for failed fitting.
failed_percentage = (np.sum((data == -1000) | (data == -1)) / len(data)) * 100
# Separate valid data and failed fittings
valid_data = data[(data != -1000) & (data != -1)]
plt.figure(figsize=(6, 3))
# Use Interquartile Range (IQR) for defining histogram range.
q1, q3 = np.percentile(valid_data, [25, 75])
iqr = q3 - q1
lower = max(q1 - 1.5 * iqr, np.min(valid_data))
upper = min(q3 + 1.5 * iqr, np.max(valid_data))
plt.hist(data, bins=bins, range=(lower, upper), edgecolor='black')
plt.xlabel('Value')
plt.ylabel('Count')
plt.title(title, size=12)
mean = np.mean(valid_data)
median = np.median(valid_data)
plt.axvline(
mean,
color='r',
linestyle='dashed',
linewidth=2,
label=f'Mean: {mean:.2f}'
)
plt.axvline(
median,
color='g',
linestyle='dashed',
linewidth=2,
label=f'Median: {median:.2f}'
)
# Add text box with failed fitting percentage.
plt.text(
0.05, 0.95,
f"Failed Fittings:\n {failed_percentage:.2f}%",
transform=plt.gca().transAxes,
verticalalignment='top',
bbox=dict(
boxstyle='round',
facecolor='white',
alpha=0.8),
fontsize=10)
plt.legend()
plt.tight_layout()
plt.show()
```
%% Cell type:code id: tags:
``` python
if nmods > 4:
fixed_cols = 4
row, col = math.ceil(nmods / fixed_cols), 4
else:
row, col = 1, nmods
fig, axs = plt.subplots(row, col, figsize=(20, 10)) # Adjust for spatial histograms
axs = axs.ravel()
for i, da in enumerate (da_to_pdu.keys()):
with h5file(
Path(out_folder) / histo_temp.format(run, proposal.upper(), da),
'r'
) as f:
histos = f["histos"][:]
row, col = divmod(i, 4)
for m in range(histos.shape[1]):
cell_hist = histos[m]
axs[i].plot(cell_hist.ravel(), label=f'Cell {m}')
axs[i].set_title(f"{da} ({da_to_pdu[da]})")
for i, ax in enumerate(axs):
if i > nmods-1:
ax.set_visible(False) # Hide unused subplots
# Create a legend for the whole figure
handles, labels = axs[0].get_legend_handles_labels()
fig.legend(handles, labels, loc="center right" if nmods > 4 else "upper right")
plt.tight_layout(pad=3)
plt.show()
# Define datasets to plot
datasets = {
"gainMap_fit": 'Single Photon Peak Position (ADU)',
'sigmamap': 'Peak Width (σ)',
'alphamap': 'Charge Sharing Probability (α)', # 'Charge sharing parameter'
'chi2map': 'Goodness of Fit (χ²/ndf)' # 'Chi-square of fit'
}
for dataset, title in datasets.items():
stacked_constants = np.full(geom.expected_data_shape, np.nan)
all_data = []
av_modules = []
for i, da in enumerate(da_to_pdu.keys()):
file_path = Path(out_folder) / spectra_fit_temp.format(run, proposal.upper(), da, fit_func)
try:
with h5file(file_path, 'r') as f:
data = np.array(f[dataset])
stacked_constants[i] = np.moveaxis(np.mean(data, axis=-1), 0, 1)
all_data.append(data)
av_modules.append(da)
except Exception as e:
warning(f"Error while loading Fitting file {file_path}: {e}")
# Help to avoid plotting missing module.
# Plot stacked heatmap
plot_stacked_heatmap(geom, stacked_constants, title)
plot_histogram_of_values(
np.concatenate(all_data), f'Distribution of {title}')
# Plot cell comparison for the the last module module
plot_cells_comparison(all_data[-1], f'{title} - Module {av_modules[-1]}({da_to_pdu[av_modules[-1]]})')
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment