Compare revisions

Karim Ahmed · Karim Ahmed · Philipp Schmidt · Philipp Schmidt · Thomas Kluyver · Philipp Schmidt
--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Gain Characterization #


 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900138/scratch/karnem/r0203_r0204_v01/" # the folder to read histograms from, required
 out_folder = ""  # the folder to output to, required
 hist_file_template = "hists_m{:02d}_sum.h5" # the template to use to access histograms
 modules = [10] # modules to correct, set to -1 for all, range allowed

 raw_folder = "/gpfs/exfel/exp/MID/202030/p900137/raw" # Path to raw image data used to create histograms
 proc_folder = "" # Path to corrected image data used to create histograms

 run = 449 # of the run of image data used to create histograms

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
 karabo_id_control = "MID_IRU_AGIPD1M1" # karabo-id for control device
 karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milli seconds
 local_output = True # output constants locally
 db_output = False # output constants to database

 # Fit parameters
 peak_range = [-30, 30, 35, 70, 95, 135, 145, 220] # where to look for the peaks, [a0, b0, a1, b1, ...] exactly 8 elements
 peak_width_range = [0, 30, 0, 35, 0, 40, 0, 45] # fit limits on the peak widths, [a0, b0, a1, b1, ...] exactly 8 elements
 peak_norm_range = [0.0, -1, 0, -1, 0, -1, 0, -1] #

 # Bad-pixel thresholds (gain evaluation error). Contribute to BadPixel bit "Gain_Evaluation_Error"
 peak_lim = [-30, 30] # Limit of position of noise peak
 d0_lim = [10, 80] # hard limits for distance between noise and first peak
 peak_width_lim = [0.9, 1.55, 0.95, 1.65] # hard limits on the peak widths for first and second peak, in units of the noise peak. 4 parameters.
 chi2_lim = [0, 3.0] # Hard limit on chi2/nDOF value

 intensity_lim = 15 # Threshold on standard deviation of a histogram in ADU. Contribute to BadPixel bit "No_Entry"
 gain_lim = [0.8, 1.2] # Threshold on gain in relative number. Contribute to BadPixel bit "Gain_deviation"

 cell_range = [1, 3] # range of cell to be considered, [0,0] for all
 pixel_range = [0, 0, 32, 32] # range of pixels x1,y1,x2,y2 to consider [0,0,512,128] for all
 max_bins = 0 # Maximum number of bins to consider, 0 for all bins
 batch_size = [1, 8, 8] # batch size: [cell,x,y]
 fit_range = [0, 0] # range of a histogram considered for fitting in ADU. Dynamically evaluated in case [0,0]
 n_peaks_fit = 4 # Number of gaussian peaks to fit including noise peak
 fix_peaks = False # Fix distance between photon peaks
 do_minos = False # This is additional feature of minuit to evaluate errors.
 sigma_limit = 0. # If >0, repeat fit keeping only bins within mu +- sigma_limit*sigma

 # Detector conditions
 max_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300  # Bias voltage
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 photon_energy = 8.05 # photon energy in keV
 integration_time = -1 # integration time, negative values for auto-detection.
 ```

 %% Cell type:code id: tags:

 ``` python
 import glob
 import os
 import traceback
 import warnings
 from multiprocessing import Pool

 import h5py
 import matplotlib.pyplot as plt
 import numpy as np
 import sharedmem
 import XFELDetAna.xfelpyanatools as xana
-from cal_tools.agipdlib import get_bias_voltage
 from cal_tools.agipdutils_ff import (
    BadPixelsFF,
    any_in,
    fit_n_peaks,
    gaussian,
    gaussian_sum,
    get_mask,
    get_starting_parameters,
    set_par_limits,
 )
 from cal_tools.ana_tools import get_range, save_dict_to_hdf5
 from iminuit import Minuit
 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.simpleplot import simplePlot

 # %load_ext autotime
 %matplotlib inline
 warnings.filterwarnings('ignore')
 ```

 %% Cell type:code id: tags:

 ``` python
 peak_range = np.reshape(peak_range,(4,2))
 peak_width_range = np.reshape(peak_width_range,(4,2))
 peak_width_lim = np.reshape(peak_width_lim,(2,2))
 peak_norm_range = [None if x == -1 else x for x in peak_norm_range]
 peak_norm_range = np.reshape(peak_norm_range,(4,2))
 module = modules[0]
 ```

 %% Cell type:code id: tags:

 ``` python
-# This is never used in this notebook and should be removed
-
-# if bias_voltage == 0:
-#     # Read the bias voltage from files, if recorded.
-#     # If not available, make use of the historical voltage the detector is running at
-#     control_filename = f'{raw_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
-#     bias_voltage = get_bias_voltage(control_filename, karabo_id_control)
-#     bias_voltage = bias_voltage if bias_voltage is not None else 300
-# print(f"Bias voltage: {bias_voltage}V")
-```
-
-%% Cell type:code id: tags:
-
-``` python
 def idx_gen(batch_start, batch_size):
    """
    This generator iterate across pixels and memory cells starting
    from batch_start until batch_start+batch_size
    """
    for c_idx in range(batch_start[0], batch_start[0]+batch_size[0]):
        for x_idx in range(batch_start[1], batch_start[1]+batch_size[1]):
            for y_idx in range(batch_start[2], batch_start[2]+batch_size[2]):
                yield(c_idx, x_idx, y_idx)
 ```

 %% Cell type:code id: tags:

 ``` python
 n_pixels_x = pixel_range[2]-pixel_range[0]
 n_pixels_y = pixel_range[3]-pixel_range[1]

 hist_data = {}
 with h5py.File(f"{in_folder}/{hist_file_template.format(module)}", 'r') as hf:
    hist_data['cellId'] = np.array(hf['cellId'][()])
    hist_data['hRange'] = np.array(hf['hRange'][()])
    hist_data['nBins'] = np.array(hf['nBins'][()])

    if cell_range == [0,0]:
        cell_range[1] = hist_data['cellId'].shape[0]

    if max_bins == 0:
        max_bins = hist_data['nBins']

    hist_data['cellId'] = hist_data['cellId'][cell_range[0]:cell_range[1]]
    hist_data['hist'] = np.array(hf['hist'][cell_range[0]:cell_range[1], :max_bins, :])

 n_cells = cell_range[1]-cell_range[0]
 hist_data['hist'] = hist_data['hist'].reshape(n_cells, max_bins, 512, 128)
 hist_data['hist'] = hist_data['hist'][:,:,pixel_range[0]:pixel_range[2],pixel_range[1]:pixel_range[3]]

 print(f'Data shape {hist_data["hist"].shape}')

 bin_edges = np.linspace(hist_data['hRange'][0], hist_data['hRange'][1], int(hist_data['nBins']+1))
 x = (bin_edges[1:] + bin_edges[:-1])[:max_bins] * 0.5


 batches = []
 for c_idx in range(0, n_cells, batch_size[0]):
    for x_idx in range(0, n_pixels_x, batch_size[1]):
        for y_idx in range(0, n_pixels_y, batch_size[2]):
            batches.append([c_idx,x_idx,y_idx])

 print(f'Number of batches {len(batches)}')
 ```

 %% Cell type:code id: tags:

 ``` python
 def fit_batch(batch_start):
    current_result = {}
    prev = None
    for c_idx, x_idx, y_idx in idx_gen(batch_start, batch_size):
        try:
            y = hist_data['hist'][c_idx, :, x_idx, y_idx]

            if prev is None:
                prev, _ = get_starting_parameters(x, y, peak_range, n_peaks=n_peaks_fit)

            if fit_range == [0, 0]:
                frange = (prev[f'g0mean']-2*prev[f'g0sigma'],
                          prev[f'g{n_peaks_fit-1}mean'] + prev[f'g{n_peaks_fit-1}sigma'])
            else:
                frange = fit_range

            set_par_limits(prev, peak_range, peak_norm_range,
                           peak_width_range, n_peaks_fit)
            minuit = fit_n_peaks(x, y, prev, frange,
                                 do_minos=do_minos, n_peaks=n_peaks_fit,
                                 fix_d01=fix_peaks, sigma_limit=sigma_limit,)

            ndof = np.rint(frange[1]-frange[0])-len(minuit.args) ## FIXME: this line is wrong if fix_peaks is True
            current_result['chi2_ndof'] = minuit.fval/ndof
            res = minuit.fitarg
            if fix_peaks : ## set g2 and g3 mean correctly
                for i in range(2,n_peaks_fit):
                    d = res[f'g1mean'] - res[f'g0mean']
                    res[f'g{i}mean'] = res[f'g0mean'] + d*i
            current_result.update(res)
            current_result.update(minuit.get_fmin())

            fit_result['chi2_ndof'][c_idx, x_idx, y_idx] = current_result['chi2_ndof']

            for key in res.keys():
                if key in fit_result:
                    fit_result[key][c_idx, x_idx, y_idx] = res[key]

            fit_result['mask'][c_idx, x_idx, y_idx] = get_mask(current_result,
                                                                    peak_lim,
                                                                    d0_lim, chi2_lim,
                                                                    peak_width_lim)
        except Exception as e:
            fit_result['mask'][c_idx, x_idx,
                                    y_idx] = BadPixelsFF.FIT_FAILED.value
            print(c_idx, x_idx, y_idx, e, traceback.format_exc())

        if fit_result['mask'][c_idx, x_idx, y_idx] == 0:
            prev = res
        else:
            prev = None
 ```

 %% Cell type:markdown id: tags:

 # Single fit ##

 Left plot shows starting parameters for fitting. Right plot shows result of the fit. Errors are evaluated with minos.

 %% Cell type:code id: tags:

 ``` python
 hist = hist_data['hist'][1,:,1, 1]
 prev, shapes = get_starting_parameters(x, hist, peak_range, n_peaks=n_peaks_fit)

 if fit_range == [0, 0]:
    frange = (prev[f'g0mean']-2*prev[f'g0sigma'],
              prev[f'g3mean'] + prev[f'g3sigma'])
 else:
    frange = fit_range

 set_par_limits(prev, peak_range, peak_norm_range,
               peak_width_range, n_peaks=n_peaks_fit)
 minuit = fit_n_peaks(x, hist, prev, frange,
                     do_minos=True, n_peaks=n_peaks_fit,
                     fix_d01=fix_peaks,
                     sigma_limit=sigma_limit,
                    )
 print (minuit.get_fmin())
 minuit.print_matrix()
 print(minuit.get_param_states())
 ```

 %% Cell type:code id: tags:

 ``` python
 res = minuit.fitarg
 if fix_peaks :
    for i in range(2,n_peaks_fit):
        d = res[f'g1mean'] - res[f'g0mean']
        res[f'g{i}mean'] = res[f'g0mean'] + d*i
 err = minuit.errors
 p = minuit.args
 ya = np.arange(0,1e4)
 y = gaussian_sum(x,n_peaks_fit, *p)
 peak_colors = ['g', 'y', 'b', 'orange']

 peak_hist = hist.copy()
 d=[]
 if sigma_limit > 0 :
    sel2 = (np.abs(x - res['g0mean']) < sigma_limit*res['g0sigma']) | \
           (np.abs(x - res['g1mean']) < sigma_limit*res['g1sigma']) | \
           (np.abs(x - res['g2mean']) < sigma_limit*res['g2sigma']) | \
           (np.abs(x - res['g3mean']) < sigma_limit*res['g3sigma'])
    peak_hist[~sel2] = 0
    valley_hist = hist.copy()
    valley_hist[sel2] = 0
    d.append({'x': x,
              'y': valley_hist.astype(np.float64),
              'y_err': np.sqrt(valley_hist),
              'drawstyle': 'bars',
              'errorstyle': 'bars',
              'transparency': '95%',
              'errorcoarsing': 3,
              'label': f'X-ray Data)'
             })
    htitle = f'X-ray Data, (μ±{sigma_limit:0.1f}σ)'
 else :
    htitle = 'X-ray Data'

 d.append({'x': x,
          'y': peak_hist.astype(np.float64),
          'y_err': np.sqrt(peak_hist),
          'drawstyle': 'bars',
          'errorstyle': 'bars',
          'errorcoarsing': 3,
          'label': htitle,
         }
        )
 d.append({'x': x,
          'y': y,
          'y2': (hist-y)/np.sqrt(hist),
          'drawstyle':'line',
          'drawstyle2': 'steps-mid',
          'label': 'Fit'
         }
        )

 for i in range(n_peaks_fit):
    d.append({'x': x,
             'y': gaussian(x, res[f'g{i}n'], res[f'g{i}mean'], res[f'g{i}sigma']),
             'drawstyle':'line',
             'color': peak_colors[i],
             })
    d.append({'x': np.full_like(ya, res[f'g{i}mean']),
              'y': ya,
              'drawstyle': 'line',
              'linestyle': 'dashed',
              'color': peak_colors[i],
              'label': f'peak {i} = {res[f"g{i}mean"]:0.1f} $ \pm $ {err[f"g{i}mean"]:0.2f} ADU' })
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, (ax1, ax2) = plt.subplots(1, 2)
 fig.set_size_inches(16, 7)
 for i, shape in enumerate(shapes):
    idx = shape[3]
    ax1.errorbar(
        x[idx], hist[idx],
        np.sqrt(hist[idx]),
        marker='+', ls='',
    )
    yg = gaussian(x[idx], *shape[:3])
    l = f'Peak {i}: {shape[1]:0.1f} $ \pm $ {shape[2]:0.2f} ADU'
    ax1.plot(x[idx], yg, label=l)
 ax1.grid(True)
 ax1.set_xlabel("Signal [ADU]")
 ax1.set_ylabel("Counts")
 ax1.legend(ncol=2)

 _ = xana.simplePlot(
    d,
    use_axis=ax2,
    x_label='Signal [ADU]',
    y_label='Counts',
    secondpanel=True, y_log=False,
    x_range=(frange[0], frange[1]),
    y_range=(1., np.max(hist)*1.6),
    legend='top-left-frame-ncol2',
 )

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## All fits ##

 %% Cell type:code id: tags:

 ``` python
 # Allocate memory for fit results
 fit_result = {}
 keys = list(minuit.fitarg.keys())
 keys = [x for x in keys if 'limit_' not in x and 'fix_' not in x]
 keys += ['chi2_ndof', 'mask', 'gain']
 for key in keys:
    dtype = 'f4'
    if key == 'mask':
        dtype = 'i4'
    fit_result[key] = sharedmem.empty([n_cells, n_pixels_x, n_pixels_y], dtype=dtype)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Perform fitting
 with Pool() as pool:
    const_out = pool.map(fit_batch, batches)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate bad pixels
 fit_result['gain'] = (fit_result['g1mean'] - fit_result['g0mean'])/photon_energy

 # Calculate histogram width and evaluate cut
 h_sums = np.sum(hist_data['hist'], axis=1)
 hist_norm = hist_data['hist'] / h_sums[:, None, :, :]
 hist_mean = np.sum(hist_norm[:, :max_bins, ...] *
                   x[None, :, None, None], axis=1)
 hist_sqr = (x[None, :, None, None] - hist_mean[:, None, ...])**2
 hist_std = np.sqrt(np.sum(hist_norm[:, :max_bins, ...] * hist_sqr, axis=1))

 fit_result['mask'][hist_std<intensity_lim] |= BadPixelsFF.NO_ENTRY.value

 # Bad pixel on gain deviation
 gains = np.copy(fit_result['gain'])
 gains[fit_result['mask']>0] = np.nan
 gain_mean = np.nanmean(gains, axis=(1,2))

 fit_result['mask'][fit_result['gain'] > gain_mean[:,None,None]*gain_lim[1] ] |=  BadPixelsFF.GAIN_DEVIATION.value
 fit_result['mask'][fit_result['gain'] < gain_mean[:,None,None]*gain_lim[0] ] |=  BadPixelsFF.GAIN_DEVIATION.value
 ```

 %% Cell type:code id: tags:

 ``` python
 # Save fit results
 os.makedirs(out_folder, exist_ok=True)
 out_name = f'{out_folder}/fits_m{module:02d}.h5'
 print(f'Save to file: {out_name}')
 save_dict_to_hdf5({'data': fit_result}, out_name)
 ```

 %% Cell type:markdown id: tags:

 ## Summary across cells ##

 %% Cell type:code id: tags:

 ``` python
 labels = [
    "Noise peak [ADU]",
    "First photon peak [ADU]",
    f"gain [ADU/keV] $\gamma$={photon_energy} [keV]",
    "$\chi^2$/nDOF",
    "Fraction of bad pixels",
 ]

 for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof', 'mask']):
    fig = plt.figure(figsize=(20,5))
    ax = fig.add_subplot(121)
    data = fit_result[key]
    if key == 'mask':
        data = data > 0
        vmin, vmax = [0, 1]
    else:
        vmin, vmax = get_range(data, 5)
    _ = heatmapPlot(
        np.mean(data, axis=0).T,
        add_panels=False, cmap='viridis', use_axis=ax,
        vmin=vmin, vmax=vmax, lut_label=labels[i]
    )

    if key != 'mask':
        vmin, vmax = get_range(data, 7)
        ax = fig.add_subplot(122)
        _ = xana.histPlot(
            ax, data.flatten(),
            bins=45,range=[vmin, vmax],
            log=True,color='red',histtype='stepfilled'
        )
        ax.set_xlabel(labels[i])
        ax.set_ylabel("Counts")
 ```

 %% Cell type:markdown id: tags:

 ## histograms of fit parameters ##

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 5))
 ax = fig.add_subplot(111)
 a = ax.hist(hist_std.flatten(), bins=100, range=(0,100) )
 ax.plot([intensity_lim, intensity_lim], [0, np.nanmax(a[0])], linewidth=1.5, color='red' )
 ax.set_xlabel('Histogram width [ADU]', fontsize=14)
 ax.set_ylabel('Number of histograms', fontsize=14)
 ax.set_title(f'{hist_std[hist_std<intensity_lim].shape[0]} histograms below threshold in {intensity_lim} ADU',
              fontsize=14, fontweight='bold')
 ax.grid()
 ax.set_yscale('log')
 ```

 %% Cell type:code id: tags:

 ``` python
 def plot_par_distr(par):
    fig = plt.figure(figsize=(16, 5))
    sel = fit_result['mask'] == 0

    for i in range(n_peaks_fit) :
        data=fit_result[f"g{i}{par}"]
        plt_range=(-1,50)
        if par =='mean':
            plt_range=[peak_range[i][0] ,peak_range[i][1]]

        num_bins = int(plt_range[1] - plt_range[0])
        ax = fig.add_subplot(1,n_peaks_fit,i+1)
        _ = xana.histPlot(ax,data.flatten(),
                          bins= num_bins,range=plt_range,
                          log=True,color='red',
                          label='all fits',)

        a = ax.hist(data[sel].flatten(),
                    bins=num_bins, range=plt_range,
                    log=True,color='g',
                    label='good fits only',
                   )
        ax.set_xlabel(f"g{i} {par} [ADU]")
        ax.legend()

 plot_par_distr('mean')
 plot_par_distr('sigma')
 ```

 %% Cell type:code id: tags:

 ``` python
 sel = fit_result['mask'] == 0

 dsets = {'d01 [ADU]':fit_result[f"g1mean"]-fit_result[f"g0mean"],
         'gain [ADU/keV]':fit_result[f"gain"],
         'gain relative to module mean':fit_result[f"gain"]/np.nanmean(gain_mean),
        }
 fig = plt.figure(figsize=(16,5))
 for i, (par, data) in enumerate(dsets.items()):
    ax = fig.add_subplot(1, 3, i+1)
    plt_range=get_range(data, 10)
    num_bins = 100
    _ = xana.histPlot(ax,data.flatten(),
                      bins= num_bins,range=plt_range,
                      log=True,color='red',
                      label='all fits',)

    a = ax.hist(data[sel].flatten(),
                bins=num_bins, range=plt_range,
                log=True,color='g',
                label='good fits only',
               )
    ax.set_xlabel(f"{par}")
    ax.legend()
    if 'd01' in par :
        ax.axvline(d0_lim[0])
        ax.axvline(d0_lim[1])
    if 'rel' in par :
        ax.axvline(gain_lim[0])
        ax.axvline(gain_lim[1])
 ```

 %% Cell type:markdown id: tags:

 ## Summary across pixels ##

 Mean and median values are calculated across all pixels for each memory cell.

 %% Cell type:code id: tags:

 ``` python
 def plot_error_band(key, x, ax):

    cdata = np.copy(fit_result[key])
    cdata[fit_result['mask']>0] = np.nan

    mean = np.nanmean(cdata, axis=(1,2))
    median = np.nanmedian(cdata, axis=(1,2))
    std = np.nanstd(cdata, axis=(1,2))
    mad = np.nanmedian(np.abs(cdata - median[:,None,None]), axis=(1,2))

    ax.plot(x, mean, 'k', color='#3F7F4C', label=" mean value ")
    ax.plot(x, median, 'o', color='red', label=" median value ")
    ax.fill_between(x, mean-std, mean+std,
                     alpha=0.6, edgecolor='#3F7F4C', facecolor='#7EFF99',
                     linewidth=1, linestyle='dashdot', antialiased=True,
                     label=" mean value $ \pm $ std ")

    ax.fill_between(x, median-mad, median+mad,
                     alpha=0.3, edgecolor='red', facecolor='red',
                     linewidth=1, linestyle='dashdot', antialiased=True,
                     label=" median value $ \pm $ mad ")

    if f'error_{key}' in fit_result:
        cerr = np.copy(fit_result[f'error_{key}'])
        cerr[fit_result['mask']>0] = np.nan

        meanerr = np.nanmean(cerr, axis=(1,2))
        ax.fill_between(x, mean-meanerr, mean+meanerr,
                 alpha=0.6, edgecolor='#089FFF', facecolor='#089FFF',
                 linewidth=1, linestyle='dashdot', antialiased=True,
                 label=" mean fit error ")


 x = np.linspace(*cell_range, n_cells)

 for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof']):

    fig = plt.figure(figsize=(10, 5))
    ax = fig.add_subplot(111)
    plot_error_band(key, x, ax)

    ax.set_xlabel('Memory Cell ID', fontsize=14)
    ax.set_ylabel(labels[i], fontsize=14)
    ax.grid()
    ax.legend()
 ```

 %% Cell type:markdown id: tags:

 ## Cut flow ##

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots()
 fig.set_size_inches(10, 5)

 n_bars = 8
 x = np.arange(n_bars)
 width = 0.3

 msk = fit_result['mask']
 n_fits = np.prod(msk.shape)
 y = [any_in(msk, BadPixelsFF.FIT_FAILED.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
           | BadPixelsFF.NO_ENTRY.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
           | BadPixelsFF.NO_ENTRY.value| BadPixelsFF.GAIN_DEVIATION.value)
    ]

 y2 = [any_in(msk, BadPixelsFF.FIT_FAILED.value),
     any_in(msk, BadPixelsFF.ACCURATE_COVAR.value),
     any_in(msk, BadPixelsFF.CHI2_THRESHOLD.value),
     any_in(msk, BadPixelsFF.GAIN_THRESHOLD.value),
     any_in(msk, BadPixelsFF.NOISE_PEAK_THRESHOLD.value),
     any_in(msk, BadPixelsFF.PEAK_WIDTH_THRESHOLD.value),
     any_in(msk, BadPixelsFF.NO_ENTRY.value),
     any_in(msk, BadPixelsFF.GAIN_DEVIATION.value)
    ]

 y = (1 - np.sum(y, axis=(1,2,3))/n_fits)*100
 y2 = (1 - np.sum(y2, axis=(1,2,3))/n_fits)*100

 labels = ['Fit failes',
         'Accurate covar',
         'Chi2/nDOF',
         'Gain',
         'Noise peak',
         'Peak width',
         'No Entry',
         'Gain deviation']

 ax.bar(x, y2, width, label='Only this cut')
 ax.bar(x, y, width, label='Cut flow')
 ax.set_xticks(x)
 ax.set_xticklabels(labels, rotation=90)
 ax.set_ylim(y[5]-0.5, 100)
 ax.grid(True)
 ax.legend()
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 # Gain Characterization #


 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900138/scratch/karnem/r0203_r0204_v01/" # the folder to read histograms from, required
 out_folder = ""  # the folder to output to, required
 hist_file_template = "hists_m{:02d}_sum.h5" # the template to use to access histograms
 modules = [10] # modules to correct, set to -1 for all, range allowed

 raw_folder = "/gpfs/exfel/exp/MID/202030/p900137/raw" # Path to raw image data used to create histograms
 proc_folder = "" # Path to corrected image data used to create histograms

 run = 449 # of the run of image data used to create histograms

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
 karabo_id_control = "MID_IRU_AGIPD1M1" # karabo-id for control device
 karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milli seconds
 local_output = True # output constants locally
 db_output = False # output constants to database

 # Fit parameters
 peak_range = [-30, 30, 35, 70, 95, 135, 145, 220] # where to look for the peaks, [a0, b0, a1, b1, ...] exactly 8 elements
 peak_width_range = [0, 30, 0, 35, 0, 40, 0, 45] # fit limits on the peak widths, [a0, b0, a1, b1, ...] exactly 8 elements
 peak_norm_range = [0.0, -1, 0, -1, 0, -1, 0, -1] #

 # Bad-pixel thresholds (gain evaluation error). Contribute to BadPixel bit "Gain_Evaluation_Error"
 peak_lim = [-30, 30] # Limit of position of noise peak
 d0_lim = [10, 80] # hard limits for distance between noise and first peak
 peak_width_lim = [0.9, 1.55, 0.95, 1.65] # hard limits on the peak widths for first and second peak, in units of the noise peak. 4 parameters.
 chi2_lim = [0, 3.0] # Hard limit on chi2/nDOF value

 intensity_lim = 15 # Threshold on standard deviation of a histogram in ADU. Contribute to BadPixel bit "No_Entry"
 gain_lim = [0.8, 1.2] # Threshold on gain in relative number. Contribute to BadPixel bit "Gain_deviation"

 cell_range = [1, 3] # range of cell to be considered, [0,0] for all
 pixel_range = [0, 0, 32, 32] # range of pixels x1,y1,x2,y2 to consider [0,0,512,128] for all
 max_bins = 0 # Maximum number of bins to consider, 0 for all bins
 batch_size = [1, 8, 8] # batch size: [cell,x,y]
 fit_range = [0, 0] # range of a histogram considered for fitting in ADU. Dynamically evaluated in case [0,0]
 n_peaks_fit = 4 # Number of gaussian peaks to fit including noise peak
 fix_peaks = False # Fix distance between photon peaks
 do_minos = False # This is additional feature of minuit to evaluate errors.
 sigma_limit = 0. # If >0, repeat fit keeping only bins within mu +- sigma_limit*sigma

 # Detector conditions
 max_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300  # Bias voltage
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
 photon_energy = 8.05 # photon energy in keV
 integration_time = -1 # integration time, negative values for auto-detection.
 ```

 %% Cell type:code id: tags:

 ``` python
 import glob
 import os
 import traceback
 import warnings
 from multiprocessing import Pool

 import h5py
 import matplotlib.pyplot as plt
 import numpy as np
 import sharedmem
 import XFELDetAna.xfelpyanatools as xana
-from cal_tools.agipdlib import get_bias_voltage
 from cal_tools.agipdutils_ff import (
    BadPixelsFF,
    any_in,
    fit_n_peaks,
    gaussian,
    gaussian_sum,
    get_mask,
    get_starting_parameters,
    set_par_limits,
 )
 from cal_tools.ana_tools import get_range, save_dict_to_hdf5
 from iminuit import Minuit
 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.simpleplot import simplePlot

 # %load_ext autotime
 %matplotlib inline
 warnings.filterwarnings('ignore')
 ```

 %% Cell type:code id: tags:

 ``` python
 peak_range = np.reshape(peak_range,(4,2))
 peak_width_range = np.reshape(peak_width_range,(4,2))
 peak_width_lim = np.reshape(peak_width_lim,(2,2))
 peak_norm_range = [None if x == -1 else x for x in peak_norm_range]
 peak_norm_range = np.reshape(peak_norm_range,(4,2))
 module = modules[0]
 ```

 %% Cell type:code id: tags:

 ``` python
-# This is never used in this notebook and should be removed
-
-# if bias_voltage == 0:
-#     # Read the bias voltage from files, if recorded.
-#     # If not available, make use of the historical voltage the detector is running at
-#     control_filename = f'{raw_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
-#     bias_voltage = get_bias_voltage(control_filename, karabo_id_control)
-#     bias_voltage = bias_voltage if bias_voltage is not None else 300
-# print(f"Bias voltage: {bias_voltage}V")
-```
-
-%% Cell type:code id: tags:
-
-``` python
 def idx_gen(batch_start, batch_size):
    """
    This generator iterate across pixels and memory cells starting
    from batch_start until batch_start+batch_size
    """
    for c_idx in range(batch_start[0], batch_start[0]+batch_size[0]):
        for x_idx in range(batch_start[1], batch_start[1]+batch_size[1]):
            for y_idx in range(batch_start[2], batch_start[2]+batch_size[2]):
                yield(c_idx, x_idx, y_idx)
 ```

 %% Cell type:code id: tags:

 ``` python
 n_pixels_x = pixel_range[2]-pixel_range[0]
 n_pixels_y = pixel_range[3]-pixel_range[1]

 hist_data = {}
 with h5py.File(f"{in_folder}/{hist_file_template.format(module)}", 'r') as hf:
    hist_data['cellId'] = np.array(hf['cellId'][()])
    hist_data['hRange'] = np.array(hf['hRange'][()])
    hist_data['nBins'] = np.array(hf['nBins'][()])

    if cell_range == [0,0]:
        cell_range[1] = hist_data['cellId'].shape[0]

    if max_bins == 0:
        max_bins = hist_data['nBins']

    hist_data['cellId'] = hist_data['cellId'][cell_range[0]:cell_range[1]]
    hist_data['hist'] = np.array(hf['hist'][cell_range[0]:cell_range[1], :max_bins, :])

 n_cells = cell_range[1]-cell_range[0]
 hist_data['hist'] = hist_data['hist'].reshape(n_cells, max_bins, 512, 128)
 hist_data['hist'] = hist_data['hist'][:,:,pixel_range[0]:pixel_range[2],pixel_range[1]:pixel_range[3]]

 print(f'Data shape {hist_data["hist"].shape}')

 bin_edges = np.linspace(hist_data['hRange'][0], hist_data['hRange'][1], int(hist_data['nBins']+1))
 x = (bin_edges[1:] + bin_edges[:-1])[:max_bins] * 0.5


 batches = []
 for c_idx in range(0, n_cells, batch_size[0]):
    for x_idx in range(0, n_pixels_x, batch_size[1]):
        for y_idx in range(0, n_pixels_y, batch_size[2]):
            batches.append([c_idx,x_idx,y_idx])

 print(f'Number of batches {len(batches)}')
 ```

 %% Cell type:code id: tags:

 ``` python
 def fit_batch(batch_start):
    current_result = {}
    prev = None
    for c_idx, x_idx, y_idx in idx_gen(batch_start, batch_size):
        try:
            y = hist_data['hist'][c_idx, :, x_idx, y_idx]

            if prev is None:
                prev, _ = get_starting_parameters(x, y, peak_range, n_peaks=n_peaks_fit)

            if fit_range == [0, 0]:
                frange = (prev[f'g0mean']-2*prev[f'g0sigma'],
                          prev[f'g{n_peaks_fit-1}mean'] + prev[f'g{n_peaks_fit-1}sigma'])
            else:
                frange = fit_range

            set_par_limits(prev, peak_range, peak_norm_range,
                           peak_width_range, n_peaks_fit)
            minuit = fit_n_peaks(x, y, prev, frange,
                                 do_minos=do_minos, n_peaks=n_peaks_fit,
                                 fix_d01=fix_peaks, sigma_limit=sigma_limit,)

            ndof = np.rint(frange[1]-frange[0])-len(minuit.args) ## FIXME: this line is wrong if fix_peaks is True
            current_result['chi2_ndof'] = minuit.fval/ndof
            res = minuit.fitarg
            if fix_peaks : ## set g2 and g3 mean correctly
                for i in range(2,n_peaks_fit):
                    d = res[f'g1mean'] - res[f'g0mean']
                    res[f'g{i}mean'] = res[f'g0mean'] + d*i
            current_result.update(res)
            current_result.update(minuit.get_fmin())

            fit_result['chi2_ndof'][c_idx, x_idx, y_idx] = current_result['chi2_ndof']

            for key in res.keys():
                if key in fit_result:
                    fit_result[key][c_idx, x_idx, y_idx] = res[key]

            fit_result['mask'][c_idx, x_idx, y_idx] = get_mask(current_result,
                                                                    peak_lim,
                                                                    d0_lim, chi2_lim,
                                                                    peak_width_lim)
        except Exception as e:
            fit_result['mask'][c_idx, x_idx,
                                    y_idx] = BadPixelsFF.FIT_FAILED.value
            print(c_idx, x_idx, y_idx, e, traceback.format_exc())

        if fit_result['mask'][c_idx, x_idx, y_idx] == 0:
            prev = res
        else:
            prev = None
 ```

 %% Cell type:markdown id: tags:

 # Single fit ##

 Left plot shows starting parameters for fitting. Right plot shows result of the fit. Errors are evaluated with minos.

 %% Cell type:code id: tags:

 ``` python
 hist = hist_data['hist'][1,:,1, 1]
 prev, shapes = get_starting_parameters(x, hist, peak_range, n_peaks=n_peaks_fit)

 if fit_range == [0, 0]:
    frange = (prev[f'g0mean']-2*prev[f'g0sigma'],
              prev[f'g3mean'] + prev[f'g3sigma'])
 else:
    frange = fit_range

 set_par_limits(prev, peak_range, peak_norm_range,
               peak_width_range, n_peaks=n_peaks_fit)
 minuit = fit_n_peaks(x, hist, prev, frange,
                     do_minos=True, n_peaks=n_peaks_fit,
                     fix_d01=fix_peaks,
                     sigma_limit=sigma_limit,
                    )
 print (minuit.get_fmin())
 minuit.print_matrix()
 print(minuit.get_param_states())
 ```

 %% Cell type:code id: tags:

 ``` python
 res = minuit.fitarg
 if fix_peaks :
    for i in range(2,n_peaks_fit):
        d = res[f'g1mean'] - res[f'g0mean']
        res[f'g{i}mean'] = res[f'g0mean'] + d*i
 err = minuit.errors
 p = minuit.args
 ya = np.arange(0,1e4)
 y = gaussian_sum(x,n_peaks_fit, *p)
 peak_colors = ['g', 'y', 'b', 'orange']

 peak_hist = hist.copy()
 d=[]
 if sigma_limit > 0 :
    sel2 = (np.abs(x - res['g0mean']) < sigma_limit*res['g0sigma']) | \
           (np.abs(x - res['g1mean']) < sigma_limit*res['g1sigma']) | \
           (np.abs(x - res['g2mean']) < sigma_limit*res['g2sigma']) | \
           (np.abs(x - res['g3mean']) < sigma_limit*res['g3sigma'])
    peak_hist[~sel2] = 0
    valley_hist = hist.copy()
    valley_hist[sel2] = 0
    d.append({'x': x,
              'y': valley_hist.astype(np.float64),
              'y_err': np.sqrt(valley_hist),
              'drawstyle': 'bars',
              'errorstyle': 'bars',
              'transparency': '95%',
              'errorcoarsing': 3,
              'label': f'X-ray Data)'
             })
    htitle = f'X-ray Data, (μ±{sigma_limit:0.1f}σ)'
 else :
    htitle = 'X-ray Data'

 d.append({'x': x,
          'y': peak_hist.astype(np.float64),
          'y_err': np.sqrt(peak_hist),
          'drawstyle': 'bars',
          'errorstyle': 'bars',
          'errorcoarsing': 3,
          'label': htitle,
         }
        )
 d.append({'x': x,
          'y': y,
          'y2': (hist-y)/np.sqrt(hist),
          'drawstyle':'line',
          'drawstyle2': 'steps-mid',
          'label': 'Fit'
         }
        )

 for i in range(n_peaks_fit):
    d.append({'x': x,
             'y': gaussian(x, res[f'g{i}n'], res[f'g{i}mean'], res[f'g{i}sigma']),
             'drawstyle':'line',
             'color': peak_colors[i],
             })
    d.append({'x': np.full_like(ya, res[f'g{i}mean']),
              'y': ya,
              'drawstyle': 'line',
              'linestyle': 'dashed',
              'color': peak_colors[i],
              'label': f'peak {i} = {res[f"g{i}mean"]:0.1f} $ \pm $ {err[f"g{i}mean"]:0.2f} ADU' })
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, (ax1, ax2) = plt.subplots(1, 2)
 fig.set_size_inches(16, 7)
 for i, shape in enumerate(shapes):
    idx = shape[3]
    ax1.errorbar(
        x[idx], hist[idx],
        np.sqrt(hist[idx]),
        marker='+', ls='',
    )
    yg = gaussian(x[idx], *shape[:3])
    l = f'Peak {i}: {shape[1]:0.1f} $ \pm $ {shape[2]:0.2f} ADU'
    ax1.plot(x[idx], yg, label=l)
 ax1.grid(True)
 ax1.set_xlabel("Signal [ADU]")
 ax1.set_ylabel("Counts")
 ax1.legend(ncol=2)

 _ = xana.simplePlot(
    d,
    use_axis=ax2,
    x_label='Signal [ADU]',
    y_label='Counts',
    secondpanel=True, y_log=False,
    x_range=(frange[0], frange[1]),
    y_range=(1., np.max(hist)*1.6),
    legend='top-left-frame-ncol2',
 )

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## All fits ##

 %% Cell type:code id: tags:

 ``` python
 # Allocate memory for fit results
 fit_result = {}
 keys = list(minuit.fitarg.keys())
 keys = [x for x in keys if 'limit_' not in x and 'fix_' not in x]
 keys += ['chi2_ndof', 'mask', 'gain']
 for key in keys:
    dtype = 'f4'
    if key == 'mask':
        dtype = 'i4'
    fit_result[key] = sharedmem.empty([n_cells, n_pixels_x, n_pixels_y], dtype=dtype)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Perform fitting
 with Pool() as pool:
    const_out = pool.map(fit_batch, batches)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate bad pixels
 fit_result['gain'] = (fit_result['g1mean'] - fit_result['g0mean'])/photon_energy

 # Calculate histogram width and evaluate cut
 h_sums = np.sum(hist_data['hist'], axis=1)
 hist_norm = hist_data['hist'] / h_sums[:, None, :, :]
 hist_mean = np.sum(hist_norm[:, :max_bins, ...] *
                   x[None, :, None, None], axis=1)
 hist_sqr = (x[None, :, None, None] - hist_mean[:, None, ...])**2
 hist_std = np.sqrt(np.sum(hist_norm[:, :max_bins, ...] * hist_sqr, axis=1))

 fit_result['mask'][hist_std<intensity_lim] |= BadPixelsFF.NO_ENTRY.value

 # Bad pixel on gain deviation
 gains = np.copy(fit_result['gain'])
 gains[fit_result['mask']>0] = np.nan
 gain_mean = np.nanmean(gains, axis=(1,2))

 fit_result['mask'][fit_result['gain'] > gain_mean[:,None,None]*gain_lim[1] ] |=  BadPixelsFF.GAIN_DEVIATION.value
 fit_result['mask'][fit_result['gain'] < gain_mean[:,None,None]*gain_lim[0] ] |=  BadPixelsFF.GAIN_DEVIATION.value
 ```

 %% Cell type:code id: tags:

 ``` python
 # Save fit results
 os.makedirs(out_folder, exist_ok=True)
 out_name = f'{out_folder}/fits_m{module:02d}.h5'
 print(f'Save to file: {out_name}')
 save_dict_to_hdf5({'data': fit_result}, out_name)
 ```

 %% Cell type:markdown id: tags:

 ## Summary across cells ##

 %% Cell type:code id: tags:

 ``` python
 labels = [
    "Noise peak [ADU]",
    "First photon peak [ADU]",
    f"gain [ADU/keV] $\gamma$={photon_energy} [keV]",
    "$\chi^2$/nDOF",
    "Fraction of bad pixels",
 ]

 for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof', 'mask']):
    fig = plt.figure(figsize=(20,5))
    ax = fig.add_subplot(121)
    data = fit_result[key]
    if key == 'mask':
        data = data > 0
        vmin, vmax = [0, 1]
    else:
        vmin, vmax = get_range(data, 5)
    _ = heatmapPlot(
        np.mean(data, axis=0).T,
        add_panels=False, cmap='viridis', use_axis=ax,
        vmin=vmin, vmax=vmax, lut_label=labels[i]
    )

    if key != 'mask':
        vmin, vmax = get_range(data, 7)
        ax = fig.add_subplot(122)
        _ = xana.histPlot(
            ax, data.flatten(),
            bins=45,range=[vmin, vmax],
            log=True,color='red',histtype='stepfilled'
        )
        ax.set_xlabel(labels[i])
        ax.set_ylabel("Counts")
 ```

 %% Cell type:markdown id: tags:

 ## histograms of fit parameters ##

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 5))
 ax = fig.add_subplot(111)
 a = ax.hist(hist_std.flatten(), bins=100, range=(0,100) )
 ax.plot([intensity_lim, intensity_lim], [0, np.nanmax(a[0])], linewidth=1.5, color='red' )
 ax.set_xlabel('Histogram width [ADU]', fontsize=14)
 ax.set_ylabel('Number of histograms', fontsize=14)
 ax.set_title(f'{hist_std[hist_std<intensity_lim].shape[0]} histograms below threshold in {intensity_lim} ADU',
              fontsize=14, fontweight='bold')
 ax.grid()
 ax.set_yscale('log')
 ```

 %% Cell type:code id: tags:

 ``` python
 def plot_par_distr(par):
    fig = plt.figure(figsize=(16, 5))
    sel = fit_result['mask'] == 0

    for i in range(n_peaks_fit) :
        data=fit_result[f"g{i}{par}"]
        plt_range=(-1,50)
        if par =='mean':
            plt_range=[peak_range[i][0] ,peak_range[i][1]]

        num_bins = int(plt_range[1] - plt_range[0])
        ax = fig.add_subplot(1,n_peaks_fit,i+1)
        _ = xana.histPlot(ax,data.flatten(),
                          bins= num_bins,range=plt_range,
                          log=True,color='red',
                          label='all fits',)

        a = ax.hist(data[sel].flatten(),
                    bins=num_bins, range=plt_range,
                    log=True,color='g',
                    label='good fits only',
                   )
        ax.set_xlabel(f"g{i} {par} [ADU]")
        ax.legend()

 plot_par_distr('mean')
 plot_par_distr('sigma')
 ```

 %% Cell type:code id: tags:

 ``` python
 sel = fit_result['mask'] == 0

 dsets = {'d01 [ADU]':fit_result[f"g1mean"]-fit_result[f"g0mean"],
         'gain [ADU/keV]':fit_result[f"gain"],
         'gain relative to module mean':fit_result[f"gain"]/np.nanmean(gain_mean),
        }
 fig = plt.figure(figsize=(16,5))
 for i, (par, data) in enumerate(dsets.items()):
    ax = fig.add_subplot(1, 3, i+1)
    plt_range=get_range(data, 10)
    num_bins = 100
    _ = xana.histPlot(ax,data.flatten(),
                      bins= num_bins,range=plt_range,
                      log=True,color='red',
                      label='all fits',)

    a = ax.hist(data[sel].flatten(),
                bins=num_bins, range=plt_range,
                log=True,color='g',
                label='good fits only',
               )
    ax.set_xlabel(f"{par}")
    ax.legend()
    if 'd01' in par :
        ax.axvline(d0_lim[0])
        ax.axvline(d0_lim[1])
    if 'rel' in par :
        ax.axvline(gain_lim[0])
        ax.axvline(gain_lim[1])
 ```

 %% Cell type:markdown id: tags:

 ## Summary across pixels ##

 Mean and median values are calculated across all pixels for each memory cell.

 %% Cell type:code id: tags:

 ``` python
 def plot_error_band(key, x, ax):

    cdata = np.copy(fit_result[key])
    cdata[fit_result['mask']>0] = np.nan

    mean = np.nanmean(cdata, axis=(1,2))
    median = np.nanmedian(cdata, axis=(1,2))
    std = np.nanstd(cdata, axis=(1,2))
    mad = np.nanmedian(np.abs(cdata - median[:,None,None]), axis=(1,2))

    ax.plot(x, mean, 'k', color='#3F7F4C', label=" mean value ")
    ax.plot(x, median, 'o', color='red', label=" median value ")
    ax.fill_between(x, mean-std, mean+std,
                     alpha=0.6, edgecolor='#3F7F4C', facecolor='#7EFF99',
                     linewidth=1, linestyle='dashdot', antialiased=True,
                     label=" mean value $ \pm $ std ")

    ax.fill_between(x, median-mad, median+mad,
                     alpha=0.3, edgecolor='red', facecolor='red',
                     linewidth=1, linestyle='dashdot', antialiased=True,
                     label=" median value $ \pm $ mad ")

    if f'error_{key}' in fit_result:
        cerr = np.copy(fit_result[f'error_{key}'])
        cerr[fit_result['mask']>0] = np.nan

        meanerr = np.nanmean(cerr, axis=(1,2))
        ax.fill_between(x, mean-meanerr, mean+meanerr,
                 alpha=0.6, edgecolor='#089FFF', facecolor='#089FFF',
                 linewidth=1, linestyle='dashdot', antialiased=True,
                 label=" mean fit error ")


 x = np.linspace(*cell_range, n_cells)

 for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof']):

    fig = plt.figure(figsize=(10, 5))
    ax = fig.add_subplot(111)
    plot_error_band(key, x, ax)

    ax.set_xlabel('Memory Cell ID', fontsize=14)
    ax.set_ylabel(labels[i], fontsize=14)
    ax.grid()
    ax.legend()
 ```

 %% Cell type:markdown id: tags:

 ## Cut flow ##

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots()
 fig.set_size_inches(10, 5)

 n_bars = 8
 x = np.arange(n_bars)
 width = 0.3

 msk = fit_result['mask']
 n_fits = np.prod(msk.shape)
 y = [any_in(msk, BadPixelsFF.FIT_FAILED.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
           | BadPixelsFF.NO_ENTRY.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
           | BadPixelsFF.NO_ENTRY.value| BadPixelsFF.GAIN_DEVIATION.value)
    ]

 y2 = [any_in(msk, BadPixelsFF.FIT_FAILED.value),
     any_in(msk, BadPixelsFF.ACCURATE_COVAR.value),
     any_in(msk, BadPixelsFF.CHI2_THRESHOLD.value),
     any_in(msk, BadPixelsFF.GAIN_THRESHOLD.value),
     any_in(msk, BadPixelsFF.NOISE_PEAK_THRESHOLD.value),
     any_in(msk, BadPixelsFF.PEAK_WIDTH_THRESHOLD.value),
     any_in(msk, BadPixelsFF.NO_ENTRY.value),
     any_in(msk, BadPixelsFF.GAIN_DEVIATION.value)
    ]

 y = (1 - np.sum(y, axis=(1,2,3))/n_fits)*100
 y2 = (1 - np.sum(y2, axis=(1,2,3))/n_fits)*100

 labels = ['Fit failes',
         'Accurate covar',
         'Chi2/nDOF',
         'Gain',
         'Noise peak',
         'Peak width',
         'No Entry',
         'Gain deviation']

 ax.bar(x, y2, width, label='Only this cut')
 ax.bar(x, y, width, label='Cut flow')
 ax.set_xticks(x)
 ax.set_xticklabels(labels, rotation=90)
 ax.set_ylim(y[5]-0.5, 100)
 ax.grid(True)
 ax.legend()
 plt.show()
 ```

--- a/tests/test_webservice.py
+++ b/tests/test_webservice.py
@@ -14,6 +14,7 @@ from webservice.webservice import (  # noqa: import not at top of file
    run_action,
    wait_on_transfer,
    get_slurm_partition,
+    get_slurm_nice
 )


@@ -143,13 +144,15 @@ async def test_wait_on_transfer_exceptions(
        ('sim', ['DARK', '1', '2', '3', '4'], 1, "success: simulated"),
    ],
 )
-async def test_run_action(mode, cmd, retcode, expected):
+async def test_run_action(mode, cmd, retcode, expected, monkeypatch):
    job_db = mock.Mock()

    async def mock_run_proc_async(*args):
        return retcode, b'Submitted job: 42'

-    webservice.webservice.run_proc_async = mock_run_proc_async
+    monkeypatch.setattr(
+        webservice.webservice, 'run_proc_async', mock_run_proc_async
+    )
    ret = await run_action(job_db, cmd, mode, 1, 1, 1)
    assert ret.lower().startswith(expected)

@@ -177,3 +180,44 @@ async def test_get_slurm_partition(proposal_number,

    ret = await get_slurm_partition(client, action, proposal_number)
    assert ret == expected_result
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    'cycle, num_jobs, expected_result',
+    [
+        ('202201', 0, 0), ('202201', 10, 3*10**2),  # user proposal
+        ('202221', 0, 5), ('202221', 10, 5+3*10**2),  # commissioning
+    ]
+)
+async def test_get_slurm_nice_values(fp, cycle, num_jobs, expected_result):
+    """ Test get_slurm_nice values."""
+
+    fp.register(
+        ['squeue', '-h', '-o', '%.20j', '-p', 'upex-higher', '--me'],
+        stdout='\n'.join(
+            [f'correct_SPB_{i}' for i in range(num_jobs)] +
+            [f'correct_FXE_{i}' for i in range(num_jobs*2)]).encode('ascii'),
+        returncode=0)
+
+    ret = await get_slurm_nice(
+        'upex-higher', 'SPB', cycle, job_penalty=3, commissioning_penalty=5)
+    assert ret == expected_result
+
+@pytest.mark.asyncio
+async def test_get_slurm_nice_fails(fp):
+    """Test corner cases for get_slurm_nice."""
+
+    # non-zero returncode
+    fp.register(
+        ['squeue', '-h', '-o', '%.20j', '-p', 'upex-higher', '--me'],
+        stdout='', returncode=1)
+
+    assert await get_slurm_nice('upex-higher', 'SPB', '202201') == 0
+
+    # exfel is special
+    fp.register(
+        ['squeue', '-h', '-o', '%.20j', '-p', 'exfel', '--me'],
+        stdout='\n'.join([f'correct_SPB_{i}' for i in range(10)]),
+        returncode=0)
+
+    assert await get_slurm_nice('exfel', 'SPB', '202201') == 0
--- a/webservice/config/webservice.yaml
+++ b/webservice/config/webservice.yaml
@@ -31,7 +31,8 @@ kafka:
 correct:
    in-folder: /gpfs/exfel/exp/{instrument}/{cycle}/p{proposal}/raw
    out-folder: /gpfs/exfel/d/proc/{instrument}/{cycle}/p{proposal}/{run}
-    sched-prio: 80
+    commissioning-penalty: 1250
+    job-penalty: 2
    cmd : >-
        python -m xfel_calibrate.calibrate {detector} CORRECT
        --slurm-scheduling {sched_prio}
@@ -45,7 +46,8 @@ correct:
 dark:
    in-folder: /gpfs/exfel/exp/{instrument}/{cycle}/p{proposal}/raw
    out-folder: /gpfs/exfel/u/usr/{instrument}/{cycle}/p{proposal}/dark/runs_{runs}
-    sched-prio: 10
+    commissioning-penalty: 1250
+    job-penalty: 2
    cmd: >-
        python -m xfel_calibrate.calibrate {detector} DARK
        --concurrency-par karabo_da

--- a/webservice/webservice.py
+++ b/webservice/webservice.py
@@ -669,6 +669,55 @@ async def get_slurm_partition(mdc: MetadataClient,
    return partition


+async def get_slurm_nice(partition: str, instrument: str,
+                         cycle: Union[int, str], job_penalty: int = 2,
+                         commissioning_penalty: int = 1250) -> int:
+    """Compute priority adjustment based on cycle and number of running
+       jobs.
+
+       The nice value is computed with
+           base_penalty + job_penalty * num_jobs**2
+
+       base_penalty is 0 for user proposals and commissioning_penalty
+       for commissioning proposals. The number of jobs is computed by
+       calling `squeue` and counting based on job name.
+
+       The default penalty values give commissioning proposals a
+       penalty of 25 running jobs.
+
+       :param partition: Partition to run jobs in.
+       :param instrument: Instrument to run jobs for.
+       :param cycle: Cycle of proposal to run jobs for.
+       :param job_penalty: Scaling factor per job, 2 by default.
+       :param commissioning_penalty: Base penalty for commissioning,
+           1250 by default.
+       :return: Nice value to be passed to sbatch --nice
+    """
+
+    if partition == 'exfel':
+        return 0  # Don't apply degressive priority on exfel.
+
+    # List all names for jobs running in the specified partition.
+    returncode, job_names = await run_proc_async(
+        ['squeue', '-h', '-o', '%.20j', '-p', partition, '--me'])
+
+    if returncode != 0:
+        logging.error(f'Non-zero return code {returncode} from '
+                      f'`squeue` upon counting number of jobs')
+        return 0  # Fallback if something went wrong.
+
+    # Base value depending on proposal type using cycle, assuming that
+    # user proposals follow the pattern xxxx0y, while different kinds of
+    # commissioning proposals use xxxx2y or xxxx3y.
+    base_nice = 0 if str(cycle)[4] == '0' else commissioning_penalty
+
+    # Count number of jobs
+    num_jobs = sum((1 for job in job_names.decode('ascii').split('\n')
+                    if f'correct_{instrument}' in job))
+
+    return base_nice + num_jobs**2 * job_penalty
+
+
 async def update_darks_paths(mdc: MetadataClient, rid: int, in_path: str,
                             out_path: str, report_path: str):
    """Update data paths in MyMDC to provide Globus access
@@ -1209,6 +1258,10 @@ class ActionsServer:
        ret = []

        partition = await get_slurm_partition(self.mdc, action, proposal)
+        nice = await get_slurm_nice(
+            partition, instrument, cycle,
+            commissioning_penalty=self.config[action]['commissioning-penalty'],
+            job_penalty=self.config[action]['job-penalty'])

        # run xfel_calibrate
        for karabo_id, dconfig in detectors.items():
@@ -1216,7 +1269,7 @@ class ActionsServer:
            del dconfig['detector-type']
            cmd = self.config[action]['cmd'].format(
                detector=detector,
-                sched_prio=str(self.config[action]['sched-prio']),
+                sched_prio=nice,
                partition=partition,
                action=action, instrument=instrument,
                cycle=cycle, proposal=proposal,
No results found