fix: put back out_folder as it is mandatory for xfel-calibrate

4c6abd97 · Karim Ahmed · 4d2ea47a · 4c6abd97
Commit 4c6abd97 authored 4 months ago by Karim Ahmed
--- a/notebooks/Jungfrau/Jungfrau_Create_Gain_maps_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Create_Gain_maps_NBC.ipynb
@@ -37,7 +37,8 @@
   "outputs": [],
   "source": [
    "in_folder = '/gpfs/exfel/exp/MID/202330/p900322/raw'  # RAW data path, required\n",
-    "fit_data_folder = \"/gpfs/exfel/data/scratch/ahmedk/test/jf_ff/gain_maps\"  # Output path for gain data, required\n",
+    "out_folder = \"/gpfs/exfel/data/scratch/ahmedk/test/jf_ff/gain_maps\"  # Output path for gain data, required\n",
+    "fit_data_folder = \"\"  # One can add fit data files in a separate path an point to it here.\n",
    "metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate\n",
    "runs = [94]  # Number of high gain\n",
    "\n",
@@ -138,7 +139,9 @@
   "outputs": [],
   "source": [
    "run = runs[0]\n",
-    "fit_data_folder = Path(fit_data_folder)\n",
+    "out_folder = Path(out_folder)\n",
+    "if not fit_data_folder:\n",
+    "    fit_data_folder = out_folder\n",
    "proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]\n",
    "file_loc = raw_data_location_string(proposal=proposal, runs=runs)\n",
    "report = get_report(metadata_folder)\n",

 %% Cell type:markdown id: tags:

 # Jungfrau Detector Flatfield Gain Calibration - Part 2: Gain Map Generation

 Author: European XFEL Detector Group, Version: 1.0

 This notebook performs the final steps in the Jungfrau detector flatfield gain calibration process, focusing on gain map creation and refinement.

 #### Overview

 1. Load Fit Results: Import photon peak fit data from previous processing step (Histogram creation and Fitting)
 2. Retrieve Old Gain Map: Access previous gain calibration from database or file
 3. Calculate New Gain Map:
   - Use fit results for high gain (G0)
   - Apply gain ratios for medium (G1) and low (G2) gains
 4. Evaluate Bad Pixels:
   - Identify pixels with failed fits
   - Flag pixels with insufficient data
   - Detect pixels with high gain deviation
 5. Correct Bad Pixels: Replace identified bad pixels with median values
 6. Generate Bad Pixel Map: Create a map of all identified bad pixels
 7. Save:
   - Store new gain maps for all gain levels
   - Save bad pixel map
   - Optionally inject new calibration maps into database
 8. Visualize Results: Plot gain maps and bad pixel distributions

 %% Cell type:code id: tags:

 ``` python
 in_folder = '/gpfs/exfel/exp/MID/202330/p900322/raw'  # RAW data path, required
-fit_data_folder = "/gpfs/exfel/data/scratch/ahmedk/test/jf_ff/gain_maps"  # Output path for gain data, required
+out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/jf_ff/gain_maps"  # Output path for gain data, required
+fit_data_folder = ""  # One can add fit data files in a separate path an point to it here.
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 runs = [94]  # Number of high gain

 karabo_id = 'MID_EXP_JF500K2'  # karabo prefix of Jungfrau devices
 karabo_da = ['']
 fit_func = 'CHARGE_SHARING'  # function used to fit the single photon peak. The 3 available options are: CHARGE_SHARING, CHARGE_SHARING_2, and GAUSS
 gains = [0, 1, 2]

 # Parameter conditions
 bias_voltage = -1  # detector bias voltage
 integration_time = -1.  # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # gain setting parameter conditions (High or low CDS) - Set to -1 to derive from CONTROL file.
 gain_mode = -1  # number of memory cells - Set to -1 to derive from CONTROL file.
 memory_cells = -1  # number of memory cells - Set to -1 to derive from CONTROL file.
 # Condition limits
 bias_voltage_lims = [0, 200]  # Acceptable deviations for bias voltages operating conditions.
 integration_time_lims = [0.1, 1000]  # Acceptable deviations for integration time operating conditions.

 adc_fit = True
 strixel_sensor = ""  # reordering for strixel detector layout. Possible strixels to choose from are A0123 and A1256.
 spectra_fit_temp = 'R{:04d}_{}_Gain_Spectra_{}_{}_Fit.h5'  # 'R{:04d}_{proposal.upper()}_Gain_Spectra_{da}_{fit_func}_Fit.h5'
 creation_time = ""  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
 g_map_old = ''  # '/gpfs/exfel/data/user/mramilli/jungfrau/module_PSI_gainmaps/M302/gainMaps_M302_2022-02-23.h5'  # old gain map file path to calculate gain ratios G0/G1 and G1/G2. Set to "" to get last gain constant from the database.
 old_gain_dataset_name = 'gain_map_g0'  # name of the data structure in the old gain map
 correct_offset = False  # correct the photon peak value with a pedestal fit position
 db_output = False  # A boolean to inject the gain maps to the database.
 local_output = True  # A boolean to store a local version of the constants in the out-folder
 g0_fit_dataset = 'gainMap_fit'  # name of the data structure in the fit files
 E_ph = 8.048  # photon energy of the peak fitted
 badpixel_threshold_sigma = 3.  # number of std in gain distribution to mark bad pixels
 roi = [0, 1024, 0, 512]  # ROI to consider to evaluate gain distribution (for bad pixels evaluation)

 # CALCAT API parameters
 cal_db_interface = "tcp://max-exfl-cal001:8020"  # the database interface to use
 cal_db_timeout = 180000


 def find_das(in_folder, runs, karabo_da):
    run = runs[0]
    if (karabo_da is not None) and karabo_da != [""]:
        return karabo_da
    from pathlib import Path
    import re
    karabo_da = set()
    for file in Path(in_folder, f'r{run:04d}').iterdir():
        da = re.search(r'JNGFR\d{2}', file.name)
        if da:
            karabo_da.add(da.group())
    return sorted(karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import matplotlib.pyplot as plt
 import numpy as np
 from IPython.display import Markdown, display
 from h5py import File as h5file
 from logging import warning

 import cal_tools.restful_config as rest_cfg

 from pathlib import Path

 import matplotlib.pyplot as plt
 from extra_data import RunDirectory
 from iCalibrationDB import Conditions, Constants
 from XFELDetAna.plotting.heatmap import heatmapPlot

 from cal_tools.calcat_interface import JUNGFRAU_CalibrationData
 from cal_tools.enums import BadPixels
 from cal_tools.jungfrau.jfstrixel import from_strixel
 from cal_tools.tools import (
    calcat_creation_time,
    get_report,
    raw_data_location_string,
    save_const_to_h5,
    send_to_db,
 )

 %matplotlib inline
 ```

 %% Cell type:markdown id: tags:

 ## Open the photon peak fit file

 %% Cell type:code id: tags:

 ``` python
 run = runs[0]
-fit_data_folder = Path(fit_data_folder)
+out_folder = Path(out_folder)
+if not fit_data_folder:
+    fit_data_folder = out_folder
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = raw_data_location_string(proposal=proposal, runs=runs)
 report = get_report(metadata_folder)
 in_folder = Path(in_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 g0_new = dict()
 for da in karabo_da:
    with h5file(
        fit_data_folder / spectra_fit_temp.format(run, proposal.upper(), da, fit_func),
        'r'
    ) as f:
        if strixel_sensor:
            g0_new[da] = np.transpose(
                from_strixel(np.transpose(f[g0_fit_dataset][()])),
                kind=strixel_sensor)
        else:
            g0_new[da] = f[g0_fit_dataset][()]

        print("Fit gain data shape:", g0_new[da].shape)

        if integration_time < 0  and 'integration_time' in f.attrs.keys():
            integration_time = np.float32(f.attrs['integration_time'])
            print(f'Integration time: {integration_time}')
        else:
            print(f'integration time not found!\nUsing default value {integration_time}')

        if bias_voltage < 0 and 'bias_voltage' in f.attrs.keys():
            bias_voltage = np.float32(f.attrs['bias_voltage'])
            print(f'Bias voltage: {bias_voltage}')
        else:
            print(f'Bias voltage not found!\nUsing default value {bias_voltage}')

        if gain_setting < 0 and 'gain_setting' in f.attrs.keys():
            gain_setting = bool(f.attrs['gain_setting'])
            print(f'Gain setting: {gain_setting}')
        else:
            print(f'Gain setting not found!\nUsing default value {gain_setting}')

        if gain_mode < 0 and 'gain_mode' in f.attrs.keys():
            gain_mode = bool(f.attrs['gain_mode'])
            print(f'Gain mode: {gain_mode}')
        else:
            print(f'Gain mode not found!\nUsing default value {gain_mode}')

        if memory_cells < 0 and 'memory_cells' in f.attrs.keys():
            memory_cells = f.attrs['memory_cells']
            print(f'Memory cells: {memory_cells}')
        else:
            print(f'Memory cells not found!\nUsing default value {memory_cells}')

        # Run's creation time:
        creation_time = calcat_creation_time(in_folder, run, creation_time)
        print(f"Creation time: {creation_time}")
 ```

 %% Cell type:markdown id: tags:

 ## Plot Spectra fit map

 %% Cell type:code id: tags:

 ``` python
 for da in karabo_da:
    for cell in range(g0_new[da].shape[-1]):
        vmin, vmax = np.percentile(g0_new[da][..., cell], [5, 95])
        fig = heatmapPlot(
            np.swapaxes(g0_new[da][..., cell], 0, 1),
            y_label="Row",
            x_label="Column",
            lut_label="Output [keV]",
            title=f'Cell {cell} for module {da}',
            aspect=1.,
            log=False,
            vmin=vmin,
            vmax=vmax,
            panel_top_low_lim=35.,
            panel_top_high_lim=45.,
            panel_side_low_lim=35.,
            panel_side_high_lim=45.,
        )
        plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Retrieve the old gain map

 Currently only the high gain map can be generated. To calculate the new medium and low gain maps, we depend on the past gain maps to calculate both gain maps from the ratio on G0/G1 and G1/G2. If we can't use any old gain maps, it would be impossible to calculate gain maps for medium and low gain at the moment.

 %% Cell type:code id: tags:

 ``` python
 jf_cal = JUNGFRAU_CalibrationData(
    detector_name=karabo_id,
    sensor_bias_voltage=bias_voltage,
    event_at=None,
    modules=karabo_da,
    memory_cells=memory_cells,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=0,  # Always retrieve dynamic RelativeGain constant
    client=rest_cfg.calibration_client(),
 )
 gmap_old = dict()
 gmap_new = dict()
 bad_pixel_maps = dict()

 da_to_pdu = {}
 for mod_info in jf_cal.physical_detector_units.values():
    da_to_pdu[mod_info["karabo_da"]] = mod_info["physical_name"]

 jf_metadata = jf_cal.metadata(calibrations=["RelativeGain10Hz"])
 const_data = jf_cal.ndarray_map(metadata=jf_metadata)
 jf_cal.display_markdown_retrieved_constants(metadata=jf_metadata)

 for da in karabo_da:
    if const_data[da].get("RelativeGain10Hz") is not None:
        gmap_old[da] = np.array(
            const_data[da]["RelativeGain10Hz"],
            dtype=np.float32,
        )
    else:
        gmap_old[da] = None
        warning("Gain not found")

    gmap_new[da] = np.zeros(
        (
            gmap_old[da].shape[0],
            gmap_old[da].shape[1],
            g0_new[da].shape[2],
            len(gains)
        ),
        dtype=np.float32)
    bad_pixel_maps[da] = np.zeros(gmap_new[da].shape, np.uint32)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create masks for failed fit and no entry pixels
 for da in karabo_da:
    no_entries = np.logical_and(g0_new[da] < 0., g0_new[da] > -2.) # no entries in pixel is marked with -1. in the gain map
    failed_fit = g0_new[da] < -999. # failed fit are marked with -1000. in the gain map

    # replace placeholder values with nan
    g0_new[da][no_entries] = np.nan
    g0_new[da][failed_fit] = np.nan

    no_entries = np.broadcast_to(np.expand_dims(no_entries, axis=len(no_entries.shape)), gmap_new[da].shape)
    failed_fit = np.broadcast_to(np.expand_dims(failed_fit, axis=len(failed_fit.shape)), gmap_new[da].shape)

    bad_pixel_maps[da][no_entries] |= BadPixels.FF_NO_ENTRIES.value
    bad_pixel_maps[da][failed_fit] |= BadPixels.FF_GAIN_EVAL_ERROR.value
 ```

 %% Cell type:code id: tags:

 ``` python
 _n = 0
 for da in karabo_da:
    for col in range(10, 200):
        for row in range(10, 200):
            if bad_pixel_maps[da][col, row, 0, 0] == BadPixels.FF_GAIN_EVAL_ERROR.value and _n < 20:
                print(f'Module {da}: Column-{col}, Row-{row}')
                _n += 1
 ```

 %% Cell type:markdown id: tags:

 ## Calculate gain

 %% Cell type:code id: tags:

 ``` python
 for da in karabo_da:

    if correct_offset:
        display(Markdown("#### Correct with better pedestal estimation"))
        with h5file(
            fit_data_folder / spectra_fit_temp.format(run, proposal.upper(), da, fit_func),
            'r'
        ) as fc:
            corr = np.array(fc[g0_fit_dataset])
            if strixel_sensor:
                corr = np.transpose(
                    from_strixel(np.transpose(corr)), kind=strixel_sensor)

            g0_new[da] -= corr

    # Calculate gain map in ADC units/keV
    g0_new[da] = g0_new[da] / E_ph

    if adc_fit:
        # Calculate gain ratios from old gain map
        g_zeros = np.zeros((gmap_old[da].shape[0], gmap_old[da].shape[1]), dtype=np.float32)

        g1g0 = g_zeros.copy()
        g2g1 = g_zeros.copy()

        g1g0 = gmap_old[da][..., 0, 1] / gmap_old[da][..., 0, 0]
        g2g1 = gmap_old[da][..., 0, 2] / gmap_old[da][..., 0, 1]

        # Calculate new G1 and G2 values from gain ratios and G0 fit value
        for cell in range(0, gmap_new[da].shape[2]):
            gmap_new[da][..., cell, 0] = g0_new[da][..., cell]
            gmap_new[da][..., cell, 1] = g0_new[da][..., cell] * g1g0
            gmap_new[da][..., cell, 2] = g0_new[da][..., cell] * g1g0 * g2g1
    else:
        g_ratio = np.broadcast_to(np.expand_dims(g0_new[da], axis=3), gmap_old[da].shape)
        gmap_new[da] = g_ratio * gmap_old[da]
 ```

 %% Cell type:code id: tags:

 ``` python
 # plotting the gain distribution per memory cell
 # the vertical bars indicate the cuts

 n_bins = 100
 props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
 for da in karabo_da:
    for cell in range(0, gmap_new[da].shape[2]):
        x_low = np.nanmin(gmap_new[da][..., cell, 0])
        x_up = np.nanmax(gmap_new[da][..., cell, 0])
        #x_low = 20.
        #x_up = 50.
        x_bins = np.linspace(x_low, x_up, n_bins)
        mdn = np.nanmedian(gmap_new[da][..., cell, 0])
        std = np.nanstd(gmap_new[da][..., cell, 0])
        _h, _e = np.histogram(gmap_new[da][..., cell, 0].ravel(), bins=x_bins)
        _x = (_e[1:] + _e[:-1])/2.
        f, a = plt.subplots()
        a.plot(_x, _h, drawstyle='steps', linewidth=3)
        a.axvline(x=mdn+badpixel_threshold_sigma*std, linestyle='--')
        a.axvline(x=mdn-badpixel_threshold_sigma*std, linestyle='--')
        a.grid(True)

        textstr = '\n'.join((
                r'$\mathrm{median}=%.2e$' % (mdn),
                r'$\sigma=%.2e$' % (std)))

        a.text(
            0.05,
            0.95,
            textstr,
            transform=a.transAxes,
            fontsize=20,
            verticalalignment='center',
            bbox=props,
        )

        a.set_yscale('log')
        a.set_xlabel('Gain [ADCu/keV]', fontsize=20)
        plt.show()
 ```

 %% Cell type:code id: tags:

 ``` python
 def replace_with_median(data, bpix_map):
    """
    replaces the bad pixels with the median value of the other pixels
    args:
    data (array, float): map in; with shape (column, row, cell, gain)
    bpix_map (array, uint32): bad pixel map

    returns the new map
    """
    b_p_mask = bpix_map
    b_p_mask[b_p_mask > 0] = 1
    mskd = np.ma.MaskedArray(
        data, b_p_mask,
        hard_mask=True,
        keep_mask=False,
        fill_value=np.nan
    ).filled()
    median_value = np.nanmedian(mskd, axis=(0, 1))
    for cell in range(median_value.shape[0]):
        for gain in range(median_value.shape[1]):
            mskd[..., cell, gain][bpix_map[..., cell, gain] > 0] = median_value[cell, gain]

    return mskd


 def eval_bpidx(d_in, badpixel_threshold_sigma=badpixel_threshold_sigma, roi=roi):
    """
    evaluates the indexes of the pixels with high deviation from the median value
    args:
    * d_in (array, float): the map with the values; with shape (columns, rows, cells, gain)
    * badpixel_threshold_sigma (float): number of std outside which a value indicates a bad pixel
    * roi (int, tuple): ROI to use to calculate median and std: (col_min, col_max, row_min, row_max)

    returns mask where True indicates bad pixel
    """
    d_eval = d_in[roi[0]:roi[1], roi[2]:roi[3]]
    mdn = np.nanmedian(d_eval, axis=(0, 1))[None, None, :, :]
    std = np.nanstd(d_eval, axis=(0, 1))[None, None, :, :]
    idx = (d_in > badpixel_threshold_sigma*std+mdn) | (d_in < (-badpixel_threshold_sigma)*std+mdn)

    return idx
 ```

 %% Cell type:code id: tags:

 ``` python
 for da in karabo_da:
    # Create map of bad pixels with too high gain deviation
    idxs = eval_bpidx(gmap_new[da])
    bad_pixel_maps[da][idxs] |= BadPixels.FF_GAIN_DEVIATION.value

    # Replace all the bad pixels with the median gain value across the module
    gmap_new[da] = replace_with_median(gmap_new[da], bad_pixel_maps[da])
 ```

 %% Cell type:markdown id: tags:

 ### New gain map

 #### Plot new gain map for the first gain and for each cell

 %% Cell type:code id: tags:

 ``` python
 for da in karabo_da:
    for cell in range(gmap_new[da].shape[-2]):
        vmin, vmax = np.percentile(gmap_new[da][..., cell, 0], [5, 95])
        fig = heatmapPlot(
            np.swapaxes(gmap_new[da][..., cell, 0], 0, 1),
            y_label="Row",
            x_label="Column",
            lut_label="Output [keV]",
            title=f'Cell {cell} for module {da}({da_to_pdu[da]})',
            aspect=1.,
            log=False,
            vmin=vmin,
            vmax=vmax,
            panel_top_low_lim=25.,
            panel_top_high_lim=45.,
            panel_side_low_lim=25.,
            panel_side_high_lim=45.,
        )
        plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Save and Inject gain map

 %% Cell type:code id: tags:

 ``` python

 def create_constants(bad_pixels, gmap, gain_mode):
    constants = {
        "BadPixelsFF": bad_pixels,  # 512, 1024, 1, 3
        "RelativeGain": gmap.copy() if gain_mode else gmap
    }
    if gain_mode:
        constants["RelativeGain"][..., 1:] *= -1
    return constants

 for gain_mode in [0, 1]:
    for da in karabo_da:
        # Inject constants for fixed and adaptive gain maps.
        conditions = Conditions.Dark.jungfrau(
            memory_cells=memory_cells,
            bias_voltage=bias_voltage,
            integration_time=integration_time,
            gain_setting=gain_setting,
            gain_mode=gain_mode,
        )

        for parm in conditions.parameters:
            if parm.name == "Integration Time":
                print('Setting integration time limits.')
                parm.lower_deviation = integration_time - integration_time_lims[0]
                parm.upper_deviation = integration_time_lims[1] - integration_time
            elif parm.name == "Sensor Bias Voltage":
                print('Setting bias voltage limits.')
                parm.lower_deviation = bias_voltage - bias_voltage_lims[0]
                parm.upper_deviation = bias_voltage_lims[1] - bias_voltage

        constants = create_constants(
            bad_pixels=bad_pixel_maps[da],
            gmap=gmap_new[da],
            gain_mode=gain_mode,
        )

        pdu = da_to_pdu[da]
        for cname, cdata in constants.items():
            if db_output:
                print(f"Storing maps for {'fixed' if gain_mode else 'adaptive'} gain mode.")
                const = getattr(Constants.jungfrau, cname)()
                const.data = cdata
                send_to_db(
                    db_module=pdu,
                    karabo_id=karabo_id,
                    constant=const,
                    condition=conditions,
                    file_loc=file_loc,
                    report_path=report,
                    cal_db_interface=cal_db_interface,
                    creation_time=creation_time,
                )

            if local_output and gain_mode == 0:  # Store locally adaptive gain maps only.
                save_const_to_h5(
                    db_module=pdu,
                    karabo_id=karabo_id,
                    constant=getattr(Constants.jungfrau, cname)(),
                    condition=conditions,
                    data=cdata,
                    file_loc=file_loc,
                    report=report,
                    creation_time=creation_time,
                    out_folder=fit_data_folder,
                )
                print(f"Calibration constant {cname} is stored locally at {fit_data_folder}.\n")
 ```

 %% Cell type:markdown id: tags:

 ## Plot Gain map for the 1st cell and each gain

 %% Cell type:code id: tags:

 ``` python
 for da in karabo_da:
    for g in gains:  # TODO: Isn't it always 3 gains??
        vmin, vmax = np.percentile(gmap_new[da][..., 0, g], [5, 95])
        f_im = heatmapPlot(
            np.swapaxes(gmap_new[da][..., 0, g], 0, 1),
            title=f"1st cell of G{g} gain map for {da}({da_to_pdu[da]})",
            y_label="Row",
            x_label="Column",
            lut_label="G{:01d}[ADCu/keV]".format(g),
            aspect=1.,
            vmin=vmin,
            vmax=vmax,
        )
        plt.show()
 ```