Karim Ahmed
--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb

+ 12

− 8
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb

+ 12

− 8
 %% Cell type:markdown id: tags:

 # Jungfrau Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the Jungfrau Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202130/p900204/raw"  # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/remove"  # the folder to output to, required
 run = 112  # run to process, required
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 karabo_id = "SPB_IRDA_JF4M"  # karabo prefix of Jungfrau devices
 karabo_da = ['JNGFR01', 'JNGFR02', 'JNGFR03', 'JNGFR04', 'JNGFR05', 'JNGFR06', 'JNGFR07', 'JNGFR08']  # data aggregators
 receiver_template = "JNGFR{:02d}"  # Detector receiver template for accessing raw data files. e.g. "JNGFR{:02d}"
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id

 # Parameters for calibration database.
 use_dir_creation_date = True  # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8017#8025" # the database interface to use
 cal_db_timeout = 180000  # timeout on caldb requests

 # Parameters affecting corrected data.
 overwrite = True  # set to True if existing data should be overwritten
 relative_gain = True  # do relative gain correction
 plt_images = 100  # Number of images to plot after applying selected corrections.
 limit_images = 0  # ONLY FOR TESTING. process only first N images, Use 0 to process all.

 # Parameters for retrieving calibration constants
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 integration_time = 4.96  # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic gain, 1 for dynamic HG0, will be overwritten by value in file
 gain_mode = 0  # 0 for runs with dynamic gain setting, 1 for fixgain. It will be overwritten by value in file, if manual_slow_data is set to True.
 mem_cells = 0  # leave memory cells equal 0, as it is saved in control information starting 2019.
 bias_voltage = 180  # will be overwritten by value in file

 # Parameters for plotting
 skip_plots = False  # exit after writing corrected files


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import multiprocessing
 import warnings
 from functools import partial
 from pathlib import Path

 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import pasha as psh
 import tabulate
 from IPython.display import Latex, Markdown, display
 from extra_data import H5File, RunDirectory
 from matplotlib.colors import LogNorm

 from cal_tools import h5_copy_except
 from cal_tools.jungfraulib import JungfrauCtrl
 from cal_tools.enums import BadPixels
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    get_constant_from_db_and_time,
    get_dir_creation_date,
    get_pdu_from_db,
    map_seq_files,
    write_compressed_frames,
 )
 from iCalibrationDB import Conditions, Constants

 warnings.filterwarnings('ignore')

 matplotlib.use('agg')
 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_dc = RunDirectory(in_folder / f'r{run:04d}')
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 if out_folder.exists() and not overwrite:
    raise AttributeError("Output path exists! Exiting")
 else:
    out_folder.mkdir(parents=True, exist_ok=True)

 print(f"Run is: {run}")
 print(f"Instrument H5File source: {instrument_src}")
 print(f"Process modules: {karabo_da}")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time")

 if karabo_id_control == "":
    karabo_id_control = karabo_id
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read available sequence files to correct.
 mapped_files, num_seq_files = map_seq_files(
    run_dc, karabo_id, karabo_da, sequences)

 if not len(mapped_files):
    raise IndexError(
        "No sequence files available to correct for the selected sequences and karabo_da.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Processing a total of {num_seq_files} sequence files")
 table = []
 fi = 0
 for kda, sfiles in mapped_files.items():
    for k, f in enumerate(sfiles):
        if k == 0:
            table.append((fi, kda, k, f))
        else:
            table.append((fi, "", k,  f))
        fi += 1
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["#", "module", "# module", "file"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)
 ctrl_data = JungfrauCtrl(run_dc, ctrl_src)
 try:
    this_run_mcells, sc_start = ctrl_data.get_memory_cells()

    if this_run_mcells == 1:
        memory_cells = 1
        print("Run is in single cell mode.\n"
              f"Storage cell start: {sc_start:02d}")
    else:
        memory_cells = 16
        print(f"Run is in burst mode.\n"
              f"Storage cell start: {sc_start:02d}")
 except KeyError as e:
    print("WARNING: KeyError while reading number of memory cells.")
    if mem_cells == 0:
        memory_cells = 1
    else:
        memory_cells = mem_cells
    print(f"WARNING: Set memory cells to {memory_cells}, as "
          "it is not saved in control information.")

 if not manual_slow_data:
    integration_time = ctrl_data.get_integration_time()
    bias_voltage = ctrl_data.get_bias_voltage()
    gain_setting = ctrl_data.get_gain_setting()
    gain_mode = ctrl_data.get_gain_mode()

 print(f"Integration time is {integration_time} us")
 print(f"Gain setting is {gain_setting} (run settings: "
      f"{ctrl_data.run_settings.value if ctrl_data.run_settings else ctrl_data.run_settings})")  # noqa
 print(f"Gain mode is {gain_mode}")
 print(f"Bias voltage is {bias_voltage} V")
 print(f"Number of memory cells are {memory_cells}")
 ```

 %% Cell type:markdown id: tags:

 ### Retrieving calibration constants ###

 %% Cell type:code id: tags:

 ``` python
 condition = Conditions.Dark.jungfrau(
    memory_cells=memory_cells,
    bias_voltage=bias_voltage,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
 )

-db_modules = get_pdu_from_db(
-    karabo_id=karabo_id,
-    karabo_da=karabo_da,
-    constant=Constants.jungfrau.Offset(),
-    condition=condition,
-    cal_db_interface=cal_db_interface,
-    snapshot_at=creation_time)
-
 def get_constants_for_module(karabo_da: str):
    """ Get calibration constants for given module of Jungfrau

    :return:
        offset_map (offset map),
        mask (mask of bad pixels),
        gain_map (map of relative gain factors),
        db_module (name of DB module),
        when (dictionaty: constant - creation time)
    """

    when = {}
    retrieval_function = partial(
        get_constant_from_db_and_time,
        karabo_id=karabo_id,
        karabo_da=karabo_da,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time,
        timeout=cal_db_timeout,
        print_once=False,
    )
    offset_map, when["Offset"] = retrieval_function(
        condition=condition,
        constant=Constants.jungfrau.Offset(),
        empty_constant=np.zeros((512, 1024, memory_cells, 3))
    )
    mask, when["BadPixelsDark"] = retrieval_function(
        condition=condition,
        constant=Constants.jungfrau.BadPixelsDark(),
        empty_constant=np.zeros((512, 1024, memory_cells, 3), dtype=np.uint32),
    )
    mask_ff, when["BadPixelsFF"] = retrieval_function(
        condition=condition,
        constant=Constants.jungfrau.BadPixelsFF(),
        empty_constant=None
    )
    gain_map, when["Gain"] = retrieval_function(
        condition=condition,
        constant=Constants.jungfrau.RelativeGain(),
        empty_constant=None
    )

    # combine masks
    if mask_ff is not None:
        mask |= np.moveaxis(mask_ff, 0, 1)

    if memory_cells > 1:
        # move from x, y, cell, gain to cell, x, y, gain
        offset_map = np.moveaxis(offset_map, [0, 1], [1, 2])
        mask = np.moveaxis(mask, [0, 1], [1, 2])
    else:
        offset_map = np.squeeze(offset_map)
        mask = np.squeeze(mask)

    if gain_map is not None:
        if memory_cells > 1:
            gain_map = np.moveaxis(gain_map, [0, 2], [2, 0])
            # add extra empty cell constant
            b = np.ones(((1,)+gain_map.shape[1:]))
            gain_map = np.concatenate((gain_map, b), axis=0)
        else:
            gain_map = np.moveaxis(np.squeeze(gain_map), 1, 0)

    return offset_map, mask, gain_map, karabo_da, when

 with multiprocessing.Pool() as pool:
    r = pool.map(get_constants_for_module, karabo_da)

 # Print timestamps for the retrieved constants.
 constants = {}
 for offset_map, mask, gain_map, k_da, when in r:
    print(f'Constants for module {k_da}:')
    for const in when:
        print(f'  {const} injected at {when[const]}')

    if gain_map is None:
        print("No gain map found")
        relative_gain = False

    constants[k_da] = (offset_map, mask, gain_map)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Correct a chunk of images for offset and gain
 def correct_train(wid, index, d):
    d = d.astype(np.float32)  # [cells, x, y]
    g = gain[index]

    # Jungfrau gains 0[00], 1[01], 3[11]
    g[g==3] = 2

    # Select memory cells
    if memory_cells > 1:
        """
        Even though it is correct to assume that memory cells pattern
        can be the same across all trains (for one correction run
        taken with one acquisition), it is preferred to not assume
        this to account for exceptions that can happen.
        """
        m = memcells[index].copy()
        # 255 is a cell value pointing to no cell image data (image of 0 pixels).
        # Corresponding image will be corrected with constant of cell 0. To avoid values of 0.
        # This line is depending on not storing the modified memory cells in the corrected data.
        m[m==255] = 0

        offset_map_cell = offset_map[m, ...]  # [16 + empty cell, x, y]
        mask_cell = mask[m, ...]
    else:
        offset_map_cell = offset_map
        mask_cell = mask

    # Offset correction
    offset = np.choose(g, np.moveaxis(offset_map_cell, -1, 0))

    d -= offset

    # Gain correction
    if relative_gain:
        if memory_cells > 1:
            gain_map_cell = gain_map[m, ...]
        else:
            gain_map_cell = gain_map
        cal = np.choose(g, np.moveaxis(gain_map_cell, -1, 0))
        d /= cal

    msk = np.choose(g, np.moveaxis(mask_cell, -1, 0))

    data_corr[index, ...] = d
    mask_corr[index, ...] = msk
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()

 n_cpus = multiprocessing.cpu_count()
 context = psh.context.ProcessContext(num_workers=n_cpus)
 print(f"Using {n_cpus} workers for correction.")
 ```

 %% Cell type:markdown id: tags:

 ### Correcting RAW data ###

 %% Cell type:code id: tags:

 ``` python
 fim_data = {}
 gim_data = {}
 rim_data = {}
 msk_data = {}

 # Loop over modules
 for local_karabo_da, mapped_files_module in mapped_files.items():
    instrument_src_kda = instrument_src.format(int(local_karabo_da[-2:]))
    data_path = "INSTRUMENT/"+instrument_src_kda+"/data"

    for sequence_file in mapped_files_module:  # noqa
        sequence_file = Path(sequence_file)
        seq_dc = H5File(sequence_file)

        # Save corrected data in an output file with name
        # of corresponding raw sequence file.
        ofile_name = sequence_file.name.replace("RAW", "CORR")
        out_file = out_folder / ofile_name

        # load shape of data for memory cells, and detector size (imgs, cells, x, y)
        # dshape[0] = number of available images to correct.
        dshape = seq_dc[instrument_src_kda, "data.adc"].shape

        if dshape[0] == 0:
            print(f"\t- WARNING: No image data for {ofile_name}: data shape is {dshape}")
            continue

        # load number of data available, including trains with empty data.
        n_trains = len(seq_dc.train_ids)

        n_imgs = dshape[0]

        # For testing, only correct limit_images
        if limit_images > 0:
            n_imgs = min(n_imgs, limit_images)

        print(f"\nNumber of images to correct: {n_imgs} for {ofile_name}")
        if n_trains - dshape[0] != 0:
            print(f"\t- WARNING: {sequence_file.name} has {n_trains - dshape[0]} "
                  "trains with empty data.")

        # Just in case if n_imgs is less than the chosen plt_images.
        plt_images = min(plt_images, n_imgs)

        # load constants from the constants dictionary.
        offset_map, mask, gain_map = constants[local_karabo_da]

        # Allocate shared arrays.
        data_corr = context.alloc(shape=dshape, dtype=np.float32)
        mask_corr = context.alloc(shape=dshape, dtype=np.uint32)

        step_timer.start()
        seq_dc = seq_dc.select(
            instrument_src_kda, "*", require_all=True).select_trains(np.s_[:n_imgs])

        data = seq_dc[instrument_src_kda, "data.adc"].ndarray()
        gain = seq_dc[instrument_src_kda, "data.gain"].ndarray()
        memcells = seq_dc[instrument_src_kda, "data.memoryCell"].ndarray()

        rim_data[local_karabo_da] = data[:plt_images, 0, ...].copy()

        context.map(correct_train, data)
        step_timer.done_step(f'Correction time.')

        step_timer.start()
        # Create CORR files and add corrected data sources.
        # Exclude raw data images (data/adc)
        with h5py.File(out_file, 'w') as ofile:
            # Copy RAW non-calibrated sources.
            with h5py.File(sequence_file, 'r') as sfile:
                h5_copy_except.h5_copy_except_paths(
                    sfile, ofile, [f"{data_path}/adc"])

            # Create datasets with the available corrected data
            ddset = ofile.create_dataset(
                f"{data_path}/adc",
                data=data_corr,
                chunks=((1,) + dshape[1:]),  # 1 chunk == 1 image
                dtype=np.float32,
            )

            write_compressed_frames(
                mask_corr,
                ofile,
                dataset_path=f"{data_path}/mask",
                comp_threads=n_cpus,
            )

        step_timer.done_step(f'Saving data time.')

        # Prepare plotting arrays
        step_timer.start()

        # TODO: Print out which cell is being selected for plotting
        fim_data[local_karabo_da] = data_corr[:plt_images, 0, ...].copy()
        msk_data[local_karabo_da] = mask_corr[:plt_images, 0, ...].copy()
        gim_data[local_karabo_da] = gain[:plt_images, 0, ...].copy()

        step_timer.done_step(f'Preparing plotting data of {plt_images} images.')
 ```

 %% Cell type:markdown id: tags:

 ### Processing time summary ###

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [
        np.min(edges[1]),
        np.max(edges[1]),
        np.min(edges[0]),
        np.max(edges[0]),
    ]

    im = ax.imshow(
        data[::-1, :],
        extent=extent,
        aspect="auto",
        norm=LogNorm(vmin=1, vmax=np.max(data))
    )
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
+db_modules = get_pdu_from_db(
+    karabo_id=karabo_id,
+    karabo_da=karabo_da,
+    constant=Constants.jungfrau.Offset(),
+    condition=condition,
+    cal_db_interface=cal_db_interface,
+    snapshot_at=creation_time)
+```
+
+%% Cell type:code id: tags:
+
+``` python
 for pdu, mod in zip(db_modules, karabo_da):
    h, ex, ey = np.histogram2d(
        rim_data[mod].flatten(),
        gim_data[mod].flatten(),
        bins=[100, 4],
        range=[[0, 10000], [0, 4]],
    )
    do_2d_plot(
        h, (ex, ey),
        "Signal (ADU)",
        "Gain Bit Value (high gain=0[00], medium gain=1[01], low gain=3[11])",
        f"Module {mod} ({pdu})")
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview ###

 The per pixel mean of the sequence file of RAW data

 %% Cell type:code id: tags:

 ``` python
 for pdu, mod in zip(db_modules, karabo_da):
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)

    im = ax.imshow(
        np.mean(rim_data[mod],axis=0),
        vmin=min(0.75*np.median(rim_data[mod][rim_data[mod] > 0]), 2000),
        vmax=max(1.5*np.median(rim_data[mod][rim_data[mod] > 0]), 16000),
        cmap="jet")

    ax.set_title(f'Module {mod} ({pdu})')
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview ###

 The per pixel mean of the sequence file of CORR data

 %% Cell type:code id: tags:

 ``` python
 for pdu, mod in zip(db_modules, karabo_da):
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)

    im = ax.imshow(
        np.mean(fim_data[mod],axis=0),
        vmin=min(0.75*np.median(fim_data[mod][fim_data[mod] > 0]), -0.5),
        vmax=max(2.*np.median(fim_data[mod][fim_data[mod] > 0]), 100),
        cmap="jet")

    ax.set_title(f'Module {mod} ({pdu})', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Single Train Preview ###

 A single image from the first train

 %% Cell type:code id: tags:

 ``` python
 for pdu, mod in zip(db_modules, karabo_da):
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)

    im = ax.imshow(
        fim_data[mod][0, ...],
        vmin=min(0.75*np.median(fim_data[mod][0, ...]), -0.5),
        vmax=max(2.*np.median(fim_data[mod][0, ...]), 100),
        cmap="jet")

    ax.set_title(f'Module {mod} ({pdu})', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Signal Distribution ##

 %% Cell type:code id: tags:

 ``` python
 for pdu, mod in zip(db_modules, karabo_da):
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(211)
    h = ax.hist(
        fim_data[mod].flatten(),
        bins=1000,
        range=(-100, 1000),
        log=True,
    )
    l = ax.set_xlabel("Signal (keV)")
    l = ax.set_ylabel("Counts")
    _ = ax.set_title(f'Module {mod} ({pdu})')

    ax = fig.add_subplot(212)
    h = ax.hist(
        fim_data[mod].flatten(),
        bins=1000,
        range=(-1000, 10000),
        log=True,
    )
    l = ax.set_xlabel("Signal (keV)")
    l = ax.set_ylabel("Counts")
    _ = ax.set_title(f'Module {mod} ({pdu})')
 ```

 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview ###

 The per pixel maximum of the first train of the GAIN data

 %% Cell type:code id: tags:

 ``` python
 for pdu, mod in zip(db_modules, karabo_da):
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    im = ax.imshow(
        np.max(gim_data[mod], axis=0),
        vmin=0, vmax=3, cmap="jet")
    ax.set_title(f'Module {mod} ({pdu})', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append(
        (item.name, f"{item.value:016b}"))
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Image Bad Pixels ###

 A single image bad pixel map for the first image of the first train

 %% Cell type:code id: tags:

 ``` python
 for pdu, mod in zip(db_modules, karabo_da):
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    im = ax.imshow(
        np.log2(msk_data[mod][0,...]),
        vmin=0, vmax=32, cmap="jet")
    ax.set_title(f'Module {mod} ({pdu})', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```