Karim Ahmed · Karim Ahmed
--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb

+ 17

− 16
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb

+ 17

− 16
 %% Cell type:markdown id: tags:

 # Jungfrau Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the Jungfrau Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202130/p900204/raw"  # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/remove"  # the folder to output to, required
 run = 112  # run to process, required
 sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel

 # Parameters used to access raw data.
 karabo_id = "SPB_IRDA_JF4M"  # karabo prefix of Jungfrau devices
 karabo_da = ['JNGFR01', 'JNGFR02', 'JNGFR03', 'JNGFR04', 'JNGFR05', 'JNGFR06', 'JNGFR07', 'JNGFR08']  # data aggregators
 receiver_template = "JNGFR{:02d}"  # Detector receiver template for accessing raw data files. e.g. "JNGFR{:02d}"
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5'  # template to use for file name
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id
 karabo_da_control = "JNGFRCTRL00"  # file inset for control data

 # Parameters for calibration database.
 use_dir_creation_date = True  # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8017#8025" # the database interface to use
 cal_db_timeout = 180000  # timeout on caldb requests

 # Parameters affecting corrected data.
 overwrite = True  # set to True if existing data should be overwritten
 relative_gain = True  # do relative gain correction
 plt_images = 100  # Number of images to plot after applying selected corrections.
 limit_images = 0  # ONLY FOR TESTING. process only first N images, Use 0 to process all.


 # Parameters for retrieving calibration constants
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 integration_time = 4.96  # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic gain, 1 for dynamic HG0, will be overwritten by value in file
 mem_cells = 0  # leave memory cells equal 0, as it is saved in control information starting 2019.
 bias_voltage = 180  # will be overwritten by value in file

 # TODO: Remove
 db_module = ""  # ID of module in calibration database, this parameter is ignore in the notebook. TODO: remove from calibration_configurations.


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import multiprocessing
 import warnings
 from functools import partial
 from pathlib import Path

 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import pasha as psh
 import tabulate
 from IPython.display import Latex, Markdown, display
 from extra_data import H5File, RunDirectory
 from matplotlib.colors import LogNorm

 from cal_tools import h5_copy_except
 from cal_tools.jungfraulib import JungfrauCtrl
 from cal_tools.enums import BadPixels
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import (
    get_constant_from_db_and_time,
    get_dir_creation_date,
    map_seq_files,
    write_compressed_frames,
 )
 from iCalibrationDB import Conditions, Constants

 warnings.filterwarnings('ignore')

 matplotlib.use('agg')
 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_dc = RunDirectory(in_folder / f'r{run:04d}')
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)

 if out_folder.exists() and not overwrite:
    raise AttributeError("Output path exists! Exiting")
 else:
    out_folder.mkdir(parents=True, exist_ok=True)

 print(f"Run is: {run}")
 print(f"Instrument H5File source: {instrument_src}")
 print(f"Process modules: {karabo_da}")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time")

 if karabo_id_control == "":
    karabo_id_control = karabo_id
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read available sequence files to correct.
 mapped_files, num_seq_files = map_seq_files(
    run_dc, karabo_id, karabo_da, sequences)

 if not len(mapped_files):
    raise IndexError(
        "No sequence files available to correct for the selected sequences and karabo_da.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Processing a total of {num_seq_files} sequence files")
 table = []
 fi = 0
 if len(mapped_files) > 0: # create table
    for kda, sfiles in mapped_files.items():
        for k, f in enumerate(sfiles):
            if k == 0:
                table.append((fi, kda, k, f))
            else:
                table.append((fi, "", k,  f))
            fi += 1
    md = display(Latex(tabulate.tabulate(
        table, tablefmt='latex',
        headers=["#", "module", "# module", "file"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 ctrl_src = ctrl_source_template.format(karabo_id_control)
 ctrl_data = JungfrauCtrl(run_dc, ctrl_src)
-
 try:
    this_run_mcells, sc_start = ctrl_data.get_memory_cells()
+
    if this_run_mcells == 1:
        memory_cells = 1
        print("Run is in single cell mode.\n"
              f"Storage cell start: {sc_start:02d}")
    else:
        memory_cells = 16
        print(f"Run is in burst mode.\n"
              f"Storage cell start: {sc_start:02d}")
 except KeyError as e:
-        print("WARNING: KeyError while reading number of memory cells.")  # noqa
-
-        if mem_cells == 0:
-            memory_cells = 1
-        else:
-            memory_cells = mem_cells
-
-        print(f"Set memory cells to {memory_cells}, as "
-              "it is not saved in control information.")
+    print("WARNING: KeyError while reading number of memory cells.")
+    if mem_cells == 0:
+        memory_cells = 1
+    else:
+        memory_cells = mem_cells
+    print(f"WARNING: Set memory cells to {memory_cells}, as "
+          "it is not saved in control information.")

 if not manual_slow_data:
    integration_time = ctrl_data.get_integration_time()
    bias_voltage = ctrl_data.get_bias_voltage()
    gain_setting = ctrl_data.get_gain_setting()
    gain_mode = ctrl_data.get_gain_mode()

 print(f"Integration time is {integration_time} us")
 print(f"Gain setting is {gain_setting} (run settings: "
      f"{ctrl_data.run_settings.value if ctrl_data.run_settings else ctrl_data.run_settings})")  # noqa
 print(f"Gain mode is {gain_mode}")
 print(f"Bias voltage is {bias_voltage} V")
 print(f"Number of memory cells are {memory_cells}")
 ```

 %% Cell type:markdown id: tags:

 ### Retrieving calibration constants ###

 %% Cell type:code id: tags:

 ``` python
 condition = Conditions.Dark.jungfrau(
    memory_cells=memory_cells,
    bias_voltage=bias_voltage,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
 )

 def get_constants_for_module(karabo_da: str):
    """ Get calibration constants for given module of Jungfrau

    :return:
        offset_map (offset map),
        mask (mask of bad pixels),
        gain_map (map of relative gain factors),
        db_module (name of DB module),
        when (dictionaty: constant - creation time)
    """

    when = {}
    retrieval_function = partial(
        get_constant_from_db_and_time,
        karabo_id=karabo_id,
        karabo_da=karabo_da,
        condition=condition,
        cal_db_interface=cal_db_interface,
        creation_time=creation_time,
        timeout=cal_db_timeout,
        print_once=False,
    )
    offset_map, when["Offset"] = retrieval_function(
        constant=Constants.jungfrau.Offset(),
        empty_constant=np.zeros((1024, 512, 1, 3))
    )
    mask, when["BadPixelsDark"] = retrieval_function(
        constant=Constants.jungfrau.BadPixelsDark(),
        empty_constant=np.zeros((1024, 512, 1, 3)),
    )
    mask_ff, when["BadPixelsFF"] = retrieval_function(
        constant=Constants.jungfrau.BadPixelsFF(),
        empty_constant=None
    )
    gain_map, when["Gain"] = retrieval_function(
        constant=Constants.jungfrau.RelativeGain(),
        empty_constant=None
    )

    # combine masks
    if mask_ff is not None:
        mask |= np.moveaxis(mask_ff, 0, 1)

    # move from x,y,cell,gain to cell,x,y,gain
    offset_map = np.squeeze(offset_map)
    mask = np.squeeze(mask)

    if memory_cells > 1:
        offset_map = np.moveaxis(np.moveaxis(offset_map, 0, 2), 0, 2)
        mask = np.moveaxis(np.moveaxis(mask, 0, 2), 0, 2)

    if gain_map is not None:
        if memory_cells > 1:
            gain_map = np.moveaxis(np.moveaxis(gain_map, 0, 2), 0, 1)
        else:
            gain_map = np.squeeze(gain_map)
            gain_map = np.moveaxis(gain_map, 1, 0)

    return offset_map, mask, gain_map, karabo_da, when

 with multiprocessing.Pool() as pool:
    r = pool.map(get_constants_for_module, karabo_da)

 # Print timestamps for the retrieved constants.
 constants = {}
 for offset_map, mask, gain_map, k_da, when in r:
    print(f'Constants for module {k_da}:')
    for const in when:
        print(f'  {const} injected at {when[const]}')

    if gain_map is None:
        print("No gain map found")
        relative_gain = False

    constants[k_da] = (offset_map, mask, gain_map)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Correct a chunk of images for offset and gain
 def correct_train(wid, index, d):
-    d = d.astype(np.float32)
+    d = d.astype(np.float32)  # [2, x, y]
    g = gain[index]
    m = memcells[index]

    g[g==3] = 2

    if 0 <= index < plt_images:
        r_data[index, ...] = d[0, ...]

    # Select memory cells

    # TODO: This needs to be revisited.
    # As this result in copying data to a new array on every train,
    # even when there's the same pattern of memory cells on every train.
    if memory_cells > 1:
-        m[m>16] = 0
+        m[m>16] = 0  # TODO: this is wrong and needs to be updated with burst mode.
+        # For an invalid image a memory cell of 255 is set.
+        # These images doesn't need to be processed.
        offset_map_cell = offset_map[m, ...]
        mask_cell = mask[m, ...]
    else:
        offset_map_cell = offset_map
        mask_cell = mask

    # Offset correction
    offset = np.choose(g, np.moveaxis(offset_map_cell, -1, 0))
    d -= offset

    # Gain correction
    if relative_gain:
        if memory_cells > 1:
            gain_map_cell = gain_map[m, ...]
        else:
            gain_map_cell = gain_map
        cal = np.choose(g, np.moveaxis(gain_map_cell, -1, 0))

        d /= cal

    msk = np.choose(g, np.moveaxis(mask_cell, -1, 0))

    # Store sample of data for plotting
    if 0 <= index < plt_images:
        if memory_cells == 1:
            g_data[index, ...] = g
        else:
            g_data[index, ...] = g[1, ...]

    data_corr[index, ...] = d
    mask_corr[index, ...] = msk
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()

 n_cpus = multiprocessing.cpu_count()
 context = psh.context.ProcessContext(num_workers=n_cpus)
 print(f"Using {n_cpus} workers for correction.")
 ```

 %% Cell type:markdown id: tags:

 ### Correcting RAW data ###

 %% Cell type:code id: tags:

 ``` python
 fim_data = {}
 gim_data = {}
 rim_data = {}
 msk_data = {}

 # Loop over modules
 for local_karabo_da, mapped_files_module in mapped_files.items():
    instrument_src_kda = instrument_src.format(int(local_karabo_da[-2:]))
    data_path = "INSTRUMENT/"+instrument_src_kda+"/data"

    for sequence_file in mapped_files_module:  # noqa
        sequence_file = Path(sequence_file)
        seq_dc = H5File(sequence_file)

        # Save corrected data in an output file with name
        # of corresponding raw sequence file.
-        out_file = out_folder / sequence_file.name.replace("RAW", "CORR")
+        ofile_name = sequence_file.name.replace("RAW", "CORR")
+        out_file = out_folder / ofile_name

        # load shape of data for memory cells, and detector size (imgs, cells, x, y)
        # dshape[0] = number of available images to correct.
        dshape = seq_dc[instrument_src_kda, "data.adc"].shape

        if dshape[0] == 0:
-            print(f"\t- WARNING: No image data for {out_file}: data shape is {dshape}")
+            print(f"\t- WARNING: No image data for {ofile_name}: data shape is {dshape}")
            continue

        sensor_size = dshape[1:]
        # load number of data available, including trains with empty data.
        n_trains = seq_dc.get_data_counts(instrument_src_kda, "data.adc").shape[0]

        n_imgs = dshape[0]

        # For testing, only correct limit_images
        if limit_images > 0:
            n_imgs = min(n_imgs, limit_images)

-        print(f"\nNumber of images to correct: {n_imgs} for {out_file}")
+        print(f"\nNumber of images to correct: {n_imgs} for {ofile_name}")
        if n_trains - dshape[0] != 0:
-            print(f"\t- WARNING: {sequence_file} has {n_trains - dshape[0]} "
+            print(f"\t- WARNING: {sequence_file.name} has {n_trains - dshape[0]} "
                  "trains with empty data.")

        # Just in case if n_imgs is less than the chosen plt_images.
        plt_images = min(plt_images, n_imgs)

        # load constants from the constants dictionary.
        offset_map, mask, gain_map = constants[local_karabo_da]

        # Allocate shared arrays.

        # Shared arrays for plotting.
        g_data = context.alloc(
            shape=(plt_images, dshape[2], dshape[3]), dtype=np.uint8)
        r_data = context.alloc(
            shape=(plt_images, dshape[2], dshape[3]), dtype=np.float32)

        # shared arrays for corrected data.
        data_corr = context.alloc(shape=dshape, dtype=np.float32)
        mask_corr = context.alloc(shape=dshape, dtype=np.uint32)

        step_timer.start()
        seq_dc = seq_dc.select(
            instrument_src_kda, "*", require_all=True).select_trains(np.s_[:n_imgs])

        data = seq_dc[instrument_src_kda, "data.adc"].ndarray()
        gain = seq_dc[instrument_src_kda, "data.gain"].ndarray()
        memcells = seq_dc[instrument_src_kda, "data.memoryCell"].ndarray()

        context.map(correct_train, data)
        step_timer.done_step(f'Correction time.')

        step_timer.start()
        # Create CORR files and add corrected data sources.
        # Exclude raw data images (data/adc)
        with h5py.File(out_file, 'w') as ofile:
            # Copy RAW non-calibrated sources.
            with h5py.File(sequence_file, 'r') as sfile:
                h5_copy_except.h5_copy_except_paths(
                    sfile, ofile, [f"{data_path}/adc"])

            # Create datasets with the available corrected data
            ddset = ofile.create_dataset(
                f"{data_path}/adc",
                data=data_corr,
                chunks=((1,) + dshape[1:]),  # 1 chunk == 1 image
                dtype=np.float32,
            )

            write_compressed_frames(
                mask_corr,
                ofile,
                dataset_path=f"{data_path}/mask",
                comp_threads=n_cpus,
            )

        step_timer.done_step(f'Saving data time.')

        # Prepare plotting arrays
        step_timer.start()
        if memory_cells == 1:
            fim_data[local_karabo_da] = data_corr[:plt_images, ...].copy()
            msk_data[local_karabo_da] = mask_corr[:plt_images, ...].copy()
        else:
            fim_data[local_karabo_da] = data_corr[:plt_images, 1, ...].copy()
            msk_data[local_karabo_da] = mask_corr[:plt_images, 1, ...].copy()

        gim_data[local_karabo_da] = g_data
        rim_data[local_karabo_da] = r_data
        step_timer.done_step(f'Preparing plotting data of {plt_images} images:.')
 ```

 %% Cell type:markdown id: tags:

 ### Processing time summary ###

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [
        np.min(edges[1]),
        np.max(edges[1]),
        np.min(edges[0]),
        np.max(edges[0]),
    ]

    im = ax.imshow(
        data[::-1, :],
        extent=extent,
        aspect="auto",
        norm=LogNorm(vmin=1, vmax=np.max(data))
    )
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    h, ex, ey = np.histogram2d(
        rim_data[mod].flatten(),
        gim_data[mod].flatten(),
        bins=[100, 4],
        range=[[0, 10000], [0, 4]],
    )
    do_2d_plot(
        h, (ex, ey),
        "Signal (ADU)",
        "Gain Bit Value",
        f"Module {mod}")
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview ###

 The per pixel mean of the sequence file of RAW data

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)

    im = ax.imshow(
        np.mean(rim_data[mod],axis=0),
        vmin=min(0.75*np.median(rim_data[mod][rim_data[mod] > 0]), 2000),
        vmax=max(1.5*np.median(rim_data[mod][rim_data[mod] > 0]), 16000),
        cmap="jet")

    ax.set_title(f'Module {mod}')
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview ###

 The per pixel mean of the sequence file of CORR data

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)

    im = ax.imshow(
        np.mean(fim_data[mod],axis=0),
        vmin=min(0.75*np.median(fim_data[mod][fim_data[mod] > 0]), -0.5),
        vmax=max(2.*np.median(fim_data[mod][fim_data[mod] > 0]), 100),
        cmap="jet")

    ax.set_title(f'Module {mod}', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Single Train Preview ###

 A single image from the first train

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)

    im = ax.imshow(
        fim_data[mod][0, ...],
        vmin=min(0.75*np.median(fim_data[mod][0, ...]), -0.5),
        vmax=max(2.*np.median(fim_data[mod][0, ...]), 100),
        cmap="jet")

    ax.set_title(f'Module {mod}', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Signal Distribution ##

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(211)
    h = ax.hist(
        fim_data[mod].flatten(),
        bins=1000,
        range=(-100, 1000),
        log=True,
    )
    l = ax.set_xlabel("Signal (keV)")
    l = ax.set_ylabel("Counts")
    _ = ax.set_title(f'Module {mod}')

    ax = fig.add_subplot(212)
    h = ax.hist(
        fim_data[mod].flatten(),
        bins=1000,
        range=(-1000, 10000),
        log=True,
    )
    l = ax.set_xlabel("Signal (keV)")
    l = ax.set_ylabel("Counts")
    _ = ax.set_title(f'Module {mod}')
 ```

 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview ###

 The per pixel maximum of the first train of the GAIN data

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    im = ax.imshow(
        np.max(gim_data[mod], axis=0),
        vmin=0, vmax=3, cmap="jet")
    ax.set_title(f'Module {mod}', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append(
        (item.name, f"{item.value:016b}"))
 md = display(Latex(tabulate.tabulate(
    table, tablefmt='latex',
    headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Image Bad Pixels ###

 A single image bad pixel map for the first image of the first train

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    im = ax.imshow(
        np.log2(msk_data[mod][0,...]),
        vmin=0, vmax=32, cmap="jet")
    ax.set_title(f'Module {mod}', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```