David Hammer
--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb

+ 28

− 34
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb

+ 28

− 34
 %% Cell type:markdown id: tags:

 # Jungfrau Offline Correction #

 Author: European XFEL Detector Group, Version: 0.1

 Offline Calibration for the Jungfrau Detector

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/FXE/201901/p002210/raw" # the folder to read data from, required
 out_folder =  "/gpfs/exfel/data/scratch/ahmedk/test/jf"  # the folder to output to, required
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 run = 249 # runs to process, required

 karabo_id = "FXE_XAD_JF1M" # karabo prefix of Jungfrau devices
 karabo_da = ['JNGFR01'] # data aggregators
 receiver_id = "RECEIVER-{}" # inset for receiver devices
 receiver_control_id = "CONTROL" # inset for control devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5'  # template to use for file name, double escape sequence number
 h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data'  # path in H5 file under which images are located
 h5path_run = '/RUN/{}/DET/{}' # path to run data
 h5path_cntrl = '/CONTROL/{}/DET/{}' # path to control data
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id
 karabo_da_control = "JNGFR01" # file inset for control data

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8017#8025" #"tcp://max-exfl016:8015#8025" # the database interface to use
 cal_db_timeout = 180000 # timeout on caldb requests",

 overwrite = True # set to True if existing data should be overwritten
 no_relative_gain = False # do not do relative gain correction
 bias_voltage = 180 # will be overwritten by value in file
 sequences_per_node = 5 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 photon_energy = 9.2 # photon energy in keV
 chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.
 integration_time = 4.96 # integration time in us, will be overwritten by value in file
 mem_cells = 0. # leave memory cells equal 0, as it is saved in control information starting 2019.
 gmapfile = "" # variable is not used but left here for back compatibility
 db_module = ["Jungfrau_M233"] # ID of module in calibration database
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 chunk_size = 0

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 import multiprocessing
 import os
 import time
 import warnings
 from functools import partial

 import h5py
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import tabulate
 from cal_tools.enums import BadPixels
 from cal_tools.tools import (get_constant_from_db_and_time,
                             get_dir_creation_date, map_modules_from_folder)
 from iCalibrationDB import (Conditions, ConstantMetaData, Constants, Detectors,
                            Versions)
 from IPython.display import Latex, display
 from matplotlib.colors import LogNorm

 warnings.filterwarnings('ignore')

 matplotlib.use('agg')
 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 h5path = h5path.format(karabo_id, receiver_id)
 ped_dir = "{}/r{:04d}".format(in_folder, run)

 if ped_dir[-1] == "/":
    ped_dir = ped_dir[:-1]

 if not os.path.exists(out_folder):
    os.makedirs(out_folder)
 elif not overwrite:
    raise AttributeError("Output path exists! Exiting")

 fp_name_contr = path_template.format(run, karabo_da_control, 0)
 fp_path_contr = '{}/{}'.format(ped_dir, fp_name_contr)

 if sequences[0] == -1:
    sequences = None

 print(f"Run is: {run}")
 print(f"HDF5 path: {h5path}")
 print(f"Process modules: {karabo_da}")

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print(f"Using {creation_time} as creation time")

 if karabo_id_control == "":
    karabo_id_control = karabo_id
 ```

 %% Cell type:code id: tags:

 ``` python
 def check_memoryCells(file_name, path):
    with h5py.File(file_name, 'r') as f:
        t_stamp = np.array(f[path + '/storageCells/timestamp'])
        st_cells = np.array(f[path + '/storageCells/value'])
        sc_start = np.array(f[path + '/storageCellStart/value'])

    valid_train = t_stamp > 0
    n_scs = st_cells[valid_train][0] + 1
    sc_s = sc_start[valid_train][0]

    return n_scs, sc_s
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf

 print(f"Processing a total of {total_sequences} sequence files")
 table = []
 fi = 0
 if total_sequences > 0: # create table
    for i, key in enumerate(mapped_files):
        for k, f in enumerate(list(mapped_files[key].queue)):
            if k == 0:
                table.append((fi, karabo_da[i], k, f))
            else:
                table.append((fi, "", k,  f))
            fi += 1
    md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                         headers=["#", "module", "# module", "file"])))

 # restore the queue
 mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
 mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
 ```

 %% Cell type:code id: tags:

 ``` python
 if not manual_slow_data:
    with h5py.File(fp_path_contr.format(0), 'r') as f:
        run_path = h5path_run.format(karabo_id_control, receiver_control_id)
        integration_time = float(f[f'{run_path}/exposureTime/value'][()]*1e6)
        bias_voltage = int(np.squeeze(f[f'{run_path}/vHighVoltage/value'])[0])


 control_path = h5path_cntrl.format(karabo_id_control, receiver_control_id)
 try:
    this_run_mcells, sc_start = check_memoryCells(fp_path_contr.format(0), control_path)
    if this_run_mcells == 1:
        memoryCells = 1
        print(f'Dark runs in single cell mode\n storage cell start: {sc_start:02d}')
    else:
        memoryCells = 16
        print(f'Dark runs in burst mode\n storage cell start: {sc_start:02d}')
 except Exception as e:
    if "Unable to open object" in str(e):
        if mem_cells==0:
            memoryCells = 1
        else:
            memoryCells = mem_cells
        print(f'Set memory cells to {memoryCells} as it is not saved in control information.')
    else:
        print(f"Error trying to access memory cell from contol information: {e}")

 print(f"Integration time is {integration_time} us")
 print(f"Bias voltage is {bias_voltage} V")
 print(f"Number of memory cells is {memoryCells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 condition = Conditions.Dark.jungfrau(memory_cells=memoryCells,
                                     bias_voltage=bias_voltage,
                                     integration_time=integration_time)

-def get_constant_for_module(karabo_id, condition, cal_db_interface, creation_time, cal_db_timeout,
-                            memoryCells, karabo_da):
+def get_constants_for_module(karabo_da: str):
    """ Get calibration constants for given module of Jungfrau

-    Function contains all includes to be used with ipCluster
-
-    :param condition: Calibration condition
-    :param cal_db_interface: Interface string, e.g. "tcp://max-exfl016:8015"
-    :param creation_time: Latest time for constant to be created
-    :param cal_db_timeout: Timeout for zmq request
-    :param: memoryCells: Number of used memory cells
-    :param: db_module: Module of Jungfrau, e.g. "Jungfrau_M035"
-
-    :return: offset_map (offset map), mask (mask of bad pixels),
+    :return:
+        offset_map (offset map), mask (mask of bad pixels),
        gain_map (map of relative gain factors), db_module (name of DB module),
        when (dictionaty: constant - creation time)
    """
-
-    import numpy as np
-    from cal_tools.tools import get_constant_from_db_and_time
-    from iCalibrationDB import (Conditions, ConstantMetaData, Constants,
-                                Detectors, Versions)
-
    when = {}

    #TODO: Remove condition + constant retrieval duplication from notebook

    offset_map, when['Offset'] = \
        get_constant_from_db_and_time(karabo_id, karabo_da,
                                      Constants.jungfrau.Offset(),
                                      condition,
                                      np.zeros((1024, 512, 1, 3)),
                                      cal_db_interface,
                                      creation_time=creation_time,
                                      timeout=cal_db_timeout)
-    mask, when['BadPixels'] = \
+
+    mask, when['BadPixelsDark'] = \
        get_constant_from_db_and_time(karabo_id, karabo_da,
                                      Constants.jungfrau.BadPixelsDark(),
                                      condition,
                                      np.zeros((1024, 512, 1, 3)),
                                      cal_db_interface,
                                      creation_time=creation_time,
                                      timeout=cal_db_timeout)
+
+    mask_ff, when['BadPixelsFF'] = \
+        get_constant_from_db_and_time(karabo_id, karabo_da,
+                                      Constants.jungfrau.BadPixelsFF(),
+                                      condition,
+                                      None,
+                                      cal_db_interface,
+                                      creation_time=creation_time,
+                                      timeout=cal_db_timeout)
+
    gain_map, when['Gain'] = \
        get_constant_from_db_and_time(karabo_id, karabo_da,
                                      Constants.jungfrau.RelativeGain(),
                                      condition,
                                      None,
                                      cal_db_interface,
                                      creation_time=creation_time,
                                      timeout=cal_db_timeout)

+    if mask_ff is not None:
+        mask |= np.moveaxis(mask_ff, 0, 1)
+
    # move from x,y,cell,gain to cell,x,y,gain
    offset_map = np.squeeze(offset_map)
    mask = np.squeeze(mask)
+
    if memoryCells > 1:
        offset_map = np.moveaxis(np.moveaxis(offset_map, 0, 2), 0, 2)
        mask = np.moveaxis(np.moveaxis(mask, 0, 2), 0, 2)

    if gain_map is not None:
        if memoryCells > 1:
            gain_map = np.moveaxis(np.moveaxis(gain_map, 0, 2), 0, 1)
        else:
            gain_map = np.squeeze(gain_map)
            gain_map = np.moveaxis(gain_map, 1, 0)

    return offset_map, mask, gain_map, karabo_da, when


-# Retrieve Offset, BadPixels and gain constants for a JungFrau module.
-# Run ip Cluster parallelization over modules
-p = partial(get_constant_for_module, karabo_id, condition, cal_db_interface,
-            creation_time, cal_db_timeout, memoryCells)
-
 with multiprocessing.Pool() as pool:
-    r = pool.map(p, karabo_da)
+    r = pool.map(get_constants_for_module, karabo_da)

 constants = {}
-for rr in r:
-    offset_map, mask, gain_map, k_da, when = rr
+for offset_map, mask, gain_map, k_da, when in r:
    print(f'Constants for module {k_da}:')
    for const in when:
-        print(f'{const} injected at {when[const]}')
+        print(f'  {const} injected at {when[const]}')
+
    if gain_map is None:
-        print("No gain map found")
+        print("  No gain map found")
        no_relative_gain = True

    constants[k_da] = (offset_map, mask, gain_map)
 ```

 %% Cell type:code id: tags:

 ``` python
 def copy_and_sanitize_non_cal_data(infile, outfile, h5base):
    """ Copy and sanitize data in `infile` that is not touched by `correctLPD`
    """

    if h5base.startswith("/"):
        h5base = h5base[1:]
    dont_copy = ["adc", ]
    dont_copy = [h5base+"/{}".format(do)
                 for do in dont_copy]

    def visitor(k, item):
        if k not in dont_copy:
            if isinstance(item, h5py.Group):
                outfile.create_group(k)
            elif isinstance(item, h5py.Dataset):
                group = str(k).split("/")
                group = "/".join(group[:-1])
                infile.copy(k, outfile[group])

    infile.visititems(visitor)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Correct a chunk of images for offset and gain
 def correct_chunk(offset_map, mask, gain_map, memoryCells, no_relative_gain, inp):
-    import copy
-
-    import h5py
-    import numpy as np
-
    fim_data = None
    gim_data = None
    rim_data = None
    msk_data = None
    err = ''

    try:
        d, g, m, ind, copy_sample = inp
        g[g==3] = 2

        if copy_sample and ind==0:
            if memoryCells==1:
                rim_data = np.squeeze(copy.copy(d))
            else:
                rim_data = np.squeeze(copy.copy(d[:,0,...]))

        # Select memory cells
        if memoryCells>1:
            m[m>16] = 0
            offset_map_cell = offset_map[m,...]
            mask_cell = mask[m,...]
        else:
            offset_map_cell = offset_map
            mask_cell = mask

        # Offset correction
        offset = np.choose(g, (offset_map_cell[...,0], offset_map_cell[...,1], offset_map_cell[...,2]))
        d -= offset

        # Gain correction
        if not no_relative_gain:
            if memoryCells>1:
                gain_map_cell = gain_map[m,...]
            else:
                gain_map_cell = gain_map
            cal = np.choose(g, (gain_map_cell[..., 0], gain_map_cell[..., 1], gain_map_cell[..., 2]))
            d /= cal

        msk = np.choose(g, (mask_cell[...,0], mask_cell[...,1], mask_cell[...,2]))

        # Store sample of data for plotting
        if copy_sample and ind==0:
            if memoryCells==1:
                fim_data = np.squeeze(copy.copy(d))
                gim_data = np.squeeze(copy.copy(g))
                msk_data = np.squeeze(copy.copy(msk))
            else:
                fim_data = np.squeeze(copy.copy(d[:,1,...]))
                gim_data = np.squeeze(copy.copy(g[:,1,...]))
                msk_data = np.squeeze(copy.copy(msk[:,1,...]))

    except Exception as e:
        err = e

    return ind, d, msk, rim_data, fim_data, gim_data, msk_data, err
+```

+%% Cell type:code id: tags:
+
+``` python
 fim_data = {}
 gim_data = {}
 rim_data = {}
 msk_data = {}

 # Loop over modules
 pool = multiprocessing.Pool()
 for i, key in enumerate(mapped_files):
    # Loop over sequences for given module
    for k, f in enumerate(list(mapped_files[key].queue)):

        offset_map, mask, gain_map = constants[karabo_da[i]]
        h5path_f = h5path.format(int(karabo_da[i][-2:]))

        with h5py.File(f, 'r') as infile:

            # The processed files are saved here in a folder with the run name.
            out_file = "{}/{}".format(out_folder, f.split("/")[-1])
            out_file = out_file.replace("RAW", "CORR")
            print(f'Process file: {f}, with path {h5path_f}')
            try:
                with h5py.File(out_file, "w") as ofile:
                    copy_and_sanitize_non_cal_data(infile, ofile, h5path_f)

                    oshape = infile[h5path_f+"/adc"].shape
                    print(f'Data shape: {oshape}')
                    if not oshape[0]:
                        raise ValueError(f"No image data: shape {oshape}")
                    # Chunk always contains >= 1 complete image
                    chunk_shape = (chunk_size_idim, 1) + oshape[-2:]
                    ddset = ofile.create_dataset(h5path_f+"/adc",
                                                 oshape,
                                                 chunks=chunk_shape,
                                                 dtype=np.float32)

                    mskset = ofile.create_dataset(h5path_f+"/mask",
                                                  oshape,
                                                  chunks=chunk_shape,
                                                  dtype=np.uint32,
                                                  compression="gzip", compression_opts=1, shuffle=True)

                    # Run ip Cluster parallelization over chunks of images
                    inp = []
                    max_ind = oshape[0]
                    ind = 0

                    # If chunk size is not given maximum 12+1 chunks is expected
                    if chunk_size == 0:
                        chunk_size = max_ind//12
                        print(f'Chunk size: {chunk_size}')

                    ts = time.time()
                    while ind<max_ind:
                        d = infile[h5path_f+"/adc"][ind:ind+chunk_size,...].astype(np.float32)
                        g = infile[h5path_f+"/gain"][ind:ind+chunk_size,...]
                        if h5path_f+"/memoryCell" in infile:
                            m = infile[h5path_f+"/memoryCell"][ind:ind+chunk_size,...]
                        else:
                            m = None
                        print(f'To process: {d.shape}')
                        inp.append((d,g,m, ind, k==0))
                        ind += chunk_size

                    print('Preparation time: ', time.time() - ts)
                    ts = time.time()

                    print(f'Run {len(inp)} processes')
                    p = partial(correct_chunk, offset_map, mask, gain_map, memoryCells, no_relative_gain)

                    r = pool.map(p, inp)

                    if k==0:
                        (_,_,_,
                         rim_data[karabo_da[i]], fim_data[karabo_da[i]],
                         gim_data[karabo_da[i]], msk_data[karabo_da[i]], _) = r[0]

                    print('Correction time: ', time.time() - ts)
                    ts = time.time()

                    for rr in r:
                        ind, cdata, cmask, _,_,_,_, err = rr
                        data_size = cdata.shape[0]
                        ddset[ind:ind+data_size,...] = cdata
                        mskset[ind:ind+data_size,...] = cmask
                        if err != '':
                            print(f'Error: {err}')

                    print('Saving time: ', time.time() - ts)
            except Exception as e:
                print(f"Error: {e}")
 pool.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis, title):
    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1,:], extent=extent, aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(data)))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_title(title)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    h, ex, ey = np.histogram2d(rim_data[mod].flatten(),
                               gim_data[mod].flatten(),
                               bins=[100, 4],
                               range=[[0, 10000], [0, 4]])
    do_2d_plot(h, (ex, ey), "Signal (ADU)", "Gain Bit Value", f'Module {mod}')
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview ###

 The per pixel mean of the sequence file of RAW data

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(111)
    im = ax.imshow(np.mean(rim_data[mod],axis=0),
                   vmin=min(0.75*np.median(rim_data[mod][rim_data[mod] > 0]), 2000),
                   vmax=max(1.5*np.median(rim_data[mod][rim_data[mod] > 0]), 16000), cmap="jet")
    ax.set_title(f'Module {mod}')
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview ###

 The per pixel mean of the sequence file of CORR data

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(111)
    im = ax.imshow(np.mean(fim_data[mod],axis=0),
                   vmin=min(0.75*np.median(fim_data[mod][fim_data[mod] > 0]), -0.5),
                   vmax=max(2.*np.median(fim_data[mod][fim_data[mod] > 0]), 100), cmap="jet")
    ax.set_title(f'Module {mod}', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Single Train Preview ###

 A single image from the first train

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(111)
    im = ax.imshow(fim_data[mod][0,...],
                   vmin=min(0.75*np.median(fim_data[mod][0,...]), -0.5),
                   vmax=max(2.*np.median(fim_data[mod][0,...]), 100), cmap="jet")
    ax.set_title(f'Module {mod}', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Signal Distribution ##

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(211)
    h = ax.hist(fim_data[mod].flatten(), bins=1000, range=(-100, 1000), log=True)
    l = ax.set_xlabel("Signal (keV)")
    l = ax.set_ylabel("Counts")
    _ = ax.set_title(f'Module {mod}')

    ax = fig.add_subplot(212)
    h = ax.hist(fim_data[mod].flatten(), bins=1000, range=(-1000, 10000), log=True)
    l = ax.set_xlabel("Signal (keV)")
    l = ax.set_ylabel("Counts")
    _ = ax.set_title(f'Module {mod}')
 ```

 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview ###

 The per pixel maximum of the first train of the GAIN data

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(111)
    im = ax.imshow(np.max(gim_data[mod], axis=0), vmin=0,
                   vmax=3, cmap="jet")
    ax.set_title(f'Module {mod}', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, f"{item.value:016b}"))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Image Bad Pixels ###

 A single image bad pixel map for the first image of the first train

 %% Cell type:code id: tags:

 ``` python
 for mod in rim_data:
    fig = plt.figure(figsize=(20,10))
    ax = fig.add_subplot(111)
    im = ax.imshow(np.log2(msk_data[mod][0,...]), vmin=0, vmax=32, cmap="jet")
    ax.set_title(f'Module {mod}', size=18)
    cb = fig.colorbar(im, ax=ax)
 ```