remove unused import and refactor docstring

94d66af9 · Karim Ahmed · 338c9467 · 94d66af9 · 94d66af9
Commit 94d66af9 authored 2 years ago by Karim Ahmed
--- a/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
@@ -89,11 +89,7 @@
    "from XFELDetAna.plotting.histogram import histPlot\n",
    "from cal_tools import jungfraulib, step_timing\n",
    "from cal_tools.ana_tools import save_dict_to_hdf5\n",
-    "from cal_tools.enums import (\n",
-    "    BadPixels,\n",
-    "    JungfrauGainMode,\n",
-    "    JungfrauSettings,\n",
-    "    )\n",
+    "from cal_tools.enums import BadPixels, JungfrauGainMode\n",
    "from cal_tools.tools import (\n",
    "    get_dir_creation_date,\n",
    "    get_pdu_from_db,\n",

 %% Cell type:markdown id: tags:

 # Jungfrau Dark Image Characterization #

 Author: European XFEL Detector Group, Version: 2.0

 Analyzes Jungfrau dark image data to deduce offset, noise and resulting bad pixel maps

 %% Cell type:code id: tags:

 ``` python
 in_folder = '/gpfs/exfel/exp/SPB/202130/p900204/raw/'  # folder under which runs are located, required
 out_folder = '/gpfs/exfel/data/scratch/ahmedk/test/remove' # path to place reports at, required
 metadata_folder = ''  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 run_high = 141 # run number for G0 dark run, required
 run_med = 142 # run number for G1 dark run, required
 run_low = 143 # run number for G2 dark run, required

 # Parameters used to access raw data.
 karabo_da = ['JNGFR01', 'JNGFR02','JNGFR03','JNGFR04', 'JNGFR05', 'JNGFR06','JNGFR07','JNGFR08'] # list of data aggregators, which corresponds to different JF modules
 karabo_id = 'SPB_IRDA_JF4M'  # karabo_id (detector identifier) prefix of Jungfrau detector to process.
 karabo_id_control = ''  # if control is on a different ID, set to empty string if it is the same a karabo-id
 receiver_template = 'JNGFR{:02}' # inset for receiver devices
 instrument_source_template = '{}/DET/{}:daqOutput'  # template for instrument source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
 ctrl_source_template = '{}/DET/CONTROL'  # template for control source name (filled with karabo_id_control)

 # Parameters for calibration database and storing constants.
 use_dir_creation_date = True  # use dir creation date
 cal_db_interface = 'tcp://max-exfl016:8016'  # calibrate db interface to connect to
 cal_db_timeout = 300000 # timeout on caldb requests
 local_output = True  # output constants locally
 db_output = False  # output constants to database

 # Parameters affecting creating dark calibration constants.
 badpixel_threshold_sigma = 5.  # bad pixels defined by values outside n times this std from median
 offset_abs_threshold_low = [1000, 10000, 10000]  # absolute bad pixel threshold in terms of offset, lower values
 offset_abs_threshold_high = [8000, 15000, 15000]  # absolute bad pixel threshold in terms of offset, upper values
 max_trains = 0  # Maximum trains to process darks. Set to 0 to process all available train images.
 min_trains = 1  # Minimum number of trains that should be available to process dark constants. Default 1.
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 time_limits = 0.025  # to find calibration constants later on, the integration time is allowed to vary by 0.5 us

 # Parameters to be used for injecting dark calibration constants.
 integration_time = 1000 # integration time in us, will be overwritten by value in file
 gain_setting = 0  # 0 for dynamic, forceswitchg1, forceswitchg2, 1 for dynamichg0, fixgain1, fixgain2. Will be overwritten by value in file
 gain_mode = 0  # 1 if medium and low runs are  fixgain1 and fixgain2, otherwise 0. It will be overwritten by value in file, if manual_slow_data
 bias_voltage = 90  # sensor bias voltage in V, will be overwritten by value in file
 memory_cells = 16  # number of memory cells

 # TODO: this is used for only Warning check at AGIPD dark.
 # Need to rethink if it makes sense to use it here as well.
 operation_mode = 'ADAPTIVE_GAIN'  # Detector operation mode, optional
 ```

 %% Cell type:code id: tags:

 ``` python
 import glob
 import os
 import warnings
 from pathlib import Path
 warnings.filterwarnings('ignore')

 import matplotlib
 import matplotlib.pyplot as plt
 import multiprocessing
 import numpy as np
 import pasha as psh
 from IPython.display import Markdown, display
 from extra_data import RunDirectory

 matplotlib.use('agg')
 %matplotlib inline

 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.histogram import histPlot
 from cal_tools import jungfraulib, step_timing
 from cal_tools.ana_tools import save_dict_to_hdf5
-from cal_tools.enums import (
-    BadPixels,
-    JungfrauGainMode,
-    JungfrauSettings,
-    )
+from cal_tools.enums import BadPixels, JungfrauGainMode
 from cal_tools.tools import (
    get_dir_creation_date,
    get_pdu_from_db,
    get_random_db_interface,
    get_report,
    save_const_to_h5,
    send_to_db,
 )
 from iCalibrationDB import Conditions, Constants
 ```

 %% Cell type:code id: tags:

 ``` python
 # Constants relevant for the analysis
 run_nums = [run_high, run_med, run_low]  # run number for G0/HG0, G1, G2
 sensor_size = (1024, 512)
 gains = [0, 1, 2]

 fixed_settings = [
    JungfrauGainMode.FIX_GAIN_1.value, JungfrauGainMode.FIX_GAIN_2.value]
 dynamic_settings = [
    JungfrauGainMode.FORCE_SWITCH_HG1.value, JungfrauGainMode.FORCE_SWITCH_HG2.value]

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)
    print(f"Using {creation_time} as creation time")
 os.makedirs(out_folder, exist_ok=True)

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print(f'Calibration database interface: {cal_db_interface}')

 if karabo_id_control == "":
    karabo_id_control = karabo_id
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = f"proposal:{proposal} runs:{run_high} {run_med} {run_low}"

 report = get_report(metadata_folder)

 step_timer = step_timing.StepTimer()
 ```

 %% Cell type:markdown id: tags:

 ## Reading control data

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 gain_runs = dict()

 med_low_settings = []

 ctrl_src = ctrl_source_template.format(karabo_id_control)

 for gain, run_n in enumerate(run_nums):
    run_dc = RunDirectory(f"{in_folder}/r{run_n:04d}/")
    gain_runs[run_n] = [gain, run_dc]
    ctrl_data = jungfraulib.JungfrauCtrl(run_dc, ctrl_src)
    # Read control data for the high gain run only.
    if run_n == run_high:

        run_mcells, sc_start = ctrl_data.get_memory_cells()

        if not manual_slow_data:
            integration_time = ctrl_data.get_integration_time()
            bias_voltage = ctrl_data.get_bias_voltage()
            gain_setting = ctrl_data.get_gain_setting()
            print(f"Gain setting is {gain_setting} ({ctrl_data.run_settings})")
            print(f"Integration time is {integration_time} us")
            print(f"Bias voltage is {bias_voltage} V")
        if run_mcells == 1:
            memory_cells = 1
            print('Dark runs in single cell mode, '
                  f'storage cell start: {sc_start:02d}')
        else:
            memory_cells = 16
            print('Dark runs in burst mode, '
                  f'storage cell start: {sc_start:02d}')
    else:
        gain_mode = ctrl_data.get_gain_mode()
        med_low_settings.append(ctrl_data.run_mode)

 # A transperent workaround for old raw data with wrong/missing medium and low settings
 if med_low_settings == [None, None]:
    print("WARNING: run.settings is not stored in the data to read. "
          f"Hence assuming gain_mode = {gain_mode} for adaptive old data.")
 elif med_low_settings == ["dynamic", "forceswitchg1"]:
    print(f"WARNING: run.settings for medium and low gain runs are wrong {med_low_settings}. "
          f"This is an expected bug for old raw data. Setting gain_mode to {gain_mode}.")
 # Validate that low_med_settings is not a mix of adaptive and fixed settings.
 elif not (sorted(med_low_settings) in [fixed_settings, dynamic_settings]):  # noqa
    raise ValueError(
        "Medium and low run settings are not as expected. "
        f"Either {dynamic_settings} or {fixed_settings} are expected.\n"
        f"Got {sorted(med_low_settings)} for both runs, respectively.")

 print(f"Gain mode is {gain_mode} ({med_low_settings})")

 step_timer.done_step(f'Reading control data.')
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 condition = Conditions.Dark.jungfrau(
    memory_cells=memory_cells,
    bias_voltage=bias_voltage,
    integration_time=integration_time,
    gain_setting=gain_setting,
    gain_mode=gain_mode,
 )

 db_modules = get_pdu_from_db(
    karabo_id=karabo_id,
    karabo_da=karabo_da,
    constant=Constants.jungfrau.Offset(),
    condition=condition,
    cal_db_interface=cal_db_interface,
    snapshot_at=creation_time)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Use only high gain threshold for all gains in case of fixed_gain.

 if gain_mode:  # fixed_gain
    offset_abs_threshold = [[offset_abs_threshold_low[0]]*3, [offset_abs_threshold_high[0]]*3]
 else:
    offset_abs_threshold = [offset_abs_threshold_low, offset_abs_threshold_high]
 ```

 %% Cell type:code id: tags:

 ``` python
 context = psh.context.ThreadContext(num_workers=multiprocessing.cpu_count())
 ```

 %% Cell type:code id: tags:

 ``` python
 """
 All jungfrau runs are taken through one acquisition, except for the forceswitch runs.
 While taking non-fixed dark runs, a procedure of multiple acquisitions is used to switch the storage cell indices.

 This is done for medium and low gain dark dynamic runs, only [forceswitchg1, forceswitchg2]:
 Switching the cell indices in burst mode is a work around for hardware procedure
 deficiency that produces wrong data for dark runs except for the first storage cell.
 This is why multiple acquisitions are taken to switch the used storage cells and
 acquire data through two cells for each of the 16 cells instead of acquiring darks through all 16 cells.
 """

 print(f"Maximum trains to process is set to {max_trains}")

 noise_map = dict()
 offset_map = dict()
 bad_pixels_map = dict()

 for mod in karabo_da:
    step_timer.start()
    instrument_src = instrument_source_template.format(
        karabo_id, receiver_template.format(int(mod[-2:])))

    print(f"\n- Instrument data path for {mod} is {instrument_src}.")

    offset_map[mod] = context.alloc(shape=(sensor_size+(memory_cells, 3)), fill=0)
    noise_map[mod] = context.alloc(like=offset_map[mod], fill=0)
    bad_pixels_map[mod] = context.alloc(like=offset_map[mod], dtype=np.uint32, fill=0)

    for run_n, [gain, run_dc] in gain_runs.items():

        def process_cell(worker_id, array_index, cell_number):
            cell_slice_idx = acelltable == cell_number
            thiscell = images[..., cell_slice_idx]

            offset_map[mod][..., cell_number, gain] = np.mean(thiscell, axis=2)
            noise_map[mod][..., cell_number, gain] = np.std(thiscell, axis=2)

            # Check if there are wrong bad gain values.
            # Indicate pixels with wrong gain value for any train for each cell.
            gain_avg = np.average(gain_vals[..., cell_slice_idx], axis=2)
            # TODO: Set a threshold for acceptable gain average values across trains.
            bad_pixels_map[mod][..., cell_number, gain][gain_avg != raw_g] |= BadPixels.WRONG_GAIN_VALUE.value
        print(f"Gain stage {gain}, run {run_n}")

        # load shape of data for memory cells, and detector size (imgs, cells, x, y)
        n_imgs = run_dc[instrument_src, "data.adc"].shape[0]
        # load number of data available, including trains with empty data.
        n_trains = len(run_dc.train_ids)
        instr_dc = run_dc.select(instrument_src, require_all=True)
        empty_trains = n_trains - n_imgs
        if empty_trains != 0:
            print(f"\tWARNING: {mod} has {empty_trains} trains with empty data out of {n_trains} trains")  # noqa
        if max_trains > 0:
            n_imgs = min(n_imgs, max_trains)
        print(f"Processing {n_imgs} images.")
        # Select only requested number of images to process darks.
        instr_dc = instr_dc.select_trains(np.s_[:n_imgs])

        if n_imgs < min_trains:
            raise ValueError(
                f"Less than {min_trains} trains are available in RAW data."
                " Not enough data to process darks.")

        images = np.transpose(
            instr_dc[instrument_src, "data.adc"].ndarray(), (3, 2, 1, 0))
        acelltable = np.transpose(instr_dc[instrument_src, "data.memoryCell"].ndarray())
        gain_vals = np.transpose(
            instr_dc[instrument_src, "data.gain"].ndarray(), (3, 2, 1, 0))

        # define gain value as saved in raw gain map
        raw_g = 3 if gain == 2 else gain

        if memory_cells == 1:
            acelltable -= sc_start
        # Only for dynamic medium and low gain runs [forceswitchg1, forceswitchg2] in burst mode.

        if gain_mode == 0 and gain > 0 and memory_cells == 16:
            # 255 similar to the receiver which uses the 255
            # value to indicate a cell without an image.
            # image shape for forceswitchg1 and forceswitchg2 = (1024, 512, 2, trains)
            # compared to expected shape of (1024, 512, 16, trains) for high gain run.
            acelltable[1:] = 255

        # Calculate offset and noise maps
        context.map(process_cell, range(memory_cells))

    step_timer.done_step(f'Creating Offset and noise constants for a module.')
 ```

 %% Cell type:markdown id: tags:

 ## Offset and Noise Maps ##

 Below offset and noise maps for the high ($g_0$) gain stage are shown, alongside the distribution of these values. One expects block-like structures mapping to the ASICs of the detector

 %% Cell type:code id: tags:

 ``` python
 g_name = ['G0', 'G1', 'G2']
 g_range = [(0, 8000), (8000, 16000), (8000, 16000)]
 n_range = [(0., 50.), (0., 50.), (0., 50.)]

 unit = '[ADCu]'
 ```

 %% Cell type:code id: tags:

 ``` python
 # TODO: Fix plots arrangment and speed for Jungfrau burst mode.
 step_timer.start()
 for pdu, mod in zip(db_modules, karabo_da):
    for g_idx in gains:
        for cell in range(0, memory_cells):
            f_o0 = heatmapPlot(
                np.swapaxes(offset_map[mod][..., cell, g_idx], 0, 1),
                y_label="Row",
                x_label="Column",
                lut_label=unit,
                aspect=1.,
                vmin=g_range[g_idx][0],
                vmax=g_range[g_idx][1],
                title=f'Pedestal {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})')

            fo0, ax_o0 = plt.subplots()
            res_o0 = histPlot(
                ax_o0, offset_map[mod][..., cell, g_idx],
                bins=800,
                range=g_range[g_idx],
                facecolor='b',
                histotype='stepfilled',
            )

            ax_o0.tick_params(axis='both',which='major',labelsize=15)
            ax_o0.set_title(
                f'Module pedestal distribution - Cell {cell:02d} - Module {mod} ({pdu})',
                fontsize=15)
            ax_o0.set_xlabel(f'Pedestal {g_name[g_idx]} {unit}',fontsize=15)
            ax_o0.set_yscale('log')

            f_n0 = heatmapPlot(
                np.swapaxes(noise_map[mod][..., cell, g_idx], 0, 1),
                y_label="Row",
                x_label="Column",
                lut_label= unit,
                aspect=1.,
                vmin=n_range[g_idx][0],
                vmax=n_range[g_idx][1],
                title=f"RMS noise {g_name[g_idx]} - Cell {cell:02d} - Module {mod} ({pdu})",
            )

            fn0, ax_n0 = plt.subplots()
            res_n0 = histPlot(
                ax_n0,
                noise_map[mod][..., cell, g_idx],
                bins=100,
                range=n_range[g_idx],
                facecolor='b',
                histotype='stepfilled',
            )

            ax_n0.tick_params(axis='both', which='major', labelsize=15)
            ax_n0.set_title(
                f'Module noise distribution - Cell {cell:02d} - Module {mod} ({pdu})',
                fontsize=15)
            ax_n0.set_xlabel(
                f'RMS noise {g_name[g_idx]} ' + unit, fontsize=15)
            plt.show()
 step_timer.done_step(f'Plotting offset and noise maps.')
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixel Map ###

 The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) and each gain ($g$) against the median value for that gain stage:

 $$
 v_i > \mathrm{median}(v_{k,g}) + n \sigma_{v_{k,g}}
 $$
 or
 $$
 v_i < \mathrm{median}(v_{k,g}) - n \sigma_{v_{k,g}}
 $$

 Values are encoded in a 32 bit mask, where for the dark image deduced bad pixels the following non-zero entries are relevant:

 %% Cell type:code id: tags:

 ``` python
 def print_bp_entry(bp):
    print("{:<30s} {:032b} -> {}".format(bp.name, bp.value, int(bp.value)))

 print_bp_entry(BadPixels.OFFSET_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
 print_bp_entry(BadPixels.WRONG_GAIN_VALUE)

 def eval_bpidx(d):

    mdn = np.nanmedian(d, axis=(0, 1))[None, None, :, :]
    std = np.nanstd(d, axis=(0, 1))[None, None, :, :]
    idx = (d > badpixel_threshold_sigma*std+mdn) | (d < (-badpixel_threshold_sigma)*std+mdn)

    return idx
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 for pdu, mod in zip(db_modules, karabo_da):
    display(Markdown(f"### Badpixels for module {mod} ({pdu}):"))
    offset_abs_threshold = np.array(offset_abs_threshold)

    bad_pixels_map[mod][eval_bpidx(offset_map[mod])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value

    bad_pixels_map[mod][~np.isfinite(offset_map[mod])] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    bad_pixels_map[mod][eval_bpidx(noise_map[mod])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value

    bad_pixels_map[mod][~np.isfinite(noise_map[mod])] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    bad_pixels_map[mod][(offset_map[mod] < offset_abs_threshold[0][None, None, None, :]) | (offset_map[mod] > offset_abs_threshold[1][None, None, None, :])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value  # noqa

    for g_idx in gains:
        for cell in range(memory_cells):
            bad_pixels = bad_pixels_map[mod][:, :, cell, g_idx]
            fn_0 = heatmapPlot(
                np.swapaxes(bad_pixels, 0, 1),
                y_label="Row",
                x_label="Column",
                lut_label=f"Badpixels {g_name[g_idx]} [ADCu]",
                aspect=1.,
                vmin=0, vmax=5,
                title=f'G{g_idx} Bad pixel map - Cell {cell:02d} - Module {mod} ({pdu})')
 step_timer.done_step(f'Creating bad pixels constant and plotting it for a module.')
 ```

 %% Cell type:markdown id: tags:

 ## Inject and save calibration constants

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 for mod, db_mod in zip(karabo_da, db_modules):
    constants = {
        'Offset': np.moveaxis(offset_map[mod], 0, 1),
        'Noise': np.moveaxis(noise_map[mod], 0, 1),
        'BadPixelsDark': np.moveaxis(bad_pixels_map[mod], 0, 1),
    }

    md = None

    for key, const_data in constants.items():

        const =  getattr(Constants.jungfrau, key)()
        const.data = const_data

        for parm in condition.parameters:
            if parm.name == "Integration Time":
                parm.lower_deviation = time_limits
                parm.upper_deviation = time_limits

        if db_output:
            md = send_to_db(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                file_loc=file_loc,
                report_path=report,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                timeout=cal_db_timeout,
            )
        if local_output:
            md = save_const_to_h5(
                db_module=db_mod,
                karabo_id=karabo_id,
                constant=const,
                condition=condition,
                data=const.data,
                file_loc=file_loc,
                report=report,
                creation_time=creation_time,
                out_folder=out_folder,
            )
            print(f"Calibration constant {key} is stored locally at {out_folder}.\n")

 print("Constants parameter conditions are:\n")
 print(
    f"• Bias voltage: {bias_voltage}\n"
    f"• Memory cells: {memory_cells}\n"
    f"• Integration time: {integration_time}\n"
    f"• Gain setting: {gain_setting}\n"
    f"• Gain mode: {gain_mode}\n"
    f"• Creation time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")  # noqa
 step_timer.done_step("Injecting constants.")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

--- a/src/cal_tools/jungfraulib.py
+++ b/src/cal_tools/jungfraulib.py
@@ -85,6 +85,8 @@ class JungfrauCtrl():
    def get_gain_setting(self) -> int:
        """Get run gain settings to identify if run is in
        High CDS or Low CDS.
+        - `1` if run_settings = highgain0.
+        - `0` if run_settings = gain0 or None.
        """
        if self.run_settings == JungfrauSettings.HIGH_GAIN_0:
            return 1
@@ -92,12 +94,9 @@ class JungfrauCtrl():
            return 0

    def get_gain_mode(self) -> int:
-        """From Jungfrau SW V5 gain_mode is deducted from
-        /RUN/..../gainMode.
-        With different enums: dynamic, fixg1, fixg2,
-        forceswitchg1, forceswitchg2. which are used to decide
-        if the value is 0 or 1.
-        Gain Mode is fixed (1) if the enum is fixg1 or fixg2.
+        """Get gain mode value. Fixed `1` or Adaptive `1`.
+        - `0` if run_mode = dynamic, forceswitchg1, forceswitchg2, or None.
+        - `1` if run_mode = fixg1 or fixg2.
        """
        if self.run_mode in [
            JungfrauGainMode.FIX_GAIN_1.value,