cleared notebook outputs

dd58ef97 · Marco Ramilli · 6bd71ca2 · dd58ef97
Commit dd58ef97 authored 4 years ago by Marco Ramilli
--- a/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
@@ -13,7 +13,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -57,7 +57,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
   "metadata": {
    "code_folding": []
   },
@@ -97,20 +97,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Using 2020-03-04 11:22:16.791550 as creation time\n",
-      "Calibration database interface: tcp://max-exfl016:8016\n",
-      "Path inset  JNGFR03\n",
-      "Receiver Id  JNGFR03\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "path_inset = karabo_da[0] # karabo_da is a concurrency parameter\n",
    "receiver_id = receiver_id.format(int(path_inset[-2:]))\n",
@@ -152,7 +141,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -171,35 +160,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Gain stage 0, run 126\n",
-      "Integration time is 250.00001525878906 us\n",
-      "Bias voltage is 90 V\n",
-      "Dark runs in single cell mode\n",
-      " storage cell start: 15\n",
-      "Reading data from /gpfs/exfel/exp/FXE/202030/p900121/raw//r0126//RAW-R0126-JNGFR03-S{:05d}.h5\n",
-      "Run is: 126\n",
-      "HDF5 path: /INSTRUMENT/FXE_XAD_JF500K/DET/JNGFR03:daqOutput/data\n"
-     ]
-    },
-    {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-5-2cdbfae5a631>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     72\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     73\u001b[0m             \u001b[0midxs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnonzero\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrainId\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 74\u001b[0;31m             \u001b[0mimages\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mimages\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m...\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midxs\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     75\u001b[0m             \u001b[0mgainmaps\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgainmaps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m...\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midxs\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     76\u001b[0m             \u001b[0mfr_num\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfr_num\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m...\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midxs\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "chunkSize = 100\n",
    "filep_size = 1000\n",

 %% Cell type:markdown id: tags:

 # Jungfrau Dark Image Characterization #

 Version: 0.1, Author: M. Ramilli, S. Hauf

 Analyzes Jungfrau dark image data to deduce offset, noise and resulting bad pixel maps

 %% Cell type:code id: tags:

 ``` python
 cluster_profile = 'noDB'  # the ipcluster profile name
 in_folder = '/gpfs/exfel/exp/FXE/202030/p900121/raw/'  # folder under which runs are located, required
 out_folder = '/gpfs/exfel/exp/HSLAB/201831/p900053/proc/proc_data/FXE/p900121/test_dark/' # path to place reports at, required
 sequences = 1  # number of sequence files in that run
 run_high = 130 # run number for G0 dark run, required
 run_med = 131 # run number for G1 dark run, required
 run_low = 132 # run number for G2 dark run, required

 karabo_da = ['JNGFR03'] # list of data aggregators, which corresponds to different JF modules
 karabo_id = "FXE_XAD_JF500K"  # bla karabo prefix of Jungfrau devices
 karabo_id_control = ""  # if control is on a different ID, set to empty string if it is the same a karabo-id
 receiver_id = 'JNGFR{:02d}' # inset for receiver devices
 receiver_control_id = "CONTROL" # inset for control devices
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # template to use for file name, double escape sequence number
 h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data'  # path in H5 file under which images are located
 h5path_run = '/RUN/{}/DET/{}' # path to run data
 h5path_cntrl = '/CONTROL/{}/DET/{}' # path to control data
 karabo_da_control = "JNGFRCTRL00" # file inset for control data

 use_dir_creation_date = True # use dir creation date
 cal_db_interface = 'tcp://max-exfl016:8016'  # calibrate db interface to connect to
 cal_db_timeout = 300000 # timeout on caldb requests
 local_output = True # output constants locally
 db_output = False # output constants to database

 integration_time = 1000 # integration time in us, will be overwritten by value in file
 bias_voltage = 90  # sensor bias voltage in V, will be overwritten by value in file
 badpixel_threshold_sigma = 5.  # bad pixels defined by values outside n times this std from median
 offset_abs_threshold_low = [1000, 10000, 10000]  # absolute bad pixel threshold in terms of offset, lower values
 offset_abs_threshold_high = [8000, 15000, 15000]  # absolute bad pixel threshold in terms of offset, upper values
 chunkSize = 10  # iteration chunk size, needs to match or be less than number of images in a sequence file
 imageRange = [0, 500]  # image range in which to evaluate
 memoryCells = 16  # number of memory cells
 db_module = ["Jungfrau_M260"] # ID of module in calibration database
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values
 time_limits = 0.025 #  to find calibration constants later on, the integration time is allowed to vary by 0.5 us
 ```

 %% Cell type:code id: tags:

 ``` python
 import warnings
 warnings.filterwarnings('ignore')

 import matplotlib
 matplotlib.use('agg')
 import numpy as np
 import h5py
 from h5py import File as h5file

 from cal_tools.enums import BadPixels
 from cal_tools.tools import get_dir_creation_date, get_random_db_interface
 from cal_tools.ana_tools import save_dict_to_hdf5
 from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions

 from XFELDetAna.util import env
 env.iprofile = cluster_profile

 from XFELDetAna.detectors.jungfrau import readerPSI as jfreaderPSI
 from XFELDetAna.detectors.jungfrau import reader as jfreader
 from XFELDetAna.detectors.jungfrau.jf_chunk_reader import JFChunkReader
 from XFELDetAna.detectors.jungfrau.util import count_n_files, rollout_data, sanitize_data_cellid
 import glob
 import matplotlib.pyplot as plt
 %matplotlib inline

 from XFELDetAna.plotting.histogram import histPlot
 from XFELDetAna.plotting.heatmap import heatmapPlot

 import os
 os.makedirs(out_folder, exist_ok=True)
 ```

 %% Cell type:code id: tags:

 ``` python
 path_inset = karabo_da[0] # karabo_da is a concurrency parameter
 receiver_id = receiver_id.format(int(path_inset[-2:]))
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_high, run_med, run_low)

 # TODO
 # this trick is needed until proper mapping is introduced
 if len(db_module)>1:
    db_module = db_module[int(path_inset[-2:])-1]
 else:
    db_module = db_module[0]

 # Constants relevant for the analysis
 run_nums = [run_high, run_med, run_low] # run number for G0/HG0, G1, G2
 sensorSize = [1024, 512]
 blockSize = [1024, 512]
 xRange = [0, 0+sensorSize[0]]
 yRange = [0, 0+sensorSize[1]]
 gains = [0, 1, 2]
 h5path = h5path.format(karabo_id, receiver_id)

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run_high)
    print("Using {} as creation time".format(creation_time))

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print('Calibration database interface: {}'.format(cal_db_interface))

 offset_abs_threshold = [offset_abs_threshold_low, offset_abs_threshold_high]

 if karabo_id_control == "":
    karabo_id_control = karabo_id

 print('Path inset ', path_inset)
 print('Receiver Id ', receiver_id)
 ```

-%% Output
-
-    Using 2020-03-04 11:22:16.791550 as creation time
-    Calibration database interface: tcp://max-exfl016:8016
-    Path inset  JNGFR03
-    Receiver Id  JNGFR03
-
 %% Cell type:code id: tags:

 ``` python
 def check_memoryCells(file_name, path):
    with h5file(file_name, 'r') as f:
        t_stamp = np.array(f[path + '/storageCells/timestamp'])
        st_cells = np.array(f[path + '/storageCells/value'])
        sc_start = np.array(f[path + '/storageCellStart/value'])

    valid_train = t_stamp > 0
    n_scs = st_cells[valid_train][0] + 1
    sc_s = sc_start[valid_train][0]

    return n_scs, sc_s
 ```

 %% Cell type:code id: tags:

 ``` python
 chunkSize = 100
 filep_size = 1000
 noiseCal = None
 noise_map = None
 offset_map = None
 memoryCells = None
 for i, r_n in enumerate(run_nums):

    gain = i
    print(f"Gain stage {gain}, run {r_n}")
    valid_data = []
    valid_cellids = []
    if r_n is not None:
        n_tr = 0
        n_empty_trains = 0
        n_empty_sc = 0

        ped_dir = "{}/r{:04d}/".format(in_folder, r_n)
        fp_name = path_template.format(r_n, karabo_da_control)
        fp_path = '{}/{}'.format(ped_dir, fp_name)

        n_files = len(glob.glob("{}/*{}*.h5".format(ped_dir, path_inset)))
        myRange = range(0, n_files)
        control_path = h5path_cntrl.format(karabo_id_control, receiver_control_id)

        this_run_mcells, sc_start = check_memoryCells(fp_path.format(0).format(myRange[0]), control_path)

        if noise_map is None:
            if not manual_slow_data:
                with h5py.File(fp_path.format(0), 'r') as f:
                    run_path = h5path_run.format(karabo_id_control, receiver_control_id)
                    integration_time = float(f[f'{run_path}/exposureTime/value'][()]*1e6)
                    bias_voltage = int(np.squeeze(f[f'{run_path}/vHighVoltage/value'])[0])
            print("Integration time is {} us".format(integration_time))
            print("Bias voltage is {} V".format(bias_voltage))
            if this_run_mcells == 1:
                memoryCells = 1
                print('Dark runs in single cell mode\n storage cell start: {:02d}'.format(sc_start))
            else:
                memoryCells = 16
                print('Dark runs in burst mode\n storage cell start: {:02d}'.format(sc_start))

            noise_map = np.zeros(sensorSize+[memoryCells, 3])
            offset_map = np.zeros(sensorSize+[memoryCells, 3])

        fp_name = path_template.format(r_n, path_inset)
        fp_path = '{}/{}'.format(ped_dir, fp_name)
        myRange_P = range(0, sequences)
        path = h5path

        print("Reading data from {}".format(fp_path))
        print("Run is: {}".format(r_n))
        print("HDF5 path: {}".format(h5path))

        imageRange = [0, filep_size*len(myRange)]
        reader = JFChunkReader(filename = fp_path, readFun = jfreader.readData, size = filep_size, chunkSize = chunkSize,
                               path = h5path, image_range=imageRange, pixels_x = sensorSize[0], pixels_y = sensorSize[1],
                               x_range = xRange, y_range = yRange, imagesPerChunk=chunkSize, filesRange = myRange,
                               memoryCells=this_run_mcells, blockSize=blockSize)

        for data in reader.readChunks():

            images = np.array(data[0], dtype=np.float)
            gainmaps = np.array(data[1], dtype=np.uint16)
            trainId = np.array(data[2])
            fr_num = np.array(data[3])
            acelltable = np.array(data[4])
            n_tr += acelltable.shape[-1]
            this_tr = acelltable.shape[-1]



            idxs = np.nonzero(trainId)[0]
            images = images[..., idxs]
            gainmaps = gainmaps[..., idxs]
            fr_num = fr_num[..., idxs]
            acelltable = acelltable[..., idxs]

            if memoryCells == 1:
                acelltable -= sc_start

            n_empty_trains += this_tr - acelltable.shape[-1]
            n_empty_sc += len(acelltable[acelltable > 15])

            if i > 0 and memoryCells == 16: ## throwing away the second memory cell entry in lower gains
                acelltable[1] = 255


            images, gainmaps, acelltable = rollout_data([images, gainmaps, acelltable]) # makes 4-dim vecs into 3-dim
                                                                                        # makes 2-dim into 1-dim
                                                                                        # leaves  1-dim and 3-dim vecs

            images, gainmaps, acelltable = sanitize_data_cellid([images, gainmaps], acelltable) # removes entries with cellID 255
            valid_data.append(images)
            valid_cellids.append(acelltable)

        valid_data = np.concatenate(valid_data, axis=2)
        valid_cellids = np.concatenate(valid_cellids, axis=0)

        for cell in range(memoryCells):

            thiscell = valid_data[...,valid_cellids == cell]
            noise_map[...,cell,gain] = np.std(thiscell, axis=2)
            offset_map[...,cell,gain] = np.mean(thiscell, axis=2)


        print('G{:01d} dark calibration'.format(i))
        print('Missed {:d} out of {:d} trains'.format(n_empty_trains, n_tr))
        print('Lost {:d} images out of {:d}'.format(n_empty_sc, this_run_mcells * (n_tr - n_empty_trains)))

    else:
        print('missing G{:01d}'.format(i))

 ```

-%% Output
-
-    Gain stage 0, run 126
-    Integration time is 250.00001525878906 us
-    Bias voltage is 90 V
-    Dark runs in single cell mode
-     storage cell start: 15
-    Reading data from /gpfs/exfel/exp/FXE/202030/p900121/raw//r0126//RAW-R0126-JNGFR03-S{:05d}.h5
-    Run is: 126
-    HDF5 path: /INSTRUMENT/FXE_XAD_JF500K/DET/JNGFR03:daqOutput/data
-
-    ---------------------------------------------------------------------------
-    KeyboardInterrupt                         Traceback (most recent call last)
-    <ipython-input-5-2cdbfae5a631> in <module>
-         72
-         73             idxs = np.nonzero(trainId)[0]
-    ---> 74             images = images[..., idxs]
-         75             gainmaps = gainmaps[..., idxs]
-         76             fr_num = fr_num[..., idxs]
-    KeyboardInterrupt:
-
 %% Cell type:markdown id: tags:

 ## Offset and Noise Maps ##

 Below offset and noise maps for the high ($g_0$) gain stage are shown, alongside the distribution of these values. One expects block-like structures mapping to the ASICs of the detector

 %% Cell type:code id: tags:

 ``` python
 import matplotlib.pyplot as plt
 from XFELDetAna.core.util import remove_nans
 %matplotlib inline
 #%matplotlib notebook

 from XFELDetAna.plotting.histogram import histPlot
 from XFELDetAna.plotting.heatmap import heatmapPlot

 g_name = ['G0', 'G1', 'G2']
 g_range = [(0, 8000), (8000, 16000), (8000, 16000)]
 n_range = [(0., 50.), (0., 50.), (0., 50.)]

 unit = '[ADCu]'
 ```

 %% Cell type:code id: tags:

 ``` python

 for g_idx in gains:
    for cell in range(0, memoryCells):
        f_o0 = heatmapPlot(np.swapaxes(offset_map[..., cell, g_idx], 0, 1),
                           y_label="Row",
                           x_label="Column",
                           lut_label="Pedestal {} [ADCu]".format(g_name[g_idx]),
                           aspect=1.,
                           vmin=g_range[g_idx][0],
                           vmax=g_range[g_idx][1])

        fo0, ax_o0 = plt.subplots()
        res_o0 = histPlot(ax_o0, offset_map[..., cell, g_idx],
                          bins=800,
                          range=g_range[g_idx],
                          facecolor='b',
                          histotype='stepfilled')

        ax_o0.tick_params(axis='both',which='major',labelsize=15)
        ax_o0.set_title('Module pedestal distribution Cell {:02d}'.format(cell), fontsize=15)
        ax_o0.set_xlabel('Pedestal {} [ADCu]'.format(g_name[g_idx]),fontsize=15)
        ax_o0.set_yscale('log')

        f_n0 = heatmapPlot(np.swapaxes(noise_map[..., cell, g_idx], 0, 1),
                           y_label="Row",
                           x_label="Column",
                           lut_label="RMS noise {} ".format(g_name[g_idx]) + unit,
                           aspect=1.,
                           vmin=n_range[g_idx][0],
                           vmax=n_range[g_idx][1])

        fn0, ax_n0 = plt.subplots()
        res_n0 = histPlot(ax_n0, noise_map[..., cell, g_idx],
                          bins=100,
                          range=n_range[g_idx],
                          facecolor='b',
                          histotype='stepfilled')

        ax_n0.tick_params(axis='both',which='major',labelsize=15)
        ax_n0.set_title('Module noise distribution Cell {:02d}'.format(cell), fontsize=15)
        ax_n0.set_xlabel('RMS noise {} '.format(g_name[g_idx]) + unit,fontsize=15)
        #ax_n0.set_yscale('log')

        plt.show()


 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixel Map ###

 The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) and each gain ($g$) against the median value for that gain stage:

 $$
 v_i > \mathrm{median}(v_{k,g}) + n \sigma_{v_{k,g}}
 $$
 or
 $$
 v_i < \mathrm{median}(v_{k,g}) - n \sigma_{v_{k,g}}
 $$

 Values are encoded in a 32 bit mask, where for the dark image deduced bad pixels the following non-zero entries are relevant:

 %% Cell type:code id: tags:

 ``` python
 def print_bp_entry(bp):
    print("{:<30s} {:032b}".format(bp.name, bp.value))

 print_bp_entry(BadPixels.OFFSET_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
 print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixels_map = np.zeros(noise_map.shape, np.uint32)
 def eval_bpidx(d):

    mdn = np.nanmedian(d, axis=(0, 1))[None, None, :, :]
    std = np.nanstd(d, axis=(0, 1))[None, None, :, :]
    idx = (d > badpixel_threshold_sigma*std+mdn) | (d < (-badpixel_threshold_sigma)*std+mdn)

    return idx

 offset_abs_threshold = np.array(offset_abs_threshold)

 bad_pixels_map[eval_bpidx(offset_map)] = BadPixels.OFFSET_OUT_OF_THRESHOLD.value
 bad_pixels_map[~np.isfinite(offset_map)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value
 bad_pixels_map[eval_bpidx(noise_map)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
 bad_pixels_map[~np.isfinite(noise_map)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value
 bad_pixels_map[(offset_map < offset_abs_threshold[0][None, None, None, :]) | (offset_map > offset_abs_threshold[1][None, None, None, :])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value

 for g_idx in gains:
    for cell in range(0, memoryCells):

        bad_pixels = bad_pixels_map[:, :, cell, g_idx]

        fn_0 = heatmapPlot(np.swapaxes(bad_pixels, 0, 1),
                           y_label="Row",
                           x_label="Column",
                           lut_label="Badpixels {} [ADCu]".format(g_name[g_idx]),
                           aspect=1.,
                           vmin=0)
 ```

 %% Cell type:code id: tags:

 ``` python
 constants = {'Offset': np.moveaxis(offset_map, 0, 1),
             'Noise': np.moveaxis(noise_map, 0, 1),
             'BadPixelsDark': np.moveaxis(bad_pixels_map, 0, 1)}

 for key, const_data in constants.items():

    metadata = ConstantMetaData()
    const =  getattr(Constants.jungfrau, key)()
    const.data = const_data
    metadata.calibration_constant = const

    # set the operating condition
    condition = Conditions.Dark.jungfrau(memory_cells=memoryCells, bias_voltage=bias_voltage,
                                         integration_time=integration_time)

    for parm in condition.parameters:
        if parm.name == "Integration Time":
            parm.lower_deviation = time_limits
            parm.upper_deviation = time_limits

    device = getattr(Detectors, db_module)
    metadata.detector_condition = condition

    # specify the version for this constant
    if creation_time is None:
        metadata.calibration_constant_version = Versions.Now(device=device)
    else:
        metadata.calibration_constant_version = Versions.Timespan(device=device,
                                                                  start=creation_time)

    metadata.calibration_constant_version.raw_data_location = file_loc
    if db_output:
        metadata.send(cal_db_interface, timeout=cal_db_timeout)
        print('Constants {} is sent to the data base'.format(key))

    # save everything to file.
    # one file - one entrt in the DB
    if local_output:
        dpar = {}
        for parm in metadata.detector_condition.parameters:
            dpar[parm.name] = {'lower_deviation': parm.lower_deviation,
                               'upper_deviation': parm.upper_deviation,
                               'value': parm.value}

        data_to_store = {}
        data_to_store['condition'] = dpar
        data_to_store['db_module'] = db_module
        data_to_store['constant'] = key
        data_to_store['data'] = const_data
        data_to_store['creation_time'] = creation_time
        data_to_store['dclass'] = 'jungfrau'
        data_to_store['file_loc'] = file_loc

        ofile = "{}/const_{}_{}.h5".format(out_folder, key, db_module)
        save_dict_to_hdf5(data_to_store, ofile)
 ```