Karim Ahmed
--- a/notebooks/AGIPD/playground/AGIPD_SingleM_test_Dark.ipynb

+ 7

− 22
+++ b/notebooks/AGIPD/playground/AGIPD_SingleM_test_Dark.ipynb

+ 7

− 22
 %% Cell type:markdown id: tags:

 # Characterize Dark Images for AGIPD64K #

 Author: K. Ahmed, Version: 0.1

 The following code analyzes a set of dark images taken with the Single Module AGIPD (AGIPD64K) detector to deduce detector offsets and noise. Data for the detector's three gain stages needs to be present, separated into separate runs.

 %% Cell type:code id: tags:

 ``` python
 # Inputs exposed to xfel-calibrate package should be in this first cell.

 # Parameters for accessing files.
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900138/raw" # path to input data, required
 out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/SPB/AGIPD/DARK/202030/p900138/" # path to output to, required
 sequences = [0] # sequence files to evaluate.

 run_high = 33 # run number in which high gain data was recorded, required
 run_med = 34 # run number in which medium gain data was recorded, required
 run_low = 35 # run number in which low gain data was recorded, required

 local_output = True # output constants locally
 db_output = False # output constants to database

 cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
 cal_db_timeout = 3000000 # timeout on caldb requests"

 instrument = "SPB"
 module_name = 'AGIPD64K'

 channel = 16

 path_template = "RAW-R{:04d}-{}-S{:05d}"

 # Parameters for taking dark.
 mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
 bias_voltage = 300 # detector bias voltage
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = 0.1 # gain setting can have value 0 or 1, Default=0.1 for no (None) gain-setting

 dont_use_dir_date = False # don't use the dir creation date for determining the creation time

 thresholds_offset_sigma = 3. # thresholds in terms of n sigma noise for offset deduced bad pixels
 thresholds_offset_hard = [4000, 8500] # thresholds in absolute ADU terms for offset deduced bad pixels

 thresholds_noise_sigma = 5. # thresholds in terms of n sigma noise for offset deduced bad pixels
 thresholds_noise_hard = [4, 20] # thresholds in absolute ADU terms for offset deduced bad pixels

 # Plotting parameters
 high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h

 # Parameters for ipcluster
 cluster_profile = "noDB" # The ipcluster profile to use
 ```

 %% Cell type:code id: tags:

 ``` python
 # imports and things that do not usually need to be changed
 from datetime import datetime
 import warnings
 warnings.filterwarnings('ignore')
 from collections import OrderedDict
 import os
 import h5py
 import numpy as np
 import traceback
 import matplotlib
 matplotlib.use('agg')
 import matplotlib.pyplot as plt
 %matplotlib inline

 from cal_tools.tools import (gain_map_files, parse_runs,
                             run_prop_seq_from_path, get_notebook_name,
                             get_dir_creation_date, save_const_to_h5,
-                             get_random_db_interface)
+                             get_random_db_interface, send_to_db)
 from cal_tools.influx import InfluxLogger
 from cal_tools.enums import BadPixels
 from cal_tools.plotting import show_overview, plot_badpix_3d, create_constant_overview

 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 from ipyparallel import Client

 view = Client(profile=cluster_profile)[:]
 view.use_dill()

 from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions

 gains = np.arange(3)

 max_cells = mem_cells

 offset_runs = OrderedDict()
 offset_runs["high"] = parse_runs(run_high)[0]
 offset_runs["med"] = parse_runs(run_med)[0]
 offset_runs["low"] = parse_runs(run_low)[0]

 creation_time=None
 if not dont_use_dir_date:
    creation_time = get_dir_creation_date(in_folder, run_high)


 run, prop, seq = run_prop_seq_from_path(in_folder)
 logger = InfluxLogger(detector="AGIPD", instrument=instrument, mem_cells=mem_cells,
                      notebook=get_notebook_name(), proposal=prop)

 print("Using {} as creation time of constant.".format(creation_time))

 cal_db_interface = get_random_db_interface(cal_db_interface)
 print('Calibration database interface: {}'.format(cal_db_interface))

 # Same used for testing(Temporary) the Single Module
 loc = "SPB_DET_AGIPD1M-1"

 # Same used for testing(Temporary) the Single Module
 dinstance = "AGIPD1M1"
 print("Detector in use is {}".format(loc))

 # Convert gain-setting in case of still being 0.1
 if gain_setting == 0.1:
    gain_setting = None
 ```

 %% Cell type:code id: tags:

 ``` python
 print("Parameters are:")
 print("Proposal: {}".format(prop))
 print("Memory cells: {}/{}".format(mem_cells, max_cells))
 print("Runs: {}".format([ v for v in offset_runs.values()]))
 print("Sequences: {}".format(sequences))
 print("Using DB: {}".format(db_output))
 print("Input: {}".format(in_folder))
 print("Output: {}".format(out_folder))
 print("Bias voltage: {}V".format(bias_voltage))
 print("Gain setting: {}".format(gain_setting))
 ```

 %% Cell type:markdown id: tags:

 Dark raw files:

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 if not os.path.exists(out_folder):
    os.makedirs(out_folder)

 path_inset = "AGIPD{}".format(channel)
 raw_files = []
 total_file_size = 0

 for sequence in sequences:
    for run in [run_high, run_med, run_low]:
        h5file =  path_template.format(run, path_inset, sequence)
        print(h5file)
        rfile = "{}/r{:04d}/{}.h5".format(in_folder, run, h5file)
        raw_files.append(rfile)
        total_file_size += os.path.getsize(rfile)



 if len(raw_files) < 1:
    print("WARNING: NO FILES TO CREATE THE DARK!")
 else:
    total_file_size =  total_file_size / 1e9
    total_sequences = len(raw_files)
    print("The total size of the processed data: {}GB".format(total_file_size))
 ```

 %% Cell type:markdown id: tags:

 ## Calculate Offsets, Noise and Thresholds ##

 The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.

 %% Cell type:code id: tags:

 ``` python
 import copy
 from functools import partial
 def characterize_module(cells, bp_thresh, loc, acq_rate, inp):
    import numpy as np
    import copy
    import h5py
    import traceback
    from cal_tools.enums import BadPixels
    from cal_tools.agipdlib import get_num_cells, get_acq_rate

    filename, filename_out, channel = inp

    if cells == 0:
        cells = get_num_cells(filename, loc, channel)

    if acq_rate == 0.:
        acq_rate = get_acq_rate(filename, loc, channel)

    thresholds_offset_hard, thresholds_offset_sigma, thresholds_noise_hard, thresholds_noise_sigma = bp_thresh
    infile = h5py.File(filename, "r", driver="core")
    count = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/count".format(loc, channel)])
    first = np.squeeze(infile["/INDEX/{}/DET/{}CH0:xtdf/image/first".format(loc, channel)])
    last_index = int(first[count != 0][-1]+count[count != 0][-1])
    first_index = int(first[count != 0][0])
    im = np.array(infile["/INSTRUMENT/{}/DET/{}CH0:xtdf/image/data".format(loc, channel)][first_index:last_index,...])
    cellIds = np.squeeze(infile["/INSTRUMENT/{}/DET/{}CH0:xtdf/image/cellId".format(loc, channel)][first_index:last_index,...])

    infile.close()

    ga = im[:, 1, ...]
    im = im[:, 0, ...].astype(np.float32)

    im = np.rollaxis(im, 2)
    im = np.rollaxis(im, 2, 1)

    ga = np.rollaxis(ga, 2)
    ga = np.rollaxis(ga, 2, 1)

    mcells = cells
    offset = np.zeros((im.shape[0], im.shape[1], mcells))
    gains = np.zeros((im.shape[0], im.shape[1], mcells))
    noise = np.zeros((im.shape[0], im.shape[1], mcells))

    for cc in np.unique(cellIds[cellIds < mcells]):
        cellidx = cellIds == cc
        offset[...,cc] = np.median(im[..., cellidx], axis=2)
        noise[...,cc] = np.std(im[..., cellidx], axis=2)
        gains[...,cc] = np.median(ga[..., cellidx], axis=2)

    # bad pixels
    bp = np.zeros(offset.shape, np.uint32)

    # offset related bad pixels
    offset_mn = np.nanmedian(offset, axis=(0,1))
    offset_std = np.nanstd(offset, axis=(0,1))

    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[(offset < thresholds_offset_hard[0]) | (offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value

    # noise related bad pixels
    noise_mn = np.nanmedian(noise, axis=(0,1))
    noise_std = np.nanstd(noise, axis=(0,1))

    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[(noise < thresholds_noise_hard[0]) | (noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value


    return offset, noise, gains, bp, cells, acq_rate

 gg = 0

 start = datetime.now()
 all_cells = []
 all_acq_rate = []

 for gain, fname_in in enumerate(raw_files):

    inp = []

    #replace RAW with CORR in .hf5 file name.
    fout = os.path.abspath("{}/{}".format(out_folder, (os.path.split(fname_in)[-1]).replace("RAW", "CORR")))
    inp.append((fname_in, fout, channel))

    p = partial(characterize_module, max_cells,
               (thresholds_offset_hard, thresholds_offset_sigma,
                thresholds_noise_hard, thresholds_noise_sigma), loc, acq_rate)

    results = list(map(p, inp))
    #results = view.map_sync(p, inp)

    for ii, r in enumerate(results):
        i = 0
        offset, noise, gain, bp, thiscell, thisacq = r
        all_cells.append(thiscell)
        all_acq_rate.append(thisacq)

        # only at the first gain.
        if gg == 0:
            offset_g = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))
            noise_g = np.zeros_like(offset_g)
            gain_g = np.zeros_like(offset_g)
            badpix_g = np.zeros_like(offset_g, np.uint32)
            first = False

        offset_g[...,gg] = offset
        noise_g[...,gg] = noise
        gain_g[...,gg] = gain
        badpix_g[...,gg] = bp
    gg +=1

 duration = (datetime.now()-start).total_seconds()
 logger.runtime_summary_entry(success=True, runtime=duration,
                                   total_sequences=total_sequences,
                                   filesize=total_file_size)
 logger.send()

 max_cells = np.max(all_cells)
 print("Using {} memory cells".format(max_cells))

 acq_rate = np.max(all_acq_rate)
 print("Using {} MHz acquisition rate".format(acq_rate))
 ```

 %% Cell type:markdown id: tags:

 The thresholds for gain switching are then defined as the mean value between in individual gain bit levels. Note that these thresholds need to be refined with charge induced thresholds, as the two are not the same.

 %% Cell type:code id: tags:

 ``` python
 thresholds_g = {}
 thresholds_g = np.zeros((gain_g.shape[0], gain_g.shape[1], gain_g.shape[2], 5))
 thresholds_g[...,0] = (gain_g[...,1]+gain_g[...,0])/2
 thresholds_g[...,1] = (gain_g[...,2]+gain_g[...,1])/2

 # loop over the 3 files of each gain
 for i in range(3):
    thresholds_g[...,2+i] = gain_g[...,i]
 ```

 %% Cell type:code id: tags:

 ``` python
 res = OrderedDict()

 res = {'Offset': offset_g,
       'Noise': noise_g,
       'ThresholdsDark': thresholds_g,
       'BadPixelsDark': badpix_g
       }

 if local_output:
    ofile = "{}/agipd_offset_store_{}_{}.h5".format(out_folder, "_".join(offset_runs.values()), module_name)
    store_file = h5py.File(ofile, "w")
    store_file["{}/Offset/0/data".format(module_name)] = offset_g
    store_file["{}/Noise/0/data".format(module_name)] = noise_g
    store_file["{}/Threshold/0/data".format(module_name)] = thresholds_g
    store_file["{}/BadPixels/0/data".format(module_name)] = badpix_g
    store_file.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_low, run_med, run_high)
 ```

 %% Cell type:code id: tags:

 ``` python
 for const in res:
-    metadata = ConstantMetaData()
    dconst = getattr(Constants.AGIPD, const)()
    dconst.data = res[const]
-    metadata.calibration_constant = dconst

    # set the operating condition
    condition = Conditions.Dark.AGIPD(memory_cells=max_cells,
                                      bias_voltage=bias_voltage,
                                      acquisition_rate=acq_rate,
                                      gain_setting=gain_setting)
    detinst = getattr(Detectors, dinstance)

    # AGIPD_SIV1_AGIPDV11_M001Test
    device = Detectors.AGIPD.AGIPD_SIV1_AGIPDV11_M001Test

-    metadata.detector_condition = condition
-
-    # specify the a version for this constant
-    if creation_time is None:
-        metadata.calibration_constant_version = Versions.Now(device=device)
-    else:
-        metadata.calibration_constant_version = Versions.Timespan(device=device,
-                                                                  start=creation_time)
-
-    metadata.calibration_constant_version.raw_data_location = file_loc
    if db_output:
-        try:
-            metadata.send(cal_db_interface, timeout=cal_db_timeout)
-            msg = 'Const {} was injected to the calibration DB. Begin at: {}'
-            print(msg.format(const,
-                             metadata.calibration_constant_version.begin_at))
-        except Exception as e:
-            print("Error sending constant to the DataBase:", e)
+        send_to_db(device, dconst, condition, file_loc,
+                   cal_db_interface, creation_time=creation_time,
+                   timeout=cal_db_timeout)

    if local_output:
-        save_const_to_h5(metadata, out_folder)
-        print("Calibration constant {} is stored locally.".format(const))
+        save_const_to_h5(device, dconst, condition,
+                         dconst.data, file_loc, creation_time, out_folder)
+        print(f"Calibration constant {const} is stored locally.")
 ```

 %% Cell type:markdown id: tags:

 ## Single-Cell Overviews ##

 Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.

 %% Cell type:markdown id: tags:

 ### High Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 0
 out_folder = None
 # attach module name for plotting.
 res_da = {}
 res_da[module_name] = res

 show_overview(res_da, cell, gain, out_folder=out_folder, infix="_".join(offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ### Medium Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 1
 show_overview(res_da, cell, gain, out_folder=out_folder, infix="_".join(offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ### Low Gain ###

 %% Cell type:code id: tags:

 ``` python
 cell = 3
 gain = 2
 show_overview(res_da, cell, gain, out_folder=out_folder, infix="_".join(offset_runs.values()))
 ```

 %% Cell type:markdown id: tags:

 ## Global Bad Pixel Behaviour ##

 The following plots show the results of bad pixel evaluation for all evaluated memory cells. Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2. This excludes single bad pixels present only in disconnected pixels. Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated. Colors encode the bad pixel type, or mixed type.

 %% Cell type:markdown id: tags:

 ### High Gain ###

 %% Cell type:code id: tags:

 ``` python
 cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
        BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
        BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}

 rebin = 8 if not high_res_badpix_3d else 2

 gain = 0
 badpix_g_da = {}
 badpix_g_da[module_name] = badpix_g
 for mod, data in badpix_g_da.items():
    plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=rebin)
 ```

 %% Cell type:markdown id: tags:

 ### Medium Gain ###

 %% Cell type:code id: tags:

 ``` python
 gain = 1
 for mod, data in badpix_g_da.items():
    plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=rebin)
 ```

 %% Cell type:markdown id: tags:

 ### Low Gain ###

 %% Cell type:code id: tags:

 ``` python
 gain = 2
 for mod, data in badpix_g_da.items():
    plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=rebin)
 ```

 %% Cell type:markdown id: tags:

 ## Aggregate values, and per Cell behaviour ##

 The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.

 %% Cell type:code id: tags:

 ``` python
 offset_g_da = {}
 offset_g_da[module_name] = offset_g

 create_constant_overview(offset_g_da, "Offset (ADU)", max_cells, 4000, 8000,
                         out_folder=out_folder, infix="_".join(offset_runs.values()))
 ```

 %% Cell type:code id: tags:

 ``` python
 noise_g_da = {}
 noise_g_da[module_name] = noise_g
 create_constant_overview(noise_g_da, "Noise (ADU)", max_cells, 0, 100,
                         out_folder=out_folder, infix="_".join(offset_runs.values()))
 ```

 %% Cell type:code id: tags:

 ``` python
 thresholds_g_da = {}
 thresholds_g_da[module_name] = thresholds_g
 create_constant_overview(thresholds_g_da, "Threshold (ADU)", max_cells, 3000, 8000, 2,
                         out_folder=out_folder, infix="_".join(offset_runs.values()))
 ```

 %% Cell type:code id: tags:

 ``` python
 bad_pixel_aggregate_g = OrderedDict()

 for m, d in badpix_g_da.items():
    bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
 create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", max_cells, 0, 0.10, 3,
                         out_folder=out_folder, infix="_".join(offset_runs.values()))
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:code id: tags:

 ``` python
 ```