Steffen Hauf · ab0ace14 · 005fd5f1 · 74c36b4d · 0099a7a4 · ab0ace14
--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb

+ 13

− 4
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb

+ 13

− 4
 %% Cell type:markdown id: tags:

 # Jungfrau Offline Correction #

 Author: European XFEL Detector Group, Version: 0.1

 Offline Calibration for the Jungfrau Detector

 %% Cell type:code id: tags:

 ``` python
-in_folder = "/gpfs/exfel/exp/FXE/201802/p002271/ra" # the folder to read data from, required
+in_folder = "/gpfs/exfel/exp/FXE/201802/p002271/raw" # the folder to read data from, required
 run = 132 # runs to process, required
-out_folder =  "/gpfs/exfel/data/scratch/haufs/test/"  # the folder to output to, required
+out_folder =  "/gpfs/exfel/data/scratch/xcal/test/"  # the folder to output to, required
 calfile =  "" # path to calibration file. Leave empty if all data should come from DB, not actually used, makes automode happy
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 mem_cells = 1 # memory cells in data, not actually used, makes automode happy
 overwrite = True # set to True if existing data should be overwritten
 no_relative_gain = False # do not do relative gain correction
 cluster_profile = "noDB" # cluster profile to use
 bias_voltage = 90 # will be overwritten by value in file
 cal_db_interface = "tcp://max-exfl016:8016" #"tcp://max-exfl016:8015#8025" # the database interface to use
 use_dir_creation_date = True # use the creation data of the input dir for database queries
 sequences_per_node = 5 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 photon_energy = 9.2 # photon energy in keV
 nodb = False # if set only file-based constants will be used
 chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # template to use for file name, double escape sequence number
 path_template_control = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # template to use for file name, double escape sequence number
 integration_time = 1000 # integration time in us, will be overwritten by value in file
 h5path = '/INSTRUMENT/{}/DET/{}:daqOutput/data'  # path in H5 file under which images are located
 gmapfile = "/gpfs/exfel/data/scratch/xcal/jfgain/gainMaps_M233.h5" #temporary gain calibration file, not in the DB; leave empty if using DB
 memcells = 1 # number of memory cells
 karabo_id = "FXE_XAD_JF1M" # karabo prefix of Jungfrau devices
 karabo_id_control = ""  # if control is on a different ID, set to empty string for using the same as for data
 receiver_id = "RECEIVER" # inset for receiver devices
 control_id = "CONTROL" # inset for control devices
 db_module = "Jungfrau_M233" # ID of module in calibration database
 path_inset = "JNGFR02" # file inset for image data
 path_inset_control = "JNGFR01" # file inset for control data
 manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values

 def balance_sequences(in_folder, run, sequences, sequences_per_node):
    import glob
    import re
    import numpy as np
    if sequences_per_node != 0:
        sequence_files = glob.glob("{}/r{:04d}/*-S*.h5".format(in_folder, run))
        seq_nums = set()
        for sf in sequence_files:
            seqnum = re.findall(r".*-S([0-9]*).h5", sf)[0]
            seq_nums.add(int(seqnum))
        seq_nums -= set(sequences)
        return [l.tolist() for l in np.array_split(list(seq_nums),
                                                   len(seq_nums)//sequences_per_node+1)]
    else:
        return sequences

 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:code id: tags:

 ``` python
 import warnings
 warnings.filterwarnings('ignore')

 h5path = h5path.format(karabo_id, receiver_id)
 import matplotlib
 matplotlib.use('agg')
 import matplotlib.pyplot as plt
 %matplotlib inline
 import numpy as np
 import XFELDetAna.xfelpyanatools as xana
 import XFELDetAna.xfelpycaltools as xcal

 from cal_tools.enums import BadPixels
 from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions

 from cal_tools.tools import (get_dir_creation_date)

 import os
 import h5py

 from XFELDetAna.util import env
 env.iprofile = cluster_profile

 from XFELDetAna.detectors.jungfrau import readerPSI as jfreaderPSI
 from XFELDetAna.detectors.jungfrau import reader as jfreader
 from XFELDetAna.detectors.jungfrau.jf_chunk_reader import JFChunkReader
 from XFELDetAna.detectors.jungfrau.util import non_empty_trains

 # so constants relevant for the analysis
 cpuCores = 1
 is_parallel = False
 sensorSize = [1024, 512]
 blockSize = [1024, 512]
 xRange = [0, 0+sensorSize[0]]
 yRange = [0, 0+sensorSize[1]]
 gains = [0, 1, 2]

 ped_dir = "{}/r{:04d}".format(in_folder, run)

 if ped_dir[-1] == "/":
    ped_dir = ped_dir[:-1]
 out_folder = "{}/{}".format(out_folder, os.path.split(ped_dir)[-1])

 if not os.path.exists(out_folder):
    os.makedirs(out_folder)
 elif not overwrite:
    raise AttributeError("Output path exists! Exiting")



 fp_name = path_template.format(run, path_inset)
 fp_path = '{}/{}'.format(ped_dir, fp_name)

 fp_name_contr = path_template_control.format(run, path_inset_control)
 fp_path_contr = '{}/{}'.format(ped_dir, fp_name_contr)



 filep_size = 500
 myRange_P = sequences
 path = h5path

 if sequences[0] == -1:
    sequences = None

 print("Reading data from {}".format(fp_path))
 print("Run is: {}".format(run))
 print("HDF5 path: {}".format(h5path))

 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print("Using {} as creation time".format(creation_time))

 cal_timeout = 600000 #ms

 if karabo_id_control == "":
    karabo_id_control = karabo_id
 ```

+%% Output
+
+    Disabled GPU usage after pyCuda import failed!: libcuda.so.1: cannot open shared object file: No such file or directory
+    Using Cython were available
+    Reading data from /gpfs/exfel/exp/FXE/201802/p002271/raw/r0132/RAW-R0132-JNGFR02-S{:05d}.h5
+    Run is: 132
+    HDF5 path: /INSTRUMENT/FXE_XAD_JF1M/DET/RECEIVER:daqOutput/data
+    Using 2019-06-01 13:59:58.020575 as creation time
+
 %% Cell type:code id: tags:

 ``` python
 import h5py
 if not manual_slow_data:
    with h5py.File(fp_path_contr.format(0), 'r') as f:
        integration_time = int(f['/RUN/{}/DET/{}/exposureTime/value'.format(karabo_id_control, control_id)][()]*1e6)
        bias_voltage = int(np.squeeze(f['/RUN/{}/DET/{}/vHighVoltage/value'.format(karabo_id_control, control_id)])[0])
 print("Integration time is {} us".format(integration_time))
 print("Bias voltage is {} V".format(bias_voltage))
 ```

 %% Cell type:code id: tags:

 ``` python
 dirlist = sorted(os.listdir(ped_dir))
 file_list = []
 total_sequences = 0
 fsequences = []
 for entry in dirlist:
    #only h5 file
    abs_entry = "{}/{}".format(ped_dir, entry)
    if os.path.isfile(abs_entry) and os.path.splitext(abs_entry)[1] == ".h5":

        if sequences is None:
            for seq in range(len(dirlist)):

                if path_template.format(run, path_inset).format(seq) in abs_entry:
                    file_list.append(abs_entry)
                    total_sequences += 1
                    fsequences.append(seq)
        else:
            for seq in sequences:
                if path_template.format(run, path_inset).format(seq) in abs_entry:
                    file_list.append(os.path.abspath(abs_entry))
                    total_sequences += 1
                    fsequences.append(seq)
 sequences = fsequences
 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 from IPython.display import HTML, display, Markdown, Latex
 import tabulate
 print("Processing a total of {} sequence files".format(total_sequences))
 table = []


 for k, f in enumerate(file_list):
    table.append((k, f))
-
-md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["#", "file"])))
+if len(table) > 0:
+    md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["#", "file"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_PSI_gainmaps(fname, dset):
    with h5py.File(fname, 'r') as f_g:
        gain_map = np.array(f_g[dset])
        gain_map = np.transpose(gain_map, (2, 1, 0, 3))
        gain_map = np.expand_dims(gain_map, axis=0)

        return gain_map

 ```

 %% Cell type:code id: tags:

 ``` python
 ## offset

 metadata = ConstantMetaData()
 offset = Constants.jungfrau.Offset()
 metadata.calibration_constant = offset

 # set the operating condition
 condition = Conditions.Dark.jungfrau(memory_cells=1, bias_voltage=bias_voltage,
                                     integration_time=integration_time)
 device = getattr(Detectors, db_module)
 metadata.detector_condition = condition

 # specify the a version for this constant
 if creation_time is None:
        metadata.calibration_constant_version = Versions.Now(device=device)
        metadata.retrieve(cal_db_interface)
 else:
    metadata.calibration_constant_version = Versions.Timespan(device=device,
                                                              start=creation_time)
    metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=3000000)
 offset_map = metadata.calibration_constant.data

 ## noise

 metadata = ConstantMetaData()
 noise = Constants.jungfrau.Noise()

 metadata.calibration_constant = noise

 # set the operating condition
 condition = Conditions.Dark.jungfrau(memory_cells=1, bias_voltage=bias_voltage,
                                     integration_time=integration_time)



 metadata.detector_condition = condition

 # specify the a version for this constant
 if creation_time is None:
        metadata.calibration_constant_version = Versions.Now(device=device)
        metadata.retrieve(cal_db_interface)
 else:
    metadata.calibration_constant_version = Versions.Timespan(device=device,
                                                              start=creation_time)
    metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=3000000)
 noise_map = metadata.calibration_constant.data

 ## bad pixels

 metadata = ConstantMetaData()
 bpix = Constants.jungfrau.BadPixelsDark()
 metadata.calibration_constant = bpix

 # set the operating condition
 condition = Conditions.Dark.jungfrau(memory_cells=1, bias_voltage=bias_voltage,
                                     integration_time=integration_time)



 metadata.detector_condition = condition

 # specify the a version for this constant
 if creation_time is None:
        metadata.calibration_constant_version = Versions.Now(device=device)
        metadata.retrieve(cal_db_interface)
 else:
    metadata.calibration_constant_version = Versions.Timespan(device=device,
                                                              start=creation_time)
    metadata.retrieve(cal_db_interface, when=creation_time.isoformat(), timeout=3000000)
 mask = metadata.calibration_constant.data


 ```

 %% Cell type:code id: tags:

 ``` python
 # gain correction
 if gmapfile:
    gain_map = get_PSI_gainmaps(gmapfile, 'gain_map_g0')
    print('Not Using DB gain maps ')
    print('maps from: {}'.format(gmapfile))
 ```

 %% Cell type:code id: tags:

 ``` python
 def copy_and_sanitize_non_cal_data(infile, outfile, h5base):
    """ Copy and sanitize data in `infile` that is not touched by `correctLPD`
    """

    if h5base.startswith("/"):
        h5base = h5base[1:]
    dont_copy = ["adc",]
    dont_copy = [h5base+"/{}".format(do)
                for do in dont_copy]

    def visitor(k, item):
        if k not in dont_copy:
            if isinstance(item, h5py.Group):
                outfile.create_group(k)
            elif isinstance(item, h5py.Dataset):
                group = str(k).split("/")
                group = "/".join(group[:-1])
                infile.copy(k, outfile[group])

    infile.visititems(visitor)

 ```

 %% Cell type:code id: tags:

 ``` python
 import copy
 offset_map = np.squeeze(offset_map)
 mask = np.squeeze(mask)

 fim_data = None
 gim_data = None
 rim_data = None
 msk_data = None
 for k, f in enumerate(file_list):
    with h5py.File(f, 'r') as infile:
        out_file = "{}/{}".format(out_folder, f.split("/")[-1])
        out_file = out_file.replace("RAW", "CORR")
        with h5py.File(out_file, "w") as ofile:
            copy_and_sanitize_non_cal_data(infile, ofile, h5path)
            d = infile[h5path+"/adc"][()].astype(np.float32)
            if k == 0:
                rim_data = np.squeeze(copy.copy(d))
            g = infile[h5path+"/gain"][()]
            oshape = d.shape
            ddset = ofile.create_dataset(h5path+"/adc",
                                         oshape,
                                         chunks=(chunk_size_idim, 1, oshape[1], oshape[2]),
                                         dtype=np.float32)

            mskset = ofile.create_dataset(h5path+"/mask",
                                          oshape,
                                          chunks=(chunk_size_idim, 1, oshape[1], oshape[2]),
                                          dtype=np.uint32,
                                          compression="gzip", compression_opts=1, shuffle=True)
            g[g==3] = 2

            #offset correction
            offset = np.choose(g, (offset_map[...,0], offset_map[...,1], offset_map[...,2]))
            d -= offset

            #gain correction
            cal = np.choose(g, (gain_map[..., 0], gain_map[..., 1], gain_map[..., 2]))
            d /= cal

            ddset[...] = d

            msk = np.choose(g, (mask[...,0], mask[...,1], mask[...,2]))
            mskset[...] = msk
            if k == 0:
                fim_data = np.squeeze(copy.copy(d))
                gim_data = np.squeeze(copy.copy(g))
                msk_data = np.squeeze(copy.copy(msk))
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis):
    from matplotlib.colors import LogNorm
    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1,:], extent=extent, aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(data)))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")

 ```

 %% Cell type:code id: tags:

 ``` python
 h, ex, ey = np.histogram2d(rim_data.flatten(), gim_data.flatten(),
                                                           bins=[100, 4], range=[[0, 10000], [0,4]])
 do_2d_plot(h, (ex, ey), "Signal (ADU)", "Gain Bit Value")
 ```

 %% Cell type:markdown id: tags:

 ### Mean RAW Preview ###

 The per pixel mean of the sequence file of RAW data

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.mean(rim_data,axis=0),
               vmin=min(0.75*np.median(rim_data[rim_data > 0]), 2000),
               vmax=max(1.5*np.median(rim_data[rim_data > 0]), 16000), cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Mean CORRECTED Preview ###

 The per pixel mean of the sequence file of CORR data

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.mean(fim_data,axis=0),
               vmin=min(0.75*np.median(fim_data[fim_data > 0]), -0.5),
               vmax=max(2.*np.median(fim_data[fim_data > 0]), 100), cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ### Single Train Preview ###

 A single image from the first train

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(fim_data[0,...],
               vmin=min(0.75*np.median(fim_data[0,...]), -0.5),
               vmax=max(2.*np.median(fim_data[0,...]), 1000), cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Signal Distribution ##

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(211)
 h = ax.hist(fim_data.flatten(), bins=1000, range=(-100, 1000), log=True)
 l = ax.set_xlabel("Signal (keV)")
 l = ax.set_ylabel("Counts")

 ax = fig.add_subplot(212)
 h = ax.hist(fim_data.flatten(), bins=1000, range=(-100, 10000), log=True)
 l = ax.set_xlabel("Signal (keV)")
 l = ax.set_ylabel("Counts")
 ```

 %% Cell type:markdown id: tags:

 ### Maximum GAIN Preview ###

 The per pixel maximum of the first train of the GAIN data

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.max(gim_data, axis=0), vmin=0,
               vmax=3, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 from cal_tools.enums import BadPixels
 from IPython.display import HTML, display, Markdown, Latex
 import tabulate
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:markdown id: tags:

 ### Single Image Bad Pixels ###

 A single image bad pixel map for the first image of the first train

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.log2(msk_data[0,...]), vmin=0, vmax=32, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```

 %% Cell type:code id: tags:

 ``` python
 ```