Karim Ahmed
--- a/notebooks/ePix100/Correction_ePix100_NBC.ipynb

+ 3

− 2
+++ b/notebooks/ePix100/Correction_ePix100_NBC.ipynb

+ 3

− 2
 %% Cell type:markdown id: tags:

 # ePix100 Data Correction

 Author: European XFEL Detector Group, Version: 2.0

 The following notebook provides data correction of images acquired with the ePix100 detector.

 The sequence of correction applied are:
 Offset --> Common Mode Noise --> Relative Gain --> Charge Sharing --> Absolute Gain.

 Offset, common mode and gain corrected data is saved to /data/image/pixels in the CORR files.

 If pattern classification is applied (charge sharing correction), this data will be saved to /data/image/pixels_classified, while the corresponding patterns will be saved to /data/image/patterns in the CORR files.

 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/HED/202202/p003121/raw" # input folder, required
 out_folder = ""  # output folder, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1]  # sequences to correct, set to -1 for all, range allowed
 sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 run = 156  # which run to read data from, required

 # Parameters for accessing the raw data.
 karabo_id = "HED_IA1_EPX100-1"  # karabo karabo_id
 karabo_da = "EPIX01"  # data aggregators
 db_module = ""  # module id in the database
 receiver_template = "RECEIVER"  # detector receiver template for accessing raw data files
 path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # the template to use to access data
 instrument_source_template = '{}/DET/{}:daqOutput'  # instrument detector data source in h5files

 # Parameters affecting writing corrected data.
 chunk_size_idim = 1  # H5 chunking size of output data

 # Only for testing
 limit_images = 0  # ONLY FOR TESTING. process only first N images, 0 - process all.

 # Parameters for the calibration database.
 cal_db_interface = "tcp://max-exfl016:8015#8025"  # calibration DB interface to use
 cal_db_timeout = 300000  # timeout on caldb requests
 creation_time = ""  # The timestamp to use with Calibration DBe. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41

 # Conditions for retrieving calibration constants.
 bias_voltage = 200  # bias voltage
 in_vacuum = False  # detector operated in vacuum
 integration_time = -1  # Detector integration time, Default value -1 to use the value from the slow data.
 fix_temperature = -1  # fixed temperature value in Kelvin, Default value -1 to use the value from files.
 gain_photon_energy = 8.048  # Photon energy used for gain calibration
 photon_energy = 0.  # Photon energy to calibrate in number of photons, 0 for calibration in keV

 # Flags to select type of applied corrections.
 pattern_classification = True  # do clustering.
 relative_gain = True  # Apply relative gain correction.
 absolute_gain = True  # Apply absolute gain correction (implies relative gain).
 common_mode = True  # Apply common mode correction.

 # Parameters affecting applied correction.
 cm_min_frac = 0.25  # No CM correction is performed if after masking the ratio of good pixels falls below this
 cm_noise_sigma = 5.  # CM correction noise standard deviation
 split_evt_primary_threshold = 7.  # primary threshold for split event correction
 split_evt_secondary_threshold = 5.  # secondary threshold for split event correction
 split_evt_mip_threshold = 1000.  # minimum ionizing particle threshold


 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import tabulate
 import warnings
 from logging import warning

 import h5py
 import pasha as psh
 import numpy as np
 import matplotlib.pyplot as plt
 from IPython.display import Latex, display
 from extra_data import RunDirectory, H5File
 from pathlib import Path

 from XFELDetAna import xfelpyanatools as xana
 from XFELDetAna import xfelpycaltools as xcal
 from calibration_client import CalibrationClient
-from cal_tools import h5_copy_except, restful_config
-from cal_tools.epix100 import epix100lib
+from cal_tools import h5_copy_except
 from cal_tools.calcat_interface import EPIX100_CalibrationData
+from cal_tools.epix100 import epix100lib
+from cal_tools.restful_config import restful_config
 from cal_tools.tools import (
    calcat_creation_time,
    load_constants_dict,
    CalibrationMetadata,
 )
 from cal_tools.step_timing import StepTimer

 warnings.filterwarnings('ignore')

 prettyPlotting = True

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` python
 x = 708  # rows of the ePix100
 y = 768  # columns of the ePix100

 if absolute_gain:
    relative_gain = True

 plot_unit = 'ADU'
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)

 out_folder.mkdir(parents=True, exist_ok=True)

 run_folder = in_folder / f"r{run:04d}"

 instrument_src = instrument_source_template.format(
    karabo_id, receiver_template)

 print(f"Correcting run: {run_folder}")
 print(f"Instrument H5File source: {instrument_src}")
 print(f"Data corrected files are stored at: {out_folder}")
 ```

 %% Cell type:code id: tags:

 ``` python
 creation_time = calcat_creation_time(in_folder, run, creation_time)
 print(f"Using {creation_time.isoformat()} as creation time")

 metadata = CalibrationMetadata(metadata_folder or out_folder)
 # Constant paths are saved under retrieved-constants in calibration_metadata.yml.
 # NOTE: this notebook shouldn't overwrite calibration metadata file.
 const_yaml = metadata.get("retrieved-constants", {})
 ```

 %% Cell type:code id: tags:

 ``` python
 run_dc = RunDirectory(run_folder, _use_voview=False)

 seq_files = [Path(f.filename) for f in run_dc.select(f"*{karabo_id}*").files]

 # If a set of sequences requested to correct,
 # adapt seq_files list.
 if sequences != [-1]:
    seq_files = [f for f in seq_files if any(f.match(f"*-S{s:05d}.h5") for s in sequences)]

 if not len(seq_files):
    raise IndexError("No sequence files available for the selected sequences.")

 print(f"Processing a total of {len(seq_files)} sequence files")
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()

 sensorSize = [x, y]
 # Sensor area will be analysed according to blocksize
 blockSize = [sensorSize[0]//2, sensorSize[1]//2]
 xcal.defaultBlockSize = blockSize
 memoryCells = 1  # ePIX has no memory cells
 run_parallel = False

 # Read control data.
 ctrl_data = epix100lib.epix100Ctrl(
    run_dc=run_dc,
    instrument_src=f"{karabo_id}/DET/{receiver_template}:daqOutput",
    ctrl_src=f"{karabo_id}/DET/CONTROL",
    )

 if integration_time < 0:
    integration_time = ctrl_data.get_integration_time()
    integration_time_str_add = ""
 else:
    integration_time_str_add = "(manual input)"

 if fix_temperature < 0:
    temperature = ctrl_data.get_temprature()
    temperature_k = temperature + 273.15
    temp_str_add = ""
 else:
    temperature_k = fix_temperature
    temperature = fix_temperature - 273.15
    temp_str_add = "(manual input)"

 print(f"Bias voltage is {bias_voltage} V")
 print(f"Detector integration time is set to {integration_time} \u03BCs {integration_time_str_add}")
 print(f"Mean temperature: {temperature:0.2f}°C / {temperature_k:0.2f} K {temp_str_add}")
 print(f"Operated in vacuum: {in_vacuum}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Table of sequence files to process
 table = [(k, f) for k, f in enumerate(seq_files)]

 if len(table):
    md = display(Latex(tabulate.tabulate(
        table,
        tablefmt='latex',
        headers=["#", "file"]
    )))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Connect to CalCat.
 calcat_config = restful_config['calcat']
 client = CalibrationClient(
    base_api_url=calcat_config['base-api-url'],
    use_oauth2=calcat_config['use-oauth2'],
    client_id=calcat_config['user-id'],
    client_secret=calcat_config['user-secret'],
    user_email=calcat_config['user-email'],
    token_url=calcat_config['token-url'],
    refresh_url=calcat_config['refresh-url'],
    auth_url=calcat_config['auth-url'],
    scope='')
 ```

 %% Cell type:markdown id: tags:

 ## Retrieving calibration constants

 As a first step, dark maps have to be loaded.

 %% Cell type:code id: tags:

 ``` python
 constant_names = ["OffsetEPix100", "NoiseEPix100"]
 if relative_gain:
    constant_names += ["RelativeGainEPix100"]

 if const_yaml:  #  Used while reproducing corrected data.
    print(f"Using stored constants in {metadata.filename}")
    const_data, _ = load_constants_dict(const_yaml[karabo_da]["constants"])
 else:
    epix_cal = EPIX100_CalibrationData(
        detector_name=karabo_id,
        sensor_bias_voltage=bias_voltage,
        integration_time=integration_time,
        sensor_temperature=temperature_k,
        in_vacuum=in_vacuum,
        source_energy=gain_photon_energy,
        event_at=creation_time,
        snapshot_at=creation_time,
        client=client,
    )
    const_data = epix_cal.ndarray_map(calibrations=constant_names)[karabo_da]

 # Validate the constants availability and raise/warn correspondingly.
 missing_dark_constants = set(
    c for c in ["OffsetEPix100", "NoiseEPix100"] if c not in const_data.keys())
 if missing_dark_constants:
    raise KeyError(
        f"Dark constants {missing_dark_constants} are not available for correction.")

 if relative_gain and "RelativeGainEPix100" not in const_data.keys():
    warning("RelativeGainEPix100 is not found in the calibration database.")
    relative_gain = False
    absolute_gain = False
 ```

 %% Cell type:code id: tags:

 ``` python
 # Initializing some parameters.
 hscale = 1
 stats = True
 hrange = np.array([-50, 1000])
 nbins = hrange[1] - hrange[0]
 commonModeBlockSize = [x//2, y//2]
 ```

 %% Cell type:code id: tags:

 ``` python
 histCalOffsetCor = xcal.HistogramCalculator(
    sensorSize,
    bins=nbins,
    range=hrange,
    parallel=run_parallel,
    nCells=memoryCells,
    blockSize=blockSize
 )

 # *****************Histogram Calculators****************** #
 histCalCor = xcal.HistogramCalculator(
    sensorSize,
    bins=1050,
    range=[-50, 1000],
    parallel=run_parallel,
    nCells=memoryCells,
    blockSize=blockSize
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 if common_mode:
    histCalCMCor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize,
    )
    cmCorrectionB = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='block',
        nCells=memoryCells,
        noiseMap=const_data['NoiseEPix100'],
        runParallel=run_parallel,
        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
    cmCorrectionR = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='row',
        nCells=memoryCells,
        noiseMap=const_data['NoiseEPix100'],
        runParallel=run_parallel,
        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
    cmCorrectionC = xcal.CommonModeCorrection(
        shape=sensorSize,
        blockSize=commonModeBlockSize,
        orientation='col',
        nCells=memoryCells,
        noiseMap=const_data['NoiseEPix100'],
        runParallel=run_parallel,
        parallel=run_parallel,
        stats=stats,
        minFrac=cm_min_frac,
        noiseSigma=cm_noise_sigma,
    )
 ```

 %% Cell type:code id: tags:

 ``` python
 if relative_gain:
    gain_cnst = np.median(const_data["RelativeGainEPix100"])
    hscale = gain_cnst
    plot_unit = 'keV'
    if photon_energy > 0:
        plot_unit = '$\gamma$'
        hscale /= photon_energy

    gainCorrection = xcal.RelativeGainCorrection(
        sensorSize,
        gain_cnst/const_data["RelativeGainEPix100"][..., None],
        nCells=memoryCells,
        parallel=run_parallel,
        blockSize=blockSize,
        gains=None,
    )

    histCalRelGainCor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize
    )

    if absolute_gain:
        histCalAbsGainCor = xcal.HistogramCalculator(
            sensorSize,
            bins=nbins,
            range=hrange*hscale,
            parallel=run_parallel,
            nCells=memoryCells,
            blockSize=blockSize
        )
 ```

 %% Cell type:code id: tags:

 ``` python
 if pattern_classification :
    patternClassifier = xcal.PatternClassifier(
        [x, y],
        const_data["NoiseEPix100"],
        split_evt_primary_threshold,
        split_evt_secondary_threshold,
        split_evt_mip_threshold,
        tagFirstSingles=0,
        nCells=memoryCells,
        allowElongated=False,
        blockSize=[x, y],
        parallel=run_parallel,
    )
    histCalCSCor = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize,
    )
    histCalGainCorClusters = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange*hscale,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize
    )
    histCalGainCorSingles = xcal.HistogramCalculator(
        sensorSize,
        bins=nbins,
        range=hrange*hscale,
        parallel=run_parallel,
        nCells=memoryCells,
        blockSize=blockSize
    )
 ```

 %% Cell type:markdown id: tags:

 ## Applying corrections

 %% Cell type:code id: tags:

 ``` python
 def correct_train(wid, index, tid, d):

    d = d[pixel_data[0]][pixel_data[1]][..., np.newaxis].astype(np.float32)
    d = np.compress(
        np.any(d > 0, axis=(0, 1)), d, axis=2)

    # Offset correction.
    d -= const_data["OffsetEPix100"]

    histCalOffsetCor.fill(d)
    # Common Mode correction.
    if common_mode:
        # Block CM
        d = cmCorrectionB.correct(d)
        # Row CM
        d = cmCorrectionR.correct(d)
        # COL CM
        d = cmCorrectionC.correct(d)
        histCalCMCor.fill(d)

    # relative gain correction.
    if relative_gain:
        d = gainCorrection.correct(d)
        histCalRelGainCor.fill(d)

    """The gain correction is currently applying
    an absolute correction (not a relative correction
    as the implied by the name);
    it changes the scale (the unit of measurement)
    of the data from ADU to either keV or n_of_photons.
    But the pattern classification relies on comparing
    data with the NoiseEPix100 map, which is still in ADU.

    The best solution is to do a relative gain
    correction first and apply the global absolute
    gain to the data at the end, after clustering.
    """

    if pattern_classification:

        d_clu, patterns = patternClassifier.classify(d)
        d_clu[d_clu < (split_evt_primary_threshold*const_data["Noise"])] = 0

        data_clu[index, ...] = np.squeeze(d_clu)
        data_patterns[index, ...] = np.squeeze(patterns)

        histCalCSCor.fill(d_clu)

    # absolute gain correction
    # changes data from ADU to keV (or n. of photons)
    if absolute_gain:

        d = d * gain_cnst
        if photon_energy > 0:
            d /= photon_energy
        histCalAbsGainCor.fill(d)

        if pattern_classification:
            # Modify pattern classification.
            d_clu = d_clu * gain_cnst

            if photon_energy > 0:
                d_clu /= photon_energy

            data_clu[index, ...] = np.squeeze(d_clu)

            histCalGainCorClusters.fill(d_clu)

            d_sing = d_clu[patterns==100] # pattern 100 corresponds to single photons events
            if len(d_sing):
                histCalGainCorSingles.fill(d_sing)

    data[index, ...] = np.squeeze(d)
    histCalCor.fill(d)
 ```

 %% Cell type:code id: tags:

 ``` python
 pixel_data = (instrument_src, "data.image.pixels")

 # 10 is a number chosen after testing 1 ... 71 parallel threads
 context = psh.context.ThreadContext(num_workers=10)
 ```

 %% Cell type:code id: tags:

 ``` python
 for f in seq_files:

    seq_dc = H5File(f)

    n_imgs = seq_dc.get_data_counts(*pixel_data).shape[0]

    # Data shape in seq_dc excluding trains with empty images.
    dshape = seq_dc[pixel_data].shape
    dataset_chunk = ((chunk_size_idim,) + dshape[1:])  # e.g. (1, pixels_x, pixels_y)

    if n_imgs - dshape[0] != 0:
        print(f"- WARNING: {f} has {n_imgs - dshape[0]} trains with empty data.")

    # This parameter is only used for testing.
    if limit_images > 0:
        n_imgs = min(n_imgs, limit_images)

    data = context.alloc(shape=dshape, dtype=np.float32)

    if pattern_classification:
        data_clu = context.alloc(shape=dshape, dtype=np.float32)
        data_patterns = context.alloc(shape=dshape, dtype=np.int32)

    step_timer.start()

    context.map(
        correct_train, seq_dc.select(
            *pixel_data, require_all=True).select_trains(np.s_[:n_imgs])
    )
    step_timer.done_step(f'Correcting {n_imgs} trains.')

    # Store detector h5 information in the corrected file
    # and deselect data to correct and store later.
    step_timer.start()

    out_file = out_folder / f.name.replace("RAW", "CORR")
    data_path = "INSTRUMENT/"+instrument_src+"/data/image"
    pixels_path = f"{data_path}/pixels"

    # First copy all raw data source to the corrected file,
    # while excluding the raw data image /data/image/pixels.
    with h5py.File(out_file, 'w') as ofile:
        # Copy RAW non-calibrated sources.
        with h5py.File(f, 'r') as sfile:
            h5_copy_except.h5_copy_except_paths(
                sfile, ofile,
                [pixels_path])

        # Create dataset in CORR h5 file and add corrected images.
        dataset = ofile.create_dataset(
            pixels_path,
            data=data,
            chunks=dataset_chunk,
            dtype=np.float32)

        if pattern_classification:
            # Save /data/image/pixels_classified in corrected file.
            datasetc = ofile.create_dataset(
                f"{data_path}/pixels_classified",
                data=data_clu,
                chunks=dataset_chunk,
                dtype=np.float32)

            # Save /data/image/patterns in corrected file.
            datasetp = ofile.create_dataset(
                f"{data_path}/patterns",
                data=data_patterns,
                chunks=dataset_chunk,
                dtype=np.int32)

        step_timer.done_step('Storing data.')
 ```

 %% Cell type:code id: tags:

 ``` python
 ho, eo, co, so = histCalCor.get()

 d = [{
    'x': co,
    'y': ho,
    'y_err': np.sqrt(ho[:]),
    'drawstyle': 'steps-mid',
    'errorstyle': 'bars',
    'errorcoarsing': 2,
    'label': 'Total corr.'
 }]

 ho, eo, co, so = histCalOffsetCor.get()

 d.append({
    'x': co,
    'y': ho,
    'y_err': np.sqrt(ho[:]),
    'drawstyle': 'steps-mid',
    'errorstyle': 'bars',
    'errorcoarsing': 2,
    'label': 'Offset corr.'
 })

 if common_mode:
    ho, eo, co, so = histCalCMCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'CM corr.'
    })

 if relative_gain :
    ho, eo, co, so = histCalRelGainCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Relative gain corr.'
    })

 if pattern_classification:
    ho, eo, co, so = histCalCSCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Charge sharing corr.'
    })

 fig = xana.simplePlot(
    d, aspect=1, x_label=f'Energy (ADU)',
    y_label='Number of occurrences', figsize='2col',
    y_log=True, x_range=(-50, 500),
    legend='top-center-frame-2col',
 )
 plt.title(f'run {run} - {karabo_da}')
 plt.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 if absolute_gain :
    d=[]
    ho, eo, co, so = histCalAbsGainCor.get()
    d.append({
        'x': co,
        'y': ho,
        'y_err': np.sqrt(ho[:]),
        'drawstyle': 'steps-mid',
        'errorstyle': 'bars',
        'errorcoarsing': 2,
        'label': 'Absolute gain corr.'
    })

    if pattern_classification:
        ho, eo, co, so = histCalGainCorClusters.get()
        d.append({
            'x': co,
            'y': ho,
            'y_err': np.sqrt(ho[:]),
            'drawstyle': 'steps-mid',
            'errorstyle': 'bars',
            'errorcoarsing': 2,
            'label': 'Charge sharing corr.'
        })

        ho, eo, co, so = histCalGainCorSingles.get()
        d.append({
            'x': co,
            'y': ho,
            'y_err': np.sqrt(ho[:]),
            'drawstyle': 'steps-mid',
            'errorstyle': 'bars',
            'errorcoarsing': 2,
            'label': 'Isolated photons (singles)'
        })

    fig = xana.simplePlot(
        d, aspect=1, x_label=f'Energy ({plot_unit})',
        y_label='Number of occurrences', figsize='2col',
        y_log=True,
        x_range=np.array((-50, 500))*hscale,
        legend='top-center-frame-2col',
    )
    plt.grid()
    plt.title(f'run {run} - {karabo_da}')
 ```

 %% Cell type:markdown id: tags:

 ## Mean Image of the corrected data

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 fig = xana.heatmapPlot(
    np.nanmedian(data, axis=0),
    x_label='Columns', y_label='Rows',
    lut_label=f'Signal ({plot_unit})',
    x_range=(0, y),
    y_range=(0, x),
    vmin=-50, vmax=50)
 step_timer.done_step(f'Plotting mean image of {data.shape[0]} trains.')
 ```

 %% Cell type:markdown id: tags:

 ## Single Shot of the corrected data

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
 fig = xana.heatmapPlot(
    data[0, ...],
    x_label='Columns', y_label='Rows',
    lut_label=f'Signal ({plot_unit})',
    x_range=(0, y),
    y_range=(0, x),
    vmin=-50, vmax=50)
 step_timer.done_step(f'Plotting single shot of corrected data.')
 ```