Compare revisions

36de3b32 · 3c5556b5 · 3a7339c9 · 0845d450 · 67a4a252 · dd9cb93f
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,7 @@ docs/source/test_results.rst
 docs/source/test_rsts
 reportservice/*.log
 slurm_tmp*
-src/cal_tools/agipdalgs.c
+src/cal_tools/*.c
+src/cal_tools/*/*.c
 webservice/*.log
 webservice/*sqlite
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -57,4 +57,4 @@ cython-editable-install-test:
  <<: *before_script
  script:
    - python3 -m pip install -e ".[test]"
-    - python3 -m pytest --color yes --verbose ./tests/test_agipdalgs.py
+    - python3 -m pytest --color yes --verbose ./tests/test_cythonalgs.py 
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
-iCalibrationDB @ git+https://xcalgitlab:${GITHUB_TOKEN}@git.xfel.eu/gitlab/detectors/cal_db_interactive.git@2.0.9
\ No newline at end of file
+iCalibrationDB @ git+https://xcalgitlab:${GITHUB_TOKEN}@git.xfel.eu/gitlab/detectors/cal_db_interactive.git@2.2.0
--- a/docs/source/cal_tools_algorithms.rst
+++ b/docs/source/cal_tools_algorithms.rst
+cal_tools
+=========
+
+.. module:: cal_tools.agipdlib
+
+.. class:: AgipdCorrections
+
+    .. attribute:: read_file
+
+    .. attribute:: write_file
+
+    .. attribute:: cm_correction
+
+    .. attribute:: mask_zero_std
+
+    .. attribute:: offset_correction
+
+    .. attribute:: baseline_correction
+
+    .. attribute:: gain_correction
+
+    .. attribute:: get_valid_image_idx
\ No newline at end of file
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
+Release Notes
+=============
+
+3.5.5
+-----
+
+15-06-2022
+
+- [AGIPD][CORRECT] Expose max tasks per pool worker.
+
+3.5.4
+-----
+
+13-06-2022
+
+- [AGIPD] Convert bias_voltage parameter condition to integer in cal_tools.
+- [LPD] Fix correcting a single pulse.
+- [LPD] VCXI require 4 modules.
+
+3.5.3
+-----
+
+19-05-2022
+
+- [LPD][CORRECT] Optionally create virtual CXI files
+- [LPD][CORRECT] Expose max-nodes parameter
+- [AGIPD] Replace gain_choose_int by fused types
+- Fix missing install of restful_config.yaml
+- Fix use of xfel-calibrate --skip-report
+
+3.5.2
+-----
+
+16.05.2022
+
+- [LPD][CORRECT] New correction notebook for LPD
+- New `files` module to write European XFEL HDF5 corrected data files.
+
+3.5.1
+-----
+
+05-04-2022
+
+- Calibration Constant version's new `Variant` file attribute. To indicate method of handling the constant post retrieval. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/619
+- Epix100 dark Badpixels Map. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/637
+- `skip-plots` flag to finish correction before plotting. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/635
+- First trainId's timestamp as RAW data creation_time, if there is myMDC connection. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/630
+- AGIPD correction can correct one cellId without plotting errors. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/642
+- Fixed mode relative gain constants in Jungfrau can be retrieved. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/639
+- Only instrument source is selected to check number of trains to dark process. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/636
+- AGIPD trains for dark processing is selected for each module individually. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/653
+- Produce report after trying to correct AGIPD run with no images. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/641
+- AGIPD's bias voltage for AGIPD1M is read from slow data. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/647
+- Removed psutil dependency. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/653
+- Update Pasha to 0.1.1 https://git.xfel.eu/detectors/pycalibration/-/merge_requests/638
+
+3.5.0
+-----
+
+01-03-2022
+
+- Updating Correction and dark notebooks for JUNGFRAU: https://git.xfel.eu/detectors/pycalibration/-/merge_requests/518
+- Updating Correction and dark notebooks for AGIPD: https://git.xfel.eu/detectors/pycalibration/-/merge_requests/535
+- Updating Correction and dark notebooks for PnCCD: https://git.xfel.eu/detectors/pycalibration/-/merge_requests/559
+- Updating Correction and dark notebooks for ePix100: https://git.xfel.eu/detectors/pycalibration/-/merge_requests/500
+
+  * EXtra-data is integrated to read files in pycalibration for AGIPD, JUNGFRAU, ePix100, and PnCCD. Dark and Correction notebooks.
+  * Pasha is now used for processing data for JUNGFRAU, ePix100 and PnCCD.
+  * pyDetLib correction functions were removed (except for common-mode correction).
+  * `db-module` is useless now for JUNGFRAU, ePix100 and PnCCD. Some parameters were updated in dark and correction notebooks for the mentioned detectors.
+
+- `gain_mode` and burst mode are now available for JUNGFRAU. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/591
+- JUNGFRAU has now a new badpixel value, `WRONG_GAIN_VALUE`. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/591
+- Pass through available for testing in-progress ORCA service. https://git.xfel.eu/detectors/pycalibration/-/merge_requests?scope=all&state=merged&search=orca
+- Non-calibrated RAW h5files are no longer copied.
+- High priority partitions (`upex-high`and `upex-middle`) are used for runs from ACTIVE and READY proposals, only. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/628
+- Supporting to disable LPD Correction through the webservice. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/629
+- Compatibility for old DAQ files for REMI is added. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/607
+- server-overview refactors. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/593 https://git.xfel.eu/detectors/pycalibration/-/merge_requests/589
+- AGIPD correction notebook support AgipdLitFrameFinder device. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/603
+- Parsing code arguments in xfel-calibrate is refactored. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/575
+- skip-plots option for AGIPD. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/581
+- Native implementation for transposition of constants AGIPD. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/580
+- Trains for AGIPD can be selected for correction. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/574
+- Skip report flag in xfel-calibrate. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/579
+- Fix ReadTheDocs. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/448
+- Fix error reporting for re-injecting the same CCV. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/627
+- Fix AGIPD for legacy runs without `gain_mode`. https://git.xfel.eu/detectors/pycalibration/-/merge_requests/617 https://git.xfel.eu/detectors/pycalibration/-/merge_requests/626
+- Pinning markupsafe version 2.0.1 https://git.xfel.eu/detectors/pycalibration/-/merge_requests/631
+- Pinning psutil 5.9.0 https://git.xfel.eu/detectors/pycalibration/-/merge_requests/535
+- Updating Extra-data to 1.9.1 https://git.xfel.eu/detectors/pycalibration/-/merge_requests/535
+- Updating h5py to 3.5.0 https://git.xfel.eu/detectors/pycalibration/-/merge_requests/602
+
+3.4.3
+-----
+
+20-10-2021
+
+- Update pyDetLib tag.
+- Add explicit dependencies on matplotlib, scipy.
+- Remove outdated matplotlib rcParams setting.
+- Update EXtra-geom to 1.6.
+- Remove cluster_profile parameter from notebooks which don't use it.
+- Fix checking availability for the concurrency parameter.
+- Fix launching work directly (not via Slurm).
+- Fix `sphinx-rep` temp folder recreation, if sphinx-rep already existed.
+- Fix missing string conversion for slurm-scheduling argument.
+- Fix title reports for multiple detectors per run folder.
+- Append to .out files for preemptable finalize job.
+- [AGIPD] [Correct] Reuse previously found constants.
+- [AGIPD] Fix missing memory cell index in SlopesPC constant sanitization.
+- [AGIPD] Only use bad pixels from darks in agipdutils.baseline_correct_via_stripes.
+- [AGIPD] [Dark] Use function to get list of karabo_da from run for making Slurm jobs.
+- [EPIX100][CORRECT] Set absolute_gain to false if relative gain was not retrieved.
+- [JUNGFRAU] Fix running for multiple modules and flip logic for do_relative_gain.
+- [JUNGFRAU] Style changes for Dark and Correct notebooks.
+- [REMI] Add notebook to reconstruct detector hits from raw data.
+- [webservice] Check run migration status using MyMDC.
+- Resolve "Skip ZMQ tests if zmq connection for calibration DB not available".
+- Reproducibility, step 1.
+
+3.4.2
+-----
+
+17-09-2021
+
+- Remove driver=core from all notebook
+- [webservice] Make use of Dynaconf for managing secrets.
+- [webservice] Make use of dedicated slurm partitions.
+- [webservice] Handle missing migration information (missing user.status fattr).
+- [webservice] Implement, raise, and catch, migration errors to send mdc messages.
+- [webservice] Simplify handling of user notebook paths.
+- [webservice] Update princess to 0.4 (use Unix sockets).
+- [webservice] Update MyMDC with begin and end times.
+- [webservice] create output folder before copying slow data.
+- [AGIPD] [CORRECT] read acq_rate from slow data.
+- [AGIPD][CORRECT] Set default memory cells to 352.
+- [AGIPD] [CORRECT] Set maximum pulses to correct based on file content.
+- [AGIPD] [FF] Correctly label legends in figures.
+- [AGIPD] [FF] Add HIBEF AGIPD500K and fix some issue with retrieval of conditions.
+- [Jungfrau] Add Gain setting to Jungfrau notebooks.
+- [Jungfrau] Fix max gain plot in LPD correct notebook
+- [JUNGFRAU] [DARK] Clearer error message for Jungfrau Dark notebooks no suitable files are found
+- [LPD] [CORRECT] Fix max gain plot.
+- [EPIX100] [CORRECT] Solve conflict between gain correction and clustering
+
+
+3.4.1
+-----
+
+16-07-2021
+
+- Update h5py to 3.3
+- Stop execution on notebook errors
+- [AGIPD] Add integration time as operating condition to all notebooks
+- [webservice] Add blocklist pattern when copying untouched files in webservice.
+- [webservice] Expose dark configurations in update_config.py
+- Fix MetadataClient.get_proposal_runs arguments call.
+- Fix Use snapshot for injecting constants for old PDU mappings
+- Fix the old time-summary (creation time for retrieved constants)
+- Update documentation notes on venv installation
+- Ignore all .so files in gitignore
+
+
+3.4.0
+-----
+
+28-06-2021
+
+- Update to Python 3.8.
+- Bump numpy to 1.20.3 and remove fabio.
+- remove PyQT dependency.
+- Disable dark requests from serve overview.
+- Update report upload parameter key.
+- Override locale to always use UTF-8.
+- Assorted cleanup of xfel-calibrate.
+- Fix pre-commit.
+- Use argparse only if name is main, call main with args dict.
+- [webservice] Use full hostname for webservice overview.
+- [webservice] Show clearer messages when running webservice in sim mode.
+- [webservice] Fix filename lineno and typos in webservice logs.
+- [webservice] Fix creating an extra run folder in run output folder.
+- [AGIPD] Parallelize gain/mask compression for writing corrected AGIPD files.
+- [AGIPD][DARK] Fix processing empty sequence files.
+- [AGIPD][PC][FF] Update notebooks with new CALCAT mapping.
+- [AGIPD][JUNGFRAU] Use all available sequences for processing darks for AGIPD and Jungfrau.
+- [AGIPD][LPD][DSSC] Fix retrieve old constants for comparison for modular detectors.
+- [LPD] Fix data paths in LPD notebook.
+- [REMI] Fix user notebook path for REMI correct notebook provisionally.
+- [EPIX][CORRECT] Add Common mode correction.
+- Fix plotting-related warnings.
+- Test update config.
+- Test get_from_db and send_to_db.
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -150,7 +150,7 @@ todo_include_todos = True
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-# html_theme = ''
+html_theme = 'sphinx_rtd_theme'

 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
@@ -403,6 +403,9 @@ with open("available_notebooks.rst", "w") as f:
            """))

    for detector in sorted(notebooks.notebooks.keys()):
+        # Avoid having test notebooks in detector notebooks documentations. 
+        if "TEST" in detector.upper():
+            continue
        values = notebooks.notebooks[detector]
        f.write("{}\n".format(detector))
        f.write("{}\n".format("-"*len(detector)))
@@ -410,8 +413,6 @@ with open("available_notebooks.rst", "w") as f:

        for caltype in sorted(values.keys()):
            data = values[caltype]
-            if data.get("notebook", None) is None:
-                continue
            nbpath = os.path.abspath("{}/../../../{}".format(__file__, data["notebook"]))
            with open(nbpath, "r") as nf:
                nb = nbformat.read(nf, as_version=4)
@@ -442,7 +443,6 @@ with open("available_notebooks.rst", "w") as f:
            output = check_output(nb_help).decode('utf8')
            f.write(indent(output.replace("DETECTOR", detector).replace("TYPE", caltype), " "*4))
            f.write("\n\n")
-
 # add test results
 test_artefact_dir = os.path.realpath("../../tests/legacy/artefacts")


--- a/docs/source/configuration.rst
+++ b/docs/source/configuration.rst
@@ -3,16 +3,14 @@
 Configuration
 =============

-The European XFEL Offline Calibration tools are configure using the `settings.py`
-and `notebooks.py` files. Both can be found in the root directory. The `settings.py`
-file configures the tools to the environment. The `notebook.py` file configures the
-notebooks which should be exposed to the command line.
+The European XFEL Offline Calibration is configured through `settings.py`
+and `notebooks.py` files. Both can be found in `xfel_calibrate` source directory. The `notebook.py` file exposes and configures the
+the notebooks by the `xfel-calibrate` command line interface.

 Settings
 --------

-The `settings.py` file configures the enviroment the tools are run in. It is a normal
-python file of the form::
+The `settings.py` is a python configuration file, which configures the tool's environment.::

    # path into which temporary files from each run are placed
    temp_path = "{}/temp/".format(os.getcwd())
@@ -35,9 +33,9 @@ A comment is given for the meaning of each configuration parameter.
 Notebooks
 ---------

-The `xfel-calibrate` tool will expose any notebooks that are configured here to the
+The `xfel-calibrate` tool exposes configured notebooks to the
 command line by automatically parsing the parameters given in the notebooks first cell.
-The configuration is to be given in form of a python directory::
+The configuration is given in the form of a python dictionary::

    notebooks = {
        "AGIPD": {
@@ -63,55 +61,23 @@ The configuration is to be given in form of a python directory::
         }
     }

-The first key is the detector that the calibration may be used for, here AGIPD. The second
-key level gives the name of the task being performed (here: DARK and PC). For each of these
-entries, a path to the notebook and a concurrency hint should be given. In the concurrency
-hint the first entry specifies which parameter of the notebook expects a list whose integer
-entries, can be concurrently run (here "modules"). The second parameter state with which range
-to fill this parameter if it is not given by the user. In the example a `range(16):=0,1,2,...15`
-would be passed onto the notebook, which is run as 16 concurrent jobs, each processing one module.
-Finally, a hint for the number of cluster cores to be started should be given. This value should
-be derived e.g. by profiling memory usage per core, run times, etc.
+The first key is the detector, e.g. AGIPD. The second key is the calibration type name, e.g. DARK or PC.
+A dictionary is expected for each calibration type with a notebook path and concurrency configuration.
+For the concurrency three values are expected. Key `parameter` with a value name of type list, which is defined in the first notebook cell.
+The key `default concurrency` to define the range of values for `parameter` in each concurrent notebook, if it is not defined by the user.
+e.g. `"default concurrency": 16` leads to running 16 concurrent jobs, each processing one module with values of [0,1,2,...,15].
+Finally, a hint for the number of `cluster cores` that is used if the notebook is using ipcluster parallelization, only.
+This value should be derived e.g. by profiling memory usage per core, run times, etc.

 .. note::

    It is good practice to name command line enabled notebooks with an `_NBC` suffix as shown in
    the above example.

-The `CORRECT` notebook (last notebook in the example) makes use of a concurrency generating function
-by setting the `use function` parameter. This function must be defined in a code cell in the notebook,
-its parameters should be named like other exposed parameters. It should return a list of of parameters
-to be inserted into the concurrently run notebooks. The example given e.g. defines the `balance_sequences`
-function::
-
-    def balance_sequences(in_folder, run, sequences, sequences_per_node):
-        import glob
-        import re
-        import numpy as np
-        if sequences_per_node != 0:
-            sequence_files = glob.glob("{}/r{:04d}/*-S*.h5".format(in_folder, run))
-            seq_nums = set()
-            for sf in sequence_files:
-                seqnum = re.findall(r".*-S([0-9]*).h5", sf)[0]
-                seq_nums.add(int(seqnum))
-            seq_nums -= set(sequences)
-            return [l.tolist() for l in np.array_split(list(seq_nums),
-                                                       len(seq_nums)//sequences_per_node+1)]
-        else:
-            return sequences
-
-
-.. note::
-
-    Note how imports are inlined in the definition. This is necessary, as only the function code,
-    not the entire notebook is executed.
-
-which requires as exposed parameters e.g. ::
-
-    in_folder = "/gpfs/exfel/exp/SPB/201701/p002038/raw/" # the folder to read data from, required
-    run = 239 # runs to process, required
-    sequences = [-1] # sequences to correct, set to -1 for all, range allowed
-    sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
+The AGIPD `CORRECT` notebook (last notebook in the example) makes use of a concurrency generating function
+by setting the `use function` parameter. This function must be defined in the first cell in the notebook
+its given arguments should be named as the first cell notebook parameters. It is expected to return a list of parameters
+to concurrently run notebooks. Above the used function is :func:`xfel_calibrate.calibrate.balance_sequences`.

 .. note::


--- a/docs/source/how_it_works.rst
+++ b/docs/source/how_it_works.rst
-.. _how_it_works:
-
-How it Works
-============
-
-The European XFEL Offline Calibration utilizes the tools nbconvert_ and nbparameterise_
-to expose Jupyter_ notebooks to a command line interface. In the process reports are generated
-from these notebooks. The general interface is::
-
-    % xfel-calibrate DETECTOR TYPE
-
-where `DETECTOR` and `TYPE` specify the task to be performed.
-
-Additionally, it leverages the DESY/XFEL Maxwell cluster to run these jobs in parallel
-via SLURM_.
-
-Here is a list of :ref:`available_notebooks`. See the :ref:`advanced_topics` if you are
-for details on how to use as detector group staff.
-
-If you would like to integrate additional notebooks please see the :ref:`development_workflow`.
-
-.. _nbparameterise: https://github.com/takluyver/nbparameterise
-.. _nbconver: https://github.com/jupyter/nbconvert
-.. _jupyter: http://jupyter.org/
-.. _SLURM: https://slurm.schedmd.com
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -3,15 +3,45 @@
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.

-Welcome to European XFEL Offline Calibration's documentation!
-=============================================================
+European XFEL Offline Calibration
+=================================

-Contents:
+The European XFEL Offline Calibration (pyCalibration) is a python package that consists of
+different services, responsible for applying most of the offline calibration
+and characterization for the detectors.
+
+Running a calibration
+---------------------
+
+It utilizes tools such as nbconvert_ and nbparameterise_
+to expose Jupyter_ notebooks to a command line interface.
+In the process reports are generated from these notebooks.
+
+The general interface is::
+
+    % xfel-calibrate DETECTOR TYPE
+
+where `DETECTOR` and `TYPE` specify the task to be performed.
+
+Additionally, it leverages the DESY/XFEL Maxwell cluster to run these jobs in parallel
+via SLURM_.
+
+Here is a list of :ref:`available_notebooks`. See the :ref:`advanced_topics` if you are looking
+for details on how to use as detector group staff.
+
+If you would like to integrate additional notebooks please see the :ref:`development_workflow`.
+
+.. _nbparameterise: https://github.com/takluyver/nbparameterise
+.. _nbconvert: https://github.com/jupyter/nbconvert
+.. _jupyter: http://jupyter.org/
+.. _SLURM: https://slurm.schedmd.com
+
+
+Documentation contents:

 .. toctree::
   :maxdepth: 2

-   how_it_works
   installation
   configuration
   workflow
@@ -20,3 +50,23 @@ Contents:
   tutorial
   _notebooks/index
   testing
+
+.. toctree::
+   :caption: Reference
+   :maxdepth: 2
+
+   xfel_calibrate_conf
+   cal_tools_algorithms
+
+.. toctree::
+   :caption: Development
+   :maxdepth: 2
+
+   changelog
+   architecture
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`search`
\ No newline at end of file
--- a/docs/source/installation.rst
+++ b/docs/source/installation.rst
-Installation
-============
-
-Installation of the European XFEL Offline Calibration tools is best
-done into an anaconda/3 environment using Maxwell_ cluster. However, there are no explicit
-dependencies, so any Python3 environment is capable of running the tool.
-
-A more detailed step-by-step instruction can be found in the :ref:`tutorial`.
-
-Package Requirements
--------------------
-
-Requirements can be categorized into three types:
-
-1. Those required to run the the general tool chain,
-2. Those required by the notebooks themselves,
-3. Those required to run notebooks concurrently on a HPC cluster.
-
-Categories 1+2 are usually Python modules, as given in the `requirements.txt`
-file found in the root directory. Additionally, PyDetLib_ is required.
-It can be installed either as a Karabo dependency or manually.
-
-Parallel execution is currently supported in a `SLURM_` environment.
-However, sequential execution is available using the `--no-cluster-job`
-parameter when executing `xfel-calibrate`.
-
-.. _Python: http://www.python.org/
-.. _Karabo: https://in.xfel.eu/readthedocs/docs/karabo/en/latest/
-.. _SLURM: https://slurm.schedmd.com
-.. _PyDetLib: https://in.xfel.eu/readthedocs/docs/pydetlib/en/latest/
-.. _Maxwell: https://confluence.desy.de/display/IS/Running+Jobs+on+Maxwell
-
-Installation using Anaconda
---------------------------
-
-First you need to load the anaconda/3 environment through::
+.. _installation:

-    1. module load anaconda/3
-
-If installing into other python enviroments, this step can be skipped.
-
-Then the package for the offline calibration can be obtained from the git repository::
-
-    2. git clone https://git.xfel.eu/gitlab/detectors/pycalibration.git
-
-Home directory
-++++++++++++++
-
-You can then install all requirements of this tool chain in your home directory by running::
-
-    3. pip install -r requirements.txt . --user
-
-in pycalibration's root directory.
-
-After installation, you should make sure that the home directory is in the PATH environment variable::
-
-    4. PATH=/home/<username>/.local/bin:$PATH
+************
+Installation
+************

-Preferred directory
-+++++++++++++++++++
+It's recommended to install the offline calibration (pycalibration) package on
+maxwell, using the anaconda/3 environment.

-Alternatively, you can install all requirements in a directory of your preference by::
+The following instructions clone from the EuXFEL GitLab instance using SSH
+remote URLs, this assumes that you have set up SSH keys for use with GitLab
+already. If you have not then read the appendix section on `SSH Key Setup for
+GitLab`_ for instructions on how to do this .

-    3. mkdir /gpfs/exfel/data/scratch/<username>/<directory-name>
-       pip install --target=/gpfs/exfel/data/scratch/<username>/<directory-name> -r requirements.txt .

-and it is important to make sure that the installed requirements are in the PATH environment::
+Installation using python virtual environment - recommended
+===========================================================

-    4. PATH=/gpfs/exfel/data/scratch/<username>/<directory-name>/bin:$PATH
+`pycalibration` uses the same version of Python as Karabo, which in June 2021
+updated to use Python 3.8. Currently the default python installation on Maxwell
+is still Python 3.6.8, so Python 3.8 needs to be loaded from a different
+location.

+One option is to use the Maxwell Spack installation, running `module load
+maxwell` will activate the test Spack instance from DESY, then you can use
+`module load python-3.8.6-gcc-10.2.0-622qtxd` to Python 3.8. Note that this Spack
+instance is currently a trial phase and may not be stable.

-After this make sure that you adjust the :ref:`configuration` and settings in the xfel-calibrate
-folder to match your environment.
+Another option is to use `pyenv`, we provide a pyenv installation at
+`/gpfs/exfel/sw/calsoft/.pyenv` which we use to manage different versions of
+python. This can be activated with ``source /gpfs/exfel/sw/calsoft/.pyenv/bin/activate``

-The tool-chain is then available via the::
+A quick setup would be:

-    xfel-calibrate
+1. ``source /gpfs/exfel/sw/calsoft/.pyenv/bin/activate``
+2. ``git clone ssh://git@git.xfel.eu:10022/detectors/pycalibration.git && cd pycalibration`` - clone the offline calibration package from EuXFEL GitLab
+3. ``pyenv shell 3.8.11`` - load required version of python
+4. ``python3 -m venv .venv`` - create the virtual environment
+5. ``source .venv/bin/activate`` - activate the virtual environment
+6. ``python3 -m pip install --upgrade pip`` - upgrade version of pip
+7. ``python3 -m pip install .`` - install the pycalibration package (add ``-e`` flag for editable development installation)

-command.
+Copy/paste script:

+.. code:: bash

-Installation using karabo
-+++++++++++++++++++++++++
+  source /gpfs/exfel/sw/calsoft/.pyenv/bin/activate
+  git clone ssh://git@git.xfel.eu:10022/detectors/pycalibration.git
+  cd pycalibration
+  pyenv shell 3.8.11
+  python3 -m venv .venv
+  source .venv/bin/activate
+  python3 -m pip install --upgrade pip
+  python3 -m pip install .  # `-e` flag for editable install, e.g. `pip install -e .`

-If required, one can install into karabo environment. The difference would be to
-first source activate the karabo envrionment::

-    1. source karabo/activate
+Creating an ipython kernel for virtual environments
+===================================================

-then after cloning the offline calibration package from git, the requirements can be installed through::
+To create an ipython kernel with pycalibration available you should (if using a
+venv) activate the virtual environment first, and then run:

-    3. pip install -r requirements.txt .
+.. code:: bash

-Development Installation
------------------------
+  python3 -m pip install ipykernel  # If not using a venv add `--user` flag
+  python3 -m ipykernel install --user --name pycalibration --display-name "pycalibration"  # If not using a venv pick different name

-For a development installation in your home directory, which automatically
-picks up (most) changes, first install the dependencies as above,
-but then install the tool-chain separately in development mode::
+This can be useful for Jupyter notebook tools as https://max-jhub.desy.de/hub/login

-   pip install -e . --user

-.. note:: Using "- -target" for development installation in a preferred directory can lead to errors.
+SSH Key Setup for GitLab
+========================

-.. note:: For development installation in karabo environment "- -user" is not needed.
+It is highly recommended to set up SSH keys for access to GitLab as this
+simplifies the setup process for all of our internal software present on GitLab.

-Installation of New Notebooks
-----------------------------
+To set up the keys:

-To install new, previously untracked notebooks in the home directory,
-repeat the installation of the the tool-chain, without requirments,
-from the package base directory::
+1. Connect to Maxwell
+2. Generate a new keypair with ``ssh-keygen -o -a 100 -t ed25519``, you can
+   either leave this in the default location (``~/.ssh/id_ed25519``) or place it
+   into a separate directory to make management of keys easier if you already
+   have multiple ones. If you are using a password for your keys please check
+   this page to learn how to manage them: https://docs.github.com/en/github/authenticating-to-github/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent#adding-your-ssh-key-to-the-ssh-agent
+3. Add the public key (``id_ed25519.pub``) to your account on GitLab: https://git.xfel.eu/gitlab/profile/keys
+4. Add the following to your ``~/.ssh/config`` file

-    pip install --upgrade . --user
+.. code::

-Or, in case you are actively developing::
+  # Special flags for gitlab over SSH
+  Host git.xfel.eu
+      User git
+      Port 10022
+      ForwardX11 no
+      IdentityFile ~/.ssh/id_ed25519

-    pip install -e . --user
+Once this is done you can clone repositories you have access to from GitLab
+without having to enter your password each time. As ``pycalibration``
+requirements are installed from SSH remote URLs having SSH keys set up is a
+requirement for installing pycalibration.
--- a/docs/source/tutorial.rst
+++ b/docs/source/tutorial.rst
@@ -25,41 +25,7 @@ This will open a jupyter kernel running in your browser where you can then open
  ipcluster start --n=4 --profile=tutorial

 you can step through the cells and run them.
-If you run this notebook using the xfel-calibrate command as explaind at the end of this tutorial you do not need to start the cluster yourself, it will be done by the framework.
-
-
-Installation and Configuration
------------------------------
-
-The offline calibration tool-chain is optimised to run on the maxwell cluster.
-For more information refer to the Maxwell_ documentation.
-
-.. _Maxwell: https://confluence.desy.de/display/IS/Running+Jobs+on+Maxwell
-
-In order to use the offline calibration tool a few steps need to be carried out
-to install the necessary packages and setup the environment:
-
-1. Log into max-exfl with you own user name/account.
-
-2. Install karabo in your home directory or under /gpfs/exfel/data/scratch/username
-   by typing the following commands on you shell::
-
-     wget http://exflserv05.desy.de/karabo/karaboFramework/tags/2.2.4/karabo-2.2.4-Release-CentOS-7-x86_64.sh
-
-     chmod +x karabo-2.2.4-Release-CentOS-7-x86_64.sh
-
-     ./karabo-2.2.4-Release-CentOS-7-x86_64.sh
-
-     source karabo/activate
-
-3. Get the package pycalibration which contains the offline calibration tool-chain::
-
-     git clone https://git.xfel.eu/gitlab/detectors/pycalibration.git
-
-4. Install the necessary requirements and the package itself::
-
-     cd pycalibration
-     pip install -r requirements.txt .
+If you run this notebook using the xfel-calibrate command as explained at the end of this tutorial you do not need to start the cluster yourself, it will be done by the framework.


 Create your own notebook
@@ -87,7 +53,7 @@ Running the notebook

   You can see your job in the queue with::

-     squeue -u username
+     squeue --me

 3. Look at the generated report in the chosen output folder.
 4. More information on the job run on the cluster can be found in the temp folder.
--- a/docs/source/workflow.rst
+++ b/docs/source/workflow.rst
@@ -4,7 +4,7 @@ Development Workflow
 ====================

 The following walkthrough will guide you through a possible workflow
-when developing new offline calibration tools.
+when developing new notebooks for offline calibration.

 Fresh Start
 -----------
@@ -12,7 +12,7 @@ Fresh Start
 If you are starting a blank notebook from scratch you should first
 think about a few preconsiderations:

-* Will the notebook performan a headless task, or will it also be
+* Will the notebook perform a headless task, or will it also be
  an important interface for evaluating the results in form of a
  report.
 * Do you need to run concurrently? Is concurrency handled internally,
@@ -25,7 +25,7 @@ cells in the notebook. You should also structure it into appropriate
 subsections.

 If you plan on running concurrently on the cluster, identify which variable
-should be mapped to concurent runs. For autofilling it an integer list is
+should be mapped to concurrent runs. For autofilling it an integer list is
 needed.

 Once you've clarified the above points, you should create a new notebook,
@@ -139,7 +139,7 @@ to the following parameters being exposed via the command line::

 .. note::

-    Nbparameterise can only parse the mentioned subset of variable types. An expression
+    nbparameterise_ can only parse the mentioned subset of variable types. An expression
    that evaluates to such a type will note be recognized: e.g. `a = list(range(3))` will
    not work!

@@ -170,59 +170,41 @@ Best Coding Practices
 In principle there a not restrictions other than that parameters that are exposed to the
 command line need to be defined in the first code cell of the notebook.

-However, a few guidelines should be observered to make notebook useful for display as
-reports and usage by other.
+However, a few guidelines should be observed to make notebook useful for display as
+reports and usage by others.

 External Libraries
 ~~~~~~~~~~~~~~~~~~

-You may use a wide variaty of libraries available in Python, but keep in mind that others
+You may use a wide variety of libraries available in Python, but keep in mind that others
 wanting to run the tool will need to install these requirements as well. Thus,

-* do not use a specialized tool if an accepted alternative exists. Plots e.g. should usually
+* Do not use a specialized tool if an accepted alternative exists. Plots e.g. should usually
  be created using matplotlib_ and numerical processing should be done in numpy_.

-* keep runtimes and library requirements in mind. A library doing its own parallelism either
-  needs to programatically be able to set this up, or automatically do so. If you need to
+* Keep runtime and library requirements in mind. A library doing its own parallelism either
+  needs to programmatically be able to set this up, or automatically do so. If you need to
  start something from the command line first, things might be tricky as you will likely
  need to run this via `POpen` commands with appropriate environment variable.

+* Reading out RAW data should be done using extra_data_. It helps in accessing the HDF5 data
+  structures efficiently. It reduces the complexity of accessing the RAW or CORRECTED datasets,
+  and it provides different methods to select and filter the trains, cells, or pixels of interest.
+
 Writing out data
 ~~~~~~~~~~~~~~~~

 If your notebook produces output data, consider writing data out as early as possible,
 such that it is available as soon as possible. Detailed plotting and inspection can
-possibly done later on in a notebook.
+be done later on in the notebook.

-Also consider using HDF5 via h5py_ as your output format. If you correct or calibrated
-input data, which adhears to the XFEL naming convention, you should maintain the convention
+Also use HDF5 via h5py_ as your output format. If you correct or calibrate
+input data, which adheres to the XFEL naming convention, you should maintain the convention
 in your output data. You should not touch any data that you do not actively work on and
-should assure that the `INDEX` and identifier entries are syncronized with respect to
+should assure that the `INDEX` and identifier entries are synchronized with respect to
 your output data. E.g. if you remove pulses from a train, the `INDEX/.../count` section
 should reflect this.

-Finally, XFEL RAW data can contain filler data from the DAQ. One possible way of identifying
-this data is the following::
-
-    datapath = "/INSTRUMENT/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/image/cellId".format(channel)
-
-    count = np.squeeze(infile[datapath])
-    first = np.squeeze(infile[datapath])
-    if np.count_nonzero(count != 0) == 0:  # filler data has counts of 0
-        print("File {} has no valid counts".format(infile))
-        return
-    valid = count != 0
-    idxtrains = np.squeeze(infile["/INDEX/trainId"])
-    medianTrain = np.nanmedian(idxtrains)  # protect against freak train ids
-    valid &= (idxtrains > medianTrain - 1e4) & (idxtrains < medianTrain + 1e4)
-
-    # index ranges in which non-filler data exists
-    last_index = int(first[valid][-1]+count[valid][-1])
-    first_index = int(first[valid][0])
-
-    # access these indices
-    cellIds = np.squeeze(np.array(infile[datapath][first_index:last_index, ...]))
-

 Plotting
 ~~~~~~~~
@@ -233,10 +215,10 @@ a context. Make sure to label your axes.

 Also make sure the plots are readable on an A4-sized PDF page; this is the format the notebook
 will be rendered to for report outputs. Specifically, this means that figure sizes should not
-exeed approx 15x15 inches.
+exceed approx 15x15 inches.

 The report will contain 150 dpi png images of your plots. If you need higher quality output
-of individual plot files you should save these separetly, e.g. via `fig.savefig(...)` yourself.
+of individual plot files you should save these separately, e.g. via `fig.savefig(...)` yourself.


 Calibration Database Interaction
@@ -245,7 +227,7 @@ Calibration Database Interaction
 Tasks which require calibration constants or produce such should do this by interacting with
 the European XFEL calibration database.

-In terms of developement workflow it is usually easier to work with file-based I/O first and
+In terms of development workflow it is usually easier to work with file-based I/O first and
 only switch over to the database after the algorithmic part of the notebook has matured.
 Reasons for this include:

@@ -261,7 +243,7 @@ documentation.
 Testing
 -------

-The most important test is that your notebook completes flawlessy outside any special
+The most important test is that your notebook completes flawlessly outside any special
 tool chain feature. After all, the tool chain will only replace parameters, and then
 launch a concurrent job and generate a report out of notebook. If it fails to run in the
 normal Jupyter notebook environment, it will certainly fail in the tool chain environment.
@@ -274,11 +256,11 @@ Specifically, you should verify that all arguments are parsed correctly, e.g. by

    xfel-calibrate DETECTOR NOTEBOOK_TYPE --help

-From then on, check include if parallel slurm jobs are exectuted correctly and if a report
+From then on, check include if parallel slurm jobs are executed correctly and if a report
 is generated at the end.

 Finally, you should verify that the report contains the information you'd like to convey and
-is inteligable to people other than you.
+is intelligible to people other than you.

 .. note::

@@ -298,4 +280,5 @@ documentation.
 .. _matplotlib: https://matplotlib.org/
 .. _numpy: http://www.numpy.org/
 .. _h5py: https://www.h5py.org/
-.. _iCalibrationDB: https://in.xfel.eu/readthedocs/docs/icalibrationdb/en/latest/
+.. _iCalibrationDB: https://git.xfel.eu/detectors/cal_db_interactive
+.. _extra_data: https://extra-data.readthedocs.io/en/latest/
\ No newline at end of file
--- a/docs/source/xfel_calibrate_conf.rst
+++ b/docs/source/xfel_calibrate_conf.rst
+xfel_calibrate
+==============
+
+.. module:: xfel_calibrate.calibrate
+
+.. autofunction:: balance_sequences
--- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
+++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
-in_folder = "/gpfs/exfel/exp/SPB/202131/p900230/raw" # the folder to read data from, required
-out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/agipd_resolve_conf"  # the folder to output to, required
+in_folder = "/gpfs/exfel/exp/MID/202201/p002834/raw" # the folder to read data from, required
+out_folder = "/gpfs/exfel/data/scratch/esobolev/pycal_litfrm/p002834/r0225"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed
-run = 275 # runs to process, required
+run = 225 # runs to process, required

-karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
+karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 index_source_template = 'INDEX/{}/DET/{}:xtdf/'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
-karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device
+karabo_id_control = "MID_EXP_AGIPD1M1" # karabo-id for control device

-slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants
+slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants, loaded in precorrection notebook

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milliseconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

-use_ppu_device = ''  # Device ID for a pulse picker device to only process picked trains, empty string to disable
-ppu_train_offset = 0  # When using the pulse picker, offset between the PPU's sequence start and actually picked train
-
 mem_cells = 0  # Number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
 photon_energy = 9.2 # photon energy in keV
 overwrite = True # set to True if existing data should be overwritten
 max_pulses = [0, 352, 1] # range list [st, end, step] of memory cell indices to be processed within a train. 3 allowed maximum list input elements.
 mem_cells_db = 0 # set to a value different than 0 to use this value for DB queries
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are corrected
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain
+round_photons = False  # Round to absolute number of photons, only use with gain corrections

-use_litframe_device = '' # Device ID for a lit frame finder device to only process illuminated frames, empty string to disable
+# Optional auxiliary devices
+use_ppu_device = ''  # Device ID for a pulse picker device to only process picked trains, empty string to disable
+ppu_train_offset = 0  # When using the pulse picker, offset between the PPU's sequence start and actually picked train
+
+use_litframe_finder = 'off' # Process only illuminated frames: 'off' - disable, 'device' - use online device data, 'offline' - use offline algorithm, 'auto' - choose online/offline source automatically (default)
+litframe_device_id = '' # Device ID for a lit frame finder device, empty string to auto detection
 energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled

+use_xgm_device = ''  # DoocsXGM device ID to obtain actual photon energy, operating condition else.
+
+# Output parameters
+recast_image_data = ''  # Cast data to a different dtype before saving
+compress_fields = ['gain', 'mask']  # Datasets in image group to compress.
+
 # Plotting parameters
 skip_plots = False # exit after writing corrected files and metadata
 cell_id_preview = 1 # cell Id used for preview in single-shot plots

-# Paralellization parameters
+# Parallelization parameters
 chunk_size = 1000  # Size of chunk for image-wise correction
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as SLURM job, set to 0 to not run SLURM parallel
 max_nodes = 8 # Maximum number of SLURM jobs to split correction work into
-
+max_tasks_per_worker = -1  # the number of tasks a correction pool worker process can complete before it will exit and be replaced with a fresh worker process. Leave as -1 to keep worker alive as long as pool.

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import os
 import math
 import multiprocessing
 import re
 import traceback
 import warnings
 from datetime import timedelta
 from pathlib import Path
 from time import perf_counter

 import tabulate
 from dateutil import parser
 from IPython.display import Latex, Markdown, display

 warnings.filterwarnings('ignore')
 import matplotlib
 import matplotlib.pyplot as plt
 import yaml
 from extra_data import H5File, RunDirectory, stack_detector_data, by_id
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from matplotlib import cm as colormap
 from matplotlib.colors import LogNorm

 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")

 import cal_tools
 import seaborn as sns
 from cal_tools import agipdalgs as calgs
 from cal_tools.agipdlib import (
    AgipdCorrections,
    AgipdCtrl,
    CellRange,
    LitFrameSelection,
 )
 from cal_tools.ana_tools import get_range
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.step_timing import StepTimer

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the hierarchy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap
+    corr_bools["round_photons"] = round_photons

 # Many corrections don't apply to fixed gain mode; will explicitly disable later if detected
 disable_for_fixed_gain = [
    "adjust_mg_baseline",
    "blc_set_min",
    "force_hg_if_below",
    "force_mg_if_below",
    "low_medium_gap",
    "melt_snow",
    "rel_gain"
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 if sequences == [-1]:
    sequences = None

 dc = RunDirectory(run_folder)

 ctrl_src = ctrl_source_template.format(karabo_id_control)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 index_src = index_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 out_folder.mkdir(parents=True, exist_ok=True)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 print("Process modules:", ', '.join(cal_tools.tools.module_index_to_qm(x) for x in modules))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_ppu_device:
    # Obtain trains to process if using a pulse picker device.

    # Will throw an uncaught exception if the device is wrong.
    seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()

    # The trains picked are the unique values of trainTrigger.sequenceStart
    # minus the first (previous trigger before this run).
    train_ids = np.unique(seq_start)[1:] + ppu_train_offset

    print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')

 elif train_ids != [-1]:
    # Specific trains passed by parameter, convert to ndarray.
    train_ids = np.array(train_ids)

    print(f'Processing up to {len(train_ids)} manually selected train(s)')
 else:
    # Process all trains.
    train_ids = None

    print(f'Processing all valid trains')
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mapped_files, _, total_sequences, _, _ =  cal_tools.tools.map_modules_from_folder(
    str(in_folder), run, path_template, karabo_da, sequences
 )
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 first_mod_channel = sorted(modules)[0]

 instrument_src_mod = [
    s for s in list(dc.all_sources) if f"{first_mod_channel}CH" in s][0]
 mod_channel = int(re.findall(rf".*{first_mod_channel}CH([0-9]+):.*", instrument_src_mod)[0])

 agipd_cond = AgipdCtrl(
    run_dc=dc,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without gain_setting value
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate creation time
 creation_time = None
 if use_dir_creation_date:
    creation_time = cal_tools.tools.get_dir_creation_date(str(in_folder), run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
    creation_time += delta

 if acq_rate == 0.:
    acq_rate = agipd_cond.get_acq_rate()
 if mem_cells == 0.:
    mem_cells = agipd_cond.get_num_cells()
 # TODO: look for alternative for passing creation_time
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == 0.:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:code id: tags:

 ``` python
 if mem_cells is None:
    raise ValueError(f"No raw images found in {filename}")

 mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db

 print(f"Maximum memory cells to calibrate: {mem_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print("Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells_db}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Integration time: {integration_time}")
 print(f"• Photon Energy: {photon_energy}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if gain_mode:
    for to_disable in disable_for_fixed_gain:
        if corr_bools.get(to_disable, False):
            print(f"Warning: {to_disable} correction was requested, but does not apply to fixed gain mode")
            corr_bools[to_disable] = False
 ```

 %% Cell type:code id: tags:

 ``` python
-if use_litframe_device:
-    # check run for the AgipdLitFrameFinder device
+if use_litframe_finder != 'off':
+    from extra_redu import make_litframe_finder, LitFrameFinderError
+    from extra_redu.litfrm.utils import litfrm_run_report

-    if use_litframe_device + ':output' in dc.instrument_sources:
-        # Use selection provided by the AgipdLitFrameFinder (if the device is recorded)
-        cell_sel = LitFrameSelection(use_litframe_device, dc, train_ids, max_pulses, energy_threshold)
-        train_ids = cell_sel.train_ids
-    else:
-        # Use range selection (if the device is not recorded)
-        print(f"WARNING: LitFrameFinder {use_litframe_device} device is not found.")
+    if use_litframe_finder not in ['auto', 'offline', 'online']:
+        raise ValueError("Unexpected value in 'use_litframe_finder'.")
+
+    inst = karabo_id_control[:3]
+    litfrm = make_litframe_finder(inst, dc, litframe_device_id)
+    try:
+        if use_litframe_finder == 'auto':
+            r = litfrm.read_or_process()
+        elif use_litframe_finder == 'offline':
+            r = litfrm.process()
+        elif use_litframe_finder == 'online':
+            r = litfrm.read()
+
+        report = litfrm_run_report(r)
+        print("Lit-frame patterns:")
+        print(" # trains                      Np  Nd  Nf lit frames")
+        for rec in report:
+            frmintf = ', '.join(
+                [':'.join([str(n) for n in slc]) for slc in rec['frames']]
+            )
+            trsintf = ':'.join([str(n) for n in rec['trains']])
+            print(
+                ("{pattern_no:2d} {trsintf:25s} {npulse:4d} "
+                 "{ndataframe:3d} {nframe:3d} [{frmintf}]"
+                ).format(frmintf=frmintf, trsintf=trsintf, **rec)
+            )
+        cell_sel = LitFrameSelection(r, train_ids, max_pulses, energy_threshold)
+    except LitFrameFinderError as err:
+        print("Cannot use AgipdLitFrameFinder due to:")
+        print(err)
        cell_sel = CellRange(max_pulses, max_cells=mem_cells)
 else:
    # Use range selection
    cell_sel = CellRange(max_pulses, max_cells=mem_cells)

 print(cell_sel.msg())
 ```

+%% Cell type:code id: tags:
+
+``` python
+actual_photon_energy = None
+
+if use_xgm_device:
+    # Try to obtain photon energy from XGM device.
+    wavelength_data = dc[use_xgm_device, 'pulseEnergy.wavelengthUsed']
+
+    try:
+        from scipy.constants import h, c, e
+
+        # Read wavelength as a single value and convert to hv.
+        actual_photon_energy = (h * c / e) / (wavelength_data.as_single_value(rtol=1e-2) * 1e-6)
+        print(f'Obtained actual photon energy {actual_photon_energy:.3f} keV from {use_xgm_device}')
+    except ValueError:
+        if round_photons:
+            print('WARNING: XGM source available but actual photon energy varies greater than 1%, '
+                  'photon rounding disabled!')
+            round_photons = False
+
+if actual_photon_energy is None and round_photons:
+    print('WARNING: Using operating condition for actual photon energy in photon rounding mode, this is NOT reliable!')
+    actual_photon_energy = photon_energy
+```
+
 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(
    mem_cells,
    cell_sel,
    h5_data_path=instrument_src,
    h5_index_path=index_src,
    corr_bools=corr_bools,
    gain_mode=gain_mode,
    comp_threads=os.cpu_count() // n_cores_files,
    train_ids=train_ids
 )

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 agipd_corr.ff_gain = ff_gain
+agipd_corr.actual_photon_energy = actual_photon_energy
+
+agipd_corr.compress_fields = compress_fields
+if recast_image_data:
+    agipd_corr.recast_image_fields['data'] = np.dtype(recast_image_data)
 ```

 %% Cell type:code id: tags:

 ``` python
 module_index_to_karabo_da = {mod: da for (mod, da) in zip(modules, karabo_da)}
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 metadata = cal_tools.tools.CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 def retrieve_constants(mod):
    """
    Retrieve calibration constants and load them to shared memory

    Metadata for constants is taken from yml file or retrieved from the DB
    """
    err = ""
    k_da = module_index_to_karabo_da[mod]
    try:
        # check if there is a yaml file in out_folder that has the device constants.
        if k_da in const_yaml:
            when = agipd_corr.initialize_from_yaml(k_da, const_yaml, mod)
            print(f"Found constants for {k_da} in calibration_metadata.yml")
        else:
            # TODO: replace with proper retrieval (as done in pre-correction)
            when = agipd_corr.initialize_from_db(
                karabo_id=karabo_id,
                karabo_da=k_da,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                memory_cells=mem_cells_db,
                bias_voltage=bias_voltage,
                photon_energy=photon_energy,
                gain_setting=gain_setting,
                acquisition_rate=acq_rate,
                integration_time=integration_time,
                module_idx=mod,
                only_dark=False,
            )
            print(f"Queried CalCat for {k_da}")
    except Exception as e:
        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
        when = None
    return err, mod, when, k_da


 print(f'Preparing constants (FF: {agipd_corr.corr_bools.get("xray_corr", False)}, PC: {any(agipd_corr.pc_bools)}, '
      f'BLC: {any(agipd_corr.blc_bools)})')
 ts = perf_counter()
 with multiprocessing.Pool(processes=len(modules)) as pool:
    const_out = pool.map(retrieve_constants, modules)
 print(f"Constants were loaded in {perf_counter()-ts:.01f}s")
 ```

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = mem_cells * 256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """


    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
-
-with multiprocessing.Pool() as pool:
+if max_tasks_per_worker == -1:
+    max_tasks_per_worker = None
+with multiprocessing.Pool(maxtasksperchild=max_tasks_per_worker) as pool:
    step_timer.done_step('Started pool')

    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files")
        step_timer.start()
        img_counts = pool.starmap(
            agipd_corr.read_file,
            zip(range(len(file_batch)), file_batch, [not common_mode]*len(file_batch))
        )
        step_timer.done_step(f'Loading data from files')

        if img_counts == 0:
            # Skip any further processing and output if there are no images to
            # correct in this file.
            continue

        if mask_zero_std:
            # Evaluate zero-data-std mask
            pool.starmap(
                agipd_corr.mask_zero_std, itertools.product(
                    range(len(file_batch)),
                    np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
                )
            )
            step_timer.done_step('Mask 0 std')

        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")

        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")

        if common_mode:
            # In common mode corrected is enabled.
            # Cell selection is only activated after common mode correction.
            # Perform cross-file correction parallel over asics
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                range(len(file_batch)), range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")

            img_counts = pool.map(agipd_corr.apply_selected_pulses, range(len(file_batch)))
            step_timer.done_step("Applying selected cells after common mode correction")
+
        # Perform image-wise correction"
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Gain corrections")

        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # if the yml file contains "retrieved-constants", that means a leading
 # notebook got processed and the reporting would be generated from it.
 fst_print = True
 timestamps = {}

 for i, (error, modno, when, k_da) in enumerate(const_out):
    qm = cal_tools.tools.module_index_to_qm(modno)
    # expose errors while applying correction
    if error:
        print("Error: {}".format(error) )

    if k_da not in const_yaml:
        if fst_print:
            print("Constants are retrieved with creation time: ")
            fst_print = False

        module_timestamps = {}

        # If correction is crashed
        if not error:
            print(f"{qm}:")
            for key, item in when.items():
                if hasattr(item, 'strftime'):
                    item = item.strftime('%y-%m-%d %H:%M')
                when[key] = item
                print('{:.<12s}'.format(key), item)

        # Store few time stamps if exists
        # Add NA to keep array structure
        for key in ['Offset', 'SlopesPC', 'SlopesFF']:
            if when and key in when and when[key]:
                module_timestamps[key] = when[key]
            else:
                if error is not None:
                    module_timestamps[key] = "Err"
                else:
                    module_timestamps[key] = "NA"
        timestamps[qm] = module_timestamps

 seq = sequences[0] if sequences else 0

 if timestamps:
    with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
        yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)
    Z = data.T

    # Plot the surface.
    ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm, linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")


 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(data_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None):
    """Load single train for all module

    :param data_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param detector_id: The karabo id of the detector to get data for
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    run_data = RunDirectory(data_folder, include)
    if tid is not None:
        tid, data = run_data.select(f'{detector_id}/DET/*', source).train_from_id(tid)
    else:
        tid, data = next(iter(run_data.select(f'{detector_id}/DET/*', source).trains(require_all=True)))

    # TODO: remove and use the keep_dims version after updating Extra-data.
    # Avoid using default axis with sources of an expected scalar value per train.
-    if len(range(*cell_sel.crange)) == 1 and source in ['image.blShift', 'image.cellId', 'image.pulseId']:
+    nfrm = cell_sel.get_cells_on_trains([tid]).sum()
+    if nfrm == 1 and source in ['image.blShift', 'image.cellId', 'image.pulseId']:
        axis = 0
    else:
        axis = -3

    stacked_data = stack_detector_data(
        train=data, data=source, fillvalue=fillvalue, modules=modules, axis=axis)
    # Add cellId dimension when correcting one cellId only.
-    if (
-        len(range(*cell_sel.crange)) == 1 and
-        data_folder != run_folder  # avoid adding pulse dims for raw data.
-    ):
+    # avoid adding pulse dims for raw data.
+    if (nfrm == 1 and data_folder != run_folder):
        stacked_data = stacked_data[np.newaxis, ...]

    return tid, stacked_data
 ```

 %% Cell type:code id: tags:

 ``` python
 if dinstance == "AGIPD500K":
    geom = AGIPD_500K2GGeometry.from_origin()
 else:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods)

 _, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods)
 _, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods)
 _, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods)
 _, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods)
 _, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0)
 _, raw = get_trains_data(run_folder, 'image.data', include, karabo_id, tid, modules=nmods)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Analogue Gain ###

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
                                         raw[:,1,...].flatten().astype(np.float32),
                                         bins=(100, 100),
                                         range=[[4000, 8192], [4000, 8192]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
                                         gains.flatten().astype(np.float32), bins=(100, 3),
                                         range=[[-50, 8192], [0, 3]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]

 # Modify pulse_range, if only one pulse is selected.
 if pulse_range[0] == pulse_range[1]:
    pulse_range = [0, pulse_range[1]+int(acq_rate)]

 mean_data = np.nanmean(corrected, axis=(2, 3))
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100, int(pulse_range[1])),
                                      range=[[-50, 1000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")

 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100,  int(pulse_range[1])),
                                      range=[[-50, 200000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(-1, 1000)
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 if cell_id_preview not in cellId[:, 0]:
    print(f"WARNING: The selected cell_id_preview value {cell_id_preview} is not available in the corrected data.")
    cell_id_preview = cellId[:, 0][0]
    cell_idx_preview = 0
    print(f"Previewing the first available cellId: {cell_id_preview}.")
 else:
    cell_idx_preview = np.where(cellId[:, 0] == cell_id_preview)[0][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown(f'Mean over images of the RAW data\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(raw[slice(*cell_sel.crange), 0, ...], axis=0)
    vmin, vmax = get_range(data, 5)
    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 else:
    print("Skipping mean RAW preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(raw[cell_idx_preview, 0, ...], 5)
 ax = geom.plot_data_fast(raw[cell_idx_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown('### Mean CORRECTED Preview ###\n'))
    display(Markdown(f'A mean across train: {tid}\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(corrected, axis=0)
    vmin, vmax = get_range(data, 7)
    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
 else:
    print("Skipping mean CORRECTED preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'A single shot of the CORRECTED image from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 7, -50)
 vmin = - 50
 ax = geom.plot_data_fast(corrected[cell_idx_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 5, -50)
 nbins = np.int((vmax + 50) / 2)
 h = ax.hist(corrected[cell_idx_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax=50000
 nbins = np.int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='High gain', color='green')
 ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Medium gain', color='red')
 ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Low gain', color='yellow')
 ax.legend()
 ax.grid()
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
                         cmap="jet", vmin=-1, vmax=3)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {cell_id_preview} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.log2(mask[cell_idx_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 # AGIPD Offline Correction #

 Author: European XFEL Detector Group, Version: 2.0

 Offline Calibration for the AGIPD Detector

 %% Cell type:code id: tags:

 ``` python
-in_folder = "/gpfs/exfel/exp/SPB/202131/p900230/raw" # the folder to read data from, required
-out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/agipd_resolve_conf"  # the folder to output to, required
+in_folder = "/gpfs/exfel/exp/MID/202201/p002834/raw" # the folder to read data from, required
+out_folder = "/gpfs/exfel/data/scratch/esobolev/pycal_litfrm/p002834/r0225"  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 sequences = [-1] # sequences to correct, set to -1 for all, range allowed
 modules = [-1] # modules to correct, set to -1 for all, range allowed
 train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed
-run = 275 # runs to process, required
+run = 225 # runs to process, required

-karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
+karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_template = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 instrument_source_template = '{}/DET/{}:xtdf'  # path in the HDF5 file to images
 index_source_template = 'INDEX/{}/DET/{}:xtdf/'  # path in the HDF5 file to images
 ctrl_source_template = '{}/MDL/FPGA_COMP'  # path to control information
-karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device
+karabo_id_control = "MID_EXP_AGIPD1M1" # karabo-id for control device

-slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants
+slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants, loaded in precorrection notebook

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milliseconds
 creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants

-use_ppu_device = ''  # Device ID for a pulse picker device to only process picked trains, empty string to disable
-ppu_train_offset = 0  # When using the pulse picker, offset between the PPU's sequence start and actually picked train
-
 mem_cells = 0  # Number of memory cells used, set to 0 to automatically infer
 bias_voltage = 0  # bias voltage, set to 0 to use stored value in slow data.
 acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
 gain_setting = -1  # the gain setting, use -1 to use value stored in slow data.
 gain_mode = -1  # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)
 photon_energy = 9.2 # photon energy in keV
 overwrite = True # set to True if existing data should be overwritten
 max_pulses = [0, 352, 1] # range list [st, end, step] of memory cell indices to be processed within a train. 3 allowed maximum list input elements.
 mem_cells_db = 0 # set to a value different than 0 to use this value for DB queries
 integration_time = -1 # integration time, negative values for auto-detection.

 # Correction parameters
 blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
 cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction
 cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
 cm_n_itr = 4 # number of iterations for common mode correction
 hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
 mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
 noisy_adc_threshold = 0.25 # threshold to mask complete adc
 ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain

 # Correction Booleans
 only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
 rel_gain = False # do relative gain correction based on PC data
 xray_gain = False # do relative gain correction based on xray data
 blc_noise = False # if set, baseline correction via noise peak location is attempted
 blc_stripes = False # if set, baseline corrected via stripes
 blc_hmatch = False # if set, base line correction via histogram matching is attempted
 match_asics = False # if set, inner ASIC borders are matched to the same signal level
 adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
 zero_nans = False # set NaN values in corrected data to 0
 zero_orange = False # set to 0 very negative and very large values in corrected data
 blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
 corr_asic_diag = False # if set, diagonal drop offs on ASICs are corrected
 force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
 force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
 mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
 common_mode = False # Common mode correction
 melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
 mask_zero_std = False # Mask pixels with zero standard deviation across train
 low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain
+round_photons = False  # Round to absolute number of photons, only use with gain corrections

-use_litframe_device = '' # Device ID for a lit frame finder device to only process illuminated frames, empty string to disable
+# Optional auxiliary devices
+use_ppu_device = ''  # Device ID for a pulse picker device to only process picked trains, empty string to disable
+ppu_train_offset = 0  # When using the pulse picker, offset between the PPU's sequence start and actually picked train
+
+use_litframe_finder = 'off' # Process only illuminated frames: 'off' - disable, 'device' - use online device data, 'offline' - use offline algorithm, 'auto' - choose online/offline source automatically (default)
+litframe_device_id = '' # Device ID for a lit frame finder device, empty string to auto detection
 energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled

+use_xgm_device = ''  # DoocsXGM device ID to obtain actual photon energy, operating condition else.
+
+# Output parameters
+recast_image_data = ''  # Cast data to a different dtype before saving
+compress_fields = ['gain', 'mask']  # Datasets in image group to compress.
+
 # Plotting parameters
 skip_plots = False # exit after writing corrected files and metadata
 cell_id_preview = 1 # cell Id used for preview in single-shot plots

-# Paralellization parameters
+# Parallelization parameters
 chunk_size = 1000  # Size of chunk for image-wise correction
 n_cores_correct = 16 # Number of chunks to be processed in parallel
 n_cores_files = 4 # Number of files to be processed in parallel
 sequences_per_node = 2 # number of sequence files per cluster node if run as SLURM job, set to 0 to not run SLURM parallel
 max_nodes = 8 # Maximum number of SLURM jobs to split correction work into
-
+max_tasks_per_worker = -1  # the number of tasks a correction pool worker process can complete before it will exit and be replaced with a fresh worker process. Leave as -1 to keep worker alive as long as pool.

 def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
    from xfel_calibrate.calibrate import balance_sequences as bs
    return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
 ```

 %% Cell type:code id: tags:

 ``` python
 import itertools
 import os
 import math
 import multiprocessing
 import re
 import traceback
 import warnings
 from datetime import timedelta
 from pathlib import Path
 from time import perf_counter

 import tabulate
 from dateutil import parser
 from IPython.display import Latex, Markdown, display

 warnings.filterwarnings('ignore')
 import matplotlib
 import matplotlib.pyplot as plt
 import yaml
 from extra_data import H5File, RunDirectory, stack_detector_data, by_id
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from matplotlib import cm as colormap
 from matplotlib.colors import LogNorm

 matplotlib.use("agg")
 %matplotlib inline
 import numpy as np
 import seaborn as sns

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")

 import cal_tools
 import seaborn as sns
 from cal_tools import agipdalgs as calgs
 from cal_tools.agipdlib import (
    AgipdCorrections,
    AgipdCtrl,
    CellRange,
    LitFrameSelection,
 )
 from cal_tools.ana_tools import get_range
 from cal_tools.enums import AgipdGainMode, BadPixels
 from cal_tools.step_timing import StepTimer

 sns.set()
 sns.set_context("paper", font_scale=1.4)
 sns.set_style("ticks")
 ```

 %% Cell type:code id: tags:

 ``` python
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 run_folder = in_folder / f'r{run:04d}'
 ```

 %% Cell type:markdown id: tags:

 ## Evaluated parameters ##

 %% Cell type:code id: tags:

 ``` python
 # Fill dictionaries comprising bools and arguments for correction and data analysis

 # Here the hierarchy and dependability for correction booleans are defined
 corr_bools = {}

 # offset is at the bottom of AGIPD correction pyramid.
 corr_bools["only_offset"] = only_offset

 # Dont apply any corrections if only_offset is requested
 if not only_offset:
    corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
    corr_bools["rel_gain"] = rel_gain
    corr_bools["xray_corr"] = xray_gain
    corr_bools["blc_noise"] = blc_noise
    corr_bools["blc_stripes"] = blc_stripes
    corr_bools["blc_hmatch"] = blc_hmatch
    corr_bools["blc_set_min"] = blc_set_min
    corr_bools["match_asics"] = match_asics
    corr_bools["corr_asic_diag"] = corr_asic_diag
    corr_bools["zero_nans"] = zero_nans
    corr_bools["zero_orange"] = zero_orange
    corr_bools["mask_noisy_adc"] = mask_noisy_adc
    corr_bools["force_hg_if_below"] = force_hg_if_below
    corr_bools["force_mg_if_below"] = force_mg_if_below
    corr_bools["common_mode"] = common_mode
    corr_bools["melt_snow"] = melt_snow
    corr_bools["mask_zero_std"] = mask_zero_std
    corr_bools["low_medium_gap"] = low_medium_gap
+    corr_bools["round_photons"] = round_photons

 # Many corrections don't apply to fixed gain mode; will explicitly disable later if detected
 disable_for_fixed_gain = [
    "adjust_mg_baseline",
    "blc_set_min",
    "force_hg_if_below",
    "force_mg_if_below",
    "low_medium_gap",
    "melt_snow",
    "rel_gain"
 ]
 ```

 %% Cell type:code id: tags:

 ``` python
 if sequences == [-1]:
    sequences = None

 dc = RunDirectory(run_folder)

 ctrl_src = ctrl_source_template.format(karabo_id_control)
 instrument_src = instrument_source_template.format(karabo_id, receiver_template)
 index_src = index_source_template.format(karabo_id, receiver_template)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create output folder
 out_folder.mkdir(parents=True, exist_ok=True)

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "SPB":
    dinstance = "AGIPD1M1"
    nmods = 16
 elif instrument == "MID":
    dinstance = "AGIPD1M2"
    nmods = 16
 elif instrument == "HED":
    dinstance = "AGIPD500K"
    nmods = 8

 # Evaluate requested modules
 if karabo_da[0] == '-1':
    if modules[0] == -1:
        modules = list(range(nmods))
    karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
 else:
    modules = [int(x[-2:]) for x in karabo_da]

 print("Process modules:", ', '.join(cal_tools.tools.module_index_to_qm(x) for x in modules))
 print(f"Detector in use is {karabo_id}")
 print(f"Instrument {instrument}")
 print(f"Detector instance {dinstance}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if use_ppu_device:
    # Obtain trains to process if using a pulse picker device.

    # Will throw an uncaught exception if the device is wrong.
    seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()

    # The trains picked are the unique values of trainTrigger.sequenceStart
    # minus the first (previous trigger before this run).
    train_ids = np.unique(seq_start)[1:] + ppu_train_offset

    print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')

 elif train_ids != [-1]:
    # Specific trains passed by parameter, convert to ndarray.
    train_ids = np.array(train_ids)

    print(f'Processing up to {len(train_ids)} manually selected train(s)')
 else:
    # Process all trains.
    train_ids = None

    print(f'Processing all valid trains')
 ```

 %% Cell type:code id: tags:

 ``` python
 # set everything up filewise
 mapped_files, _, total_sequences, _, _ =  cal_tools.tools.map_modules_from_folder(
    str(in_folder), run, path_template, karabo_da, sequences
 )
 file_list = []

 # ToDo: Split table over pages
 print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
 table = []
 ti = 0
 for k, files in mapped_files.items():
    i = 0
    for f in list(files.queue):
        file_list.append(f)
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["#", "module", "# module", "file"])))
 file_list = sorted(file_list, key=lambda name: name[-10:])
 ```

 %% Cell type:code id: tags:

 ``` python
 first_mod_channel = sorted(modules)[0]

 instrument_src_mod = [
    s for s in list(dc.all_sources) if f"{first_mod_channel}CH" in s][0]
 mod_channel = int(re.findall(rf".*{first_mod_channel}CH([0-9]+):.*", instrument_src_mod)[0])

 agipd_cond = AgipdCtrl(
    run_dc=dc,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without gain_setting value
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate creation time
 creation_time = None
 if use_dir_creation_date:
    creation_time = cal_tools.tools.get_dir_creation_date(str(in_folder), run)
    offset = parser.parse(creation_date_offset)
    delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
    creation_time += delta

 if acq_rate == 0.:
    acq_rate = agipd_cond.get_acq_rate()
 if mem_cells == 0.:
    mem_cells = agipd_cond.get_num_cells()
 # TODO: look for alternative for passing creation_time
 if gain_setting == -1:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == 0.:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
 if integration_time == -1:
    integration_time = agipd_cond.get_integration_time()
 if gain_mode == -1:
    gain_mode = agipd_cond.get_gain_mode()
 else:
    gain_mode = AgipdGainMode(gain_mode)
 ```

 %% Cell type:code id: tags:

 ``` python
 if mem_cells is None:
    raise ValueError(f"No raw images found in {filename}")

 mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db

 print(f"Maximum memory cells to calibrate: {mem_cells}")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Using {creation_time} as creation time")
 print("Operating conditions are:")
 print(f"• Bias voltage: {bias_voltage}")
 print(f"• Memory cells: {mem_cells_db}")
 print(f"• Acquisition rate: {acq_rate}")
 print(f"• Gain setting: {gain_setting}")
 print(f"• Gain mode: {gain_mode.name}")
 print(f"• Integration time: {integration_time}")
 print(f"• Photon Energy: {photon_energy}")
 ```

 %% Cell type:code id: tags:

 ``` python
 if gain_mode:
    for to_disable in disable_for_fixed_gain:
        if corr_bools.get(to_disable, False):
            print(f"Warning: {to_disable} correction was requested, but does not apply to fixed gain mode")
            corr_bools[to_disable] = False
 ```

 %% Cell type:code id: tags:

 ``` python
-if use_litframe_device:
-    # check run for the AgipdLitFrameFinder device
+if use_litframe_finder != 'off':
+    from extra_redu import make_litframe_finder, LitFrameFinderError
+    from extra_redu.litfrm.utils import litfrm_run_report

-    if use_litframe_device + ':output' in dc.instrument_sources:
-        # Use selection provided by the AgipdLitFrameFinder (if the device is recorded)
-        cell_sel = LitFrameSelection(use_litframe_device, dc, train_ids, max_pulses, energy_threshold)
-        train_ids = cell_sel.train_ids
-    else:
-        # Use range selection (if the device is not recorded)
-        print(f"WARNING: LitFrameFinder {use_litframe_device} device is not found.")
+    if use_litframe_finder not in ['auto', 'offline', 'online']:
+        raise ValueError("Unexpected value in 'use_litframe_finder'.")
+
+    inst = karabo_id_control[:3]
+    litfrm = make_litframe_finder(inst, dc, litframe_device_id)
+    try:
+        if use_litframe_finder == 'auto':
+            r = litfrm.read_or_process()
+        elif use_litframe_finder == 'offline':
+            r = litfrm.process()
+        elif use_litframe_finder == 'online':
+            r = litfrm.read()
+
+        report = litfrm_run_report(r)
+        print("Lit-frame patterns:")
+        print(" # trains                      Np  Nd  Nf lit frames")
+        for rec in report:
+            frmintf = ', '.join(
+                [':'.join([str(n) for n in slc]) for slc in rec['frames']]
+            )
+            trsintf = ':'.join([str(n) for n in rec['trains']])
+            print(
+                ("{pattern_no:2d} {trsintf:25s} {npulse:4d} "
+                 "{ndataframe:3d} {nframe:3d} [{frmintf}]"
+                ).format(frmintf=frmintf, trsintf=trsintf, **rec)
+            )
+        cell_sel = LitFrameSelection(r, train_ids, max_pulses, energy_threshold)
+    except LitFrameFinderError as err:
+        print("Cannot use AgipdLitFrameFinder due to:")
+        print(err)
        cell_sel = CellRange(max_pulses, max_cells=mem_cells)
 else:
    # Use range selection
    cell_sel = CellRange(max_pulses, max_cells=mem_cells)

 print(cell_sel.msg())
 ```

+%% Cell type:code id: tags:
+
+``` python
+actual_photon_energy = None
+
+if use_xgm_device:
+    # Try to obtain photon energy from XGM device.
+    wavelength_data = dc[use_xgm_device, 'pulseEnergy.wavelengthUsed']
+
+    try:
+        from scipy.constants import h, c, e
+
+        # Read wavelength as a single value and convert to hv.
+        actual_photon_energy = (h * c / e) / (wavelength_data.as_single_value(rtol=1e-2) * 1e-6)
+        print(f'Obtained actual photon energy {actual_photon_energy:.3f} keV from {use_xgm_device}')
+    except ValueError:
+        if round_photons:
+            print('WARNING: XGM source available but actual photon energy varies greater than 1%, '
+                  'photon rounding disabled!')
+            round_photons = False
+
+if actual_photon_energy is None and round_photons:
+    print('WARNING: Using operating condition for actual photon energy in photon rounding mode, this is NOT reliable!')
+    actual_photon_energy = photon_energy
+```
+
 %% Cell type:markdown id: tags:

 ## Data processing ##

 %% Cell type:code id: tags:

 ``` python
 agipd_corr = AgipdCorrections(
    mem_cells,
    cell_sel,
    h5_data_path=instrument_src,
    h5_index_path=index_src,
    corr_bools=corr_bools,
    gain_mode=gain_mode,
    comp_threads=os.cpu_count() // n_cores_files,
    train_ids=train_ids
 )

 agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
 agipd_corr.hg_hard_threshold = hg_hard_threshold
 agipd_corr.mg_hard_threshold = mg_hard_threshold

 agipd_corr.cm_dark_min = cm_dark_range[0]
 agipd_corr.cm_dark_max = cm_dark_range[1]
 agipd_corr.cm_dark_fraction = cm_dark_fraction
 agipd_corr.cm_n_itr = cm_n_itr
 agipd_corr.noisy_adc_threshold = noisy_adc_threshold
 agipd_corr.ff_gain = ff_gain
+agipd_corr.actual_photon_energy = actual_photon_energy
+
+agipd_corr.compress_fields = compress_fields
+if recast_image_data:
+    agipd_corr.recast_image_fields['data'] = np.dtype(recast_image_data)
 ```

 %% Cell type:code id: tags:

 ``` python
 module_index_to_karabo_da = {mod: da for (mod, da) in zip(modules, karabo_da)}
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve calibration constants to RAM
 agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))

 metadata = cal_tools.tools.CalibrationMetadata(metadata_folder or out_folder)
 # NOTE: this notebook will not overwrite calibration metadata file
 const_yaml = metadata.get("retrieved-constants", {})

 def retrieve_constants(mod):
    """
    Retrieve calibration constants and load them to shared memory

    Metadata for constants is taken from yml file or retrieved from the DB
    """
    err = ""
    k_da = module_index_to_karabo_da[mod]
    try:
        # check if there is a yaml file in out_folder that has the device constants.
        if k_da in const_yaml:
            when = agipd_corr.initialize_from_yaml(k_da, const_yaml, mod)
            print(f"Found constants for {k_da} in calibration_metadata.yml")
        else:
            # TODO: replace with proper retrieval (as done in pre-correction)
            when = agipd_corr.initialize_from_db(
                karabo_id=karabo_id,
                karabo_da=k_da,
                cal_db_interface=cal_db_interface,
                creation_time=creation_time,
                memory_cells=mem_cells_db,
                bias_voltage=bias_voltage,
                photon_energy=photon_energy,
                gain_setting=gain_setting,
                acquisition_rate=acq_rate,
                integration_time=integration_time,
                module_idx=mod,
                only_dark=False,
            )
            print(f"Queried CalCat for {k_da}")
    except Exception as e:
        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
        when = None
    return err, mod, when, k_da


 print(f'Preparing constants (FF: {agipd_corr.corr_bools.get("xray_corr", False)}, PC: {any(agipd_corr.pc_bools)}, '
      f'BLC: {any(agipd_corr.blc_bools)})')
 ts = perf_counter()
 with multiprocessing.Pool(processes=len(modules)) as pool:
    const_out = pool.map(retrieve_constants, modules)
 print(f"Constants were loaded in {perf_counter()-ts:.01f}s")
 ```

 %% Cell type:code id: tags:

 ``` python
 # allocate memory for images and hists
 n_images_max = mem_cells * 256
 data_shape = (n_images_max, 512, 128)
 agipd_corr.allocate_images(data_shape, n_cores_files)
 ```

 %% Cell type:code id: tags:

 ``` python
 def batches(l, batch_size):
    """Group a list into batches of (up to) batch_size elements"""
    start = 0
    while start < len(l):
        yield l[start:start + batch_size]
        start += batch_size
 ```

 %% Cell type:code id: tags:

 ``` python
 def imagewise_chunks(img_counts):
    """Break up the loaded data into chunks of up to chunk_size

    Yields (file data slot, start index, stop index)
    """


    for i_proc, n_img in enumerate(img_counts):
        n_chunks = math.ceil(n_img / chunk_size)
        for i in range(n_chunks):
            yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer.start()
-
-with multiprocessing.Pool() as pool:
+if max_tasks_per_worker == -1:
+    max_tasks_per_worker = None
+with multiprocessing.Pool(maxtasksperchild=max_tasks_per_worker) as pool:
    step_timer.done_step('Started pool')

    for file_batch in batches(file_list, n_cores_files):
        # TODO: Move some printed output to logging or similar
        print(f"Processing next {len(file_batch)} files")
        step_timer.start()
        img_counts = pool.starmap(
            agipd_corr.read_file,
            zip(range(len(file_batch)), file_batch, [not common_mode]*len(file_batch))
        )
        step_timer.done_step(f'Loading data from files')

        if img_counts == 0:
            # Skip any further processing and output if there are no images to
            # correct in this file.
            continue

        if mask_zero_std:
            # Evaluate zero-data-std mask
            pool.starmap(
                agipd_corr.mask_zero_std, itertools.product(
                    range(len(file_batch)),
                    np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
                )
            )
            step_timer.done_step('Mask 0 std')

        # Perform offset image-wise correction
        pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Offset correction")

        if blc_noise or blc_stripes or blc_hmatch:
            # Perform image-wise correction
            pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
            step_timer.done_step("Base-line shift correction")

        if common_mode:
            # In common mode corrected is enabled.
            # Cell selection is only activated after common mode correction.
            # Perform cross-file correction parallel over asics
            pool.starmap(agipd_corr.cm_correction, itertools.product(
                range(len(file_batch)), range(16)  # 16 ASICs per module
            ))
            step_timer.done_step("Common-mode correction")

            img_counts = pool.map(agipd_corr.apply_selected_pulses, range(len(file_batch)))
            step_timer.done_step("Applying selected cells after common mode correction")
+
        # Perform image-wise correction"
        pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
        step_timer.done_step("Gain corrections")

        # Save corrected data
        pool.starmap(agipd_corr.write_file, [
            (i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
            for i_proc, file_name in enumerate(file_batch)
        ])
        step_timer.done_step("Save")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Correction of {len(file_list)} files is finished")
 print(f"Total processing time {step_timer.timespan():.01f} s")
 print(f"Timing summary per batch of {n_cores_files} files:")
 step_timer.print_summary()
 ```

 %% Cell type:code id: tags:

 ``` python
 # if the yml file contains "retrieved-constants", that means a leading
 # notebook got processed and the reporting would be generated from it.
 fst_print = True
 timestamps = {}

 for i, (error, modno, when, k_da) in enumerate(const_out):
    qm = cal_tools.tools.module_index_to_qm(modno)
    # expose errors while applying correction
    if error:
        print("Error: {}".format(error) )

    if k_da not in const_yaml:
        if fst_print:
            print("Constants are retrieved with creation time: ")
            fst_print = False

        module_timestamps = {}

        # If correction is crashed
        if not error:
            print(f"{qm}:")
            for key, item in when.items():
                if hasattr(item, 'strftime'):
                    item = item.strftime('%y-%m-%d %H:%M')
                when[key] = item
                print('{:.<12s}'.format(key), item)

        # Store few time stamps if exists
        # Add NA to keep array structure
        for key in ['Offset', 'SlopesPC', 'SlopesFF']:
            if when and key in when and when[key]:
                module_timestamps[key] = when[key]
            else:
                if error is not None:
                    module_timestamps[key] = "Err"
                else:
                    module_timestamps[key] = "NA"
        timestamps[qm] = module_timestamps

 seq = sequences[0] if sequences else 0

 if timestamps:
    with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
        yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
 ```

 %% Cell type:code id: tags:

 ``` python
 if skip_plots:
    print('Skipping plots')
    import sys
    sys.exit(0)
 ```

 %% Cell type:code id: tags:

 ``` python
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')

    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)
    Z = data.T

    # Plot the surface.
    ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm, linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")


 def do_2d_plot(data, edges, y_axis, x_axis):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),
              np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
                   norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(data_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None):
    """Load single train for all module

    :param data_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param detector_id: The karabo id of the detector to get data for
    :param tid: Train Id to be loaded. First train is considered if None is given
    :param path: Path to find image data inside h5 file
    """
    run_data = RunDirectory(data_folder, include)
    if tid is not None:
        tid, data = run_data.select(f'{detector_id}/DET/*', source).train_from_id(tid)
    else:
        tid, data = next(iter(run_data.select(f'{detector_id}/DET/*', source).trains(require_all=True)))

    # TODO: remove and use the keep_dims version after updating Extra-data.
    # Avoid using default axis with sources of an expected scalar value per train.
-    if len(range(*cell_sel.crange)) == 1 and source in ['image.blShift', 'image.cellId', 'image.pulseId']:
+    nfrm = cell_sel.get_cells_on_trains([tid]).sum()
+    if nfrm == 1 and source in ['image.blShift', 'image.cellId', 'image.pulseId']:
        axis = 0
    else:
        axis = -3

    stacked_data = stack_detector_data(
        train=data, data=source, fillvalue=fillvalue, modules=modules, axis=axis)
    # Add cellId dimension when correcting one cellId only.
-    if (
-        len(range(*cell_sel.crange)) == 1 and
-        data_folder != run_folder  # avoid adding pulse dims for raw data.
-    ):
+    # avoid adding pulse dims for raw data.
+    if (nfrm == 1 and data_folder != run_folder):
        stacked_data = stacked_data[np.newaxis, ...]

    return tid, stacked_data
 ```

 %% Cell type:code id: tags:

 ``` python
 if dinstance == "AGIPD500K":
    geom = AGIPD_500K2GGeometry.from_origin()
 else:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 ```

 %% Cell type:code id: tags:

 ``` python
 include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
 tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods)

 _, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods)
 _, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods)
 _, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods)
 _, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods)
 _, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0)
 _, raw = get_trains_data(run_folder, 'image.data', include, karabo_id, tid, modules=nmods)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Analogue Gain ###

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
                                         raw[:,1,...].flatten().astype(np.float32),
                                         bins=(100, 100),
                                         range=[[4000, 8192], [4000, 8192]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
 ```

 %% Cell type:markdown id: tags:

 ### Signal vs. Digitized Gain ###

 The following plot shows plots signal vs. digitized gain

 %% Cell type:code id: tags:

 ``` python
 hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
                                         gains.flatten().astype(np.float32), bins=(100, 3),
                                         range=[[-50, 8192], [0, 3]])
 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Gain statistics in %")
 table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
          f'{gains[gains==1].size/gains.size*100:.03f}',
          f'{gains[gains==2].size/gains.size*100:.03f}']]
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["High", "Medium", "Low"])))
 ```

 %% Cell type:markdown id: tags:

 ### Intensity per Pulse ###

 %% Cell type:code id: tags:

 ``` python
 pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]

 # Modify pulse_range, if only one pulse is selected.
 if pulse_range[0] == pulse_range[1]:
    pulse_range = [0, pulse_range[1]+int(acq_rate)]

 mean_data = np.nanmean(corrected, axis=(2, 3))
 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100, int(pulse_range[1])),
                                      range=[[-50, 1000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")

 hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
                                      pulseId.flatten().astype(np.float32),
                                      bins=(100,  int(pulse_range[1])),
                                      range=[[-50, 200000], pulse_range])

 do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
 ```

 %% Cell type:markdown id: tags:

 ### Baseline shift ###

 Estimated base-line shift with respect to the total ADU counts of corrected image.

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 h = ax.hist(blshift.flatten(), bins=100, log=True)
 _ = plt.xlabel('Baseline shift [ADU]')
 _ = plt.ylabel('Counts')
 _ = ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 10))
 corrected_ave = np.nansum(corrected, axis=(2, 3))
 plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
 plt.xlim(-1, 1000)
 plt.grid()
 plt.xlabel('Illuminated corrected [MADU] ')
 _ = plt.ylabel('Estimated baseline shift [ADU]')
 ```

 %% Cell type:code id: tags:

 ``` python
 if cell_id_preview not in cellId[:, 0]:
    print(f"WARNING: The selected cell_id_preview value {cell_id_preview} is not available in the corrected data.")
    cell_id_preview = cellId[:, 0][0]
    cell_idx_preview = 0
    print(f"Previewing the first available cellId: {cell_id_preview}.")
 else:
    cell_idx_preview = np.where(cellId[:, 0] == cell_id_preview)[0][0]
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Raw preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown(f'Mean over images of the RAW data\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(raw[slice(*cell_sel.crange), 0, ...], axis=0)
    vmin, vmax = get_range(data, 5)
    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 else:
    print("Skipping mean RAW preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'Single shot of the RAW data from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(raw[cell_idx_preview, 0, ...], 5)
 ax = geom.plot_data_fast(raw[cell_idx_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Corrected preview ###\n'))
 if cellId.shape[0] != 1:
    display(Markdown('### Mean CORRECTED Preview ###\n'))
    display(Markdown(f'A mean across train: {tid}\n'))
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    data = np.mean(corrected, axis=0)
    vmin, vmax = get_range(data, 7)
    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
 else:
    print("Skipping mean CORRECTED preview for single memory cell, "
          f"see single shot image for selected cell ID {cell_id_preview}.")
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'A single shot of the CORRECTED image from cell {cell_id_preview} \n'))
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 7, -50)
 vmin = - 50
 ax = geom.plot_data_fast(corrected[cell_idx_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected[cell_idx_preview], 5, -50)
 nbins = np.int((vmax + 50) / 2)
 h = ax.hist(corrected[cell_idx_preview].flatten(),
            bins=nbins, range=(-50, vmax),
            histtype='stepfilled', log=True)
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ax.grid()
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 vmin, vmax = get_range(corrected, 10, -100)
 vmax = np.nanmax(corrected)
 if vmax > 50000:
    vmax=50000
 nbins = np.int((vmax + 100) / 5)
 h = ax.hist(corrected.flatten(), bins=nbins,
            range=(-100, vmax), histtype='step', log=True, label = 'All')
 ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='High gain', color='green')
 ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Medium gain', color='red')
 ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
        alpha=0.5, log=True, label='Low gain', color='yellow')
 ax.legend()
 ax.grid()
 plt.xlabel('[ADU]')
 plt.ylabel('Counts')
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown('### Maximum GAIN Preview ###\n'))
 display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
                         cmap="jet", vmin=-1, vmax=3)
 ```

 %% Cell type:markdown id: tags:

 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:

 %% Cell type:code id: tags:

 ``` python
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Bad pixel type", "Bit mask"])))
 ```

 %% Cell type:code id: tags:

 ``` python
 display(Markdown(f'### Single Shot Bad Pixels ### \n'))
 display(Markdown(f'A single shot bad pixel map from cell {cell_id_preview} \n'))
 ```

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.log2(mask[cell_idx_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train  ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 geom.plot_data_fast(np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

 %% Cell type:markdown id: tags:

 ### Percentage of Bad Pixels across one train. Only Dark Related ###

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20, 10))
 ax = fig.add_subplot(111)
 cm = np.copy(mask)
 cm[cm > BadPixels.NO_DARK_DATA.value] = 0
 ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
                         vmin=0, ax=ax, vmax=1, cmap="jet")
 ```

--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb
 %% Cell type:markdown id: tags:

 # Gain Characterization #


 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900138/scratch/karnem/r0203_r0204_v01/" # the folder to read histograms from, required
 out_folder = ""  # the folder to output to, required
 hist_file_template = "hists_m{:02d}_sum.h5" # the template to use to access histograms
 modules = [10] # modules to correct, set to -1 for all, range allowed

 raw_folder = "/gpfs/exfel/exp/MID/202030/p900137/raw" # Path to raw image data used to create histograms
 proc_folder = "" # Path to corrected image data used to create histograms

 run = 449 # of the run of image data used to create histograms

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
 karabo_id_control = "MID_IRU_AGIPD1M1" # karabo-id for control device
 karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milli seconds
 local_output = True # output constants locally
 db_output = False # output constants to database

 # Fit parameters
 peak_range = [-30, 30, 35, 70, 95, 135, 145, 220] # where to look for the peaks, [a0, b0, a1, b1, ...] exactly 8 elements
 peak_width_range = [0, 30, 0, 35, 0, 40, 0, 45] # fit limits on the peak widths, [a0, b0, a1, b1, ...] exactly 8 elements
 peak_norm_range = [0.0, -1, 0, -1, 0, -1, 0, -1] #

 # Bad-pixel thresholds (gain evaluation error). Contribute to BadPixel bit "Gain_Evaluation_Error"
 peak_lim = [-30, 30] # Limit of position of noise peak
 d0_lim = [10, 80] # hard limits for distance between noise and first peak
 peak_width_lim = [0.9, 1.55, 0.95, 1.65] # hard limits on the peak widths for first and second peak, in units of the noise peak. 4 parameters.
 chi2_lim = [0, 3.0] # Hard limit on chi2/nDOF value

 intensity_lim = 15 # Threshold on standard deviation of a histogram in ADU. Contribute to BadPixel bit "No_Entry"
 gain_lim = [0.8, 1.2] # Threshold on gain in relative number. Contribute to BadPixel bit "Gain_deviation"

 cell_range = [1, 3] # range of cell to be considered, [0,0] for all
 pixel_range = [0, 0, 32, 32] # range of pixels x1,y1,x2,y2 to consider [0,0,512,128] for all
 max_bins = 0 # Maximum number of bins to consider, 0 for all bins
 batch_size = [1, 8, 8] # batch size: [cell,x,y]
 fit_range = [0, 0] # range of a histogram considered for fitting in ADU. Dynamically evaluated in case [0,0]
 n_peaks_fit = 4 # Number of gaussian peaks to fit including noise peak
 fix_peaks = False # Fix distance between photon peaks
 do_minos = False # This is additional feature of minuit to evaluate errors.
 sigma_limit = 0. # If >0, repeat fit keeping only bins within mu +- sigma_limit*sigma

 # Detector conditions
-max_cells = 0 # number of memory cells used, set to 0 to automatically infer
-bias_voltage = 300  # Bias voltage
-acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
-gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
-photon_energy = 8.05 # photon energy in keV
-integration_time = -1 # integration time, negative values for auto-detection.
+# NOTE: The below parameters are needed for the summary notebook when running through xfel-calibrate.
+mem_cells = -1  # number of memory cells used, negative values for auto-detection.
+bias_voltage = 300  # Bias voltage.
+acq_rate = 0.  # the detector acquisition rate, use 0 to try to auto-determine.
+gain_setting = -1  # the gain setting, negative values for auto-detection.
+photon_energy = 8.05  # photon energy in keV.
+integration_time = -1  # integration time, negative values for auto-detection.
 ```

 %% Cell type:code id: tags:

 ``` python
 import glob
 import os
 import traceback
 import warnings
 from multiprocessing import Pool

 import h5py
 import matplotlib.pyplot as plt
 import numpy as np
 import sharedmem
 import XFELDetAna.xfelpyanatools as xana
 from cal_tools.agipdutils_ff import (
    BadPixelsFF,
    any_in,
    fit_n_peaks,
    gaussian,
    gaussian_sum,
    get_mask,
    get_starting_parameters,
    set_par_limits,
 )
 from cal_tools.ana_tools import get_range, save_dict_to_hdf5
 from iminuit import Minuit
 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.simpleplot import simplePlot

 # %load_ext autotime
 %matplotlib inline
 warnings.filterwarnings('ignore')
 ```

 %% Cell type:code id: tags:

 ``` python
 peak_range = np.reshape(peak_range,(4,2))
 peak_width_range = np.reshape(peak_width_range,(4,2))
 peak_width_lim = np.reshape(peak_width_lim,(2,2))
 peak_norm_range = [None if x == -1 else x for x in peak_norm_range]
 peak_norm_range = np.reshape(peak_norm_range,(4,2))
 module = modules[0]
 ```

 %% Cell type:code id: tags:

 ``` python
 def idx_gen(batch_start, batch_size):
    """
    This generator iterate across pixels and memory cells starting
    from batch_start until batch_start+batch_size
    """
    for c_idx in range(batch_start[0], batch_start[0]+batch_size[0]):
        for x_idx in range(batch_start[1], batch_start[1]+batch_size[1]):
            for y_idx in range(batch_start[2], batch_start[2]+batch_size[2]):
                yield(c_idx, x_idx, y_idx)
 ```

 %% Cell type:code id: tags:

 ``` python
 n_pixels_x = pixel_range[2]-pixel_range[0]
 n_pixels_y = pixel_range[3]-pixel_range[1]

 hist_data = {}
 with h5py.File(f"{in_folder}/{hist_file_template.format(module)}", 'r') as hf:
    hist_data['cellId'] = np.array(hf['cellId'][()])
    hist_data['hRange'] = np.array(hf['hRange'][()])
    hist_data['nBins'] = np.array(hf['nBins'][()])

    if cell_range == [0,0]:
        cell_range[1] = hist_data['cellId'].shape[0]

    if max_bins == 0:
        max_bins = hist_data['nBins']

    hist_data['cellId'] = hist_data['cellId'][cell_range[0]:cell_range[1]]
    hist_data['hist'] = np.array(hf['hist'][cell_range[0]:cell_range[1], :max_bins, :])

 n_cells = cell_range[1]-cell_range[0]
 hist_data['hist'] = hist_data['hist'].reshape(n_cells, max_bins, 512, 128)
 hist_data['hist'] = hist_data['hist'][:,:,pixel_range[0]:pixel_range[2],pixel_range[1]:pixel_range[3]]

 print(f'Data shape {hist_data["hist"].shape}')

 bin_edges = np.linspace(hist_data['hRange'][0], hist_data['hRange'][1], int(hist_data['nBins']+1))
 x = (bin_edges[1:] + bin_edges[:-1])[:max_bins] * 0.5


 batches = []
 for c_idx in range(0, n_cells, batch_size[0]):
    for x_idx in range(0, n_pixels_x, batch_size[1]):
        for y_idx in range(0, n_pixels_y, batch_size[2]):
            batches.append([c_idx,x_idx,y_idx])

 print(f'Number of batches {len(batches)}')
 ```

 %% Cell type:code id: tags:

 ``` python
 def fit_batch(batch_start):
    current_result = {}
    prev = None
    for c_idx, x_idx, y_idx in idx_gen(batch_start, batch_size):
        try:
            y = hist_data['hist'][c_idx, :, x_idx, y_idx]

            if prev is None:
                prev, _ = get_starting_parameters(x, y, peak_range, n_peaks=n_peaks_fit)

            if fit_range == [0, 0]:
                frange = (prev[f'g0mean']-2*prev[f'g0sigma'],
                          prev[f'g{n_peaks_fit-1}mean'] + prev[f'g{n_peaks_fit-1}sigma'])
            else:
                frange = fit_range

            set_par_limits(prev, peak_range, peak_norm_range,
                           peak_width_range, n_peaks_fit)
            minuit = fit_n_peaks(x, y, prev, frange,
                                 do_minos=do_minos, n_peaks=n_peaks_fit,
                                 fix_d01=fix_peaks, sigma_limit=sigma_limit,)

            ndof = np.rint(frange[1]-frange[0])-len(minuit.args) ## FIXME: this line is wrong if fix_peaks is True
            current_result['chi2_ndof'] = minuit.fval/ndof
            res = minuit.fitarg
            if fix_peaks : ## set g2 and g3 mean correctly
                for i in range(2,n_peaks_fit):
                    d = res[f'g1mean'] - res[f'g0mean']
                    res[f'g{i}mean'] = res[f'g0mean'] + d*i
            current_result.update(res)
            current_result.update(minuit.get_fmin())

            fit_result['chi2_ndof'][c_idx, x_idx, y_idx] = current_result['chi2_ndof']

            for key in res.keys():
                if key in fit_result:
                    fit_result[key][c_idx, x_idx, y_idx] = res[key]

            fit_result['mask'][c_idx, x_idx, y_idx] = get_mask(current_result,
                                                                    peak_lim,
                                                                    d0_lim, chi2_lim,
                                                                    peak_width_lim)
        except Exception as e:
            fit_result['mask'][c_idx, x_idx,
                                    y_idx] = BadPixelsFF.FIT_FAILED.value
            print(c_idx, x_idx, y_idx, e, traceback.format_exc())

        if fit_result['mask'][c_idx, x_idx, y_idx] == 0:
            prev = res
        else:
            prev = None
 ```

 %% Cell type:markdown id: tags:

 # Single fit ##

 Left plot shows starting parameters for fitting. Right plot shows result of the fit. Errors are evaluated with minos.

 %% Cell type:code id: tags:

 ``` python
 hist = hist_data['hist'][1,:,1, 1]
 prev, shapes = get_starting_parameters(x, hist, peak_range, n_peaks=n_peaks_fit)

 if fit_range == [0, 0]:
    frange = (prev[f'g0mean']-2*prev[f'g0sigma'],
              prev[f'g3mean'] + prev[f'g3sigma'])
 else:
    frange = fit_range

 set_par_limits(prev, peak_range, peak_norm_range,
               peak_width_range, n_peaks=n_peaks_fit)
 minuit = fit_n_peaks(x, hist, prev, frange,
                     do_minos=True, n_peaks=n_peaks_fit,
                     fix_d01=fix_peaks,
                     sigma_limit=sigma_limit,
                    )
 print (minuit.get_fmin())
 minuit.print_matrix()
 print(minuit.get_param_states())
 ```

 %% Cell type:code id: tags:

 ``` python
 res = minuit.fitarg
 if fix_peaks :
    for i in range(2,n_peaks_fit):
        d = res[f'g1mean'] - res[f'g0mean']
        res[f'g{i}mean'] = res[f'g0mean'] + d*i
 err = minuit.errors
 p = minuit.args
 ya = np.arange(0,1e4)
 y = gaussian_sum(x,n_peaks_fit, *p)
 peak_colors = ['g', 'y', 'b', 'orange']

 peak_hist = hist.copy()
 d=[]
 if sigma_limit > 0 :
    sel2 = (np.abs(x - res['g0mean']) < sigma_limit*res['g0sigma']) | \
           (np.abs(x - res['g1mean']) < sigma_limit*res['g1sigma']) | \
           (np.abs(x - res['g2mean']) < sigma_limit*res['g2sigma']) | \
           (np.abs(x - res['g3mean']) < sigma_limit*res['g3sigma'])
    peak_hist[~sel2] = 0
    valley_hist = hist.copy()
    valley_hist[sel2] = 0
    d.append({'x': x,
              'y': valley_hist.astype(np.float64),
              'y_err': np.sqrt(valley_hist),
              'drawstyle': 'bars',
              'errorstyle': 'bars',
              'transparency': '95%',
              'errorcoarsing': 3,
              'label': f'X-ray Data)'
             })
    htitle = f'X-ray Data, (μ±{sigma_limit:0.1f}σ)'
 else :
    htitle = 'X-ray Data'

 d.append({'x': x,
          'y': peak_hist.astype(np.float64),
          'y_err': np.sqrt(peak_hist),
          'drawstyle': 'bars',
          'errorstyle': 'bars',
          'errorcoarsing': 3,
          'label': htitle,
         }
        )
 d.append({'x': x,
          'y': y,
          'y2': (hist-y)/np.sqrt(hist),
          'drawstyle':'line',
          'drawstyle2': 'steps-mid',
          'label': 'Fit'
         }
        )

 for i in range(n_peaks_fit):
    d.append({'x': x,
             'y': gaussian(x, res[f'g{i}n'], res[f'g{i}mean'], res[f'g{i}sigma']),
             'drawstyle':'line',
             'color': peak_colors[i],
             })
    d.append({'x': np.full_like(ya, res[f'g{i}mean']),
              'y': ya,
              'drawstyle': 'line',
              'linestyle': 'dashed',
              'color': peak_colors[i],
              'label': f'peak {i} = {res[f"g{i}mean"]:0.1f} $ \pm $ {err[f"g{i}mean"]:0.2f} ADU' })
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, (ax1, ax2) = plt.subplots(1, 2)
 fig.set_size_inches(16, 7)
 for i, shape in enumerate(shapes):
    idx = shape[3]
    ax1.errorbar(
        x[idx], hist[idx],
        np.sqrt(hist[idx]),
        marker='+', ls='',
    )
    yg = gaussian(x[idx], *shape[:3])
    l = f'Peak {i}: {shape[1]:0.1f} $ \pm $ {shape[2]:0.2f} ADU'
    ax1.plot(x[idx], yg, label=l)
 ax1.grid(True)
 ax1.set_xlabel("Signal [ADU]")
 ax1.set_ylabel("Counts")
 ax1.legend(ncol=2)

 _ = xana.simplePlot(
    d,
    use_axis=ax2,
    x_label='Signal [ADU]',
    y_label='Counts',
    secondpanel=True, y_log=False,
    x_range=(frange[0], frange[1]),
    y_range=(1., np.max(hist)*1.6),
    legend='top-left-frame-ncol2',
 )

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## All fits ##

 %% Cell type:code id: tags:

 ``` python
 # Allocate memory for fit results
 fit_result = {}
 keys = list(minuit.fitarg.keys())
 keys = [x for x in keys if 'limit_' not in x and 'fix_' not in x]
 keys += ['chi2_ndof', 'mask', 'gain']
 for key in keys:
    dtype = 'f4'
    if key == 'mask':
        dtype = 'i4'
    fit_result[key] = sharedmem.empty([n_cells, n_pixels_x, n_pixels_y], dtype=dtype)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Perform fitting
 with Pool() as pool:
    const_out = pool.map(fit_batch, batches)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate bad pixels
 fit_result['gain'] = (fit_result['g1mean'] - fit_result['g0mean'])/photon_energy

 # Calculate histogram width and evaluate cut
 h_sums = np.sum(hist_data['hist'], axis=1)
 hist_norm = hist_data['hist'] / h_sums[:, None, :, :]
 hist_mean = np.sum(hist_norm[:, :max_bins, ...] *
                   x[None, :, None, None], axis=1)
 hist_sqr = (x[None, :, None, None] - hist_mean[:, None, ...])**2
 hist_std = np.sqrt(np.sum(hist_norm[:, :max_bins, ...] * hist_sqr, axis=1))

 fit_result['mask'][hist_std<intensity_lim] |= BadPixelsFF.NO_ENTRY.value

 # Bad pixel on gain deviation
 gains = np.copy(fit_result['gain'])
 gains[fit_result['mask']>0] = np.nan
 gain_mean = np.nanmean(gains, axis=(1,2))

 fit_result['mask'][fit_result['gain'] > gain_mean[:,None,None]*gain_lim[1] ] |=  BadPixelsFF.GAIN_DEVIATION.value
 fit_result['mask'][fit_result['gain'] < gain_mean[:,None,None]*gain_lim[0] ] |=  BadPixelsFF.GAIN_DEVIATION.value
 ```

 %% Cell type:code id: tags:

 ``` python
 # Save fit results
 os.makedirs(out_folder, exist_ok=True)
 out_name = f'{out_folder}/fits_m{module:02d}.h5'
 print(f'Save to file: {out_name}')
 save_dict_to_hdf5({'data': fit_result}, out_name)
 ```

 %% Cell type:markdown id: tags:

 ## Summary across cells ##

 %% Cell type:code id: tags:

 ``` python
 labels = [
    "Noise peak [ADU]",
    "First photon peak [ADU]",
    f"gain [ADU/keV] $\gamma$={photon_energy} [keV]",
    "$\chi^2$/nDOF",
    "Fraction of bad pixels",
 ]

 for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof', 'mask']):
    fig = plt.figure(figsize=(20,5))
    ax = fig.add_subplot(121)
    data = fit_result[key]
    if key == 'mask':
        data = data > 0
        vmin, vmax = [0, 1]
    else:
        vmin, vmax = get_range(data, 5)
    _ = heatmapPlot(
        np.mean(data, axis=0).T,
        add_panels=False, cmap='viridis', use_axis=ax,
        vmin=vmin, vmax=vmax, lut_label=labels[i]
    )

    if key != 'mask':
        vmin, vmax = get_range(data, 7)
        ax = fig.add_subplot(122)
        _ = xana.histPlot(
            ax, data.flatten(),
            bins=45,range=[vmin, vmax],
            log=True,color='red',histtype='stepfilled'
        )
        ax.set_xlabel(labels[i])
        ax.set_ylabel("Counts")
 ```

 %% Cell type:markdown id: tags:

 ## histograms of fit parameters ##

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 5))
 ax = fig.add_subplot(111)
 a = ax.hist(hist_std.flatten(), bins=100, range=(0,100) )
 ax.plot([intensity_lim, intensity_lim], [0, np.nanmax(a[0])], linewidth=1.5, color='red' )
 ax.set_xlabel('Histogram width [ADU]', fontsize=14)
 ax.set_ylabel('Number of histograms', fontsize=14)
 ax.set_title(f'{hist_std[hist_std<intensity_lim].shape[0]} histograms below threshold in {intensity_lim} ADU',
              fontsize=14, fontweight='bold')
 ax.grid()
 ax.set_yscale('log')
 ```

 %% Cell type:code id: tags:

 ``` python
 def plot_par_distr(par):
    fig = plt.figure(figsize=(16, 5))
    sel = fit_result['mask'] == 0

    for i in range(n_peaks_fit) :
        data=fit_result[f"g{i}{par}"]
        plt_range=(-1,50)
        if par =='mean':
            plt_range=[peak_range[i][0] ,peak_range[i][1]]

        num_bins = int(plt_range[1] - plt_range[0])
        ax = fig.add_subplot(1,n_peaks_fit,i+1)
        _ = xana.histPlot(ax,data.flatten(),
                          bins= num_bins,range=plt_range,
                          log=True,color='red',
                          label='all fits',)

        a = ax.hist(data[sel].flatten(),
                    bins=num_bins, range=plt_range,
                    log=True,color='g',
                    label='good fits only',
                   )
        ax.set_xlabel(f"g{i} {par} [ADU]")
        ax.legend()

 plot_par_distr('mean')
 plot_par_distr('sigma')
 ```

 %% Cell type:code id: tags:

 ``` python
 sel = fit_result['mask'] == 0

 dsets = {'d01 [ADU]':fit_result[f"g1mean"]-fit_result[f"g0mean"],
         'gain [ADU/keV]':fit_result[f"gain"],
         'gain relative to module mean':fit_result[f"gain"]/np.nanmean(gain_mean),
        }
 fig = plt.figure(figsize=(16,5))
 for i, (par, data) in enumerate(dsets.items()):
    ax = fig.add_subplot(1, 3, i+1)
    plt_range=get_range(data, 10)
    num_bins = 100
    _ = xana.histPlot(ax,data.flatten(),
                      bins= num_bins,range=plt_range,
                      log=True,color='red',
                      label='all fits',)

    a = ax.hist(data[sel].flatten(),
                bins=num_bins, range=plt_range,
                log=True,color='g',
                label='good fits only',
               )
    ax.set_xlabel(f"{par}")
    ax.legend()
    if 'd01' in par :
        ax.axvline(d0_lim[0])
        ax.axvline(d0_lim[1])
    if 'rel' in par :
        ax.axvline(gain_lim[0])
        ax.axvline(gain_lim[1])
 ```

 %% Cell type:markdown id: tags:

 ## Summary across pixels ##

 Mean and median values are calculated across all pixels for each memory cell.

 %% Cell type:code id: tags:

 ``` python
 def plot_error_band(key, x, ax):

    cdata = np.copy(fit_result[key])
    cdata[fit_result['mask']>0] = np.nan

    mean = np.nanmean(cdata, axis=(1,2))
    median = np.nanmedian(cdata, axis=(1,2))
    std = np.nanstd(cdata, axis=(1,2))
    mad = np.nanmedian(np.abs(cdata - median[:,None,None]), axis=(1,2))

    ax.plot(x, mean, 'k', color='#3F7F4C', label=" mean value ")
    ax.plot(x, median, 'o', color='red', label=" median value ")
    ax.fill_between(x, mean-std, mean+std,
                     alpha=0.6, edgecolor='#3F7F4C', facecolor='#7EFF99',
                     linewidth=1, linestyle='dashdot', antialiased=True,
                     label=" mean value $ \pm $ std ")

    ax.fill_between(x, median-mad, median+mad,
                     alpha=0.3, edgecolor='red', facecolor='red',
                     linewidth=1, linestyle='dashdot', antialiased=True,
                     label=" median value $ \pm $ mad ")

    if f'error_{key}' in fit_result:
        cerr = np.copy(fit_result[f'error_{key}'])
        cerr[fit_result['mask']>0] = np.nan

        meanerr = np.nanmean(cerr, axis=(1,2))
        ax.fill_between(x, mean-meanerr, mean+meanerr,
                 alpha=0.6, edgecolor='#089FFF', facecolor='#089FFF',
                 linewidth=1, linestyle='dashdot', antialiased=True,
                 label=" mean fit error ")


 x = np.linspace(*cell_range, n_cells)

 for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof']):

    fig = plt.figure(figsize=(10, 5))
    ax = fig.add_subplot(111)
    plot_error_band(key, x, ax)

    ax.set_xlabel('Memory Cell ID', fontsize=14)
    ax.set_ylabel(labels[i], fontsize=14)
    ax.grid()
    ax.legend()
 ```

 %% Cell type:markdown id: tags:

 ## Cut flow ##

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots()
 fig.set_size_inches(10, 5)

 n_bars = 8
 x = np.arange(n_bars)
 width = 0.3

 msk = fit_result['mask']
 n_fits = np.prod(msk.shape)
 y = [any_in(msk, BadPixelsFF.FIT_FAILED.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
           | BadPixelsFF.NO_ENTRY.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
           | BadPixelsFF.NO_ENTRY.value| BadPixelsFF.GAIN_DEVIATION.value)
    ]

 y2 = [any_in(msk, BadPixelsFF.FIT_FAILED.value),
     any_in(msk, BadPixelsFF.ACCURATE_COVAR.value),
     any_in(msk, BadPixelsFF.CHI2_THRESHOLD.value),
     any_in(msk, BadPixelsFF.GAIN_THRESHOLD.value),
     any_in(msk, BadPixelsFF.NOISE_PEAK_THRESHOLD.value),
     any_in(msk, BadPixelsFF.PEAK_WIDTH_THRESHOLD.value),
     any_in(msk, BadPixelsFF.NO_ENTRY.value),
     any_in(msk, BadPixelsFF.GAIN_DEVIATION.value)
    ]

 y = (1 - np.sum(y, axis=(1,2,3))/n_fits)*100
 y2 = (1 - np.sum(y2, axis=(1,2,3))/n_fits)*100

 labels = ['Fit failes',
         'Accurate covar',
         'Chi2/nDOF',
         'Gain',
         'Noise peak',
         'Peak width',
         'No Entry',
         'Gain deviation']

 ax.bar(x, y2, width, label='Only this cut')
 ax.bar(x, y, width, label='Cut flow')
 ax.set_xticks(x)
 ax.set_xticklabels(labels, rotation=90)
 ax.set_ylim(y[5]-0.5, 100)
 ax.grid(True)
 ax.legend()
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 # Gain Characterization #


 %% Cell type:code id: tags:

 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/202030/p900138/scratch/karnem/r0203_r0204_v01/" # the folder to read histograms from, required
 out_folder = ""  # the folder to output to, required
 hist_file_template = "hists_m{:02d}_sum.h5" # the template to use to access histograms
 modules = [10] # modules to correct, set to -1 for all, range allowed

 raw_folder = "/gpfs/exfel/exp/MID/202030/p900137/raw" # Path to raw image data used to create histograms
 proc_folder = "" # Path to corrected image data used to create histograms

 run = 449 # of the run of image data used to create histograms

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 karabo_da = ['-1']  # a list of data aggregators names, Default [-1] for selecting all data aggregators
 receiver_id = "{}CH0" # inset for receiver devices
 path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
 h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
 h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
 karabo_id_control = "MID_IRU_AGIPD1M1" # karabo-id for control device
 karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milli seconds
 local_output = True # output constants locally
 db_output = False # output constants to database

 # Fit parameters
 peak_range = [-30, 30, 35, 70, 95, 135, 145, 220] # where to look for the peaks, [a0, b0, a1, b1, ...] exactly 8 elements
 peak_width_range = [0, 30, 0, 35, 0, 40, 0, 45] # fit limits on the peak widths, [a0, b0, a1, b1, ...] exactly 8 elements
 peak_norm_range = [0.0, -1, 0, -1, 0, -1, 0, -1] #

 # Bad-pixel thresholds (gain evaluation error). Contribute to BadPixel bit "Gain_Evaluation_Error"
 peak_lim = [-30, 30] # Limit of position of noise peak
 d0_lim = [10, 80] # hard limits for distance between noise and first peak
 peak_width_lim = [0.9, 1.55, 0.95, 1.65] # hard limits on the peak widths for first and second peak, in units of the noise peak. 4 parameters.
 chi2_lim = [0, 3.0] # Hard limit on chi2/nDOF value

 intensity_lim = 15 # Threshold on standard deviation of a histogram in ADU. Contribute to BadPixel bit "No_Entry"
 gain_lim = [0.8, 1.2] # Threshold on gain in relative number. Contribute to BadPixel bit "Gain_deviation"

 cell_range = [1, 3] # range of cell to be considered, [0,0] for all
 pixel_range = [0, 0, 32, 32] # range of pixels x1,y1,x2,y2 to consider [0,0,512,128] for all
 max_bins = 0 # Maximum number of bins to consider, 0 for all bins
 batch_size = [1, 8, 8] # batch size: [cell,x,y]
 fit_range = [0, 0] # range of a histogram considered for fitting in ADU. Dynamically evaluated in case [0,0]
 n_peaks_fit = 4 # Number of gaussian peaks to fit including noise peak
 fix_peaks = False # Fix distance between photon peaks
 do_minos = False # This is additional feature of minuit to evaluate errors.
 sigma_limit = 0. # If >0, repeat fit keeping only bins within mu +- sigma_limit*sigma

 # Detector conditions
-max_cells = 0 # number of memory cells used, set to 0 to automatically infer
-bias_voltage = 300  # Bias voltage
-acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
-gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
-photon_energy = 8.05 # photon energy in keV
-integration_time = -1 # integration time, negative values for auto-detection.
+# NOTE: The below parameters are needed for the summary notebook when running through xfel-calibrate.
+mem_cells = -1  # number of memory cells used, negative values for auto-detection.
+bias_voltage = 300  # Bias voltage.
+acq_rate = 0.  # the detector acquisition rate, use 0 to try to auto-determine.
+gain_setting = -1  # the gain setting, negative values for auto-detection.
+photon_energy = 8.05  # photon energy in keV.
+integration_time = -1  # integration time, negative values for auto-detection.
 ```

 %% Cell type:code id: tags:

 ``` python
 import glob
 import os
 import traceback
 import warnings
 from multiprocessing import Pool

 import h5py
 import matplotlib.pyplot as plt
 import numpy as np
 import sharedmem
 import XFELDetAna.xfelpyanatools as xana
 from cal_tools.agipdutils_ff import (
    BadPixelsFF,
    any_in,
    fit_n_peaks,
    gaussian,
    gaussian_sum,
    get_mask,
    get_starting_parameters,
    set_par_limits,
 )
 from cal_tools.ana_tools import get_range, save_dict_to_hdf5
 from iminuit import Minuit
 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.simpleplot import simplePlot

 # %load_ext autotime
 %matplotlib inline
 warnings.filterwarnings('ignore')
 ```

 %% Cell type:code id: tags:

 ``` python
 peak_range = np.reshape(peak_range,(4,2))
 peak_width_range = np.reshape(peak_width_range,(4,2))
 peak_width_lim = np.reshape(peak_width_lim,(2,2))
 peak_norm_range = [None if x == -1 else x for x in peak_norm_range]
 peak_norm_range = np.reshape(peak_norm_range,(4,2))
 module = modules[0]
 ```

 %% Cell type:code id: tags:

 ``` python
 def idx_gen(batch_start, batch_size):
    """
    This generator iterate across pixels and memory cells starting
    from batch_start until batch_start+batch_size
    """
    for c_idx in range(batch_start[0], batch_start[0]+batch_size[0]):
        for x_idx in range(batch_start[1], batch_start[1]+batch_size[1]):
            for y_idx in range(batch_start[2], batch_start[2]+batch_size[2]):
                yield(c_idx, x_idx, y_idx)
 ```

 %% Cell type:code id: tags:

 ``` python
 n_pixels_x = pixel_range[2]-pixel_range[0]
 n_pixels_y = pixel_range[3]-pixel_range[1]

 hist_data = {}
 with h5py.File(f"{in_folder}/{hist_file_template.format(module)}", 'r') as hf:
    hist_data['cellId'] = np.array(hf['cellId'][()])
    hist_data['hRange'] = np.array(hf['hRange'][()])
    hist_data['nBins'] = np.array(hf['nBins'][()])

    if cell_range == [0,0]:
        cell_range[1] = hist_data['cellId'].shape[0]

    if max_bins == 0:
        max_bins = hist_data['nBins']

    hist_data['cellId'] = hist_data['cellId'][cell_range[0]:cell_range[1]]
    hist_data['hist'] = np.array(hf['hist'][cell_range[0]:cell_range[1], :max_bins, :])

 n_cells = cell_range[1]-cell_range[0]
 hist_data['hist'] = hist_data['hist'].reshape(n_cells, max_bins, 512, 128)
 hist_data['hist'] = hist_data['hist'][:,:,pixel_range[0]:pixel_range[2],pixel_range[1]:pixel_range[3]]

 print(f'Data shape {hist_data["hist"].shape}')

 bin_edges = np.linspace(hist_data['hRange'][0], hist_data['hRange'][1], int(hist_data['nBins']+1))
 x = (bin_edges[1:] + bin_edges[:-1])[:max_bins] * 0.5


 batches = []
 for c_idx in range(0, n_cells, batch_size[0]):
    for x_idx in range(0, n_pixels_x, batch_size[1]):
        for y_idx in range(0, n_pixels_y, batch_size[2]):
            batches.append([c_idx,x_idx,y_idx])

 print(f'Number of batches {len(batches)}')
 ```

 %% Cell type:code id: tags:

 ``` python
 def fit_batch(batch_start):
    current_result = {}
    prev = None
    for c_idx, x_idx, y_idx in idx_gen(batch_start, batch_size):
        try:
            y = hist_data['hist'][c_idx, :, x_idx, y_idx]

            if prev is None:
                prev, _ = get_starting_parameters(x, y, peak_range, n_peaks=n_peaks_fit)

            if fit_range == [0, 0]:
                frange = (prev[f'g0mean']-2*prev[f'g0sigma'],
                          prev[f'g{n_peaks_fit-1}mean'] + prev[f'g{n_peaks_fit-1}sigma'])
            else:
                frange = fit_range

            set_par_limits(prev, peak_range, peak_norm_range,
                           peak_width_range, n_peaks_fit)
            minuit = fit_n_peaks(x, y, prev, frange,
                                 do_minos=do_minos, n_peaks=n_peaks_fit,
                                 fix_d01=fix_peaks, sigma_limit=sigma_limit,)

            ndof = np.rint(frange[1]-frange[0])-len(minuit.args) ## FIXME: this line is wrong if fix_peaks is True
            current_result['chi2_ndof'] = minuit.fval/ndof
            res = minuit.fitarg
            if fix_peaks : ## set g2 and g3 mean correctly
                for i in range(2,n_peaks_fit):
                    d = res[f'g1mean'] - res[f'g0mean']
                    res[f'g{i}mean'] = res[f'g0mean'] + d*i
            current_result.update(res)
            current_result.update(minuit.get_fmin())

            fit_result['chi2_ndof'][c_idx, x_idx, y_idx] = current_result['chi2_ndof']

            for key in res.keys():
                if key in fit_result:
                    fit_result[key][c_idx, x_idx, y_idx] = res[key]

            fit_result['mask'][c_idx, x_idx, y_idx] = get_mask(current_result,
                                                                    peak_lim,
                                                                    d0_lim, chi2_lim,
                                                                    peak_width_lim)
        except Exception as e:
            fit_result['mask'][c_idx, x_idx,
                                    y_idx] = BadPixelsFF.FIT_FAILED.value
            print(c_idx, x_idx, y_idx, e, traceback.format_exc())

        if fit_result['mask'][c_idx, x_idx, y_idx] == 0:
            prev = res
        else:
            prev = None
 ```

 %% Cell type:markdown id: tags:

 # Single fit ##

 Left plot shows starting parameters for fitting. Right plot shows result of the fit. Errors are evaluated with minos.

 %% Cell type:code id: tags:

 ``` python
 hist = hist_data['hist'][1,:,1, 1]
 prev, shapes = get_starting_parameters(x, hist, peak_range, n_peaks=n_peaks_fit)

 if fit_range == [0, 0]:
    frange = (prev[f'g0mean']-2*prev[f'g0sigma'],
              prev[f'g3mean'] + prev[f'g3sigma'])
 else:
    frange = fit_range

 set_par_limits(prev, peak_range, peak_norm_range,
               peak_width_range, n_peaks=n_peaks_fit)
 minuit = fit_n_peaks(x, hist, prev, frange,
                     do_minos=True, n_peaks=n_peaks_fit,
                     fix_d01=fix_peaks,
                     sigma_limit=sigma_limit,
                    )
 print (minuit.get_fmin())
 minuit.print_matrix()
 print(minuit.get_param_states())
 ```

 %% Cell type:code id: tags:

 ``` python
 res = minuit.fitarg
 if fix_peaks :
    for i in range(2,n_peaks_fit):
        d = res[f'g1mean'] - res[f'g0mean']
        res[f'g{i}mean'] = res[f'g0mean'] + d*i
 err = minuit.errors
 p = minuit.args
 ya = np.arange(0,1e4)
 y = gaussian_sum(x,n_peaks_fit, *p)
 peak_colors = ['g', 'y', 'b', 'orange']

 peak_hist = hist.copy()
 d=[]
 if sigma_limit > 0 :
    sel2 = (np.abs(x - res['g0mean']) < sigma_limit*res['g0sigma']) | \
           (np.abs(x - res['g1mean']) < sigma_limit*res['g1sigma']) | \
           (np.abs(x - res['g2mean']) < sigma_limit*res['g2sigma']) | \
           (np.abs(x - res['g3mean']) < sigma_limit*res['g3sigma'])
    peak_hist[~sel2] = 0
    valley_hist = hist.copy()
    valley_hist[sel2] = 0
    d.append({'x': x,
              'y': valley_hist.astype(np.float64),
              'y_err': np.sqrt(valley_hist),
              'drawstyle': 'bars',
              'errorstyle': 'bars',
              'transparency': '95%',
              'errorcoarsing': 3,
              'label': f'X-ray Data)'
             })
    htitle = f'X-ray Data, (μ±{sigma_limit:0.1f}σ)'
 else :
    htitle = 'X-ray Data'

 d.append({'x': x,
          'y': peak_hist.astype(np.float64),
          'y_err': np.sqrt(peak_hist),
          'drawstyle': 'bars',
          'errorstyle': 'bars',
          'errorcoarsing': 3,
          'label': htitle,
         }
        )
 d.append({'x': x,
          'y': y,
          'y2': (hist-y)/np.sqrt(hist),
          'drawstyle':'line',
          'drawstyle2': 'steps-mid',
          'label': 'Fit'
         }
        )

 for i in range(n_peaks_fit):
    d.append({'x': x,
             'y': gaussian(x, res[f'g{i}n'], res[f'g{i}mean'], res[f'g{i}sigma']),
             'drawstyle':'line',
             'color': peak_colors[i],
             })
    d.append({'x': np.full_like(ya, res[f'g{i}mean']),
              'y': ya,
              'drawstyle': 'line',
              'linestyle': 'dashed',
              'color': peak_colors[i],
              'label': f'peak {i} = {res[f"g{i}mean"]:0.1f} $ \pm $ {err[f"g{i}mean"]:0.2f} ADU' })
 ```

 %% Cell type:code id: tags:

 ``` python
 fig, (ax1, ax2) = plt.subplots(1, 2)
 fig.set_size_inches(16, 7)
 for i, shape in enumerate(shapes):
    idx = shape[3]
    ax1.errorbar(
        x[idx], hist[idx],
        np.sqrt(hist[idx]),
        marker='+', ls='',
    )
    yg = gaussian(x[idx], *shape[:3])
    l = f'Peak {i}: {shape[1]:0.1f} $ \pm $ {shape[2]:0.2f} ADU'
    ax1.plot(x[idx], yg, label=l)
 ax1.grid(True)
 ax1.set_xlabel("Signal [ADU]")
 ax1.set_ylabel("Counts")
 ax1.legend(ncol=2)

 _ = xana.simplePlot(
    d,
    use_axis=ax2,
    x_label='Signal [ADU]',
    y_label='Counts',
    secondpanel=True, y_log=False,
    x_range=(frange[0], frange[1]),
    y_range=(1., np.max(hist)*1.6),
    legend='top-left-frame-ncol2',
 )

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## All fits ##

 %% Cell type:code id: tags:

 ``` python
 # Allocate memory for fit results
 fit_result = {}
 keys = list(minuit.fitarg.keys())
 keys = [x for x in keys if 'limit_' not in x and 'fix_' not in x]
 keys += ['chi2_ndof', 'mask', 'gain']
 for key in keys:
    dtype = 'f4'
    if key == 'mask':
        dtype = 'i4'
    fit_result[key] = sharedmem.empty([n_cells, n_pixels_x, n_pixels_y], dtype=dtype)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Perform fitting
 with Pool() as pool:
    const_out = pool.map(fit_batch, batches)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Evaluate bad pixels
 fit_result['gain'] = (fit_result['g1mean'] - fit_result['g0mean'])/photon_energy

 # Calculate histogram width and evaluate cut
 h_sums = np.sum(hist_data['hist'], axis=1)
 hist_norm = hist_data['hist'] / h_sums[:, None, :, :]
 hist_mean = np.sum(hist_norm[:, :max_bins, ...] *
                   x[None, :, None, None], axis=1)
 hist_sqr = (x[None, :, None, None] - hist_mean[:, None, ...])**2
 hist_std = np.sqrt(np.sum(hist_norm[:, :max_bins, ...] * hist_sqr, axis=1))

 fit_result['mask'][hist_std<intensity_lim] |= BadPixelsFF.NO_ENTRY.value

 # Bad pixel on gain deviation
 gains = np.copy(fit_result['gain'])
 gains[fit_result['mask']>0] = np.nan
 gain_mean = np.nanmean(gains, axis=(1,2))

 fit_result['mask'][fit_result['gain'] > gain_mean[:,None,None]*gain_lim[1] ] |=  BadPixelsFF.GAIN_DEVIATION.value
 fit_result['mask'][fit_result['gain'] < gain_mean[:,None,None]*gain_lim[0] ] |=  BadPixelsFF.GAIN_DEVIATION.value
 ```

 %% Cell type:code id: tags:

 ``` python
 # Save fit results
 os.makedirs(out_folder, exist_ok=True)
 out_name = f'{out_folder}/fits_m{module:02d}.h5'
 print(f'Save to file: {out_name}')
 save_dict_to_hdf5({'data': fit_result}, out_name)
 ```

 %% Cell type:markdown id: tags:

 ## Summary across cells ##

 %% Cell type:code id: tags:

 ``` python
 labels = [
    "Noise peak [ADU]",
    "First photon peak [ADU]",
    f"gain [ADU/keV] $\gamma$={photon_energy} [keV]",
    "$\chi^2$/nDOF",
    "Fraction of bad pixels",
 ]

 for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof', 'mask']):
    fig = plt.figure(figsize=(20,5))
    ax = fig.add_subplot(121)
    data = fit_result[key]
    if key == 'mask':
        data = data > 0
        vmin, vmax = [0, 1]
    else:
        vmin, vmax = get_range(data, 5)
    _ = heatmapPlot(
        np.mean(data, axis=0).T,
        add_panels=False, cmap='viridis', use_axis=ax,
        vmin=vmin, vmax=vmax, lut_label=labels[i]
    )

    if key != 'mask':
        vmin, vmax = get_range(data, 7)
        ax = fig.add_subplot(122)
        _ = xana.histPlot(
            ax, data.flatten(),
            bins=45,range=[vmin, vmax],
            log=True,color='red',histtype='stepfilled'
        )
        ax.set_xlabel(labels[i])
        ax.set_ylabel("Counts")
 ```

 %% Cell type:markdown id: tags:

 ## histograms of fit parameters ##

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(10, 5))
 ax = fig.add_subplot(111)
 a = ax.hist(hist_std.flatten(), bins=100, range=(0,100) )
 ax.plot([intensity_lim, intensity_lim], [0, np.nanmax(a[0])], linewidth=1.5, color='red' )
 ax.set_xlabel('Histogram width [ADU]', fontsize=14)
 ax.set_ylabel('Number of histograms', fontsize=14)
 ax.set_title(f'{hist_std[hist_std<intensity_lim].shape[0]} histograms below threshold in {intensity_lim} ADU',
              fontsize=14, fontweight='bold')
 ax.grid()
 ax.set_yscale('log')
 ```

 %% Cell type:code id: tags:

 ``` python
 def plot_par_distr(par):
    fig = plt.figure(figsize=(16, 5))
    sel = fit_result['mask'] == 0

    for i in range(n_peaks_fit) :
        data=fit_result[f"g{i}{par}"]
        plt_range=(-1,50)
        if par =='mean':
            plt_range=[peak_range[i][0] ,peak_range[i][1]]

        num_bins = int(plt_range[1] - plt_range[0])
        ax = fig.add_subplot(1,n_peaks_fit,i+1)
        _ = xana.histPlot(ax,data.flatten(),
                          bins= num_bins,range=plt_range,
                          log=True,color='red',
                          label='all fits',)

        a = ax.hist(data[sel].flatten(),
                    bins=num_bins, range=plt_range,
                    log=True,color='g',
                    label='good fits only',
                   )
        ax.set_xlabel(f"g{i} {par} [ADU]")
        ax.legend()

 plot_par_distr('mean')
 plot_par_distr('sigma')
 ```

 %% Cell type:code id: tags:

 ``` python
 sel = fit_result['mask'] == 0

 dsets = {'d01 [ADU]':fit_result[f"g1mean"]-fit_result[f"g0mean"],
         'gain [ADU/keV]':fit_result[f"gain"],
         'gain relative to module mean':fit_result[f"gain"]/np.nanmean(gain_mean),
        }
 fig = plt.figure(figsize=(16,5))
 for i, (par, data) in enumerate(dsets.items()):
    ax = fig.add_subplot(1, 3, i+1)
    plt_range=get_range(data, 10)
    num_bins = 100
    _ = xana.histPlot(ax,data.flatten(),
                      bins= num_bins,range=plt_range,
                      log=True,color='red',
                      label='all fits',)

    a = ax.hist(data[sel].flatten(),
                bins=num_bins, range=plt_range,
                log=True,color='g',
                label='good fits only',
               )
    ax.set_xlabel(f"{par}")
    ax.legend()
    if 'd01' in par :
        ax.axvline(d0_lim[0])
        ax.axvline(d0_lim[1])
    if 'rel' in par :
        ax.axvline(gain_lim[0])
        ax.axvline(gain_lim[1])
 ```

 %% Cell type:markdown id: tags:

 ## Summary across pixels ##

 Mean and median values are calculated across all pixels for each memory cell.

 %% Cell type:code id: tags:

 ``` python
 def plot_error_band(key, x, ax):

    cdata = np.copy(fit_result[key])
    cdata[fit_result['mask']>0] = np.nan

    mean = np.nanmean(cdata, axis=(1,2))
    median = np.nanmedian(cdata, axis=(1,2))
    std = np.nanstd(cdata, axis=(1,2))
    mad = np.nanmedian(np.abs(cdata - median[:,None,None]), axis=(1,2))

    ax.plot(x, mean, 'k', color='#3F7F4C', label=" mean value ")
    ax.plot(x, median, 'o', color='red', label=" median value ")
    ax.fill_between(x, mean-std, mean+std,
                     alpha=0.6, edgecolor='#3F7F4C', facecolor='#7EFF99',
                     linewidth=1, linestyle='dashdot', antialiased=True,
                     label=" mean value $ \pm $ std ")

    ax.fill_between(x, median-mad, median+mad,
                     alpha=0.3, edgecolor='red', facecolor='red',
                     linewidth=1, linestyle='dashdot', antialiased=True,
                     label=" median value $ \pm $ mad ")

    if f'error_{key}' in fit_result:
        cerr = np.copy(fit_result[f'error_{key}'])
        cerr[fit_result['mask']>0] = np.nan

        meanerr = np.nanmean(cerr, axis=(1,2))
        ax.fill_between(x, mean-meanerr, mean+meanerr,
                 alpha=0.6, edgecolor='#089FFF', facecolor='#089FFF',
                 linewidth=1, linestyle='dashdot', antialiased=True,
                 label=" mean fit error ")


 x = np.linspace(*cell_range, n_cells)

 for i, key in enumerate(['g0mean', 'g1mean', 'gain', 'chi2_ndof']):

    fig = plt.figure(figsize=(10, 5))
    ax = fig.add_subplot(111)
    plot_error_band(key, x, ax)

    ax.set_xlabel('Memory Cell ID', fontsize=14)
    ax.set_ylabel(labels[i], fontsize=14)
    ax.grid()
    ax.legend()
 ```

 %% Cell type:markdown id: tags:

 ## Cut flow ##

 %% Cell type:code id: tags:

 ``` python
 fig, ax = plt.subplots()
 fig.set_size_inches(10, 5)

 n_bars = 8
 x = np.arange(n_bars)
 width = 0.3

 msk = fit_result['mask']
 n_fits = np.prod(msk.shape)
 y = [any_in(msk, BadPixelsFF.FIT_FAILED.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
           | BadPixelsFF.NO_ENTRY.value),
     any_in(msk, BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
           BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
           BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value
           | BadPixelsFF.NO_ENTRY.value| BadPixelsFF.GAIN_DEVIATION.value)
    ]

 y2 = [any_in(msk, BadPixelsFF.FIT_FAILED.value),
     any_in(msk, BadPixelsFF.ACCURATE_COVAR.value),
     any_in(msk, BadPixelsFF.CHI2_THRESHOLD.value),
     any_in(msk, BadPixelsFF.GAIN_THRESHOLD.value),
     any_in(msk, BadPixelsFF.NOISE_PEAK_THRESHOLD.value),
     any_in(msk, BadPixelsFF.PEAK_WIDTH_THRESHOLD.value),
     any_in(msk, BadPixelsFF.NO_ENTRY.value),
     any_in(msk, BadPixelsFF.GAIN_DEVIATION.value)
    ]

 y = (1 - np.sum(y, axis=(1,2,3))/n_fits)*100
 y2 = (1 - np.sum(y2, axis=(1,2,3))/n_fits)*100

 labels = ['Fit failes',
         'Accurate covar',
         'Chi2/nDOF',
         'Gain',
         'Noise peak',
         'Peak width',
         'No Entry',
         'Gain deviation']

 ax.bar(x, y2, width, label='Only this cut')
 ax.bar(x, y, width, label='Cut flow')
 ax.set_xticks(x)
 ax.set_xticklabels(labels, rotation=90)
 ax.set_ylim(y[5]-0.5, 100)
 ax.grid(True)
 ax.legend()
 plt.show()
 ```

--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_Summary.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_Summary.ipynb
 %% Cell type:markdown id: tags:

 # Gain Characterization Summary #


 %% Cell type:code id: tags:

 ``` python
 in_folder = "" # in this notebook, in_folder is not used as the data source is in the destination folder
 out_folder = ""  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 hist_file_template = "hists_m{:02d}_sum.h5"
 proc_folder = "" # Path to corrected image data used to create histograms and validation plots
 raw_folder = "/gpfs/exfel/exp/MID/202030/p900137/raw"  # folder of raw data. This is used to save information of source data of generated constants, required
 run = 449 # runs of image data used to create histograms

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 ctrl_source_template = '{}/MDL/FPGA_COMP' # path to control information
 karabo_id_control = "MID_EXP_AGIPD1M1" # karabo-id for control device

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milli seconds
 local_output = True # output constants locally
 db_output = False # output constants to database

 # Fit parameters
 peak_range = [-30,30,35,65,80,130,145,200] # where to look for the peaks, [a0, b0, a1, b1, ...] exactly 8 elements
 peak_width_range = [0, 30, 0, 35, 0, 40, 0, 45] # fit limits on the peak widths, [a0, b0, a1, b1, ...] exactly 8 elements

 # Bad-pixel thresholds
 d0_lim = [10, 70] # hard limits for d0 value (distance between noise and first peak)
 peak_width_lim = [0.97, 1.43, 1.03, 1.57] # hard limits on the peak widths, [a0, b0, a1, b1, ...] in units of the noise peak. 4 parameters.
 chi2_lim = [0,3.0] # Hard limit on chi2/nDOF value
 gain_lim = [0.80, 1.2] # Threshold on gain in relative number. Contribute to BadPixel bit "Gain_deviation"

 cell_range = [1,5] # range of cell to be considered, [0,0] for all
 pixel_range = [0,0,512,128] # range of pixels x1,y1,x2,y2 to consider [0,0,512,128] for all
 max_bins = 250 # Maximum number of bins to consider
 batch_size = [1,8,8] # batch size: [cell,x,y]
 n_peaks_fit = 4 # Number of gaussian peaks to fit including noise peak
 fix_peaks = True # Fix distance between photon peaks


 # Detector conditions
-max_cells = 0 # number of memory cells used, set to 0 to automatically infer
-bias_voltage = 0. # Bias voltage
-acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
-gain_setting = -1 # the gain setting, use 0.1 to try to auto-determine
-photon_energy = 8.05 # photon energy in keV
-integration_time = -1 # integration time, negative values for auto-detection.
+mem_cells = -1  # number of memory cells used, negative values for auto-detection.
+bias_voltage = 0.  # Bias voltage
+acq_rate = 0.  # the detector acquisition rate, use 0 to try to auto-determine
+gain_setting = -1  # the gain setting, negative values for auto-detection.
+photon_energy = 8.05  # photon energy in keV
+integration_time = -1  # integration time, negative values for auto-detection.
 ```

 %% Cell type:code id: tags:

 ``` python
 import glob
 import os
 import re
 import traceback
 import warnings
 from multiprocessing import Pool

 import h5py
 import matplotlib.pyplot as plt
 import numpy as np
 import tabulate
 from cal_tools.agipdlib import AgipdCtrl
 from cal_tools.agipdutils_ff import (
    BadPixelsFF,
    any_in,
    fit_n_peaks,
    gaussian_sum,
    get_starting_parameters,
 )
 from cal_tools.ana_tools import get_range, save_dict_to_hdf5
 from cal_tools.enums import BadPixels
 from cal_tools.tools import (
    get_dir_creation_date,
    get_pdu_from_db,
    get_report,
    module_index_to_qm,
    send_to_db
 )
 from dateutil import parser
 from extra_data import H5File, RunDirectory, stack_detector_data
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from iCalibrationDB import Conditions, Constants, Detectors
 from iminuit import Minuit
 from IPython.display import HTML, Latex, Markdown, display
 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.simpleplot import simplePlot

 %matplotlib inline
 warnings.filterwarnings('ignore')
 ```

 %% Cell type:code id: tags:

 ``` python
 peak_range = np.reshape(peak_range,(4,2))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Get operation conditions
 ctrl_source = ctrl_source_template.format(karabo_id_control)

 run_folder = f'{raw_folder}/r{run:04d}/'

 raw_dc = RunDirectory(run_folder)

 # Read operating conditions from AGIPD00 files
 instrument_src_mod = [
    s for s in list(raw_dc.all_sources) if "0CH" in s][0]
 ctrl_src = [
    s for s in list(raw_dc.all_sources) if ctrl_source in s][0]
 # Evaluate creation time
 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(raw_folder, run)

 agipd_cond = AgipdCtrl(
    run_dc=raw_dc,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without mosetting value
 )
-
-mem_cells = agipd_cond.get_num_cells()
+if mem_cells < 0:
+    mem_cells = agipd_cond.get_num_cells()
 if mem_cells is None:
    raise ValueError(f"No raw images found in {run_folder}")
 if acq_rate == 0.:
    acq_rate = agipd_cond.get_acq_rate()
-if gain_setting == -1:
+if gain_setting < 0:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == 0.:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
-if integration_time == -1:
+if integration_time < 0:
    integration_time = agipd_cond.get_integration_time()

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "HED":
    nmods = 8
 else:
    nmods = 16

 print(f"Using {creation_time} as creation time")
 print(f"Operating conditions are:\n• Bias voltage: {bias_voltage}\n• Memory cells: {mem_cells}\n"
      f"• Acquisition rate: {acq_rate}\n• Gain setting: {gain_setting}\n• Integration time: {integration_time}\n"
      f"• Photon Energy: {photon_energy}\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Load constants for all modules
 keys = ['g0mean', 'g1mean', 'gain', 'chi2_ndof', 'mask']
 all_keys = set(keys)
 for i in range(n_peaks_fit) :
    all_keys.add(f'g{i}mean')
    all_keys.add(f'g{i}sigma')

 fit_data = {}
 labels = {'g0mean': 'Noise peak position [ADU]',
          'g1mean': 'First photon peak [ADU]',
          'gain': f"Gain [ADU/keV], $\gamma$={photon_energy} [keV]",
          'chi2_ndof': '$\chi^2$/nDOF',
          'mask': 'Fraction of bad pixels over cells' }

 modules = []
 karabo_da = []
 for mod in range(nmods):
    qm = module_index_to_qm(mod)
    fit_data[mod] = {}
    try:
        hf = h5py.File(f'{out_folder}/fits_m{mod:02d}.h5', 'r')
        shape = hf['data/g0mean'].shape
        for key in keys:
            fit_data[mod][key] = hf[f'data/{key}'][()]

        print(f"{in_folder}/{hist_file_template.format(mod)}")
        modules.append(mod)
        karabo_da.append(f"AGIPD{mod:02d}")
    except Exception as e:
        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
        print(f"No fit data available for module {qm}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Calculate SlopesFF and BadPixels to be send to DB
 bpmask = {}
 slopesFF = {}

 for mod in modules:
    bpmask[mod] = np.zeros(fit_data[mod]['mask'].shape).astype(np.int32)
    bpmask[mod][ any_in(fit_data[mod]['mask'], BadPixelsFF.NO_ENTRY.value) ] = BadPixels.FF_NO_ENTRIES.value
    bpmask[mod][ any_in(fit_data[mod]['mask'],
                        BadPixelsFF.GAIN_DEVIATION.value) ] |= BadPixels.FF_GAIN_DEVIATION.value
    bpmask[mod][ any_in(fit_data[mod]['mask'],
                        BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
                        BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
                        BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value) ] |= BadPixels.FF_GAIN_EVAL_ERROR.value

    # Set value for bad pixel to average across pixels for a given module
    slopesFF[mod] = np.copy(fit_data[mod]['gain'])
    slopesFF[mod][fit_data[mod]['mask']>0] = np.nan
    gain_mean = np.nanmean(slopesFF[mod], axis=(1,2))

    for i in range(slopesFF[mod].shape[0]):
        slopesFF[mod][i][ fit_data[mod]['mask'][i] > 0 ] = gain_mean[i]

 ```

 %% Cell type:code id: tags:

 ``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, raw_folder.strip('/').split('/')))[-2]
 file_loc = f'Proposal: {proposal}, Run: {run}'

 report = get_report(metadata_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 condition = Conditions.Illuminated.AGIPD(mem_cells, bias_voltage, 9.2,
                                         pixels_x=512, pixels_y=128, beam_energy=None,
                                         acquisition_rate=acq_rate, gain_setting=gain_setting,
                                         integration_time=integration_time)

 # Modify acceptable deviations for integration time condition if and only if
 # the integration time is not using the standard value (12).
 if integration_time != 12:
    for p in condition.parameters:
        if p.name == 'Integration Time':
            p.lower_deviation = 5
            p.upper_deviation = 5

 # Retrieve a list of all modules corresponding to processed karabo_das
 db_modules = get_pdu_from_db(karabo_id, karabo_da, Constants.AGIPD.SlopesFF(),
                             condition, cal_db_interface,
                             snapshot_at=creation_time)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Send constants to DB
 def send_const(mod, pdu):
    try:
        # gain
        constant = Constants.AGIPD.SlopesFF()
        constant.data = np.moveaxis(np.moveaxis(slopesFF[mod], 0, 2), 0, 1)
        send_to_db(
            pdu, karabo_id, constant, condition, file_loc,
            report, cal_db_interface, creation_time,
            timeout=cal_db_timeout,
        )

        # bad pixels
        constant_bp = Constants.AGIPD.BadPixelsFF()
        constant_bp.data = np.moveaxis(np.moveaxis(bpmask[mod], 0, 2), 0, 1)
        send_to_db(
            pdu, karabo_id, constant_bp, condition, file_loc,
            report, cal_db_interface, creation_time,
            timeout=cal_db_timeout,
        )

    except Exception as e:
        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
        when = None

 # Check, if we have a shape we expect
 if db_output:
    if slopesFF[modules[0]].shape == (mem_cells, 512, 128):
        with Pool(processes=len(modules)) as pool:
            const_out = pool.starmap(send_const, zip(modules, db_modules))
    else:
        print(f"Constants are not sent to the DB because of the shape mismatsh")
        print(f"Expected {(mem_cells, 512, 128)}, observed {slopesFF[modules[0]].shape}")


 condition_dict ={}

 for entry in condition.to_dict()['parameters']:
    key = entry.pop('parameter_name')
    del entry['description']
    del entry['flg_available']
    condition_dict[key] = entry

 # Create the same file structure as database constants files, in which
 # each constant type has its corresponding condition and data.
 if local_output:
    for mod, pdu in zip(modules, db_modules):
        qm = module_index_to_qm(mod)
        file = f"{out_folder}/slopesff_bpmask_module_{qm}.h5"
        dic = {
            pdu:{
               'SlopesFF': {
                   0:{
                       'condition': condition_dict,
                       'data': np.moveaxis(np.moveaxis(slopesFF[mod],0,2),0,1)}
               },
               'BadPixelsFF':{
                   0:{
                       'condition': condition_dict,
                       'data': np.moveaxis(np.moveaxis(bpmask[mod],0,2),0,1)}
               },
           }
        }
        save_dict_to_hdf5(dic, file)
 ```

 %% Cell type:code id: tags:

 ``` python
 #Define AGIPD geometry
 #To do: find the better way to do it?
 if instrument == "HED":
    geom = AGIPD_500K2GGeometry.from_origin()
 else:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create the arrays that will be used for figures.
 # A dictionary contains all the data for each of the processing stages (gains, mean, slopesFF...).
 # Each array correponds to the data for all processed modules.
 # These are updated with their fit/slopes data in the following loops.
 if cell_range==[0,0]:
    cell_range[1] = shape[0]

 const_data = {}
 for key in keys:
    const_data[key] = np.full((nmods, shape[0],512,128), np.nan)
    for i in range(nmods):
        if key in fit_data[i]:
            const_data[key][i,:,pixel_range[0]:pixel_range[2],
                               pixel_range[1]:pixel_range[3]] = fit_data[i][key]

 const_data['slopesFF'] = np.full((nmods, shape[0],512,128), np.nan)
 labels['slopesFF'] = f'slopesFF [ADU/keV], $\gamma$={photon_energy} [keV]'
 for i in range(nmods):
    if i in slopesFF:
        const_data['slopesFF'][i,:,pixel_range[0]:pixel_range[2],
                               pixel_range[1]:pixel_range[3]] = slopesFF[i]
 ```

 %% Cell type:markdown id: tags:

 ## Summary across pixels ##

 %% Cell type:code id: tags:

 ``` python
 for key in const_data.keys():
    fig = plt.figure(figsize=(20,20))
    ax = fig.add_subplot(111)
    if key=='mask':
        data = np.nanmean(const_data[key]>0, axis=1)
        vmin, vmax = (0,1)
    else:
        data = np.nanmean(const_data[key], axis=1)
        vmin, vmax = get_range(data, 5)
    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax, figsize=(20,20))
    _ = ax.set_title(labels[key])
 ```

 %% Cell type:markdown id: tags:

 ## Summary histograms ##

 %% Cell type:code id: tags:

 ``` python
 sel = (const_data['mask'] == 0)

 module_mean = np.nanmean(const_data[f"gain"],axis=(1,2,3))
 module_mean = module_mean[:,np.newaxis,np.newaxis,np.newaxis]


 dsets = {'d01 [ADU]':const_data[f"g1mean"]-const_data[f"g0mean"],
         'gain [ADU/keV]':const_data[f"gain"],
         'gain relative to module mean':const_data[f"gain"]/module_mean,
        }
 fig = plt.figure(figsize=(16,5))
 for i, (par, data) in enumerate(dsets.items()):
    ax = fig.add_subplot(1, 3, i+1)
    plt_range= np.nanmin(data), np.nanmax(data)
    if 'd01' in par :
        ax.axvline(d0_lim[0])
        ax.axvline(d0_lim[1])
    elif 'rel' in par :
        ax.axvline(gain_lim[0])
        ax.axvline(gain_lim[1])
    num_bins = 100
    _ = ax.hist(data.flatten(),
                  bins= num_bins,range=plt_range,
                  log=True,color='red',
                  label='all fits',)

    a = ax.hist(data[sel].flatten(),
                bins=num_bins, range=plt_range,
                log=True,color='g',
                label='good fits only',
               )
    ax.set_xlabel(f"{par}")
    ax.legend()
 ```

 %% Cell type:markdown id: tags:

 ## Summary across cells ##

 Good pixels only.

 %% Cell type:code id: tags:

 ``` python
 for key in const_data.keys():
    data = np.copy(const_data[key])
    if key=='mask':
        data = data>0
    else:
        data[const_data['mask']>0] = np.nan

    d = []
    for i in range(nmods):
        d.append({'x': np.arange(data[i].shape[0]),
                  'y': np.nanmean(data[i], axis=(1,2)),
                  'drawstyle': 'steps-pre',
                  'label': f'{i}',
                  'linewidth': 2,
                  'linestyle': '--' if i>7 else '-'
                  })

    fig = plt.figure(figsize=(15, 6))
    ax = fig.add_subplot(111)

    _ = simplePlot(d, xrange=(-12, 510),
                        x_label='Memory Cell ID',
                        y_label=labels[key],
                        use_axis=ax,
                        legend='top-left-frame-ncol8',)
    ylim = ax.get_ylim()
    ax.set_ylim(ylim[0], ylim[1] + np.abs(ylim[1]-ylim[0])*0.2)
    ax.grid()
 ```

 %% Cell type:markdown id: tags:

 ## Summary table ##

 %% Cell type:code id: tags:

 ``` python
 table = []
 for i in modules:
    table.append((i,
                  f"{np.nanmean(slopesFF[i]):0.1f} +- {np.nanstd(slopesFF[i]):0.2f}",
                  f"{np.nanmean(bpmask[i]>0)*100:0.1f} ({np.nansum(bpmask[i]>0)})"
                        ))

 all_SFF = np.array([list(sff) for sff in slopesFF.values()])
 all_MSK = np.array([list(msk) for msk in bpmask.values()])

 table.append(('overall',
              f"{np.nanmean(all_SFF):0.1f} +- {np.nanstd(all_SFF):0.2f}",
              f"{np.nanmean(all_MSK>0)*100:0.1f} ({np.nansum(all_MSK>0)})"
                    ))

 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Module", "Gain [ADU/keV]", "Bad pixels [%(Count)]"])))
 ```

 %% Cell type:markdown id: tags:

 ## Performance plots

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(run_folder, source, include, tid=None):
    """
    Load single train for all module

    :param run_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param tid: Train Id to be loaded. First train is considered if None is given

    """
    run_data = RunDirectory(run_folder, include)
    if tid:
        tid, data = run_data.select('*/DET/*', source).train_from_id(tid)
        return tid, stack_detector_data(data, source, modules=nmods)
    else:
        for tid, data in run_data.select('*/DET/*', source).trains(require_all=True):
            return tid, stack_detector_data(data, source, modules=nmods)
    return None, None


 include = '*S00000*'
 tid, orig = get_trains_data(f'{proc_folder}/r{run:04d}/', 'image.data', include)
 orig = orig[cell_range[0]:cell_range[1], ...]
 ```

 %% Cell type:code id: tags:

 ``` python
 # FIXME: mask bad pixels from median
 # mask = const_data['BadPixelsFF']

 corrections = const_data['slopesFF'] # (16,shape[0],512,128) shape[0]= cell_range[1]-cell_range[0] /
 corrections = np.moveaxis(corrections, 1, 0) # (shape[0],16,512,128)
 rel_corr = corrections/np.nanmedian(corrections)
 corrected = orig / rel_corr
 ```

 %% Cell type:markdown id: tags:

 ### Mean value not corrected (train 0)

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,20))
 ax = fig.add_subplot(111)
 odata = np.nanmean(orig, axis=0)
 vmin, vmax = get_range(odata, 5)
 ax = geom.plot_data_fast(odata, ax=ax, cmap="jet", vmin=vmin, vmax=vmax, figsize=(20,20))
 _ = ax.set_title("Original data, mean across one train")
 ```

 %% Cell type:markdown id: tags:

 ### Mean value corrected (train 0)

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,20))
 ax = fig.add_subplot(111)
 cdata = np.nanmean(corrected, axis=0)
 ax = geom.plot_data_fast(cdata, ax=ax, cmap="jet", vmin=vmin, vmax=vmax, figsize=(20,20))
 _ = ax.set_title("Corrected data, mean across one train")
 ```

 %% Cell type:markdown id: tags:

 ### Laplace transform of mean image

 %% Cell type:code id: tags:

 ``` python
 from scipy.ndimage import laplace

 cmax = np.max(cdata)
 omax = np.max(odata)
 clap = np.zeros_like(cdata)
 olap = np.zeros_like(odata)

 for i in range(nmods) :
    clap[i] = np.abs(laplace(cdata[i].astype(float)/cmax))
    olap[i] = np.abs(laplace(odata[i].astype(float)/omax))

 fig = plt.figure(figsize=(20,10))
 vmin, vmax = get_range(olap, 2)

 ax = fig.add_subplot(121)
 ax = geom.plot_data_fast(olap, ax=ax, cmap="jet", vmin=vmin, vmax=vmax, )
 _ = ax.set_title("Laplace (original data)")

 ax = fig.add_subplot(122)
 ax = geom.plot_data_fast(clap, ax=ax, cmap="jet", vmin=vmin, vmax=vmax, )
 _ = ax.set_title("Laplace (gain corrected data)")
 ```

 %% Cell type:markdown id: tags:

 ### Histogram of corrected and uncorrected spectrum (train 0)


 %% Cell type:code id: tags:

 ``` python
 ######################################
 #            FIT PEAKS
 ######################################

 x_range = [peak_range[0][0], peak_range[-1][-1]]
 nb = x_range[1] - x_range[0]+1

 sel = ~np.isnan(corrected)

 fig = plt.figure(figsize=(10, 5))
 ax = fig.add_subplot(111)
 y,xe, _ = ax.hist(corrected[sel].flatten(), bins=nb, range=x_range, label='corrected', alpha=0.5)

 # get the bin centers from the bin edges
 xc=xe[:-1]+(xe[1]-xe[0])/2
 pars, _ = get_starting_parameters(xc, y, peak_range,4)
 minuit = fit_n_peaks(xc, y, pars, x_range,fix_d01=False,sigma_limit=1)

 pc = minuit.args
 resc=minuit.fitarg

 yfc = gaussian_sum(xc,4, *pc)
 plt.plot(xc, yfc, label='corrected fit')

 y,_, _ = ax.hist(orig[sel].flatten(), bins=nb, range=x_range, label='original',alpha=0.5)
 pars, _ = get_starting_parameters(xc, y, peak_range,4)
 minuit = fit_n_peaks(xc, y, pars, x_range,fix_d01=False,sigma_limit=1)

 po = minuit.args
 reso=minuit.fitarg

 yfo = gaussian_sum(xc,4, *po)
 plt.plot(xc, yfo, label='original fit')

 plt.title(f"Signal spectrum, first train")
 plt.xlabel('[ADU]')
 plt.legend()
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ### Summary table ##

 %% Cell type:code id: tags:

 ``` python
 from scipy.stats import median_absolute_deviation as mad

 table = []
 headers = ["Parameter",
           "Value (original data)",
           "Value (gain corrected data)",
           "Relative difference"]

 for i in range(4):
    table.append((f"Sigma{i} (ADU)",
                  f"{reso[f'g{i}sigma']:0.2f} ",
                  f"{resc[f'g{i}sigma']:0.2f} ",
                  f"{(reso[f'g{i}sigma']-resc[f'g{i}sigma'])/reso[f'g{i}sigma']:0.2f} ",
                 ))

 ovar = np.std(odata)
 cvar = np.std(cdata)
 table.append((f"RMS of mean image",
              f"{ovar:0.3f} ",
              f"{cvar:0.3f} ",
              f"{(ovar-cvar)/ovar:0.3f} ",
             ))

 omin, omax = get_range(odata, 5)
 cmin, cmax = get_range(cdata, 5)
 ovar = np.std(odata[(odata > omin) & (odata<omax)])
 cvar = np.std(cdata[(cdata > cmin) & (cdata<cmax)])
 table.append((f"RMS of mean image (mu+-5sigma)",
              f"{ovar:0.3f} ",
              f"{cvar:0.3f} ",
              f"{(ovar-cvar)/ovar:0.3f} ",
             ))

 ovar = mad(odata.flatten())
 cvar = mad(cdata.flatten())
 table.append((f"MAD of mean image",
              f"{ovar:0.3f} ",
              f"{cvar:0.3f} ",
              f"{(ovar-cvar)/ovar:0.3f} ",
             ))

 ovar = np.median(olap)
 cvar = np.median(clap)
 table.append((f"Median Laplace",
              f"{ovar:0.3f} ",
              f"{cvar:0.3f} ",
              f"{(ovar-cvar)/ovar:0.3f} ",
             ))

 md = display(Latex(tabulate.tabulate(table,
                                     tablefmt='latex',
                                     headers=headers)))
 ```

 %% Cell type:markdown id: tags:

 # Gain Characterization Summary #


 %% Cell type:code id: tags:

 ``` python
 in_folder = "" # in this notebook, in_folder is not used as the data source is in the destination folder
 out_folder = ""  # the folder to output to, required
 metadata_folder = ""  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 hist_file_template = "hists_m{:02d}_sum.h5"
 proc_folder = "" # Path to corrected image data used to create histograms and validation plots
 raw_folder = "/gpfs/exfel/exp/MID/202030/p900137/raw"  # folder of raw data. This is used to save information of source data of generated constants, required
 run = 449 # runs of image data used to create histograms

 karabo_id = "MID_DET_AGIPD1M-1" # karabo karabo_id
 ctrl_source_template = '{}/MDL/FPGA_COMP' # path to control information
 karabo_id_control = "MID_EXP_AGIPD1M1" # karabo-id for control device

 use_dir_creation_date = True # use the creation data of the input dir for database queries
 cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
 cal_db_timeout = 30000 # in milli seconds
 local_output = True # output constants locally
 db_output = False # output constants to database

 # Fit parameters
 peak_range = [-30,30,35,65,80,130,145,200] # where to look for the peaks, [a0, b0, a1, b1, ...] exactly 8 elements
 peak_width_range = [0, 30, 0, 35, 0, 40, 0, 45] # fit limits on the peak widths, [a0, b0, a1, b1, ...] exactly 8 elements

 # Bad-pixel thresholds
 d0_lim = [10, 70] # hard limits for d0 value (distance between noise and first peak)
 peak_width_lim = [0.97, 1.43, 1.03, 1.57] # hard limits on the peak widths, [a0, b0, a1, b1, ...] in units of the noise peak. 4 parameters.
 chi2_lim = [0,3.0] # Hard limit on chi2/nDOF value
 gain_lim = [0.80, 1.2] # Threshold on gain in relative number. Contribute to BadPixel bit "Gain_deviation"

 cell_range = [1,5] # range of cell to be considered, [0,0] for all
 pixel_range = [0,0,512,128] # range of pixels x1,y1,x2,y2 to consider [0,0,512,128] for all
 max_bins = 250 # Maximum number of bins to consider
 batch_size = [1,8,8] # batch size: [cell,x,y]
 n_peaks_fit = 4 # Number of gaussian peaks to fit including noise peak
 fix_peaks = True # Fix distance between photon peaks


 # Detector conditions
-max_cells = 0 # number of memory cells used, set to 0 to automatically infer
-bias_voltage = 0. # Bias voltage
-acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
-gain_setting = -1 # the gain setting, use 0.1 to try to auto-determine
-photon_energy = 8.05 # photon energy in keV
-integration_time = -1 # integration time, negative values for auto-detection.
+mem_cells = -1  # number of memory cells used, negative values for auto-detection.
+bias_voltage = 0.  # Bias voltage
+acq_rate = 0.  # the detector acquisition rate, use 0 to try to auto-determine
+gain_setting = -1  # the gain setting, negative values for auto-detection.
+photon_energy = 8.05  # photon energy in keV
+integration_time = -1  # integration time, negative values for auto-detection.
 ```

 %% Cell type:code id: tags:

 ``` python
 import glob
 import os
 import re
 import traceback
 import warnings
 from multiprocessing import Pool

 import h5py
 import matplotlib.pyplot as plt
 import numpy as np
 import tabulate
 from cal_tools.agipdlib import AgipdCtrl
 from cal_tools.agipdutils_ff import (
    BadPixelsFF,
    any_in,
    fit_n_peaks,
    gaussian_sum,
    get_starting_parameters,
 )
 from cal_tools.ana_tools import get_range, save_dict_to_hdf5
 from cal_tools.enums import BadPixels
 from cal_tools.tools import (
    get_dir_creation_date,
    get_pdu_from_db,
    get_report,
    module_index_to_qm,
    send_to_db
 )
 from dateutil import parser
 from extra_data import H5File, RunDirectory, stack_detector_data
 from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
 from iCalibrationDB import Conditions, Constants, Detectors
 from iminuit import Minuit
 from IPython.display import HTML, Latex, Markdown, display
 from XFELDetAna.plotting.heatmap import heatmapPlot
 from XFELDetAna.plotting.simpleplot import simplePlot

 %matplotlib inline
 warnings.filterwarnings('ignore')
 ```

 %% Cell type:code id: tags:

 ``` python
 peak_range = np.reshape(peak_range,(4,2))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Get operation conditions
 ctrl_source = ctrl_source_template.format(karabo_id_control)

 run_folder = f'{raw_folder}/r{run:04d}/'

 raw_dc = RunDirectory(run_folder)

 # Read operating conditions from AGIPD00 files
 instrument_src_mod = [
    s for s in list(raw_dc.all_sources) if "0CH" in s][0]
 ctrl_src = [
    s for s in list(raw_dc.all_sources) if ctrl_source in s][0]
 # Evaluate creation time
 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(raw_folder, run)

 agipd_cond = AgipdCtrl(
    run_dc=raw_dc,
    image_src=instrument_src_mod,
    ctrl_src=ctrl_src,
    raise_error=False,  # to be able to process very old data without mosetting value
 )
-
-mem_cells = agipd_cond.get_num_cells()
+if mem_cells < 0:
+    mem_cells = agipd_cond.get_num_cells()
 if mem_cells is None:
    raise ValueError(f"No raw images found in {run_folder}")
 if acq_rate == 0.:
    acq_rate = agipd_cond.get_acq_rate()
-if gain_setting == -1:
+if gain_setting < 0:
    gain_setting = agipd_cond.get_gain_setting(creation_time)
 if bias_voltage == 0.:
    bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)
-if integration_time == -1:
+if integration_time < 0:
    integration_time = agipd_cond.get_integration_time()

 # Evaluate detector instance for mapping
 instrument = karabo_id.split("_")[0]
 if instrument == "HED":
    nmods = 8
 else:
    nmods = 16

 print(f"Using {creation_time} as creation time")
 print(f"Operating conditions are:\n• Bias voltage: {bias_voltage}\n• Memory cells: {mem_cells}\n"
      f"• Acquisition rate: {acq_rate}\n• Gain setting: {gain_setting}\n• Integration time: {integration_time}\n"
      f"• Photon Energy: {photon_energy}\n")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Load constants for all modules
 keys = ['g0mean', 'g1mean', 'gain', 'chi2_ndof', 'mask']
 all_keys = set(keys)
 for i in range(n_peaks_fit) :
    all_keys.add(f'g{i}mean')
    all_keys.add(f'g{i}sigma')

 fit_data = {}
 labels = {'g0mean': 'Noise peak position [ADU]',
          'g1mean': 'First photon peak [ADU]',
          'gain': f"Gain [ADU/keV], $\gamma$={photon_energy} [keV]",
          'chi2_ndof': '$\chi^2$/nDOF',
          'mask': 'Fraction of bad pixels over cells' }

 modules = []
 karabo_da = []
 for mod in range(nmods):
    qm = module_index_to_qm(mod)
    fit_data[mod] = {}
    try:
        hf = h5py.File(f'{out_folder}/fits_m{mod:02d}.h5', 'r')
        shape = hf['data/g0mean'].shape
        for key in keys:
            fit_data[mod][key] = hf[f'data/{key}'][()]

        print(f"{in_folder}/{hist_file_template.format(mod)}")
        modules.append(mod)
        karabo_da.append(f"AGIPD{mod:02d}")
    except Exception as e:
        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
        print(f"No fit data available for module {qm}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Calculate SlopesFF and BadPixels to be send to DB
 bpmask = {}
 slopesFF = {}

 for mod in modules:
    bpmask[mod] = np.zeros(fit_data[mod]['mask'].shape).astype(np.int32)
    bpmask[mod][ any_in(fit_data[mod]['mask'], BadPixelsFF.NO_ENTRY.value) ] = BadPixels.FF_NO_ENTRIES.value
    bpmask[mod][ any_in(fit_data[mod]['mask'],
                        BadPixelsFF.GAIN_DEVIATION.value) ] |= BadPixels.FF_GAIN_DEVIATION.value
    bpmask[mod][ any_in(fit_data[mod]['mask'],
                        BadPixelsFF.FIT_FAILED.value | BadPixelsFF.ACCURATE_COVAR.value |
                        BadPixelsFF.CHI2_THRESHOLD.value | BadPixelsFF.GAIN_THRESHOLD.value |
                        BadPixelsFF.NOISE_PEAK_THRESHOLD.value | BadPixelsFF.PEAK_WIDTH_THRESHOLD.value) ] |= BadPixels.FF_GAIN_EVAL_ERROR.value

    # Set value for bad pixel to average across pixels for a given module
    slopesFF[mod] = np.copy(fit_data[mod]['gain'])
    slopesFF[mod][fit_data[mod]['mask']>0] = np.nan
    gain_mean = np.nanmean(slopesFF[mod], axis=(1,2))

    for i in range(slopesFF[mod].shape[0]):
        slopesFF[mod][i][ fit_data[mod]['mask'][i] > 0 ] = gain_mean[i]

 ```

 %% Cell type:code id: tags:

 ``` python
 # Read report path and create file location tuple to add with the injection
 proposal = list(filter(None, raw_folder.strip('/').split('/')))[-2]
 file_loc = f'Proposal: {proposal}, Run: {run}'

 report = get_report(metadata_folder)
 ```

 %% Cell type:code id: tags:

 ``` python
 # set the operating condition
 condition = Conditions.Illuminated.AGIPD(mem_cells, bias_voltage, 9.2,
                                         pixels_x=512, pixels_y=128, beam_energy=None,
                                         acquisition_rate=acq_rate, gain_setting=gain_setting,
                                         integration_time=integration_time)

 # Modify acceptable deviations for integration time condition if and only if
 # the integration time is not using the standard value (12).
 if integration_time != 12:
    for p in condition.parameters:
        if p.name == 'Integration Time':
            p.lower_deviation = 5
            p.upper_deviation = 5

 # Retrieve a list of all modules corresponding to processed karabo_das
 db_modules = get_pdu_from_db(karabo_id, karabo_da, Constants.AGIPD.SlopesFF(),
                             condition, cal_db_interface,
                             snapshot_at=creation_time)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Send constants to DB
 def send_const(mod, pdu):
    try:
        # gain
        constant = Constants.AGIPD.SlopesFF()
        constant.data = np.moveaxis(np.moveaxis(slopesFF[mod], 0, 2), 0, 1)
        send_to_db(
            pdu, karabo_id, constant, condition, file_loc,
            report, cal_db_interface, creation_time,
            timeout=cal_db_timeout,
        )

        # bad pixels
        constant_bp = Constants.AGIPD.BadPixelsFF()
        constant_bp.data = np.moveaxis(np.moveaxis(bpmask[mod], 0, 2), 0, 1)
        send_to_db(
            pdu, karabo_id, constant_bp, condition, file_loc,
            report, cal_db_interface, creation_time,
            timeout=cal_db_timeout,
        )

    except Exception as e:
        err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
        when = None

 # Check, if we have a shape we expect
 if db_output:
    if slopesFF[modules[0]].shape == (mem_cells, 512, 128):
        with Pool(processes=len(modules)) as pool:
            const_out = pool.starmap(send_const, zip(modules, db_modules))
    else:
        print(f"Constants are not sent to the DB because of the shape mismatsh")
        print(f"Expected {(mem_cells, 512, 128)}, observed {slopesFF[modules[0]].shape}")


 condition_dict ={}

 for entry in condition.to_dict()['parameters']:
    key = entry.pop('parameter_name')
    del entry['description']
    del entry['flg_available']
    condition_dict[key] = entry

 # Create the same file structure as database constants files, in which
 # each constant type has its corresponding condition and data.
 if local_output:
    for mod, pdu in zip(modules, db_modules):
        qm = module_index_to_qm(mod)
        file = f"{out_folder}/slopesff_bpmask_module_{qm}.h5"
        dic = {
            pdu:{
               'SlopesFF': {
                   0:{
                       'condition': condition_dict,
                       'data': np.moveaxis(np.moveaxis(slopesFF[mod],0,2),0,1)}
               },
               'BadPixelsFF':{
                   0:{
                       'condition': condition_dict,
                       'data': np.moveaxis(np.moveaxis(bpmask[mod],0,2),0,1)}
               },
           }
        }
        save_dict_to_hdf5(dic, file)
 ```

 %% Cell type:code id: tags:

 ``` python
 #Define AGIPD geometry
 #To do: find the better way to do it?
 if instrument == "HED":
    geom = AGIPD_500K2GGeometry.from_origin()
 else:
    geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
        (-525, 625),
        (-550, -10),
        (520, -160),
        (542.5, 475),
    ])
 ```

 %% Cell type:code id: tags:

 ``` python
 # Create the arrays that will be used for figures.
 # A dictionary contains all the data for each of the processing stages (gains, mean, slopesFF...).
 # Each array correponds to the data for all processed modules.
 # These are updated with their fit/slopes data in the following loops.
 if cell_range==[0,0]:
    cell_range[1] = shape[0]

 const_data = {}
 for key in keys:
    const_data[key] = np.full((nmods, shape[0],512,128), np.nan)
    for i in range(nmods):
        if key in fit_data[i]:
            const_data[key][i,:,pixel_range[0]:pixel_range[2],
                               pixel_range[1]:pixel_range[3]] = fit_data[i][key]

 const_data['slopesFF'] = np.full((nmods, shape[0],512,128), np.nan)
 labels['slopesFF'] = f'slopesFF [ADU/keV], $\gamma$={photon_energy} [keV]'
 for i in range(nmods):
    if i in slopesFF:
        const_data['slopesFF'][i,:,pixel_range[0]:pixel_range[2],
                               pixel_range[1]:pixel_range[3]] = slopesFF[i]
 ```

 %% Cell type:markdown id: tags:

 ## Summary across pixels ##

 %% Cell type:code id: tags:

 ``` python
 for key in const_data.keys():
    fig = plt.figure(figsize=(20,20))
    ax = fig.add_subplot(111)
    if key=='mask':
        data = np.nanmean(const_data[key]>0, axis=1)
        vmin, vmax = (0,1)
    else:
        data = np.nanmean(const_data[key], axis=1)
        vmin, vmax = get_range(data, 5)
    ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax, figsize=(20,20))
    _ = ax.set_title(labels[key])
 ```

 %% Cell type:markdown id: tags:

 ## Summary histograms ##

 %% Cell type:code id: tags:

 ``` python
 sel = (const_data['mask'] == 0)

 module_mean = np.nanmean(const_data[f"gain"],axis=(1,2,3))
 module_mean = module_mean[:,np.newaxis,np.newaxis,np.newaxis]


 dsets = {'d01 [ADU]':const_data[f"g1mean"]-const_data[f"g0mean"],
         'gain [ADU/keV]':const_data[f"gain"],
         'gain relative to module mean':const_data[f"gain"]/module_mean,
        }
 fig = plt.figure(figsize=(16,5))
 for i, (par, data) in enumerate(dsets.items()):
    ax = fig.add_subplot(1, 3, i+1)
    plt_range= np.nanmin(data), np.nanmax(data)
    if 'd01' in par :
        ax.axvline(d0_lim[0])
        ax.axvline(d0_lim[1])
    elif 'rel' in par :
        ax.axvline(gain_lim[0])
        ax.axvline(gain_lim[1])
    num_bins = 100
    _ = ax.hist(data.flatten(),
                  bins= num_bins,range=plt_range,
                  log=True,color='red',
                  label='all fits',)

    a = ax.hist(data[sel].flatten(),
                bins=num_bins, range=plt_range,
                log=True,color='g',
                label='good fits only',
               )
    ax.set_xlabel(f"{par}")
    ax.legend()
 ```

 %% Cell type:markdown id: tags:

 ## Summary across cells ##

 Good pixels only.

 %% Cell type:code id: tags:

 ``` python
 for key in const_data.keys():
    data = np.copy(const_data[key])
    if key=='mask':
        data = data>0
    else:
        data[const_data['mask']>0] = np.nan

    d = []
    for i in range(nmods):
        d.append({'x': np.arange(data[i].shape[0]),
                  'y': np.nanmean(data[i], axis=(1,2)),
                  'drawstyle': 'steps-pre',
                  'label': f'{i}',
                  'linewidth': 2,
                  'linestyle': '--' if i>7 else '-'
                  })

    fig = plt.figure(figsize=(15, 6))
    ax = fig.add_subplot(111)

    _ = simplePlot(d, xrange=(-12, 510),
                        x_label='Memory Cell ID',
                        y_label=labels[key],
                        use_axis=ax,
                        legend='top-left-frame-ncol8',)
    ylim = ax.get_ylim()
    ax.set_ylim(ylim[0], ylim[1] + np.abs(ylim[1]-ylim[0])*0.2)
    ax.grid()
 ```

 %% Cell type:markdown id: tags:

 ## Summary table ##

 %% Cell type:code id: tags:

 ``` python
 table = []
 for i in modules:
    table.append((i,
                  f"{np.nanmean(slopesFF[i]):0.1f} +- {np.nanstd(slopesFF[i]):0.2f}",
                  f"{np.nanmean(bpmask[i]>0)*100:0.1f} ({np.nansum(bpmask[i]>0)})"
                        ))

 all_SFF = np.array([list(sff) for sff in slopesFF.values()])
 all_MSK = np.array([list(msk) for msk in bpmask.values()])

 table.append(('overall',
              f"{np.nanmean(all_SFF):0.1f} +- {np.nanstd(all_SFF):0.2f}",
              f"{np.nanmean(all_MSK>0)*100:0.1f} ({np.nansum(all_MSK>0)})"
                    ))

 md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
                                     headers=["Module", "Gain [ADU/keV]", "Bad pixels [%(Count)]"])))
 ```

 %% Cell type:markdown id: tags:

 ## Performance plots

 %% Cell type:code id: tags:

 ``` python
 def get_trains_data(run_folder, source, include, tid=None):
    """
    Load single train for all module

    :param run_folder: Path to folder with data
    :param source: Data source to be loaded
    :param include: Inset of file name to be considered
    :param tid: Train Id to be loaded. First train is considered if None is given

    """
    run_data = RunDirectory(run_folder, include)
    if tid:
        tid, data = run_data.select('*/DET/*', source).train_from_id(tid)
        return tid, stack_detector_data(data, source, modules=nmods)
    else:
        for tid, data in run_data.select('*/DET/*', source).trains(require_all=True):
            return tid, stack_detector_data(data, source, modules=nmods)
    return None, None


 include = '*S00000*'
 tid, orig = get_trains_data(f'{proc_folder}/r{run:04d}/', 'image.data', include)
 orig = orig[cell_range[0]:cell_range[1], ...]
 ```

 %% Cell type:code id: tags:

 ``` python
 # FIXME: mask bad pixels from median
 # mask = const_data['BadPixelsFF']

 corrections = const_data['slopesFF'] # (16,shape[0],512,128) shape[0]= cell_range[1]-cell_range[0] /
 corrections = np.moveaxis(corrections, 1, 0) # (shape[0],16,512,128)
 rel_corr = corrections/np.nanmedian(corrections)
 corrected = orig / rel_corr
 ```

 %% Cell type:markdown id: tags:

 ### Mean value not corrected (train 0)

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,20))
 ax = fig.add_subplot(111)
 odata = np.nanmean(orig, axis=0)
 vmin, vmax = get_range(odata, 5)
 ax = geom.plot_data_fast(odata, ax=ax, cmap="jet", vmin=vmin, vmax=vmax, figsize=(20,20))
 _ = ax.set_title("Original data, mean across one train")
 ```

 %% Cell type:markdown id: tags:

 ### Mean value corrected (train 0)

 %% Cell type:code id: tags:

 ``` python
 fig = plt.figure(figsize=(20,20))
 ax = fig.add_subplot(111)
 cdata = np.nanmean(corrected, axis=0)
 ax = geom.plot_data_fast(cdata, ax=ax, cmap="jet", vmin=vmin, vmax=vmax, figsize=(20,20))
 _ = ax.set_title("Corrected data, mean across one train")
 ```

 %% Cell type:markdown id: tags:

 ### Laplace transform of mean image

 %% Cell type:code id: tags:

 ``` python
 from scipy.ndimage import laplace

 cmax = np.max(cdata)
 omax = np.max(odata)
 clap = np.zeros_like(cdata)
 olap = np.zeros_like(odata)

 for i in range(nmods) :
    clap[i] = np.abs(laplace(cdata[i].astype(float)/cmax))
    olap[i] = np.abs(laplace(odata[i].astype(float)/omax))

 fig = plt.figure(figsize=(20,10))
 vmin, vmax = get_range(olap, 2)

 ax = fig.add_subplot(121)
 ax = geom.plot_data_fast(olap, ax=ax, cmap="jet", vmin=vmin, vmax=vmax, )
 _ = ax.set_title("Laplace (original data)")

 ax = fig.add_subplot(122)
 ax = geom.plot_data_fast(clap, ax=ax, cmap="jet", vmin=vmin, vmax=vmax, )
 _ = ax.set_title("Laplace (gain corrected data)")
 ```

 %% Cell type:markdown id: tags:

 ### Histogram of corrected and uncorrected spectrum (train 0)


 %% Cell type:code id: tags:

 ``` python
 ######################################
 #            FIT PEAKS
 ######################################

 x_range = [peak_range[0][0], peak_range[-1][-1]]
 nb = x_range[1] - x_range[0]+1

 sel = ~np.isnan(corrected)

 fig = plt.figure(figsize=(10, 5))
 ax = fig.add_subplot(111)
 y,xe, _ = ax.hist(corrected[sel].flatten(), bins=nb, range=x_range, label='corrected', alpha=0.5)

 # get the bin centers from the bin edges
 xc=xe[:-1]+(xe[1]-xe[0])/2
 pars, _ = get_starting_parameters(xc, y, peak_range,4)
 minuit = fit_n_peaks(xc, y, pars, x_range,fix_d01=False,sigma_limit=1)

 pc = minuit.args
 resc=minuit.fitarg

 yfc = gaussian_sum(xc,4, *pc)
 plt.plot(xc, yfc, label='corrected fit')

 y,_, _ = ax.hist(orig[sel].flatten(), bins=nb, range=x_range, label='original',alpha=0.5)
 pars, _ = get_starting_parameters(xc, y, peak_range,4)
 minuit = fit_n_peaks(xc, y, pars, x_range,fix_d01=False,sigma_limit=1)

 po = minuit.args
 reso=minuit.fitarg

 yfo = gaussian_sum(xc,4, *po)
 plt.plot(xc, yfo, label='original fit')

 plt.title(f"Signal spectrum, first train")
 plt.xlabel('[ADU]')
 plt.legend()
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ### Summary table ##

 %% Cell type:code id: tags:

 ``` python
 from scipy.stats import median_absolute_deviation as mad

 table = []
 headers = ["Parameter",
           "Value (original data)",
           "Value (gain corrected data)",
           "Relative difference"]

 for i in range(4):
    table.append((f"Sigma{i} (ADU)",
                  f"{reso[f'g{i}sigma']:0.2f} ",
                  f"{resc[f'g{i}sigma']:0.2f} ",
                  f"{(reso[f'g{i}sigma']-resc[f'g{i}sigma'])/reso[f'g{i}sigma']:0.2f} ",
                 ))

 ovar = np.std(odata)
 cvar = np.std(cdata)
 table.append((f"RMS of mean image",
              f"{ovar:0.3f} ",
              f"{cvar:0.3f} ",
              f"{(ovar-cvar)/ovar:0.3f} ",
             ))

 omin, omax = get_range(odata, 5)
 cmin, cmax = get_range(cdata, 5)
 ovar = np.std(odata[(odata > omin) & (odata<omax)])
 cvar = np.std(cdata[(cdata > cmin) & (cdata<cmax)])
 table.append((f"RMS of mean image (mu+-5sigma)",
              f"{ovar:0.3f} ",
              f"{cvar:0.3f} ",
              f"{(ovar-cvar)/ovar:0.3f} ",
             ))

 ovar = mad(odata.flatten())
 cvar = mad(cdata.flatten())
 table.append((f"MAD of mean image",
              f"{ovar:0.3f} ",
              f"{cvar:0.3f} ",
              f"{(ovar-cvar)/ovar:0.3f} ",
             ))

 ovar = np.median(olap)
 cvar = np.median(clap)
 table.append((f"Median Laplace",
              f"{ovar:0.3f} ",
              f"{cvar:0.3f} ",
              f"{(ovar-cvar)/ovar:0.3f} ",
             ))

 md = display(Latex(tabulate.tabulate(table,
                                     tablefmt='latex',
                                     headers=headers)))
 ```

--- a/notebooks/AGIPD/Chracterize_AGIPD_Gain_PC_NBC.ipynb
+++ b/notebooks/AGIPD/Chracterize_AGIPD_Gain_PC_NBC.ipynb
--- a/notebooks/Gotthard2/Characterize_Darks_Gotthard2_NBC.ipynb
+++ b/notebooks/Gotthard2/Characterize_Darks_Gotthard2_NBC.ipynb
--- a/notebooks/Gotthard2/Correction_Gotthard2_NBC.ipynb
+++ b/notebooks/Gotthard2/Correction_Gotthard2_NBC.ipynb
--- a/notebooks/Gotthard2/Gotthard2_retrieve_constants_precorrection_NBC.ipynb
+++ b/notebooks/Gotthard2/Gotthard2_retrieve_constants_precorrection_NBC.ipynb
No results found