Update AGIPD correction to use library

4100fcf3 · Steffen Hauf · fe762d92 · 4100fcf3
Commit 4100fcf3 authored 6 years ago by Steffen Hauf
--- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
+++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
@@ -13,7 +13,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
@@ -59,7 +59,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
@@ -71,15 +71,6 @@
      "/gpfs/exfel/data/scratch/haufs/clean_cal/karabo/extern/lib/python3.4/importlib/_bootstrap.py:321: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
      "  return f(*args, **kwds)\n"
     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Connecting to profile noDB\n",
-      "Working in IL Mode: False. Actual cells in use are: 176\n",
-      "Outputting to /gpfs/exfel/exp/SPB/201831/p900039/proc//r0273\n"
-     ]
    }
   ],
   "source": [
@@ -143,7 +134,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
@@ -199,7 +190,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
@@ -266,48 +257,7 @@
   "metadata": {
    "collapsed": false
   },
-   "outputs": [
+   "outputs": [],
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Processing a total of 16 sequence files in chunks of 16\n"
-     ]
-    },
-    {
-     "data": {
-      "text/latex": [
-       "\\begin{tabular}{rlrl}\n",
-       "\\hline\n",
-       "   \\# & module   &   \\# module & file                                                                     \\\\\n",
-       "\\hline\n",
-       "   0 & Q1M1     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD00-S00000.h5 \\\\\n",
-       "   1 & Q1M2     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD01-S00000.h5 \\\\\n",
-       "   2 & Q1M3     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD02-S00000.h5 \\\\\n",
-       "   3 & Q1M4     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD03-S00000.h5 \\\\\n",
-       "   4 & Q2M1     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD04-S00000.h5 \\\\\n",
-       "   5 & Q2M2     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD05-S00000.h5 \\\\\n",
-       "   6 & Q2M3     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD06-S00000.h5 \\\\\n",
-       "   7 & Q2M4     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD07-S00000.h5 \\\\\n",
-       "   8 & Q3M1     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD08-S00000.h5 \\\\\n",
-       "   9 & Q3M2     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD09-S00000.h5 \\\\\n",
-       "  10 & Q3M3     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD10-S00000.h5 \\\\\n",
-       "  11 & Q3M4     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD11-S00000.h5 \\\\\n",
-       "  12 & Q4M1     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD12-S00000.h5 \\\\\n",
-       "  13 & Q4M2     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD13-S00000.h5 \\\\\n",
-       "  14 & Q4M3     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD14-S00000.h5 \\\\\n",
-       "  15 & Q4M4     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD15-S00000.h5 \\\\\n",
-       "\\hline\n",
-       "\\end{tabular}"
-      ],
-      "text/plain": [
-       "<IPython.core.display.Latex object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
   "source": [
    "import copy\n",
    "from IPython.display import HTML, display, Markdown, Latex\n",
@@ -337,15 +287,7 @@
   "metadata": {
    "collapsed": false
   },
-   "outputs": [
+   "outputs": [],
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Running 16 tasks parallel\n"
-     ]
-    }
-   ],
   "source": [
    "import copy\n",
    "from functools import partial\n",
@@ -500,7 +442,7 @@
    "        \n",
    "        for rr in r:\n",
    "            if rr is not None:\n",
-    "                hl, hh, hg, hdg, low_edges, high_edges, signal_edges, dig_edges = rr\n",
+    "                hl, hh, hg, hdg, low_edges, high_edges, signal_edges, dig_signal_edges = rr\n",
    "                if hl is not None:  # any one being None will also make the others None\n",
    "                    hists_signal_low += hl.astype(np.float64)\n",
    "                    hists_signal_high += hh.astype(np.float64)\n",

 %% Cell type:markdown id: tags:
 # AGIPD Offline Correction #
 Author: European XFEL Detector Group, Version: 1.0
 Offline Calibration for the AGIPD Detector
 %% Cell type:code id: tags:
 ``` python
 in_folder = "/gpfs/exfel/exp/SPB/201831/p900039/raw" # the folder to read data from, required
 run = 273 # runs to process, required
 out_folder =  "/gpfs/exfel/exp/SPB/201831/p900039/proc/" # the folder to output to, required
 calfile =  "/gpfs/exfel/exp/SPB/201831/p900039/usr/calibration_constants230818.h5" # path to calibration file. Leave empty if all data should come from DB
 sequences = [0] # sequences to correct, set to -1 for all, range allowed
 mem_cells = 176 # memory cells in data
 interlaced = False # whether data is in interlaced layout
 overwrite = True # set to True if existing data should be overwritten
 no_relative_gain = False # do not do relative gain correction
 cluster_profile = "noDB"
 max_pulses = 500
 local_input = False
 bias_voltage = 300
 cal_db_interface = "tcp://max-exfl015:5005" # the database interface to use
 use_dir_creation_date = False # use the creation data of the input dir for database queries
 sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
 photon_energy = 9.2 # photon energy in keV
 index_v = 2 # version of RAW index type
 nodb = False # if set only file-based constants will be used
 def balance_sequences(in_folder, run, sequences, sequences_per_node):
    import glob
    import re
    import numpy as np
    if sequences_per_node != 0:
        sequence_files = glob.glob("{}/r{:04d}/*-S*.h5".format(in_folder, run))
        seq_nums = set()
        for sf in sequence_files:
            seqnum = re.findall(r".*-S([0-9]*).h5", sf)[0]
            seq_nums.add(int(seqnum))
        seq_nums -= set(sequences)
        return [l.tolist() for l in np.array_split(list(seq_nums),
                                                   len(seq_nums)//sequences_per_node+1)]
    else:
        return sequences
 ```
 %% Cell type:code id: tags:
 ``` python
 import sys
 from collections import OrderedDict
 # make sure a cluster is running with ipcluster start --n=32, give it a while to start
 import os
 import h5py
 import numpy as np
 import matplotlib
 matplotlib.use("agg")
 import matplotlib.pyplot as plt
 from ipyparallel import Client
 print("Connecting to profile {}".format(cluster_profile))
 view = Client(profile=cluster_profile)[:]
 view.use_dill()
 from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
 from cal_tools.cal_tools import (gain_map_files, parse_runs, run_prop_seq_from_path, get_notebook_name,
                                 get_dir_creation_date, get_constant_from_db)
 il_mode = interlaced
 max_cells = mem_cells//2 if il_mode else mem_cells
 gains = np.arange(3)
 cells = np.arange(max_cells)
 creation_time = None
 if use_dir_creation_date:
    creation_time = get_dir_creation_date(in_folder, run)
    print("Using {} as creation time".format(creation_time))
 in_folder = "{}/r{:04d}".format(in_folder, run)
 print("Working in IL Mode: {}. Actual cells in use are: {}".format(il_mode, max_cells))
 if sequences[0] == -1:
    sequences = None
 do_rel_gain = not no_relative_gain
 QUADRANTS = 4
 MODULES_PER_QUAD = 4
 DET_FILE_INSET = "AGIPD"
 CHUNK_SIZE = 512
 MAX_PAR = 32
 if in_folder[-1] == "/":
    in_folder = in_folder[:-1]
 out_folder = "{}/{}".format(out_folder, os.path.split(in_folder)[-1])
 print("Outputting to {}".format(out_folder))
 if not os.path.exists(out_folder):
    os.makedirs(out_folder)
 elif not overwrite:
    raise AttributeError("Output path exists! Exiting")
 ```
 %% Output
    /gpfs/exfel/data/scratch/haufs/clean_cal/karabo/extern/lib/python3.4/importlib/_bootstrap.py:321: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
      return f(*args, **kwds)
-    Connecting to profile noDB
-    Working in IL Mode: False. Actual cells in use are: 176
-    Outputting to /gpfs/exfel/exp/SPB/201831/p900039/proc//r0273
 %% Cell type:code id: tags:
 ``` python
 def combine_stack(d, sdim):
    combined = np.zeros((sdim, 2048,2048))
    combined[...] = np.nan
    dy = 0
    for i in range(16):
        if i < 8:
            dx = -512
            if i > 3:
                dx -= 25
            mx = 1
            my = i % 8
            combined[:, my*128+dy:(my+1)*128+dy,
                     mx*512-dx:(mx+1)*512-dx] = np.rollaxis(d[i],2,1)[:,:,::-1]
            dy += 30
            if i == 3:
                dy += 30
        elif i < 12:
            dx = 80 - 50
            if i == 8:
                dy = 4*30 + 30 +50 -30
            mx = 1
            my = i % 8 +4
            combined[:, my*128+dy:(my+1)*128+dy,
                     mx*512-dx:(mx+1)*512-dx] = np.rollaxis(d[i],2,1)[:,::-1,:]
            dy += 30
        else:
            dx = 100 - 50
            if i == 11:
                dy = 20
            mx = 1
            my = i - 14
            combined[:, my*128+dy:(my+1)*128+dy,
                     mx*512-dx:(mx+1)*512-dx] = np.rollaxis(d[i],2,1)[:,::-1,:]
            dy += 30
    return combined
 ```
 %% Cell type:code id: tags:
 ``` python
 # set everything up filewise
 from queue import Queue
 from collections import OrderedDict
 #if not os.path.exists(out_folder):
 #    os.makedirs(out_folder)
 #elif not overwrite:
 #    raise AttributeError("Output path exists! Exiting")
 def map_modules_from_files(filelist):
    module_files = OrderedDict()
    mod_ids = OrderedDict()
    total_sequences = 0
    sequences_qm = {}
    for quadrant in range(0, QUADRANTS):
        for module in range(0, MODULES_PER_QUAD):
            name = "Q{}M{}".format(quadrant + 1, module + 1)
            module_files[name] = Queue()
            num = quadrant * 4 + module
            mod_ids[name] = num
            file_infix = "{}{:02d}".format(DET_FILE_INSET, num)
            sequences_qm[name] = 0
            for file in filelist:
                if file_infix in file:
                    module_files[name].put(file)
                    total_sequences += 1
                    sequences_qm[name] += 1
    return module_files, mod_ids, total_sequences, sequences_qm
 dirlist = sorted(os.listdir(in_folder))
 file_list = []
 for entry in dirlist:
    #only h5 file
    abs_entry = "{}/{}".format(in_folder, entry)
    if os.path.isfile(abs_entry) and os.path.splitext(abs_entry)[1] == ".h5":
        if sequences is None:
            file_list.append(abs_entry)
        else:
            for seq in sequences:
                if "{:05d}.h5".format(seq) in abs_entry:
                    file_list.append(os.path.abspath(abs_entry))
 mapped_files, mod_ids, total_sequences, sequences_qm = map_modules_from_files(file_list)
 MAX_PAR = min(MAX_PAR, total_sequences)
 ```
 %% Cell type:markdown id: tags:
 ## Processed Files ##
 %% Cell type:code id: tags:
 ``` python
 import copy
 from IPython.display import HTML, display, Markdown, Latex
 import tabulate
 print("Processing a total of {} sequence files in chunks of {}".format(total_sequences, MAX_PAR))
 table = []
 mfc = copy.copy(mapped_files)
 ti = 0
 for k, files in mfc.items():
    i = 0
    while not files.empty():
        f = files.get()
        if i == 0:
            table.append((ti, k, i, f))
        else:
            table.append((ti, "", i,  f))
        i += 1
        ti += 1
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["#", "module", "# module", "file"])))
 # restore the queue
 mapped_files, mod_ids, total_sequences, sequences_qm = map_modules_from_files(file_list)
 ```
-%% Output
-    Processing a total of 16 sequence files in chunks of 16
-    \begin{tabular}{rlrl}
-    \hline
-       \# & module   &   \# module & file                                                                     \\
-    \hline
-       0 & Q1M1     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD00-S00000.h5 \\
-       1 & Q1M2     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD01-S00000.h5 \\
-       2 & Q1M3     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD02-S00000.h5 \\
-       3 & Q1M4     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD03-S00000.h5 \\
-       4 & Q2M1     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD04-S00000.h5 \\
-       5 & Q2M2     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD05-S00000.h5 \\
-       6 & Q2M3     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD06-S00000.h5 \\
-       7 & Q2M4     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD07-S00000.h5 \\
-       8 & Q3M1     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD08-S00000.h5 \\
-       9 & Q3M2     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD09-S00000.h5 \\
-      10 & Q3M3     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD10-S00000.h5 \\
-      11 & Q3M4     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD11-S00000.h5 \\
-      12 & Q4M1     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD12-S00000.h5 \\
-      13 & Q4M2     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD13-S00000.h5 \\
-      14 & Q4M3     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD14-S00000.h5 \\
-      15 & Q4M4     &          0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD15-S00000.h5 \\
-    \hline
-    \end{tabular}
 %% Cell type:code id: tags:
 ``` python
 import copy
 from functools import partial
 def correct_module(max_cells, do_rel_gain, index_v, CHUNK_SIZE, total_sequences, sequences_qm,
                   bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range,
                   bins_dig_gain_vs_signal, max_pulses, dbparms, fileparms, nodb, inp):
    import numpy as np
    import copy
    import h5py
    import socket
    from datetime import datetime
    import re
    import os
    from influxdb import InfluxDBClient
    from cal_tools.enums import BadPixels
    from cal_tools.agipdlib import AgipdCorrections
    client = InfluxDBClient('exflqr18318', 8086, 'root', 'root', 'calstats')
    def create_influx_entry(run, proposal, qm, sequence, filesize, chunksize,
                            total_sequences, success, runtime, reason=""):
        return {
            "measurement": "run_correction",
            "tags": {
                "host": socket.gethostname(),
                "run": run,
                "proposal": proposal,
                "mem_cells": max_cells,
                "sequence": sequence,
                "module": qm,
                "filesize": filesize,
                "chunksize": chunksize,
                "total_sequences": total_sequences,
                "sequences_module": sequences_qm[qm],
                "detector": "AGIPD",
                "instrument": "SPB",
            },
            "time": datetime.utcnow().isoformat(),
            "fields": {
                "success": success,
                "reason": reason,
                "runtime": runtime,
            }
        }
    hists_signal_low = None
    hists_signal_high = None
    hists_gain_vs_signal = None
    hists_dig_gain_vs_signal = None
    low_edges = None
    high_edges = None
    signal_edges = None
    dig_signal_edges = None
    if True: #try:
        start = datetime.now()
        success = True
        reason = ""
        filename, filename_out, channel, qm = inp
        infile = h5py.File(filename, "r", driver="core")
        outfile = h5py.File(filename_out, "w")
        agipd_corr = AgipdCorrections(infile, outfile, max_cells, channel, max_pulses,
                                      bins_gain_vs_signal, bins_signal_low_range,
                                      bins_signal_high_range, bins_dig_gain_vs_signal,
                                      do_rel_gain=do_rel_gain)
        try:
            agipd_corr.get_valid_image_idx()
        except IOError:
            return
        if not nodb:
            agipd_corr.initialize_from_db(dbparms, qm, only_dark=(fileparms != ""))
        if fileparms != "":
            agipd_corr.initialize_from_file(fileparms, qm, with_dark=nodb)
        print("Initialized constants")
        for irange in agipd_corr.get_iteration_range():
            agipd_corr.correct_agipd(irange)
            print("Iterated")
        print("All interations finished")
        hists, edges = agipd_corr.get_histograms()
        hists_signal_low, hists_signal_high, hists_gain_vs_signal, hists_dig_gain_vs_signal= hists
        low_edges, high_edges, signal_edges, dig_signal_edges = edges
        outfile.close()
        infile.close()
        print("Closed files")
    #except Exception as e:
    #    print(e)
        success = False
        reason = "Error"
    #finally:
        run = re.findall(r'.*r([0-9]{4}).*', filename)[0]
        proposal = re.findall(r'.*p([0-9]{6}).*', filename)[0]
        sequence = re.findall(r'.*S([0-9]{5}).*', filename)[0]
        filesize = os.path.getsize(filename)
        duration = (datetime.now()-start).total_seconds()
        influx = create_influx_entry(run, proposal, qm, sequence, filesize, CHUNK_SIZE, total_sequences, success, duration, reason)
        client.write_points([influx])
    return (hists_signal_low, hists_signal_high, hists_gain_vs_signal, hists_dig_gain_vs_signal,
            low_edges, high_edges, signal_edges, dig_signal_edges)
 done = False
 first_files = []
 inp = []
 left = total_sequences
 bins_gain_vs_signal = (100, 100)
 bins_signal_low_range = 100
 bins_signal_high_range = 100
 bins_dig_gain_vs_signal = (100, 4)
 hists_signal_low =  np.zeros((bins_signal_low_range, max_pulses), np.float64)
 hists_signal_high =  np.zeros((bins_signal_low_range, max_pulses), np.float64)
 hists_gain_vs_signal =  np.zeros((bins_gain_vs_signal), np.float64)
 hists_dig_gain_vs_signal =  np.zeros((bins_dig_gain_vs_signal), np.float64)
 low_edges, high_edges, signal_edges, dig_signal_edges = None, None, None, None
 dbparms = cal_db_interface, creation_time, max_cells, bias_voltage, photon_energy
 fileparms = calfile
 while not done:
    dones = []
    first = True
    for i in range(16):
        qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
        if qm in mapped_files and not mapped_files[qm].empty():
            fname_in = str(mapped_files[qm].get())
            dones.append(mapped_files[qm].empty())
        else:
            print("Skipping {}".format(qm))
            first_files.append((None, None))
            continue
        fout = os.path.abspath("{}/{}".format(out_folder, (os.path.split(fname_in)[-1]).replace("RAW", "CORR")))
        if first:
            first_files.append((fname_in, fout))
        inp.append((fname_in, fout, i,  qm))
    first = False
    if len(inp) >= min(MAX_PAR, left):
        print("Running {} tasks parallel".format(len(inp)))
        p = partial(correct_module, max_cells, do_rel_gain, index_v, CHUNK_SIZE, total_sequences,
                    sequences_qm, bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range,
                    bins_dig_gain_vs_signal, max_pulses, dbparms, fileparms, nodb)
        r = view.map_sync(p, inp)
        #r = list(map(p, inp))
        inp = []
        left -= MAX_PAR
        for rr in r:
            if rr is not None:
-                hl, hh, hg, hdg, low_edges, high_edges, signal_edges, dig_edges = rr
+                hl, hh, hg, hdg, low_edges, high_edges, signal_edges, dig_signal_edges = rr
                if hl is not None:  # any one being None will also make the others None
                    hists_signal_low += hl.astype(np.float64)
                    hists_signal_high += hh.astype(np.float64)
                    hists_gain_vs_signal += hg.astype(np.float64)
                    hists_dig_gain_vs_signal += hdg.astype(np.float64)
    done = all(dones)
 ```
-%% Output
-    Running 16 tasks parallel
 %% Cell type:code id: tags:
 ``` python
 from mpl_toolkits.mplot3d import Axes3D
 import matplotlib.pyplot as plt
 from matplotlib import cm
 from matplotlib.ticker import LinearLocator, FormatStrFormatter
 import numpy as np
 %matplotlib inline
 def do_3d_plot(data, edges, x_axis, y_axis):
    fig = plt.figure(figsize=(10,10))
    ax = fig.gca(projection='3d')
    # Make data.
    X = edges[0][:-1]
    Y = edges[1][:-1]
    X, Y = np.meshgrid(X, Y)
    Z = data.T
    # Plot the surface.
    surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
                           linewidth=0, antialiased=False)
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    ax.set_zlabel("Counts")
 ```
 %% Cell type:markdown id: tags:
 ## Signal vs. Analogue Gain ##
 The following plot shows plots signal vs. gain for the first 128 images.
 %% Cell type:code id: tags:
 ``` python
 do_3d_plot(hists_gain_vs_signal, signal_edges, "Signal (ADU)", "Analogue gain (ADU)")
 ```
 %% Cell type:code id: tags:
 ``` python
 def do_2d_plot(data, edges, y_axis, x_axis):
    from matplotlib.colors import LogNorm
    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    extent = [np.min(edges[1]), np.max(edges[1]),np.min(edges[0]), np.max(edges[0])]
    im = ax.imshow(data[::-1,:], extent=extent, aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(data)))
    ax.set_xlabel(x_axis)
    ax.set_ylabel(y_axis)
    cb = fig.colorbar(im)
    cb.set_label("Counts")
 do_2d_plot(hists_gain_vs_signal, signal_edges, "Signal (ADU)", "Gain Value (ADU)")
 ```
 %% Cell type:markdown id: tags:
 ## Signal vs. Digitized Gain ##
 The following plot shows plots signal vs. digitized gain for the first 128 images.
 %% Cell type:code id: tags:
 ``` python
 do_2d_plot(hists_dig_gain_vs_signal, dig_signal_edges, "Signal (ADU)", "Gain Bit Value")
 ```
 %% Cell type:markdown id: tags:
 ## Mean Intensity per Pulse ##
 The following plots show the mean signal for each pulse in a detailed and expanded intensity region.
 %% Cell type:code id: tags:
 ``` python
 do_3d_plot(hists_signal_low, low_edges, "Signal (ADU)", "Pulse id")
 do_2d_plot(hists_signal_low, low_edges, "Signal (ADU)", "Pulse id")
 do_3d_plot(hists_signal_high, high_edges, "Signal (ADU)", "Pulse id")
 do_2d_plot(hists_signal_high, high_edges, "Signal (ADU)", "Pulse id")
 ```
 %% Cell type:code id: tags:
 ``` python
 ```
 %% Cell type:code id: tags:
 ``` python
 corrected = []
 raw = []
 gains = []
 mask = []
 for i, ff in enumerate(first_files):
    try:
        rf, cf = ff
        #print(cf, i)
        if rf is None:
            print(rf)
            raise Exception("File not present")
        infile = h5py.File(rf, "r")
        raw.append(np.array(infile["/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(i)][:max_cells,0,...]))
        infile.close()
        infile = h5py.File(cf, "r")
        #print("/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(i))
        corrected.append(np.array(infile["/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(i)][:max_cells,...]))
        gains.append(np.array(infile["/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/gain".format(i)][:max_cells,...]))
        mask.append(np.array(infile["/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/mask".format(i)][:max_cells,...]))
        infile.close()
    except Exception as e:
        corrected.append(np.zeros((max_cells, 512, 128)))
        gains.append(np.zeros((max_cells, 512, 128)))
        mask.append(np.zeros((max_cells, 512, 128)))
 ```
 %% Cell type:code id: tags:
 ``` python
 domask = True
 if domask:
    for i, c in enumerate(corrected):
        c[mask[i] != 0] = 0
 combined = combine_stack(corrected, corrected[0].shape[0])
 combined_raw = combine_stack(raw, raw[0].shape[0])
 combined_g = combine_stack(gains, gains[0].shape[0])
 combined_mask = combine_stack(mask, mask[0].shape[0])
 ```
 %% Cell type:markdown id: tags:
 ### Mean RAW Preview ###
 The per pixel mean of the first 128 images of the RAW data
 %% Cell type:code id: tags:
 ``` python
 %matplotlib inline
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.mean(combined_raw[:,:1300,400:1600],axis=0),
               vmin=min(0.75*np.median(combined_raw[combined_raw > 0]), 4000),
               vmax=max(1.5*np.median(combined_raw[combined_raw > 0]), 7000), cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```
 %% Cell type:markdown id: tags:
 ### Single Shot Preview ###
 A single shot image from cell 12 of the first train
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 dim = combined[70,:1300,400:1600]
 im = ax.imshow(dim, vmin=-5,
               vmax=max(10*np.median(dim[dim > 0]), 100), cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```
 %% Cell type:markdown id: tags:
 ### Mean CORRECTED Preview ###
 The per pixel mean of the first 128 images of the CORRECTED data
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.mean(combined[:,:1300,400:1600], axis=0), vmin=-5,
               vmax=max(10*np.median(combined[combined > 0]), 100), cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```
 %% Cell type:markdown id: tags:
 ### Maximum GAIN Preview ###
 The per pixel maximum of the first 128 images of the digitized GAIN data
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.max(combined_g[:,:1300,400:1600], axis=0), vmin=0,
               vmax=3, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```
 %% Cell type:markdown id: tags:
 ## Bad Pixels ##
 The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:
 %% Cell type:code id: tags:
 ``` python
 from cal_tools.enums import BadPixels
 from IPython.display import HTML, display, Markdown, Latex
 import tabulate
 table = []
 for item in BadPixels:
    table.append((item.name, "{:016b}".format(item.value)))
 md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["Bad pixel type", "Bit mask"])))
 ```
 %% Cell type:markdown id: tags:
 ### Single Shot Bad Pixels ###
 A single shot bad pixel map from cell 4 of the first train
 %% Cell type:code id: tags:
 ``` python
 fig = plt.figure(figsize=(20,10))
 ax = fig.add_subplot(111)
 im = ax.imshow(np.log2(combined_mask[4,:1300,400:1600]), vmin=0,
               vmax=32, cmap="jet")
 cb = fig.colorbar(im, ax=ax)
 ```
 %% Cell type:code id: tags:
 ``` python
 ```
 %% Cell type:code id: tags:
 ``` python
 ```