From 54344a2b8c1fd35ca141d81ef34c202f99a09e7b Mon Sep 17 00:00:00 2001
From: David Hammer <dhammer@mailbox.org>
Date: Wed, 24 Feb 2021 12:19:21 +0100
Subject: [PATCH] Start refactoring of darks notebook

Applying isort and nbstripout.  Changing from ipyparallel to multiprocessing - and while doing so,
simplifying the call signature of characterize_module.
---
 .../Characterize_AGIPD_Gain_Darks_NBC.ipynb   | 278 ++++++------------
 1 file changed, 89 insertions(+), 189 deletions(-)

diff --git a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
index 8d11bae69..09ae91c62 100644
--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
@@ -16,22 +16,16 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-20T12:42:51.255184Z",
-     "start_time": "2019-02-20T12:42:51.225500Z"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "cluster_profile = \"noDB\" # The ipcluster profile to use\n",
-    "in_folder = \"/gpfs/exfel/d/raw/DETLAB/202031/p900172/\" # path to input data, required\n",
-    "out_folder = \"/gpfs/exfel/data/scratch/ahmedk/test/miniHalfAGIPD\" # path to output to, required\n",
+    "in_folder = \"/gpfs/exfel/d/raw/CALLAB/202031/p900113\" # path to input data, required\n",
+    "out_folder = \"/gpfs/exfel/data/scratch/hammerd/agipd-fixed-gain\" # path to output to, required\n",
     "sequences = [0] # sequence files to evaluate.\n",
     "modules = [-1]  # list of modules to evaluate, RANGE ALLOWED\n",
-    "run_high = 84 # run number in which high gain data was recorded, required\n",
-    "run_med = 87 # run number in which medium gain data was recorded, required\n",
-    "run_low = 88 # run number in which low gain data was recorded, required\n",
+    "run_high = 9985 # run number in which high gain data was recorded, required\n",
+    "run_med = 9984 # run number in which medium gain data was recorded, required\n",
+    "run_low = 9983 # run number in which low gain data was recorded, required\n",
     "operation_mode = 'ADAPTIVE_GAIN'  # Detector operation mode, optional\n",
     "\n",
     "karabo_id = \"HED_DET_AGIPD500K2G\" # karabo karabo_id\n",
@@ -40,9 +34,9 @@
     "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data\n",
     "h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images\n",
     "h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images\n",
-    "h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information\n",
-    "karabo_id_control = \"SPB_IRU_AGIPD1M1\" # karabo-id for control device '\n",
-    "karabo_da_control = \"AGIPD1MCTRL00\" # karabo DA for control infromation\n",
+    "h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information\n",
+    "karabo_id_control = \"HED_EXP_AGIPD500K2G\" # karabo-id for control device '\n",
+    "karabo_da_control = \"AGIPD500K2G00\" # karabo DA for control infromation\n",
     "\n",
     "use_dir_creation_date = True  # use dir creation date as data production reference date\n",
     "cal_db_interface = \"tcp://max-exfl016:8020\" # the database interface to use\n",
@@ -77,30 +71,24 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-20T12:42:52.599660Z",
-     "start_time": "2019-02-20T12:42:51.472138Z"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "import warnings\n",
-    "\n",
     "# imports and things that do not usually need to be changed\n",
     "from datetime import datetime\n",
     "\n",
     "import dateutil.parser\n",
     "\n",
-    "warnings.filterwarnings('ignore')\n",
-    "import os\n",
     "from collections import OrderedDict\n",
+    "from functools import partial\n",
     "from typing import List, Tuple\n",
     "\n",
     "import h5py\n",
     "import matplotlib\n",
     "import numpy as np\n",
     "import tabulate\n",
+    "from cal_tools.agipdlib import get_acq_rate, get_num_cells\n",
+    "from cal_tools.enums import BadPixels\n",
     "\n",
     "matplotlib.use('agg')\n",
     "import matplotlib.pyplot as plt\n",
@@ -108,36 +96,27 @@
     "\n",
     "%matplotlib inline\n",
     "\n",
+    "import multiprocessing\n",
+    "\n",
     "from cal_tools.agipdlib import get_bias_voltage, get_gain_setting\n",
     "from cal_tools.enums import BadPixels\n",
-    "from cal_tools.plotting import (\n",
-    "    create_constant_overview,\n",
-    "    plot_badpix_3d,\n",
-    "    show_overview,\n",
-    "    show_processed_modules,\n",
-    ")\n",
-    "from cal_tools.tools import (\n",
-    "    get_dir_creation_date,\n",
-    "    get_from_db,\n",
-    "    get_notebook_name,\n",
-    "    get_pdu_from_db,\n",
-    "    get_random_db_interface,\n",
-    "    get_report,\n",
-    "    map_gain_stages,\n",
-    "    parse_runs,\n",
-    "    run_prop_seq_from_path,\n",
-    "    save_const_to_h5,\n",
-    "    send_to_db,\n",
-    ")\n",
-    "\n",
-    "# make sure a cluster is running with ipcluster start --n=32, give it a while to start\n",
-    "from ipyparallel import Client\n",
-    "\n",
-    "view = Client(profile=cluster_profile)[:]\n",
-    "view.use_dill()\n",
-    "\n",
-    "from iCalibrationDB import Conditions, Constants, Detectors, Versions\n",
-    "\n",
+    "from cal_tools.plotting import (create_constant_overview, plot_badpix_3d,\n",
+    "                                show_overview, show_processed_modules)\n",
+    "from cal_tools.tools import (get_dir_creation_date, get_from_db,\n",
+    "                             get_notebook_name, get_pdu_from_db,\n",
+    "                             get_random_db_interface, get_report,\n",
+    "                             map_gain_stages, parse_runs,\n",
+    "                             run_prop_seq_from_path, save_const_to_h5,\n",
+    "                             send_to_db)\n",
+    "from iCalibrationDB import Conditions, Constants, Detectors, Versions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "gains = np.arange(3)\n",
     "\n",
     "IL_MODE = interlaced\n",
@@ -218,12 +197,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-20T12:42:52.608214Z",
-     "start_time": "2019-02-20T12:42:52.601257Z"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "if karabo_da[0] == '-1':\n",
@@ -264,12 +238,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-20T12:42:54.024731Z",
-     "start_time": "2019-02-20T12:42:53.901555Z"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# set everything up filewise\n",
@@ -291,74 +260,50 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-20T10:50:55.839958Z",
-     "start_time": "2019-02-20T10:50:55.468134Z"
-    },
-    "scrolled": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "import copy\n",
-    "from functools import partial\n",
-    "\n",
+    "def characterize_module(inp: Tuple[str, int, int]) -> Tuple[np.array, np.array, np.array, np.array, int, np.array, int, float]:\n",
+    "    fast_data_filename, channel, gg = inp\n",
     "\n",
-    "def characterize_module(il_mode: bool,\n",
-    "                        cells: int,\n",
-    "                        bp_thresh: Tuple[List[int], float, List[int], float], \n",
-    "                        rawversion: int,\n",
-    "                        loc: str, \n",
-    "                        acq_rate: float,\n",
-    "                        h5path: str,\n",
-    "                        h5path_idx: str,\n",
-    "                        control_names: List[str],\n",
-    "                        karabo_id_control: str,\n",
-    "                        inp: Tuple[str, int, int]) -> Tuple[np.array, np.array, np.array, np.array, int, np.array, int, float]:\n",
-    "    import copy\n",
-    "\n",
-    "    import h5py\n",
-    "    import numpy as np\n",
-    "    from cal_tools.agipdlib import get_acq_rate, get_num_cells\n",
-    "    from cal_tools.enums import BadPixels\n",
+    "    if max_cells == 0:\n",
+    "        num_cells = get_num_cells(fast_data_filename, karabo_id, channel)\n",
+    "    else:\n",
+    "        num_cells = max_cells\n",
     "\n",
-    "    fast_data_filename, channel, gg = inp\n",
-    "    \n",
-    "    if cells == 0:\n",
-    "        cells = get_num_cells(fast_data_filename, loc, channel)\n",
+    "    print(f\"Using {num_cells} memory cells\")\n",
     "\n",
-    "    print(f\"Using {cells} memory cells\")\n",
-    "    \n",
     "    if acq_rate == 0.:\n",
     "        slow_paths = control_names[gg], karabo_id_control\n",
-    "        fast_paths = fast_data_filename, loc, channel\n",
-    "        acq_rate = get_acq_rate(fast_paths, slow_paths)\n",
+    "        fast_paths = fast_data_filename, karabo_id, channel\n",
+    "        local_acq_rate = get_acq_rate(fast_paths, slow_paths)\n",
+    "    else:\n",
+    "        local_acq_rate = acq_rate\n",
+    "\n",
+    "    local_thresholds_offset_hard = thresholds_offset_hard[gg]\n",
+    "    local_thresholds_noise_hard = thresholds_noise_hard[gg]\n",
+    "\n",
+    "    h5path_f = h5path.format(channel)\n",
+    "    h5path_idx_f = h5path_idx.format(channel)\n",
     "\n",
-    "    thresholds_offset, thresholds_offset_sigma, thresholds_noise, thresholds_noise_sigma = bp_thresh \n",
-    "    thresholds_offset_hard = thresholds_offset[gg]\n",
-    "    thresholds_noise_hard = thresholds_noise[gg]\n",
-    "    \n",
-    "    h5path = h5path.format(channel)\n",
-    "    h5path_idx = h5path_idx.format(channel)\n",
-    "    \n",
     "    with h5py.File(fast_data_filename, \"r\", driver=\"core\") as infile:\n",
     "        if rawversion == 2:\n",
-    "            count = np.squeeze(infile[f\"{h5path_idx}/count\"])\n",
-    "            first = np.squeeze(infile[f\"{h5path_idx}/first\"])\n",
+    "            count = np.squeeze(infile[f\"{h5path_idx_f}/count\"])\n",
+    "            first = np.squeeze(infile[f\"{h5path_idx_f}/first\"])\n",
     "            last_index = int(first[count != 0][-1]+count[count != 0][-1])\n",
     "            first_index = int(first[count != 0][0])\n",
     "        else:\n",
-    "            status = np.squeeze(infile[f\"{h5path_idx}/status\"])\n",
+    "            status = np.squeeze(infile[f\"{h5path_idx_f}/status\"])\n",
     "            if np.count_nonzero(status != 0) == 0:\n",
     "                return\n",
-    "            last = np.squeeze(infile[f\"{h5path_idx}/last\"])\n",
-    "            first = np.squeeze(infile[f\"{h5path_idx}/first\"])\n",
+    "            last = np.squeeze(infile[f\"{h5path_idx_f}/last\"])\n",
+    "            first = np.squeeze(infile[f\"{h5path_idx_f}/first\"])\n",
     "            last_index = int(last[status != 0][-1]) + 1\n",
     "            first_index = int(first[status != 0][0])\n",
-    "        im = np.array(infile[f\"{h5path}/data\"][first_index:last_index,...])    \n",
-    "        cellIds = np.squeeze(infile[f\"{h5path}/cellId\"][first_index:last_index,...]) \n",
+    "        im = np.array(infile[f\"{h5path_f}/data\"][first_index:last_index,...])\n",
+    "        cellIds = np.squeeze(infile[f\"{h5path_f}/cellId\"][first_index:last_index,...])\n",
     "\n",
-    "    if il_mode:\n",
+    "    if IL_MODE:\n",
     "        ga = im[1::2, 0, ...]\n",
     "        im = im[0::2, 0, ...].astype(np.float32)\n",
     "        cellIds = cellIds[::2]\n",
@@ -372,13 +317,12 @@
     "    ga = np.rollaxis(ga, 2)\n",
     "    ga = np.rollaxis(ga, 2, 1)\n",
     "\n",
-    "    mcells = cells #max(cells, np.max(cellIds)+1)\n",
-    "    offset = np.zeros((im.shape[0], im.shape[1], mcells))\n",
-    "    gains = np.zeros((im.shape[0], im.shape[1], mcells))\n",
-    "    noise = np.zeros((im.shape[0], im.shape[1], mcells))\n",
-    "    gains_std = np.zeros((im.shape[0], im.shape[1], mcells))\n",
-    "    \n",
-    "    for cc in np.unique(cellIds[cellIds < mcells]):\n",
+    "    offset = np.zeros((im.shape[0], im.shape[1], num_cells))\n",
+    "    gains = np.zeros((im.shape[0], im.shape[1], num_cells))\n",
+    "    noise = np.zeros((im.shape[0], im.shape[1], num_cells))\n",
+    "    gains_std = np.zeros((im.shape[0], im.shape[1], num_cells))\n",
+    "\n",
+    "    for cc in np.unique(cellIds[cellIds < num_cells]):\n",
     "        cellidx = cellIds == cc\n",
     "        offset[...,cc] = np.median(im[..., cellidx], axis=2)\n",
     "        noise[...,cc] = np.std(im[..., cellidx], axis=2)\n",
@@ -393,19 +337,19 @@
     "\n",
     "    bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |\n",
     "       (offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value\n",
-    "    bp[(offset < thresholds_offset_hard[0]) | (\n",
-    "        offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value\n",
+    "    bp[(offset < local_thresholds_offset_hard[0]) | (\n",
+    "        offset > local_thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value\n",
     "    bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value\n",
     "\n",
     "    # noise related bad pixels\n",
     "    noise_mn = np.nanmedian(noise, axis=(0,1))\n",
-    "    noise_std = np.nanstd(noise, axis=(0,1))    \n",
+    "    noise_std = np.nanstd(noise, axis=(0,1))\n",
     "    bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |\n",
     "       (noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value\n",
-    "    bp[(noise < thresholds_noise_hard[0]) | (noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value\n",
+    "    bp[(noise < local_thresholds_noise_hard[0]) | (noise > local_thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value\n",
     "    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value\n",
     "\n",
-    "    return offset, noise, gains, gains_std, gg, bp, cells, acq_rate\n",
+    "    return offset, noise, gains, gains_std, gg, bp, num_cells, local_acq_rate\n",
     "\n",
     "offset_g = OrderedDict()\n",
     "noise_g = OrderedDict()\n",
@@ -428,7 +372,7 @@
     "else:\n",
     "    thresholds_noise_hard = [thresholds_noise_hard] * 3\n",
     "\n",
-    "    \n",
+    "\n",
     "inp = []\n",
     "for gain, mapped_files in gain_mapped_files.items():\n",
     "    dones = []\n",
@@ -441,18 +385,11 @@
     "        else:\n",
     "            continue\n",
     "        inp.append((fname_in, i, gg))\n",
-    "        \n",
-    "    gg += 1\n",
     "\n",
-    "p = partial(characterize_module, IL_MODE, max_cells,\n",
-    "           (thresholds_offset_hard, thresholds_offset_sigma,\n",
-    "            thresholds_noise_hard, thresholds_noise_sigma),\n",
-    "            rawversion, karabo_id, acq_rate, h5path, h5path_idx,\n",
-    "           control_names, karabo_id_control)\n",
+    "    gg += 1\n",
     "\n",
-    "# Don't remove. Used for Debugging.\n",
-    "#results = list(map(p, inp))\n",
-    "results = view.map_sync(p, inp)\n",
+    "with multiprocessing.Pool() as pool:\n",
+    "    results = pool.map(p, inp)\n",
     "\n",
     "for ii, r in enumerate(results):\n",
     "    offset, noise, gains, gains_std, gg, bp, thiscell, thisacq = r\n",
@@ -472,7 +409,7 @@
     "        gain_g[qm][...,gg] = gains\n",
     "        gainstd_g[qm][..., gg] = gains_std\n",
     "        badpix_g[qm][...,gg] = bp\n",
-    "    \n",
+    "\n",
     "\n",
     "duration = (datetime.now() - start).total_seconds()\n",
     "\n",
@@ -506,12 +443,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2018-12-06T09:38:18.220833Z",
-     "start_time": "2018-12-06T09:38:17.926616Z"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "thresholds_g = {}\n",
@@ -526,12 +458,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2018-12-06T09:38:18.234582Z",
-     "start_time": "2018-12-06T09:38:18.222838Z"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "res = OrderedDict()\n",
@@ -575,9 +502,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Retrieve existing constants for comparison\n",
@@ -632,12 +557,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2018-12-06T09:49:32.449330Z",
-     "start_time": "2018-12-06T09:49:20.231607Z"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "md = None\n",
@@ -673,9 +593,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "mnames=[]\n",
@@ -705,13 +623,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2018-12-06T09:49:14.540552Z",
-     "start_time": "2018-12-06T09:49:13.009674Z"
-    },
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "cell = 3\n",
@@ -729,9 +641,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "cell = 3\n",
@@ -749,9 +659,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "cell = 3\n",
@@ -762,9 +670,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),\n",
@@ -808,9 +714,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "create_constant_overview(offset_g, \"Offset (ADU)\", max_cells, 4000, 8000,\n",
@@ -820,9 +724,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "create_constant_overview(noise_g, \"Noise (ADU)\", max_cells, 0, 100,\n",
@@ -832,9 +734,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Plot only three gain threshold maps.\n",
@@ -1015,5 +915,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 1
+ "nbformat_minor": 4
 }
-- 
GitLab