From 90877c15a0b2899df577eca379a7d8c1d87cfe3d Mon Sep 17 00:00:00 2001
From: David Hammer <dhammer@mailbox.org>
Date: Mon, 19 Apr 2021 18:09:49 +0200
Subject: [PATCH] Restructure, fix issue #49

---
 .../Characterize_AGIPD_Gain_Darks_NBC.ipynb   | 232 ++++++++++--------
 1 file changed, 132 insertions(+), 100 deletions(-)

diff --git a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
index 0679cfd1c..cc42a657d 100644
--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb
@@ -255,35 +255,6 @@
     "print(f\"Operation mode is {'fixed' if fixed_gain_mode else 'adaptive'} gain mode\")"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The following lines will create a queue of files which will the be executed module-parallel. Distiguishing between different gains."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# set everything up filewise\n",
-    "os.makedirs(out_folder, exist_ok=True)\n",
-    "gmf = map_gain_stages(in_folder, offset_runs, path_template, karabo_da, sequences)\n",
-    "gain_mapped_files, total_sequences, total_file_size = gmf\n",
-    "print(f\"Will process a total of {total_sequences} files.\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Calculate Offsets, Noise and Thresholds ##\n",
-    "\n",
-    "The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array."
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -319,13 +290,59 @@
     "    ]"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The following lines will create a queue of files which will the be executed module-parallel. Distiguishing between different gains."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "def characterize_module(fast_data_filename: str, channel: int, gg: int) -> Tuple[np.array, np.array, np.array, np.array, int, np.array, int, float]:\n",
+    "# set everything up filewise\n",
+    "os.makedirs(out_folder, exist_ok=True)\n",
+    "gain_mapped_files, total_sequences, total_file_size = map_gain_stages(\n",
+    "    in_folder, offset_runs, path_template, karabo_da, sequences\n",
+    ")\n",
+    "print(f\"Will process a total of {total_sequences} files ({total_file_size:.02f} GB).\")\n",
+    "\n",
+    "inp = []\n",
+    "inp_modules = []\n",
+    "for gain_index, (gain, qm_file_map) in enumerate(gain_mapped_files.items()):\n",
+    "    for module_index in modules:\n",
+    "        qm = module_index_to_qm(module_index)\n",
+    "        if qm not in qm_file_map:\n",
+    "            print(f\"Did not find files for {qm}\")\n",
+    "            continue\n",
+    "        file_queue = qm_file_map[qm]\n",
+    "        while not file_queue.empty():\n",
+    "            filename = file_queue.get()\n",
+    "            print(f\"Process {filename} for {qm}\")\n",
+    "            inp.append((filename, module_index, gain_index))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Calculate Offsets, Noise and Thresholds ##\n",
+    "\n",
+    "The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def characterize_module(\n",
+    "    fast_data_filename: str, channel: int, gain_index: int\n",
+    ") -> Tuple[np.array, np.array, np.array, np.array, int, np.array, int, float]:\n",
     "    if max_cells == 0:\n",
     "        num_cells = get_num_cells(fast_data_filename, karabo_id, channel)\n",
     "    else:\n",
@@ -334,14 +351,14 @@
     "    print(f\"Using {num_cells} memory cells\")\n",
     "\n",
     "    if acq_rate == 0.:\n",
-    "        slow_paths = control_names[gg], karabo_id_control\n",
+    "        slow_paths = control_names[gain_index], karabo_id_control\n",
     "        fast_paths = fast_data_filename, karabo_id, channel\n",
     "        local_acq_rate = get_acq_rate(fast_paths, slow_paths)\n",
     "    else:\n",
     "        local_acq_rate = acq_rate\n",
     "\n",
-    "    local_thresholds_offset_hard = thresholds_offset_hard[gg]\n",
-    "    local_thresholds_noise_hard = thresholds_noise_hard[gg]\n",
+    "    local_thresholds_offset_hard = thresholds_offset_hard[gain_index]\n",
+    "    local_thresholds_noise_hard = thresholds_noise_hard[gain_index]\n",
     "\n",
     "    h5path_f = h5path.format(channel)\n",
     "    h5path_idx_f = h5path_idx.format(channel)\n",
@@ -421,7 +438,7 @@
     "    bp[(noise < local_thresholds_noise_hard[0]) | (noise > local_thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value\n",
     "    bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value\n",
     "\n",
-    "    return offset, noise, gains, gains_std, gg, bp, num_cells, local_acq_rate"
+    "    return offset, noise, gains, gains_std, bp, num_cells, local_acq_rate"
    ]
   },
   {
@@ -435,6 +452,16 @@
     "psh.set_default_context(\"threads\", num_workers=parallel_num_threads)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with multiprocessing.Pool(processes=parallel_num_procs) as pool:\n",
+    "    results = pool.starmap(characterize_module, inp)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -451,39 +478,25 @@
     "all_cells = []\n",
     "all_acq_rate = []\n",
     "\n",
-    "inp = []\n",
-    "for gg, (gain, mapped_files) in enumerate(gain_mapped_files.items()):\n",
-    "    for i in modules:\n",
-    "        qm = module_index_to_qm(i)\n",
-    "        if qm in mapped_files and not mapped_files[qm].empty():\n",
-    "            fname_in = mapped_files[qm].get()\n",
-    "            print(f\"Process file: {fname_in} for {qm}\")\n",
-    "        else:\n",
-    "            continue\n",
-    "        inp.append((fname_in, i, gg))\n",
-    "\n",
-    "with multiprocessing.Pool(processes=parallel_num_procs) as pool:\n",
-    "    results = pool.starmap(characterize_module, inp)\n",
-    "\n",
-    "for offset, noise, gains, gains_std, gg, bp, thiscell, thisacq in results:\n",
+    "for (_, module_index, gain_index), (offset, noise, gains, gains_std, bp,\n",
+    "                                    thiscell, thisacq) in zip(inp, results):\n",
     "    all_cells.append(thiscell)\n",
     "    all_acq_rate.append(thisacq)\n",
-    "    for i in modules:\n",
-    "        qm = module_index_to_qm(i)\n",
-    "        if qm not in offset_g:\n",
-    "            offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))\n",
-    "            noise_g[qm] = np.zeros_like(offset_g[qm])\n",
-    "            badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)\n",
-    "            if not fixed_gain_mode:\n",
-    "                gain_g[qm] = np.zeros_like(offset_g[qm])\n",
-    "                gainstd_g[qm] = np.zeros_like(offset_g[qm])\n",
-    "\n",
-    "        offset_g[qm][...,gg] = offset\n",
-    "        noise_g[qm][...,gg] = noise\n",
-    "        badpix_g[qm][...,gg] = bp\n",
+    "    qm = module_index_to_qm(module_index)\n",
+    "    if qm not in offset_g:\n",
+    "        offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))\n",
+    "        noise_g[qm] = np.zeros_like(offset_g[qm])\n",
+    "        badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)\n",
     "        if not fixed_gain_mode:\n",
-    "            gain_g[qm][...,gg] = gains\n",
-    "            gainstd_g[qm][..., gg] = gains_std\n",
+    "            gain_g[qm] = np.zeros_like(offset_g[qm])\n",
+    "            gainstd_g[qm] = np.zeros_like(offset_g[qm])\n",
+    "\n",
+    "    offset_g[qm][..., gain_index] = offset\n",
+    "    noise_g[qm][..., gain_index] = noise\n",
+    "    badpix_g[qm][..., gain_index] = bp\n",
+    "    if not fixed_gain_mode:\n",
+    "        gain_g[qm][..., gain_index] = gains\n",
+    "        gainstd_g[qm][..., gain_index] = gains_std\n",
     "\n",
     "\n",
     "max_cells = np.max(all_cells)\n",
@@ -604,11 +617,52 @@
    "source": [
     "# set the operating condition\n",
     "# note: iCalibrationDB only adds gain_mode if it is truthy, so we don't need to handle None\n",
-    "condition = Conditions.Dark.AGIPD(memory_cells=max_cells,\n",
-    "                                  bias_voltage=bias_voltage,\n",
-    "                                  acquisition_rate=acq_rate,\n",
-    "                                  gain_setting=gain_setting,\n",
-    "                                  gain_mode=fixed_gain_mode)"
+    "condition = iCalibrationDB.Conditions.Dark.AGIPD(\n",
+    "    memory_cells=max_cells,\n",
+    "    bias_voltage=bias_voltage,\n",
+    "    acquisition_rate=acq_rate,\n",
+    "    gain_setting=gain_setting,\n",
+    "    gain_mode=fixed_gain_mode\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "md = None\n",
+    "\n",
+    "for qm in res:\n",
+    "    db_module = qm_dict[qm][\"db_module\"]\n",
+    "    for const in res[qm]:\n",
+    "        dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()\n",
+    "        dconst.data = res[qm][const]\n",
+    "\n",
+    "        if db_output:\n",
+    "            md = send_to_db(db_module, karabo_id, dconst, condition, file_loc,\n",
+    "                            report, cal_db_interface, creation_time=creation_time,\n",
+    "                            timeout=cal_db_timeout)\n",
+    "\n",
+    "        if local_output:\n",
+    "            md = save_const_to_h5(db_module, karabo_id, dconst, condition, dconst.data,\n",
+    "                                  file_loc, report, creation_time, out_folder)\n",
+    "            print(f\"Calibration constant {const} for {qm} is stored locally in {file_loc}.\\n\")\n",
+    "\n",
+    "    print(\"Constants parameter conditions are:\\n\")\n",
+    "    print(f\"â€¢ memory_cells: {max_cells}\\nâ€¢ bias_voltage: {bias_voltage}\\n\"\n",
+    "          f\"â€¢ acquisition_rate: {acq_rate}\\nâ€¢ gain_setting: {gain_setting}\\n\"\n",
+    "          f\"â€¢ gain_mode: {fixed_gain_mode}\\n\"\n",
+    "          f\"â€¢ creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
    ]
   },
   {
@@ -668,29 +722,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "md = None\n",
-    "\n",
-    "for qm in res:\n",
-    "    db_module = qm_dict[qm][\"db_module\"]\n",
-    "    for const in res[qm]:\n",
-    "        dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()\n",
-    "        dconst.data = res[qm][const]\n",
-    "\n",
-    "        if db_output:\n",
-    "            md = send_to_db(db_module, karabo_id, dconst, condition, file_loc,\n",
-    "                            report, cal_db_interface, creation_time=creation_time,\n",
-    "                            timeout=cal_db_timeout)\n",
-    "\n",
-    "        if local_output:\n",
-    "            md = save_const_to_h5(db_module, karabo_id, dconst, condition, dconst.data,\n",
-    "                                  file_loc, report, creation_time, out_folder)\n",
-    "            print(f\"Calibration constant {const} for {qm} is stored locally in {file_loc}.\\n\")\n",
-    "\n",
-    "    print(\"Constants parameter conditions are:\\n\")\n",
-    "    print(f\"â€¢ memory_cells: {max_cells}\\nâ€¢ bias_voltage: {bias_voltage}\\n\"\n",
-    "          f\"â€¢ acquisition_rate: {acq_rate}\\nâ€¢ gain_setting: {gain_setting}\\n\"\n",
-    "          f\"â€¢ gain_mode: {fixed_gain_mode}\\n\"\n",
-    "          f\"â€¢ creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\\n\")"
    ]
   },
   {
@@ -886,7 +917,11 @@
     "# now we need the old constants\n",
     "old_const = {}\n",
     "old_mdata = {}\n",
-    "old_retrieval_res.wait()"
+    "old_retrieval_res.wait()\n",
+    "\n",
+    "for (qm, const), (data, timestamp) in zip(qm_x_const, old_retrieval_res.get()):\n",
+    "    old_const.setdefault(qm, {})[const] = data\n",
+    "    old_mdata.setdefault(qm, {})[const] = timestamp"
    ]
   },
   {
@@ -895,9 +930,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "for (qm, const), (data, timestamp) in zip(qm_x_const, old_retrieval_res.get()):\n",
-    "    old_const.setdefault(qm, {})[const] = data\n",
-    "    old_mdata.setdefault(qm, {})[const] = timestamp"
    ]
   },
   {
@@ -934,12 +966,12 @@
     "        for bit in bits:\n",
     "            l_data.append(np.count_nonzero(badpix_g[qm][:,:,:,gain] & bit.value))\n",
     "\n",
-    "        if old_const['BadPixelsDark'] is not None:\n",
-    "            dataold = np.copy(old_const['BadPixelsDark'][:, :, :, gain])\n",
+    "        if old_const[qm]['BadPixelsDark'] is not None:\n",
+    "            dataold = np.copy(old_const[qm]['BadPixelsDark'][:, :, :, gain])\n",
     "            datau32old = dataold.astype(np.uint32)\n",
     "            l_data_old.append(len(datau32old[datau32old>0].flatten()))\n",
     "            for bit in bits:\n",
-    "                l_data_old.append(np.count_nonzero(old_const['BadPixelsDark'][:, :, :, gain] & bit.value))\n",
+    "                l_data_old.append(np.count_nonzero(old_const[qm]['BadPixelsDark'][:, :, :, gain] & bit.value))\n",
     "\n",
     "        l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',\n",
     "                       'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR', 'GAIN_THRESHOLDING_ERROR']\n",
@@ -951,7 +983,7 @@
     "        for i in range(len(l_data)):\n",
     "            line = [f'{l_data_name[i]}, {gain_names[gain]} gain', l_threshold[i], l_data[i]]\n",
     "\n",
-    "            if old_const['BadPixelsDark'] is not None:\n",
+    "            if old_const[qm]['BadPixelsDark'] is not None:\n",
     "                line += [l_data_old[i]]\n",
     "            else:\n",
     "                line += ['-']\n",
-- 
GitLab