From c0050d03fa5ba9d75c05edd6a080fc53e7c9a76e Mon Sep 17 00:00:00 2001
From: ahmedk <karim.ahmed@xfel.eu>
Date: Wed, 13 Oct 2021 13:10:56 +0200
Subject: [PATCH] move reading cntrl data out of module loop

---
 ...Jungfrau_Gain_Correct_and_Verify_NBC.ipynb |  53 ++++----
 ...rk_analysis_all_gains_burst_mode_NBC.ipynb | 124 +++++++++++-------
 src/cal_tools/tools.py                        |   4 +-
 3 files changed, 103 insertions(+), 78 deletions(-)

diff --git a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
index adee7c933..722ea572f 100644
--- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb
@@ -20,33 +20,39 @@
     "in_folder = \"/gpfs/exfel/exp/SPB/202130/p900204/raw\"  # the folder to read data from, required\n",
     "out_folder =  \"/gpfs/exfel/data/scratch/ahmedk/test/remove\"  # the folder to output to, required\n",
     "run = 112  # run to process, required\n",
-    "\n",
     "sequences = [-1]  # sequences to correct, set to [-1] for all, range allowed\n",
     "sequences_per_node = 1  # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel\n",
     "\n",
+    "# Parameters used to access raw data.\n",
     "karabo_id = \"SPB_IRDA_JF4M\"  # karabo prefix of Jungfrau devices\n",
-    "karabo_da = ['JNGFR01']  # data aggregators\n",
-    "receiver_template = \"JNGFR{:02d}\"  # Detector receiver template for accessing raw data files i.e. \"JNGFR{:02d}\"\n",
+    "karabo_da = ['JNGFR01', 'JNGFR02', 'JNGFR03', 'JNGFR04', 'JNGFR05', 'JNGFR06', 'JNGFR07', 'JNGFR08']  # data aggregators\n",
+    "receiver_template = \"JNGFR{:02d}\"  # Detector receiver template for accessing raw data files. e.g. \"JNGFR{:02d}\"\n",
     "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5'  # template to use for file name\n",
-    "instrument_source_template = '{}/DET/{}:daqOutput'  # template for source name (filled with karabo_id & receiver_id)\n",
+    "instrument_source_template = '{}/DET/{}:daqOutput'  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'\n",
     "karabo_id_control = \"\"  # if control is on a different ID, set to empty string if it is the same a karabo-id\n",
     "karabo_da_control = \"JNGFRCTRL00\"  # file inset for control data\n",
     "\n",
+    "# Parameters for calibration database.\n",
     "use_dir_creation_date = True  # use the creation data of the input dir for database queries\n",
     "cal_db_interface = \"tcp://max-exfl016:8017#8025\" # the database interface to use\n",
     "cal_db_timeout = 180000  # timeout on caldb requests\n",
     "\n",
-    "\n",
+    "# Parameters affecting corrected data.\n",
     "overwrite = True  # set to True if existing data should be overwritten\n",
     "relative_gain = True  # do relative gain correction\n",
     "limit_imgs = 0  # ONLY FOR TESTING. process only first N images, Use 0 to process all.\n",
     "\n",
+    "plt_imgs = 100  # Number of images to plot after applying selected corrections.\n",
+    "\n",
+    "# Parameters for retrieving calibration constants\n",
+    "manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values\n",
     "integration_time = 4.96  # integration time in us, will be overwritten by value in file\n",
     "gain_setting = 0  # 0 for dynamic gain, 1 for dynamic HG0, will be overwritten by value in file\n",
     "mem_cells = 0  # leave memory cells equal 0, as it is saved in control information starting 2019.\n",
     "bias_voltage = 180  # will be overwritten by value in file\n",
+    "\n",
+    "# TODO: Remove\n",
     "db_module = \"\"  # ID of module in calibration database, this parameter is ignore in the notebook. TODO: remove from calibration_configurations.\n",
-    "manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values\n",
     "\n",
     "\n",
     "def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):\n",
@@ -60,7 +66,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import copy\n",
     "import multiprocessing\n",
     "import warnings\n",
     "from functools import partial\n",
@@ -101,8 +106,7 @@
    "source": [
     "in_folder = Path(in_folder)\n",
     "out_folder = Path(out_folder)\n",
-    "ped_dir = in_folder / f'r{run:04d}'\n",
-    "run_dc = RunDirectory(ped_dir)\n",
+    "run_dc = RunDirectory(in_folder / f'r{run:04d}')\n",
     "instrument_src = instrument_source_template.format(karabo_id, receiver_template)\n",
     "\n",
     "if out_folder.exists() and not overwrite:\n",
@@ -110,9 +114,6 @@
     "else:\n",
     "    out_folder.mkdir(parents=True, exist_ok=True)\n",
     "\n",
-    "fp_name_contr = path_template.format(run, karabo_da_control, 0)\n",
-    "fp_path_contr = ped_dir / fp_name_contr\n",
-    "\n",
     "if sequences[0] == -1:\n",
     "    sequences = None\n",
     "\n",
@@ -327,7 +328,7 @@
     "\n",
     "    g[g==3] = 2\n",
     "\n",
-    "    if 0 <= index < 100:\n",
+    "    if 0 <= index < plt_imgs:\n",
     "        r_data[index, ...] = d[0, ...]\n",
     "\n",
     "    # Select memory cells\n",
@@ -356,11 +357,11 @@
     "    msk = np.choose(g, np.moveaxis(mask_cell, -1, 0))\n",
     "\n",
     "    # Store sample of data for plotting\n",
-    "    if 0 <= index < 100:\n",
+    "    if 0 <= index < plt_imgs:\n",
     "        if memory_cells == 1:\n",
-    "            g_data[index, ...] = copy.copy(g)\n",
+    "            g_data[index, ...] = g\n",
     "        else:\n",
-    "            g_data[index, ...] = copy.copy(g[1, ...])\n",
+    "            g_data[index, ...] = g[1, ...]\n",
     "\n",
     "    data_corr[index, ...] = d\n",
     "    mask_corr[index, ...] = msk"
@@ -403,7 +404,6 @@
     "for local_karabo_da, mapped_files_module in mapped_files.items():\n",
     "    instrument_src_kda = instrument_src.format(int(local_karabo_da[-2:]))\n",
     "    data_path = \"INSTRUMENT/\"+instrument_src_kda+\"/data\"\n",
-    "    print(instrument_src, instrument_src_kda)\n",
     "\n",
     "    for sequence_file_number, sequence_file in enumerate(mapped_files_module.queue):  # noqa\n",
     "        sequence_file = Path(sequence_file)\n",
@@ -431,10 +431,11 @@
     "            n_imgs = min(n_imgs, limit_imgs)\n",
     "        print(f\"\\nNumber of images to correct: {n_imgs} for {out_file}\")\n",
     "        if n_trains - dshape[0] != 0:\n",
-    "            print(f\"WARNING: {sequence_file} has {n_trains - dshape[0]} \"\n",
+    "            print(f\"\\tWARNING: {sequence_file} has {n_trains - dshape[0]} \"\n",
     "                  \"trains with empty data.\")\n",
     "\n",
-    "        plt_imgs = min(100, n_imgs)\n",
+    "        # Just in case if n_imgs is less that the chosen plt_imgs.\n",
+    "        plt_imgs = min(plt_imgs, n_imgs)\n",
     "\n",
     "        # load constants from the constants dictionary.\n",
     "        offset_map, mask, gain_map = constants[local_karabo_da]\n",
@@ -490,7 +491,7 @@
     "            write_compressed_frames(\n",
     "                mask_corr,\n",
     "                ofile,\n",
-    "                arr_source=f\"{data_path}/mask\",\n",
+    "                dataset_path=f\"{data_path}/mask\",\n",
     "                comp_threads=n_cpus,\n",
     "            )\n",
     "\n",
@@ -499,15 +500,11 @@
     "        # Prepare plotting arrays\n",
     "        step_timer.start()\n",
     "        if memory_cells == 1:\n",
-    "            fim_data[local_karabo_da] = copy.copy(\n",
-    "                data_corr[:plt_imgs, ...])\n",
-    "            msk_data[local_karabo_da] = copy.copy(\n",
-    "                mask_corr[:plt_imgs, ...])\n",
+    "            fim_data[local_karabo_da] = data_corr[:plt_imgs, ...].copy()\n",
+    "            msk_data[local_karabo_da] = mask_corr[:plt_imgs, ...].copy()\n",
     "        else:\n",
-    "            fim_data[local_karabo_da] = copy.copy(data_corr[\n",
-    "                :plt_imgs, 1, ...])\n",
-    "            msk_data[local_karabo_da] = copy.copy(mask_corr[\n",
-    "                :plt_imgs, 1, ...])\n",
+    "            fim_data[local_karabo_da] = data_corr[:plt_imgs, 1, ...].copy()\n",
+    "            msk_data[local_karabo_da] = mask_corr[:plt_imgs, 1, ...].copy()\n",
     "\n",
     "        gim_data[local_karabo_da] = g_data\n",
     "        rim_data[local_karabo_da] = r_data\n",
diff --git a/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb b/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
index a4c9c4e15..06ff820f0 100644
--- a/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb
@@ -23,26 +23,24 @@
     "run_med = 142 # run number for G1 dark run, required\n",
     "run_low = 143 # run number for G2 dark run, required\n",
     "\n",
+    "# Parameters used to access raw data.\n",
     "karabo_da = ['JNGFR01', 'JNGFR02','JNGFR03','JNGFR04', 'JNGFR05', 'JNGFR06','JNGFR07','JNGFR08'] # list of data aggregators, which corresponds to different JF modules\n",
     "karabo_id = \"SPB_IRDA_JF4M\"  # karabo_id (detector identifier) prefix of Jungfrau detector to process.\n",
     "karabo_id_control = \"\"  # if control is on a different ID, set to empty string if it is the same a karabo-id\n",
-    "receiver_id = 'JNGFR{:02}' # inset for receiver devices\n",
+    "receiver_template = 'JNGFR{:02}' # inset for receiver devices\n",
     "receiver_control_id = \"CONTROL\" # inset for control devices\n",
     "path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5'  # template to use for file name, double escape sequence number\n",
-    "h5path = '{}/DET/{}:daqOutput'  # template for source name (filled with karabo_id & receiver_id)\n",
+    "instrument_source_template = '{}/DET/{}:daqOutput'  # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'\n",
     "karabo_da_control = \"JNGFRCTRL00\" # file inset for control data\n",
-    "db_module = \"\"  # ID of module in calibration database\n",
     "\n",
+    "# Parameters for calibration database and storing constants.\n",
     "use_dir_creation_date = True  # use dir creation date\n",
     "cal_db_interface = 'tcp://max-exfl016:8016'  # calibrate db interface to connect to\n",
     "cal_db_timeout = 300000 # timeout on caldb requests\n",
     "local_output = True  # output constants locally\n",
     "db_output = False  # output constants to database\n",
     "\n",
-    "integration_time = 1000 # integration time in us, will be overwritten by value in file\n",
-    "gain_setting = 0  # 0 for dynamic, forceswitchg1, forceswitchg2, 1 for dynamichg0, fixedgain1, fixgain2. Will be overwritten by value in file\n",
-    "bias_voltage = 90  # sensor bias voltage in V, will be overwritten by value in file\n",
-    "memory_cells = 16  # number of memory cells\n",
+    "# Parameters affecting creating dark calibration constants.\n",
     "badpixel_threshold_sigma = 5.  # bad pixels defined by values outside n times this std from median\n",
     "offset_abs_threshold_low = [1000, 10000, 10000]  # absolute bad pixel threshold in terms of offset, lower values\n",
     "offset_abs_threshold_high = [8000, 15000, 15000]  # absolute bad pixel threshold in terms of offset, upper values\n",
@@ -50,7 +48,18 @@
     "min_trains = 1  # Minimum number of trains that should be available to process dark constants. Default 1.\n",
     "manual_slow_data = False  # if true, use manually entered bias_voltage and integration_time values\n",
     "time_limits = 0.025  # to find calibration constants later on, the integration time is allowed to vary by 0.5 us\n",
-    "operation_mode = ''  # Detector operation mode, optional"
+    "\n",
+    "# Parameters to be used for injecting dark calibration constants.\n",
+    "integration_time = 1000 # integration time in us, will be overwritten by value in file\n",
+    "gain_setting = 0  # 0 for dynamic, forceswitchg1, forceswitchg2, 1 for dynamichg0, fixedgain1, fixgain2. Will be overwritten by value in file\n",
+    "bias_voltage = 90  # sensor bias voltage in V, will be overwritten by value in file\n",
+    "memory_cells = 16  # number of memory cells\n",
+    "\n",
+    "# Don't remove. myMDC sends this by default.\n",
+    "operation_mode = ''  # Detector operation mode, optional\n",
+    "\n",
+    "# TODO: Remove\n",
+    "db_module = \"\"  # ID of module in calibration database.  TODO: remove from calibration_configurations."
    ]
   },
   {
@@ -103,7 +112,7 @@
    "source": [
     "# Constants relevant for the analysis\n",
     "run_nums = [run_high, run_med, run_low]  # run number for G0/HG0, G1, G2\n",
-    "sensor_size = [1024, 512]\n",
+    "sensor_size = (1024, 512)\n",
     "gains = [0, 1, 2]\n",
     "\n",
     "creation_time = None\n",
@@ -133,70 +142,89 @@
     "report = get_report(out_folder)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Reading control data"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "tags": []
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
+    "gain_runs = dict()\n",
     "noise_map = dict()\n",
     "offset_map = dict()\n",
+    "gain_str = None\n",
     "\n",
-    "for mod in karabo_da:\n",
+    "for gain, run_n in enumerate(run_nums):\n",
+    "    run_dc = RunDirectory(f\"{in_folder}/r{run_n:04d}/\")\n",
+    "    gain_runs[run_n] = [gain, run_dc]\n",
     "\n",
-    "    h5path_f = h5path.format(\n",
-    "        karabo_id, receiver_id.format(int(mod[-2:])))\n",
+    "    # Read control data for the first gain only.\n",
+    "    if run_n == run_high:\n",
     "\n",
-    "    print(f\"\\nInstrument data path for {mod} is {h5path_f}.\")\n",
+    "        ctrl_data = jungfraulib.JFCtrl(run_dc, karabo_id_control)\n",
+    "        run_mcells, sc_start = ctrl_data.get_memory_cells()\n",
     "\n",
-    "    for gain, r_n in enumerate(run_nums):\n",
+    "        if not manual_slow_data:\n",
+    "            integration_time = ctrl_data.get_integration_time()\n",
+    "            bias_voltage = ctrl_data.get_bias_voltage()\n",
+    "            gain_str, gain_setting = ctrl_data.get_gain_setting()\n",
     "\n",
-    "        print(f\"Gain stage {gain}, run {r_n}\")\n",
+    "        print(f\"Gain setting is {gain_setting} ({gain_str})\")\n",
+    "        print(f\"Integration time is {integration_time} us\")\n",
+    "        print(f\"Bias voltage is {bias_voltage} V\")\n",
     "\n",
-    "        run_dc = RunDirectory(f\"{in_folder}/r{r_n:04d}/\")\n",
+    "        if run_mcells == 1:\n",
+    "            memory_cells = 1\n",
+    "            print('Dark runs in single cell mode, '\n",
+    "                  f'storage cell start: {sc_start:02d}')\n",
+    "        else:\n",
+    "            memory_cells = 16\n",
+    "            print('Dark runs in burst mode, '\n",
+    "                  f'storage cell start: {sc_start:02d}')\n",
     "\n",
-    "        # Read control data for the first gain only.\n",
-    "        if mod not in noise_map.keys():\n",
-    "            \n",
-    "            ctrl_data = jungfraulib.JFCtrl(run_dc, karabo_id_control)\n",
-    "            run_mcells, sc_start = ctrl_data.get_memory_cells()\n",
+    "# Initialize noise_map and offset_map module arrays.\n",
+    "for mod in karabo_da:\n",
+    "    noise_map[mod] = np.zeros(sensor_size+(memory_cells, 3))\n",
+    "    offset_map[mod] = np.zeros(sensor_size+(memory_cells, 3))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "for mod in karabo_da:\n",
     "\n",
-    "            if not manual_slow_data:\n",
-    "                integration_time = ctrl_data.get_integration_time()\n",
-    "                bias_voltage = ctrl_data.get_bias_voltage()\n",
-    "                if r_n == run_high:\n",
-    "                    gain_str, gain_setting = ctrl_data.get_gain_setting()\n",
+    "    instrument_src = instrument_source_template.format(\n",
+    "        karabo_id, receiver_template.format(int(mod[-2:])))\n",
     "\n",
-    "            print(f\"\\tGain setting is {gain_setting} ({gain_str})\")\n",
-    "            print(f\"\\tIntegration time is {integration_time} us\")\n",
-    "            print(f\"\\tBias voltage is {bias_voltage} V\")\n",
+    "    print(f\"\\n- Instrument data path for {mod} is {instrument_src}.\")\n",
     "\n",
-    "            if run_mcells == 1:\n",
-    "                memory_cells = 1\n",
-    "                print('\\tDark runs in single cell mode, '\n",
-    "                      f'storage cell start: {sc_start:02d}')\n",
-    "            else:\n",
-    "                memory_cells = 16\n",
-    "                print('\\tDark runs in burst mode, '\n",
-    "                      f'storage cell start: {sc_start:02d}')\n",
+    "    for run_n, [gain, run_dc] in gain_runs.items():\n",
     "\n",
-    "            noise_map[mod] = np.zeros(sensor_size+[memory_cells, 3])\n",
-    "            offset_map[mod] = np.zeros(sensor_size+[memory_cells, 3])\n",
+    "        print(f\"Gain stage {gain}, run {run_n}\")\n",
     "\n",
     "        # load shape of data for memory cells, and detector size (imgs, cells, x, y)\n",
-    "        n_imgs = run_dc[h5path_f, \"data.adc\"].shape[0]\n",
+    "        n_imgs = run_dc[instrument_src, \"data.adc\"].shape[0]\n",
     "        \n",
     "        if max_trains > 0:\n",
     "            n_imgs = min(n_imgs, max_trains)\n",
     "        # load number of data available, including trains with empty data.\n",
-    "        n_trains = run_dc.get_data_counts(h5path_f, \"data.adc\").shape[0]\n",
+    "        n_trains = run_dc.get_data_counts(instrument_src, \"data.adc\").shape[0]\n",
     "\n",
-    "        instr_dc = run_dc.select(h5path_f, require_all=True).select_trains(np.s_[:n_imgs])\n",
+    "        instr_dc = run_dc.select(instrument_src, require_all=True).select_trains(np.s_[:n_imgs])\n",
     "\n",
     "        if n_trains-n_imgs != 0:\n",
-    "            print(f\"WARNING: {instr_dc.files[0].filename} has {n_trains-n_imgs} \"\n",
+    "            print(f\"\\tWARNING: {instr_dc.files[0].filename} has {n_trains-n_imgs} \"\n",
     "                  f\"trains with empty data out of {n_trains} trains.\")\n",
     "\n",
     "        if n_imgs < min_trains:\n",
@@ -205,14 +233,14 @@
     "                 \" Not enough data to process darks.\")\n",
     "\n",
     "        images = np.transpose(\n",
-    "            instr_dc[h5path_f, \"data.adc\"].ndarray(), (3, 2, 1, 0))\n",
+    "            instr_dc[instrument_src, \"data.adc\"].ndarray(), (3, 2, 1, 0))\n",
     "\n",
     "        roi=np.s_[:1]\n",
     "        if gain > 0 and memory_cells == 16:\n",
     "            roi=np.s_[:]\n",
     "\n",
     "        acelltable = np.transpose(\n",
-    "            instr_dc[h5path_f, \"data.memoryCell\"].ndarray(roi=roi), (1, 0))\n",
+    "            instr_dc[instrument_src, \"data.memoryCell\"].ndarray(roi=roi), (1, 0))\n",
     "\n",
     "        if memory_cells == 1:\n",
     "            acelltable -= sc_start\n",
diff --git a/src/cal_tools/tools.py b/src/cal_tools/tools.py
index db228eafd..c7c3c249b 100644
--- a/src/cal_tools/tools.py
+++ b/src/cal_tools/tools.py
@@ -770,7 +770,7 @@ class CalibrationMetadata(dict):
 def write_compressed_frames(
         arr: np.ndarray,
         ofile: h5py.File,
-        arr_source: str,
+        dataset_path: str,
         comp_threads: int = 1):
     """Compress gain/mask frames in multiple threads, and save their data
 
@@ -789,7 +789,7 @@ def write_compressed_frames(
     # gain/mask compressed with gzip level 1, but not
     # checksummed as we would have to implement this.
     dataset = ofile.create_dataset(
-        arr_source,
+        dataset_path,
         shape=arr.shape,
         chunks=((1,) + arr.shape[1:]),
         compression="gzip",
-- 
GitLab