From 952e55303740d604d44891df9667610839eced26 Mon Sep 17 00:00:00 2001
From: Karim Ahmed <karim.ahmed@xfel.eu>
Date: Thu, 19 Nov 2020 17:31:27 +0100
Subject: [PATCH] Agipd corr mod

---
 cal_tools/cal_tools/agipdlib.py               | 124 +++++++++++-------
 cal_tools/cal_tools/agipdutils.py             |   2 +-
 .../AGIPD/AGIPD_Correct_and_Verify.ipynb      |  74 +++++++----
 3 files changed, 127 insertions(+), 73 deletions(-)

diff --git a/cal_tools/cal_tools/agipdlib.py b/cal_tools/cal_tools/agipdlib.py
index 1b4b976d6..5451d5b93 100644
--- a/cal_tools/cal_tools/agipdlib.py
+++ b/cal_tools/cal_tools/agipdlib.py
@@ -209,6 +209,8 @@ class AgipdCorrections:
         self.baseline_corr_noise_threshold = -1000
         self.snow_resolution = SnowResolution.INTERPOLATE
         self.cm_dark_fraction = 0.66
+        self.cm_dark_min = -25.
+        self.cm_dark_max = 25.
         self.cm_n_itr = 4
         self.mg_hard_threshold = 100
         self.hg_hard_threshold = 100
@@ -271,7 +273,7 @@ class AgipdCorrections:
             f = h5py.File(file_name, 'r')
             group = f[data_path]
 
-            valid, first_index, last_index, valid_trains, valid_indices = \
+            _, first_index, last_index, __, valid_indices = \
                 self.get_valid_image_idx(idx_base, f)
             firange = self.gen_valid_range(first_index, last_index,
                                            self.max_cells, agipd_base, f,
@@ -342,7 +344,8 @@ class AgipdCorrections:
                 arr = data_dict[field][:n_img]
                 kw = {'fletcher32': True}
                 if field in compress_fields:
-                    kw.update(compression='gzip', compression_opts=1, shuffle=True)
+                    kw.update(compression='gzip', compression_opts=1,
+                              shuffle=True)
                 if arr.ndim > 1:
                     kw['chunks'] = (1,) + arr.shape[1:]  # 1 chunk = 1 image
 
@@ -364,17 +367,18 @@ class AgipdCorrections:
 
         if not self.corr_bools.get("common_mode"):
             return
-
+        dark_min = self.cm_dark_min
+        dark_max = self.cm_dark_max
         fraction = self.cm_dark_fraction
         n_itr = self.cm_n_itr
-
+        
         cell_id = self.shared_dict[i_proc]['cellId']
         train_id = self.shared_dict[i_proc]['trainId']
         n_img = self.shared_dict[i_proc]['nImg'][0]
         cell_ids = cell_id[train_id == train_id[0]]
         n_cells = cell_ids.size
-        data = self.shared_dict[i_proc]['data'][:n_img].reshape(-1, n_cells, 8,
-                                                                64, 2, 64)
+        data = self.shared_dict[i_proc]['data'][:n_img].reshape(-1, n_cells,
+                                                                8, 64, 2, 64)
 
         # Loop over iterations
         for i in range(n_itr):
@@ -388,7 +392,8 @@ class AgipdCorrections:
 
                 # Cell common mode
                 cell_cm_sum, cell_cm_count = \
-                    calgs.sum_and_count_in_range_cell(asic_data, -25., 25.)
+                    calgs.sum_and_count_in_range_cell(asic_data, dark_min,
+                                                      dark_max)
                 cell_cm = cell_cm_sum / cell_cm_count
 
                 cell_cm[cell_cm_count < fraction * 32 * 256] = 0
@@ -396,7 +401,8 @@ class AgipdCorrections:
 
                 # Asics common mode
                 asic_cm_sum, asic_cm_count = \
-                    calgs.sum_and_count_in_range_asic(asic_data, -25., 25.)
+                    calgs.sum_and_count_in_range_asic(asic_data, dark_min,
+                                                      dark_max)
                 asic_cm = asic_cm_sum / asic_cm_count
 
                 asic_cm[asic_cm_count < fraction * 64 * 64] = 0
@@ -513,6 +519,7 @@ class AgipdCorrections:
                 # if not we continue with initial data
                 else:
                     dd = data[i]
+                    sh = 0
 
                 # if we have enough pixels in medium or low gain and
                 # correction via hist matching is requested to this now
@@ -560,9 +567,12 @@ class AgipdCorrections:
             data[gain == 1] += mgbc[gain == 1]
             del mgbc
 
-        # Do xray correction if requested
+        # Do xray correction if requested 
+        # The slopes we have in our constants are already relative
+        # slopeFF = slopeFFpix/avarege(slopeFFpix)
+        # To apply them we have to / not *
         if self.corr_bools.get("xray_corr"):
-            data *= self.xray_cor[module_idx]
+            data /= self.xray_cor[module_idx]
 
         if self.corr_bools.get('melt_snow'):
             melt_snowy_pixels(raw_data, data, gain, rgain,
@@ -751,8 +761,8 @@ class AgipdCorrections:
             uq, fidxv, cntsv = np.unique(trains, return_index=True,
                                          return_counts=True)
 
-            # Validate calculated CORR INDEX contents by checking difference between
-            # trainId stored in RAW data and trains from
+            # Validate calculated CORR INDEX contents by checking 
+            # difference between trainId stored in RAW data and trains from
             train_diff = np.isin(np.array(infile["/INDEX/trainId"]), uq,
                                  invert=True)
 
@@ -837,43 +847,38 @@ class AgipdCorrections:
         to medium gain is used, as no reliable CI data for all memory cells
         exists of the current AGIPD instances.
 
-        Relative gain is derived both from pulse capacitor as well as flat
-        field data:
-
-        * from the pulse capacitor data we get the relative slopes of a
-          given pixel's memory cells with respect to all memory cells of that
-          pixel:
+        Relative gain is derived both from pulse capacitor as well as low
+        intensity flat field data, information from flat field data is 
+        needed to 'calibrate' pulse capacitor data, if there is no 
+        available FF data, relative gain for High Gain stage is set to 1:
 
-             rpch = m_h / median(m_h)
+        * Relative gain for High gain stage - from the FF data we get 
+          the relative slopes of a given pixel and memory cells with 
+          respect to all memory cells and all pixels in the module,
+          Please note: Current slopesFF avaialble in calibibration
+          constants are created per pixel only, not per memory cell:
 
-          where m_h is the high gain slope m of each memory cell of the pixel.
+             rel_high_gain = 1 if only PC data is available
+             rel_high_gain = rel_slopesFF if FF data is also available
 
-        * we also derive the factor between high and medium gain in a
-          similar way and scale it to be relative to the pixels memory cells:
+          
 
-             fpc = m_m/m_h
-             rfpc = fpc/ median(fpc)
+        * Relative gain for Medium gain stage: we derive the factor
+          between high and medium gain using slope information from
+          fits to the linear part of high and medium gain:
 
-          where m_m is the medium gain slope of all memory cells of a given
-          pixel and m_h is the high gain slope as above
+             rfpc_high_medium = m_h/m_m          
 
-        * finally, we get the relative X-ray gain of all memory cells for a
-          given pixel from flat field data:
-
-             ff = median(m_ff)
-             ff /= median(ff)
-
-          where m_ff is the flat field derived (high gain) slope of all
-          memory cells of a given pixel. The pixel values are then scaled to
-          the complete module_idx. Note that the first 32 memory cells are known
-          to exhibit differing behaviour and are skipped if possible.
+          where m_h and m_m is the medium gain slope of given memory cells 
+          and pixel and m_h is the high gain slope as above
+           rel_gain_medium = rel_high_gain * rfpc_high_medium
 
         With this data the relative gain for the three gain stages evaluates
         to:
 
-            high gain = ff * rpch
-            medium gain = ff * rfpc
-            low gain = medium gain / 4.48
+            rel_high gain = 1 or rel_slopesFF
+            rel_medium gain = rel_high_gain * rfpc_high_medium
+            rel_low gain = _rel_medium gain * 4.48
 
         :param cons_data: A dictionary for each retrieved constant value.
         :param module_idx: A module_idx index
@@ -910,8 +915,16 @@ class AgipdCorrections:
             else:
                 xray_cor = np.squeeze(slopesFF[..., 0])
 
-            # relative X-ray correction is normalized by the median of all pixels
-            xray_cor /= np.nanmedian(xray_cor)
+            # relative X-ray correction is normalized by the median
+            # of all pixels
+
+            # TODO: A check is required to know why it is again divided by 
+            # median. If we have relative slopes in the constants
+            # and (we have!) 
+            # xray cor = (slopeFF/avarege_slopeFF)/avarege_slopeFF.
+            # It didn't not make sense and was removed. 
+            # xray_cor /= np.nanmedian(xray_cor)
+
             self.xray_cor[module_idx][...] = xray_cor.transpose()[...]
 
         # add additional bad pixel information
@@ -921,7 +934,7 @@ class AgipdCorrections:
 
             slopesPC = cons_data["SlopesPC"].astype(np.float32)
 
-            # this will handle some historical data in a different format
+            # This will handle some historical data in a different format
             # constant dimension injected first
             if slopesPC.shape[0] == 10 or slopesPC.shape[0] == 11:
                 slopesPC = np.moveaxis(slopesPC, 0, 3)
@@ -936,23 +949,38 @@ class AgipdCorrections:
             pc_med_m = slopesPC[..., :self.max_cells, 3]
             pc_med_l = slopesPC[..., :self.max_cells, 4]
 
-            # calculate ratio high to medium gain over memory cells
+            # calculate ratio high to medium gain 
             pc_high_ave = np.nanmean(pc_high_m, axis=(0,1))
             pc_med_ave = np.nanmean(pc_med_m, axis=(0,1))
+            # ration between HG and MG per pixel per mem cell 
+            # used for rel gain calculation
+            frac_high_med_pix = pc_high_m / pc_med_m
+            # avarage ration between HG and MG as a function of 
+            # mem cell (needed for bls_stripes)
+            # TODO: Per pixel would be more optimal correction
             frac_high_med = pc_high_ave / pc_med_ave
-
             # calculate additional medium-gain offset
             md_additional_offset = pc_high_l - pc_med_l * pc_high_m / pc_med_m
 
-            # Calculate relative gain
-            rel_gain[..., 0] = pc_high_m / pc_high_ave
-            rel_gain[..., 1] = pc_med_m / pc_med_ave * frac_high_med
+            # Calculate relative gain. If FF constants are available,
+            # use them for high gain
+            # if not rel_gain is calculated using PC data only
+            # if self.corr_bools.get("xray_corr"):
+            #     rel_gain[..., :self.max_cells, 0] /= xray_corr 
+
+            # PC data should be 'calibrated with X-ray data, 
+            # if it is not done, it is better to use 1 instead of bias 
+            # the results with PC arteffacts. 
+            # rel_gain[..., 0] = 1./(pc_high_m / pc_high_ave)
+
+            # High-gain (rel_gain[..., 0]) stays the same
+            rel_gain[..., 1] = rel_gain[..., 0] * frac_high_med_pix
             rel_gain[..., 2] = rel_gain[..., 1] * 4.48
 
             self.md_additional_offset[module_idx][...] = md_additional_offset.transpose()[...]  # noqa
             self.rel_gain[module_idx][...] = rel_gain[...].transpose()
             self.frac_high_med[module_idx][...] = frac_high_med
-
+            
         self.mask[module_idx][...] = bpixels.transpose()[...]
 
         return
diff --git a/cal_tools/cal_tools/agipdutils.py b/cal_tools/cal_tools/agipdutils.py
index daad14eb3..c6a7ae4a6 100644
--- a/cal_tools/cal_tools/agipdutils.py
+++ b/cal_tools/cal_tools/agipdutils.py
@@ -141,7 +141,7 @@ def baseline_correct_via_stripe(d, g, m, frac_high_med):
     if len(idx) < 3:
         return d, 0
 
-    shift = np.nanmean(dd[idx, :])
+    shift = np.nanmedian(dd[idx, :])
     d[g == 0] -= shift
     d[g == 1] -= shift / frac_high_med
     return d, shift
diff --git a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
index e59571212..6bc8f66da 100644
--- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
+++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb
@@ -55,12 +55,12 @@
     "\n",
     "# Correction parameters\n",
     "blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted\n",
-    "chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.\n",
+    "cm_dark_fraction = 0.66 # threshold for fraction of  empty pixels to consider module enough dark to perform CM correction\n",
+    "cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel\n",
+    "cm_n_itr = 4 # number of iterations for common mode correction\n",
     "hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain\n",
     "mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain\n",
     "noisy_adc_threshold = 0.25 # threshold to mask complete adc\n",
-    "cm_dark_fraction = 0.66 # threshold for empty pixels to consider module enough dark to perform CM correction\n",
-    "cm_n_itr = 4 # number of iterations for common mode correction\n",
     "\n",
     "# Correction Booleans\n",
     "only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.\n",
@@ -75,19 +75,20 @@
     "zero_orange = False # set to 0 very negative and very large values in corrected data\n",
     "blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr\n",
     "corr_asic_diag = False # if set, diagonal drop offs on ASICs are correted\n",
-    "force_hg_if_below = True # set high gain if mg offset subtracted value is below hg_hard_threshold\n",
-    "force_mg_if_below = True # set medium gain if mg offset subtracted value is below mg_hard_threshold\n",
+    "force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold\n",
+    "force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold\n",
     "mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold\n",
-    "common_mode = True # Common mode correction\n",
+    "common_mode = False # Common mode correction\n",
     "melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels\n",
     "mask_zero_std = False # Mask pixels with zero standard deviation across train\n",
-    "low_medium_gap = True # 5% separation in thresholding between low and medium gain\n",
+    "low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain\n",
     "\n",
     "# Paralellization parameters\n",
-    "sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel\n",
     "chunk_size = 1000 # Size of chunk for image-weise correction\n",
+    "chunk_size_idim = 1  # chunking size of imaging dimension, adjust if user software is sensitive to this.\n",
     "n_cores_correct = 16 # Number of chunks to be processed in parallel\n",
     "n_cores_files = 4 # Number of files to be processed in parallel\n",
+    "sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel\n",
     "\n",
     "def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):\n",
     "    from xfel_calibrate.calibrate import balance_sequences as bs\n",
@@ -131,13 +132,18 @@
     "matplotlib.use(\"agg\")\n",
     "%matplotlib inline\n",
     "import numpy as np\n",
+    "import seaborn as sns\n",
+    "sns.set()\n",
+    "sns.set_context(\"paper\", font_scale=1.4)\n",
+    "sns.set_style(\"ticks\")\n",
     "\n",
-    "from cal_tools.agipdlib import (AgipdCorrections, get_num_cells, get_acq_rate, get_gain_setting)\n",
+    "from cal_tools.agipdlib import (AgipdCorrections, get_acq_rate,\n",
+    "                                get_gain_setting, get_num_cells)\n",
     "from cal_tools.cython import agipdalgs as calgs\n",
     "from cal_tools.ana_tools import get_range\n",
     "from cal_tools.enums import BadPixels\n",
     "from cal_tools.tools import get_dir_creation_date, map_modules_from_folder\n",
-    "from cal_tools.step_timing import StepTimer\n"
+    "from cal_tools.step_timing import StepTimer"
    ]
   },
   {
@@ -383,6 +389,8 @@
     "agipd_corr.hg_hard_threshold = hg_hard_threshold\n",
     "agipd_corr.mg_hard_threshold = mg_hard_threshold\n",
     "\n",
+    "agipd_corr.cm_dark_min = cm_dark_range[0]\n",
+    "agipd_corr.cm_dark_max = cm_dark_range[1]\n",
     "agipd_corr.cm_dark_fraction = cm_dark_fraction\n",
     "agipd_corr.cm_n_itr = cm_n_itr\n",
     "agipd_corr.noisy_adc_threshold = noisy_adc_threshold\n"
@@ -811,7 +819,10 @@
    "source": [
     "fig = plt.figure(figsize=(20, 10))\n",
     "ax = fig.add_subplot(111)\n",
-    "h = ax.hist(blshift.flatten(), bins=100, log=True)"
+    "h = ax.hist(blshift.flatten(), bins=100, log=True)\n",
+    "_ = plt.xlabel('Baseline shift [ADU]')\n",
+    "_ = plt.ylabel('Counts')\n",
+    "_ = ax.grid()"
    ]
   },
   {
@@ -823,7 +834,8 @@
     "fig = plt.figure(figsize=(10, 10))\n",
     "corrected_ave = np.nansum(corrected, axis=(2, 3))\n",
     "plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)\n",
-    "\n",
+    "plt.xlim(-1, 1000)\n",
+    "plt.grid()\n",
     "plt.xlabel('Illuminated corrected [MADU] ')\n",
     "_ = plt.ylabel('Estimated baseline shift [ADU]')"
    ]
@@ -892,7 +904,8 @@
    "source": [
     "fig = plt.figure(figsize=(20, 10))\n",
     "ax = fig.add_subplot(111)\n",
-    "vmin, vmax = get_range(corrected[cell_id_preview], 7)\n",
+    "vmin, vmax = get_range(corrected[cell_id_preview], 7, -50)\n",
+    "vmin = - 50\n",
     "ax = geom.plot_data_fast(corrected[cell_id_preview], ax=ax, cmap=\"jet\", vmin=vmin, vmax=vmax)"
    ]
   },
@@ -909,8 +922,14 @@
    "source": [
     "fig = plt.figure(figsize=(20, 10))\n",
     "ax = fig.add_subplot(111)\n",
-    "h = ax.hist(corrected[cell_id_preview].flatten(), bins=1000, range=(-50, 2000), log=True)\n",
-    "_ = plt.xlabel('[ADU]')"
+    "vmin, vmax = get_range(corrected[cell_id_preview], 5, -50)\n",
+    "nbins = np.int((vmax + 50) / 2)\n",
+    "h = ax.hist(corrected[cell_id_preview].flatten(), \n",
+    "            bins=nbins, range=(-50, vmax), \n",
+    "            histtype='stepfilled', log=True)\n",
+    "_ = plt.xlabel('[ADU]')\n",
+    "_ = plt.ylabel('Counts')\n",
+    "_ = ax.grid()"
    ]
   },
   {
@@ -937,8 +956,8 @@
     "fig = plt.figure(figsize=(20, 10))\n",
     "ax = fig.add_subplot(111)\n",
     "data = np.mean(corrected, axis=0)\n",
-    "vmin, vmax = get_range(data, 5)\n",
-    "ax = geom.plot_data_fast(data, ax=ax, cmap=\"jet\", vmin=vmin, vmax=vmax)"
+    "vmin, vmax = get_range(data, 7)\n",
+    "ax = geom.plot_data_fast(data, ax=ax, cmap=\"jet\", vmin=-50, vmax=vmax)"
    ]
   },
   {
@@ -954,16 +973,23 @@
    "source": [
     "fig = plt.figure(figsize=(20, 10))\n",
     "ax = fig.add_subplot(111)\n",
-    "h = ax.hist(corrected.flatten(), bins=1200,\n",
-    "            range=(-200, 20000), histtype='step', log=True, label = 'All')\n",
-    "_ = ax.hist(corrected[gains == 0].flatten(), bins=1200, range=(-200, 20000),\n",
+    "vmin, vmax = get_range(corrected, 10, -100)\n",
+    "vmax = np.nanmax(corrected)\n",
+    "if vmax > 50000:\n",
+    "    vmax=50000\n",
+    "nbins = np.int((vmax + 100) / 5)\n",
+    "h = ax.hist(corrected.flatten(), bins=nbins,\n",
+    "            range=(-100, vmax), histtype='step', log=True, label = 'All')\n",
+    "_ = ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),\n",
     "            alpha=0.5, log=True, label='High gain', color='green')\n",
-    "_ = ax.hist(corrected[gains == 1].flatten(), bins=1200, range=(-200, 20000),\n",
+    "_ = ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),\n",
     "            alpha=0.5, log=True, label='Medium gain', color='red')\n",
-    "_ = ax.hist(corrected[gains == 2].flatten(), bins=1200,\n",
-    "            range=(-200, 20000), alpha=0.5, log=True, label='Low gain', color='yellow')\n",
+    "_ = ax.hist(corrected[gains == 2].flatten(), bins=nbins,\n",
+    "            range=(-100, vmax), alpha=0.5, log=True, label='Low gain', color='yellow')\n",
     "_ = ax.legend()\n",
-    "_ = plt.xlabel('[ADU]')"
+    "_ = ax.grid()\n",
+    "_ = plt.xlabel('[ADU]')\n",
+    "_ = plt.ylabel('Counts')\n"
    ]
   },
   {
-- 
GitLab