From 711f8de79df5eadc663527904b7576dd428adf23 Mon Sep 17 00:00:00 2001
From: karnem <mikhail.karnevskiy@desy.de>
Date: Thu, 19 Sep 2019 14:19:32 +0200
Subject: [PATCH] enable shift correction and detector choise

---
 ...aracterize_AGIPD_Gain_FlatFields_NBC.ipynb | 369 ++++++++++++++----
 1 file changed, 301 insertions(+), 68 deletions(-)

diff --git a/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb b/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb
index 1095c9e50..e4eebca38 100644
--- a/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb
+++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_NBC.ipynb
@@ -24,21 +24,21 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-08-13T12:16:46.606616Z",
-     "start_time": "2019-08-13T12:16:46.597295Z"
+     "end_time": "2019-09-13T07:34:24.467244Z",
+     "start_time": "2019-09-13T07:34:24.457261Z"
     }
    },
    "outputs": [],
    "source": [
     "# the following lines should be adjusted depending on data\n",
-    "in_folder = '/gpfs/exfel/exp/MID/201931/p900090/raw/' # path to input data, required\n",
-    "modules = [2] # modules to work on, required, range allowed\n",
-    "out_folder = \"/gpfs/exfel/exp/MID/201931/p900091/usr/FF/\" # path to output to, required\n",
-    "runs = [201] # runs to use, required, range allowed\n",
-    "sequences = [0,] #,5,6,7,8,9,10] # sequences files to use, range allowed\n",
+    "in_folder = '/gpfs/exfel/exp/MID/201931/p900091/raw/' # path to input data, required\n",
+    "modules = [3] # modules to work on, required, range allowed\n",
+    "out_folder = \"/gpfs/exfel/exp/MID/201931/p900091/usr/FF/2.2\" # path to output to, required\n",
+    "runs = [484, 485,] # runs to use, required, range allowed\n",
+    "sequences = [0,1,2,3]#,4,5,6,7,8] #,5,6,7,8,9,10] # sequences files to use, range allowed\n",
     "cluster_profile = \"noDB\" # The ipcluster profile to use\n",
     "local_output = True # output constants locally\n",
-    "db_output = False # output constants to database\n",
+    "db_output = True # output constants to database\n",
     "bias_voltage = 300 # detector bias voltage\n",
     "cal_db_interface = \"tcp://max-exfl016:8026#8035\"  # the database interface to use\n",
     "mem_cells = 250  # number of memory cells used\n",
@@ -51,7 +51,7 @@
     "high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h\n",
     "db_input = True # retreive data from calibration database, setting offset-store will overwrite this\n",
     "deviation_threshold = 0.75 # peaks with an absolute location deviation larger than the medium are are considere bad\n",
-    "acqrate = 2.2\n",
+    "acqrate = 2.2 # acquisition rate\n",
     "dont_use_dir_creation_date = False"
    ]
   },
@@ -60,8 +60,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-08-13T12:16:47.951367Z",
-     "start_time": "2019-08-13T12:16:47.459504Z"
+     "end_time": "2019-09-13T07:34:24.830847Z",
+     "start_time": "2019-09-13T07:34:24.745094Z"
     }
    },
    "outputs": [],
@@ -181,8 +181,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-08-13T08:36:02.240191Z",
-     "start_time": "2019-08-13T08:35:43.917385Z"
+     "end_time": "2019-09-13T07:34:45.876476Z",
+     "start_time": "2019-09-13T07:34:25.656979Z"
     }
    },
    "outputs": [],
@@ -297,8 +297,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-08-13T08:27:32.034645Z",
-     "start_time": "2019-08-13T08:26:28.231221Z"
+     "end_time": "2019-09-13T07:51:37.815384Z",
+     "start_time": "2019-09-13T07:34:45.879121Z"
     }
    },
    "outputs": [],
@@ -309,13 +309,77 @@
     "    \n",
     "    Runs and sequences give the data to calculate histogram from\n",
     "    \"\"\"\n",
-    "    channel, offset, thresholds = inp\n",
+    "    channel, offset, thresholds, pc, noise = inp\n",
     "    \n",
     "    import XFELDetAna.xfelpycaltools as xcal\n",
     "    import numpy as np\n",
     "    import h5py\n",
     "    from XFELDetAna.util import env\n",
+    "    \n",
+    "    \n",
     "    env.iprofile = profile\n",
+    "    \n",
+    "    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):\n",
+    "        \"\"\" Correct baseline shifts by evaluating position of the noise peak\n",
+    "\n",
+    "        :param: d: the data to correct, should be a single image\n",
+    "        :param noise: the mean noise for the cell id of `d`\n",
+    "        :param g: gain array matching `d` array\n",
+    "\n",
+    "        Correction is done by identifying the left-most (significant) peak\n",
+    "        in the histogram of `d` and shifting it to be centered on 0.\n",
+    "        This is done via a continous wavelet transform (CWT), using a Ricker\n",
+    "        wavelet.\n",
+    "\n",
+    "        Only high gain data is evaulated, and data larger than 50 ADU,\n",
+    "        equivalent of roughly a 9 keV photon is ignored.\n",
+    "\n",
+    "        Two passes are executed: the first shift is accurate to 10 ADU and\n",
+    "        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated\n",
+    "        for peaks of widths one, three and five time the noise.\n",
+    "        The baseline is then shifted by the position of the summed CWT maximum.\n",
+    "\n",
+    "        In a second pass histogram is evaluated within a range of\n",
+    "        +- 5*noise of the initial shift value, for peak of width noise.\n",
+    "\n",
+    "        Baseline shifts larger than the maximum threshold or positive shifts\n",
+    "        larger 10 are discarded, and the original data is returned, otherwise\n",
+    "        the shift corrected data is returned.\n",
+    "\n",
+    "        \"\"\"\n",
+    "        import copy\n",
+    "        from scipy.signal import cwt, ricker\n",
+    "        # we work on a copy to be able to filter values by setting them to\n",
+    "        # np.nan\n",
+    "        dd = copy.copy(d)\n",
+    "        dd[g != 0] = np.nan  # only high gain data\n",
+    "        dd[dd > 50] = np.nan  # only noise data\n",
+    "        h, e = np.histogram(dd, bins=210, range=(-2000, 100))\n",
+    "        c = (e[1:] + e[:-1]) / 2\n",
+    "\n",
+    "        try:\n",
+    "            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])\n",
+    "        except:\n",
+    "            return d\n",
+    "        cwtall = np.sum(cwtmatr, axis=0)\n",
+    "        marg = np.argmax(cwtall)\n",
+    "        pc = c[marg]\n",
+    "\n",
+    "        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)\n",
+    "        dd[~high_res_range] = np.nan\n",
+    "        h, e = np.histogram(dd, bins=200,\n",
+    "                            range=(pc - 5 * noise, pc + 5 * noise))\n",
+    "        c = (e[1:] + e[:-1]) / 2\n",
+    "        try:\n",
+    "            cwtmatr = cwt(h, ricker, [noise, ])\n",
+    "        except:\n",
+    "            return d\n",
+    "        marg = np.argmax(cwtmatr)\n",
+    "        pc = c[marg]\n",
+    "        # too large shift to be easily decernable via noise\n",
+    "        if pc > 10 or pc < -baseline_corr_noise_threshold:\n",
+    "            return d\n",
+    "        return d - pc\n",
     "\n",
     "    \n",
     "    # function needs to be inline for parallell processing\n",
@@ -385,6 +449,14 @@
     "            g[ga < thresholds[...,c,1]] = 1\n",
     "            g[ga < thresholds[...,c,0]] = 0\n",
     "            d = offset_cor.correct(d, cellTable=c, gainMap=g)\n",
+    "            for i in range(d.shape[2]):\n",
+    "                mn_noise = np.nanmean(noise[...,c[i]])\n",
+    "                d[...,i] = baseline_correct_via_noise(d[...,i],\n",
+    "                                                      mn_noise,\n",
+    "                                                      g[..., i])\n",
+    "            \n",
+    "            d *= np.moveaxis(pc[c,...], 0, 2)\n",
+    "            \n",
     "            hist_calc.fill(d)\n",
     "    h, e, c, _ = hist_calc.get()\n",
     "    return h, e, c\n",
@@ -392,11 +464,11 @@
     "inp = []\n",
     "for i in modules:\n",
     "    qm = \"Q{}M{}\".format(i//4+1, i%4+1)\n",
-    "    inp.append((i, offset_g[qm], thresholds_g[qm]))\n",
+    "    inp.append((i, offset_g[qm], thresholds_g[qm], pc_g[qm][0,...], noise_g[qm][...,0]))\n",
     "    \n",
     "p = partial(hist_single_module, fbase, runs, sequences,\n",
     "            sensor_size, memory_cells, block_size, IL_MODE, limit_trains, profile, rawversion, instrument)\n",
-    "res_uncorr = view.map_sync(p, inp)\n"
+    "res_uncorr = list(map(p, inp))\n"
    ]
   },
   {
@@ -404,8 +476,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-08-13T08:27:32.417828Z",
-     "start_time": "2019-08-13T08:27:32.037880Z"
+     "end_time": "2019-09-13T07:51:38.024035Z",
+     "start_time": "2019-09-13T07:51:37.818276Z"
     },
     "scrolled": false
    },
@@ -439,8 +511,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-08-13T08:05:30.880972Z",
-     "start_time": "2019-08-13T08:05:30.874317Z"
+     "end_time": "2019-09-13T07:51:38.029704Z",
+     "start_time": "2019-09-13T07:51:38.026111Z"
     }
    },
    "outputs": [],
@@ -470,8 +542,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-23T21:06:07.234742Z",
-     "start_time": "2019-07-23T20:51:32.090875Z"
+     "end_time": "2019-09-13T08:16:50.652492Z",
+     "start_time": "2019-09-13T07:51:38.031787Z"
     }
    },
    "outputs": [],
@@ -479,7 +551,7 @@
     "block_size = [64, 64]\n",
     "def relgain_single_module(fbase, runs, sequences, peak_estimates,\n",
     "                          peak_heights, peak_sigma, memory_cells, sensor_size,\n",
-    "                          block_size, il_mode, profile, limit_trains_eval, rawversion, inp):\n",
+    "                          block_size, il_mode, profile, limit_trains_eval, rawversion, instrument, inp):\n",
     "    \"\"\" A function for calculated the relative gain of a single AGIPD module\n",
     "    \"\"\"\n",
     "    \n",
@@ -492,27 +564,90 @@
     "    \n",
     "    channel, offset, thresholds, noise, pc = inp\n",
     "    \n",
+    "    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):\n",
+    "        \"\"\" Correct baseline shifts by evaluating position of the noise peak\n",
+    "\n",
+    "        :param: d: the data to correct, should be a single image\n",
+    "        :param noise: the mean noise for the cell id of `d`\n",
+    "        :param g: gain array matching `d` array\n",
+    "\n",
+    "        Correction is done by identifying the left-most (significant) peak\n",
+    "        in the histogram of `d` and shifting it to be centered on 0.\n",
+    "        This is done via a continous wavelet transform (CWT), using a Ricker\n",
+    "        wavelet.\n",
+    "\n",
+    "        Only high gain data is evaulated, and data larger than 50 ADU,\n",
+    "        equivalent of roughly a 9 keV photon is ignored.\n",
+    "\n",
+    "        Two passes are executed: the first shift is accurate to 10 ADU and\n",
+    "        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated\n",
+    "        for peaks of widths one, three and five time the noise.\n",
+    "        The baseline is then shifted by the position of the summed CWT maximum.\n",
+    "\n",
+    "        In a second pass histogram is evaluated within a range of\n",
+    "        +- 5*noise of the initial shift value, for peak of width noise.\n",
+    "\n",
+    "        Baseline shifts larger than the maximum threshold or positive shifts\n",
+    "        larger 10 are discarded, and the original data is returned, otherwise\n",
+    "        the shift corrected data is returned.\n",
+    "\n",
+    "        \"\"\"\n",
+    "        import copy\n",
+    "        from scipy.signal import cwt, ricker\n",
+    "        # we work on a copy to be able to filter values by setting them to\n",
+    "        # np.nan\n",
+    "        dd = copy.copy(d)\n",
+    "        dd[g != 0] = np.nan  # only high gain data\n",
+    "        dd[dd > 50] = np.nan  # only noise data\n",
+    "        h, e = np.histogram(dd, bins=210, range=(-2000, 100))\n",
+    "        c = (e[1:] + e[:-1]) / 2\n",
+    "\n",
+    "        try:\n",
+    "            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])\n",
+    "        except:\n",
+    "            return d\n",
+    "        cwtall = np.sum(cwtmatr, axis=0)\n",
+    "        marg = np.argmax(cwtall)\n",
+    "        pc = c[marg]\n",
+    "\n",
+    "        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)\n",
+    "        dd[~high_res_range] = np.nan\n",
+    "        h, e = np.histogram(dd, bins=200,\n",
+    "                            range=(pc - 5 * noise, pc + 5 * noise))\n",
+    "        c = (e[1:] + e[:-1]) / 2\n",
+    "        try:\n",
+    "            cwtmatr = cwt(h, ricker, [noise, ])\n",
+    "        except:\n",
+    "            return d\n",
+    "        marg = np.argmax(cwtmatr)\n",
+    "        pc = c[marg]\n",
+    "        # too large shift to be easily decernable via noise\n",
+    "        if pc > 10 or pc < -baseline_corr_noise_threshold:\n",
+    "            return d\n",
+    "        return d - pc\n",
+    "\n",
+    "    \n",
     "    # function needs to be inline for parallell processing\n",
     "    def read_fun(filename, channel):\n",
     "        \"\"\" A reader function used by pyDetLib\n",
     "        \"\"\"\n",
     "        infile = h5py.File(filename, \"r\", driver=\"core\")\n",
     "        if rawversion == 2:\n",
-    "            count = np.squeeze(infile[\"/INDEX/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count\".format(channel)])\n",
-    "            first = np.squeeze(infile[\"/INDEX/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first\".format(channel)])\n",
+    "            count = np.squeeze(infile[\"/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count\".format(instrument, channel)])\n",
+    "            first = np.squeeze(infile[\"/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first\".format(instrument, channel)])\n",
     "            last_index = int(first[count != 0][-1]+count[count != 0][-1])\n",
     "            first_index = int(first[count != 0][0])\n",
     "        else:\n",
-    "            status = np.squeeze(infile[\"/INDEX/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status\".format(channel)])\n",
+    "            status = np.squeeze(infile[\"/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status\".format(instrument, channel)])\n",
     "            if np.count_nonzero(status != 0) == 0:\n",
     "                return\n",
-    "            last = np.squeeze(infile[\"/INDEX/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last\".format(channel)])\n",
+    "            last = np.squeeze(infile[\"/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last\".format(instrument, channel)])\n",
     "            last_index = int(last[status != 0][-1])\n",
     "            first_index = int(last[status != 0][0])\n",
     "        if limit_trains is not None:\n",
     "            last_index = min(limit_trains*memory_cells+first_index, last_index)\n",
-    "        im = np.array(infile[\"/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data\".format(channel)][first_index:last_index,...])    \n",
-    "        carr = infile[\"/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId\".format(channel)][first_index:last_index]\n",
+    "        im = np.array(infile[\"/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data\".format(instrument, channel)][first_index:last_index,...])    \n",
+    "        carr = infile[\"/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId\".format(instrument, channel)][first_index:last_index]\n",
     "        cells = np.squeeze(np.array(carr))\n",
     "        infile.close()\n",
     "\n",
@@ -558,6 +693,13 @@
     "            g[ga < thresholds[...,c,1]] = 1\n",
     "            g[ga < thresholds[...,c,0]] = 0\n",
     "            d = offset_cor.correct(d, cellTable=c, gainMap=g)\n",
+    "            \n",
+    "            for i in range(d.shape[2]):\n",
+    "                mn_noise = np.nanmean(noise[...,c[i]])\n",
+    "                d[...,i] = baseline_correct_via_noise(d[...,i],\n",
+    "                                                      mn_noise,\n",
+    "                                                      g[..., i])\n",
+    "            \n",
     "            d *= np.moveaxis(pc[c,...], 0, 2)\n",
     "            rel_gain.fill(d, cellTable=c)\n",
     "    \n",
@@ -574,7 +716,7 @@
     "start = datetime.now()\n",
     "p = partial(relgain_single_module, fbase, runs, sequences,\n",
     "            peak_estimates, peak_heights, peak_sigma, memory_cells,\n",
-    "            sensor_size, block_size, IL_MODE, profile, limit_trains_eval, rawversion)\n",
+    "            sensor_size, block_size, IL_MODE, profile, limit_trains_eval, rawversion, instrument)\n",
     "res_gain = list(map(p, inp)) # don't run concurently as inner function are parelllized\n",
     "duration = (datetime.now()-start).total_seconds()\n",
     "logger.runtime_summary_entry(success=True, runtime=duration)\n",
@@ -593,8 +735,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-23T21:06:10.339513Z",
-     "start_time": "2019-07-23T21:06:07.238846Z"
+     "end_time": "2019-09-13T08:16:54.261782Z",
+     "start_time": "2019-09-13T08:16:50.657594Z"
     }
    },
    "outputs": [],
@@ -635,8 +777,8 @@
     "        mask_db[(np.abs(1-gain_mdb/np.nanmedian(gain_mdb) > deviation_threshold) )] |= BadPixels.FF_GAIN_DEVIATION.value\n",
     "    # second bit set if entries are 0 for first peak\n",
     "    mask_db[entries[...,1] == 0] |= BadPixels.FF_NO_ENTRIES.value\n",
-    "    #zero_oc = np.count_nonzero((entries > 0), axis=3)\n",
-    "    #mask_db[zero_oc <= len(peak_estimates)-2] |= BadPixels.FF_NO_ENTRIES.value \n",
+    "    zero_oc = np.count_nonzero((entries > 0).astype(np.int), axis=3)\n",
+    "    mask_db[zero_oc <= len(peak_estimates)-2] |= BadPixels.FF_NO_ENTRIES.value \n",
     "    # third bit set if entries of a given adc show significant noise\n",
     "    stds = []\n",
     "    for ii in range(8):\n",
@@ -679,9 +821,10 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-23T21:06:11.067080Z",
-     "start_time": "2019-07-23T21:06:10.341783Z"
-    }
+     "end_time": "2019-09-13T08:16:55.562017Z",
+     "start_time": "2019-09-13T08:16:54.264843Z"
+    },
+    "scrolled": false
    },
    "outputs": [],
    "source": [
@@ -704,6 +847,7 @@
     "                    \n",
     "    for j in range(data.shape[-1]):\n",
     "        d = data[...,cell_to_preview,j]\n",
+    "        d[~np.isfinite(d)] = 0\n",
     "        \n",
     "        im = grid[j].imshow(d, interpolation=\"nearest\", vmin=0.9*np.nanmedian(d), vmax=max(1.1*np.nanmedian(d), 50))\n",
     "        cb = grid.cbar_axes[j].colorbar(im)\n",
@@ -724,8 +868,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-23T20:46:57.594731Z",
-     "start_time": "2019-07-23T20:46:56.722623Z"
+     "end_time": "2019-09-13T08:16:56.879582Z",
+     "start_time": "2019-09-13T08:16:55.564572Z"
     },
     "scrolled": false
    },
@@ -764,8 +908,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-23T18:02:41.742205Z",
-     "start_time": "2019-07-23T18:02:40.843607Z"
+     "end_time": "2019-09-13T08:16:58.113910Z",
+     "start_time": "2019-09-13T08:16:56.881383Z"
     }
    },
    "outputs": [],
@@ -810,8 +954,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-23T18:02:49.569476Z",
-     "start_time": "2019-07-23T18:02:49.563826Z"
+     "end_time": "2019-09-13T08:18:08.277881Z",
+     "start_time": "2019-09-13T08:18:08.272390Z"
     }
    },
    "outputs": [],
@@ -824,8 +968,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-23T18:02:51.379222Z",
-     "start_time": "2019-07-23T18:02:50.620711Z"
+     "end_time": "2019-09-13T08:18:10.840520Z",
+     "start_time": "2019-09-13T08:18:09.769451Z"
     }
    },
    "outputs": [],
@@ -854,8 +998,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-23T18:02:55.928543Z",
-     "start_time": "2019-07-23T18:02:53.702938Z"
+     "end_time": "2019-09-13T08:18:14.349244Z",
+     "start_time": "2019-09-13T08:18:11.849053Z"
     }
    },
    "outputs": [],
@@ -913,6 +1057,21 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
+   "source": [
+    "proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]\n",
+    "file_loc = proposal + ' ' + ' '.join(list(map(str,runs)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-09-12T14:49:28.355922Z",
+     "start_time": "2019-09-12T14:49:28.340063Z"
+    }
+   },
+   "outputs": [],
    "source": [
     "if db_output:\n",
     "    for i, r in enumerate(res_gain):\n",
@@ -934,7 +1093,7 @@
     "        # set the operating condition\n",
     "        condition = Conditions.Illuminated.AGIPD(memory_cells, bias_voltage, 9.2,\n",
     "                                                 pixels_x=512, pixels_y=128, beam_energy=None,\n",
-    "                                                 acquisition_rate=acq_rate)\n",
+    "                                                 acquisition_rate=acqrate)\n",
     "        \n",
     "        metadata.detector_condition = condition\n",
     "\n",
@@ -943,6 +1102,8 @@
     "            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))\n",
     "        else:\n",
     "            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)\n",
+    "\n",
+    "        metadata.calibration_constant_version.raw_data_location = file_loc\n",
     "        metadata.send(cal_db_interface, timeout=300000)\n",
     "        \n",
     "        \n",
@@ -955,7 +1116,7 @@
     "        # set the operating condition\n",
     "        condition = Conditions.Illuminated.AGIPD(memory_cells, bias_voltage, 9.2,\n",
     "                                                 pixels_x=512, pixels_y=128, beam_energy=None,\n",
-    "                                                 acquisition_rate=acq_rate)\n",
+    "                                                 acquisition_rate=acqrate)\n",
     "        \n",
     "        metadata.detector_condition = condition\n",
     "\n",
@@ -964,6 +1125,7 @@
     "            metadata.calibration_constant_version = Versions.Now(device=getattr(det, qm))\n",
     "        else:\n",
     "            metadata.calibration_constant_version = Versions.Timespan(device=getattr(det, qm), start=creation_time)\n",
+    "        metadata.calibration_constant_version.raw_data_location = file_loc\n",
     "        metadata.send(cal_db_interface, timeout=300000)"
    ]
   },
@@ -981,15 +1143,15 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-23T18:38:30.491771Z",
-     "start_time": "2019-07-23T18:33:15.648702Z"
+     "end_time": "2019-09-13T09:23:54.999086Z",
+     "start_time": "2019-09-13T09:16:57.293565Z"
     },
-    "scrolled": true
+    "scrolled": false
    },
    "outputs": [],
    "source": [
-    "def hist_single_module(fbase, runs, sequences, il_mode, profile, limit_trains, memory_cells, rawversion, inp):\n",
-    "    channel, offset, thresholds, relgain = inp\n",
+    "def hist_single_module(fbase, runs, sequences, il_mode, profile, limit_trains, memory_cells, rawversion, instrument, inp):\n",
+    "    channel, offset, thresholds, relgain, noise, pc = inp\n",
     "    gain, pks, std, gfunc = relgain\n",
     "    gains, errors, chisq, valid, max_dev, stats = gfunc\n",
     "    \n",
@@ -1002,27 +1164,91 @@
     "    sensor_size = [128, 512]\n",
     "    \n",
     "    block_size = sensor_size\n",
+    "    \n",
+    "    \n",
+    "    def baseline_correct_via_noise(d, noise, g, baseline_corr_noise_threshold=1000):\n",
+    "        \"\"\" Correct baseline shifts by evaluating position of the noise peak\n",
+    "\n",
+    "        :param: d: the data to correct, should be a single image\n",
+    "        :param noise: the mean noise for the cell id of `d`\n",
+    "        :param g: gain array matching `d` array\n",
+    "\n",
+    "        Correction is done by identifying the left-most (significant) peak\n",
+    "        in the histogram of `d` and shifting it to be centered on 0.\n",
+    "        This is done via a continous wavelet transform (CWT), using a Ricker\n",
+    "        wavelet.\n",
+    "\n",
+    "        Only high gain data is evaulated, and data larger than 50 ADU,\n",
+    "        equivalent of roughly a 9 keV photon is ignored.\n",
+    "\n",
+    "        Two passes are executed: the first shift is accurate to 10 ADU and\n",
+    "        will catch baseline shifts smaller then -2000 ADU. CWT is evaluated\n",
+    "        for peaks of widths one, three and five time the noise.\n",
+    "        The baseline is then shifted by the position of the summed CWT maximum.\n",
+    "\n",
+    "        In a second pass histogram is evaluated within a range of\n",
+    "        +- 5*noise of the initial shift value, for peak of width noise.\n",
+    "\n",
+    "        Baseline shifts larger than the maximum threshold or positive shifts\n",
+    "        larger 10 are discarded, and the original data is returned, otherwise\n",
+    "        the shift corrected data is returned.\n",
+    "\n",
+    "        \"\"\"\n",
+    "        import copy\n",
+    "        from scipy.signal import cwt, ricker\n",
+    "        # we work on a copy to be able to filter values by setting them to\n",
+    "        # np.nan\n",
+    "        dd = copy.copy(d)\n",
+    "        dd[g != 0] = np.nan  # only high gain data\n",
+    "        dd[dd > 50] = np.nan  # only noise data\n",
+    "        h, e = np.histogram(dd, bins=210, range=(-2000, 100))\n",
+    "        c = (e[1:] + e[:-1]) / 2\n",
+    "\n",
+    "        try:\n",
+    "            cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise])\n",
+    "        except:\n",
+    "            return d\n",
+    "        cwtall = np.sum(cwtmatr, axis=0)\n",
+    "        marg = np.argmax(cwtall)\n",
+    "        pc = c[marg]\n",
+    "\n",
+    "        high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise)\n",
+    "        dd[~high_res_range] = np.nan\n",
+    "        h, e = np.histogram(dd, bins=200,\n",
+    "                            range=(pc - 5 * noise, pc + 5 * noise))\n",
+    "        c = (e[1:] + e[:-1]) / 2\n",
+    "        try:\n",
+    "            cwtmatr = cwt(h, ricker, [noise, ])\n",
+    "        except:\n",
+    "            return d\n",
+    "        marg = np.argmax(cwtmatr)\n",
+    "        pc = c[marg]\n",
+    "        # too large shift to be easily decernable via noise\n",
+    "        if pc > 10 or pc < -baseline_corr_noise_threshold:\n",
+    "            return d\n",
+    "        return d - pc\n",
+    "    \n",
     "    def read_fun(filename, channel):\n",
     "        \n",
     "        infile = h5py.File(filename, \"r\", driver=\"core\")\n",
     "        if rawversion == 2:\n",
-    "            count = np.squeeze(infile[\"/INDEX/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count\".format(channel)])\n",
-    "            first = np.squeeze(infile[\"/INDEX/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first\".format(channel)])\n",
+    "            count = np.squeeze(infile[\"/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/count\".format(instrument, channel)])\n",
+    "            first = np.squeeze(infile[\"/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/first\".format(instrument, channel)])\n",
     "            last_index = int(first[count != 0][-1]+count[count != 0][-1])\n",
     "            first_index = int(first[count != 0][0])\n",
     "        else:\n",
-    "            status = np.squeeze(infile[\"/INDEX/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status\".format(channel)])\n",
+    "            status = np.squeeze(infile[\"/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/status\".format(instrument, channel)])\n",
     "            if np.count_nonzero(status != 0) == 0:\n",
     "                return\n",
-    "            last = np.squeeze(infile[\"/INDEX/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last\".format(channel)])\n",
+    "            last = np.squeeze(infile[\"/INDEX/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/last\".format(instrument, channel)])\n",
     "            last_index = int(last[status != 0][-1])\n",
     "            first_index = int(last[status != 0][0])\n",
     "        \n",
     "        if limit_trains is not None:\n",
     "            last_index = min(limit_trains*memory_cells+first_index, last_index)\n",
     "        \n",
-    "        im = np.array(infile[\"/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data\".format(channel)][first_index:last_index,...])    \n",
-    "        carr = infile[\"/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId\".format(channel)][first_index:last_index]\n",
+    "        im = np.array(infile[\"/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data\".format(instrument, channel)][first_index:last_index,...])    \n",
+    "        carr = infile[\"/INSTRUMENT/{}_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/cellId\".format(instrument, channel)][first_index:last_index]\n",
     "        cells = np.squeeze(np.array(carr))\n",
     "        infile.close()\n",
     "        \n",
@@ -1070,10 +1296,17 @@
     "            g[ga < thresholds[...,c,1]] = 1\n",
     "            g[ga < thresholds[...,c,0]] = 0\n",
     "            d = offset_cor.correct(d, cellTable=c, gainMap=g)\n",
+    "            for i in range(d.shape[2]):\n",
+    "                mn_noise = np.nanmean(noise[...,c[i]])\n",
+    "                d[...,i] = baseline_correct_via_noise(d[...,i],\n",
+    "                                                      mn_noise,\n",
+    "                                                      g[..., i])\n",
+    "            \n",
+    "            d *= np.moveaxis(pc[c,...], 0, 2)\n",
     "            \n",
     "            hist_calc_uncorr.fill(d)\n",
-    "            #d = (d-gains[..., c, 1])/gains[..., c, 0]\n",
-    "            d = d/gains[..., 0][...,None]\n",
+    "            d = (d-gains[..., 1][...,None])/gains[..., 0][...,None]\n",
+    "            #d = d/gains[..., 0][...,None]\n",
     "            hist_calc.fill(d)    \n",
     "    \n",
     "    h, e, c, _ = hist_calc.get()\n",
@@ -1085,10 +1318,10 @@
     "for i in modules:\n",
     "    qm = \"Q{}M{}\".format(i//4+1, i%4+1)\n",
     "    \n",
-    "    inp.append((i, offset_g[qm], thresholds_g[qm], res_gain[idx]))\n",
+    "    inp.append((i, offset_g[qm], thresholds_g[qm], res_gain[idx], noise_g[qm][...,0], pc_g[qm][0,...]))\n",
     "    idx += 1\n",
     "    \n",
-    "p = partial(hist_single_module, fbase, runs, sequences, IL_MODE, profile, limit_trains, memory_cells, rawversion)\n",
+    "p = partial(hist_single_module, fbase, runs, sequences, IL_MODE, profile, limit_trains, memory_cells, rawversion, instrument)\n",
     "#res = view.map_sync(p, inp)\n",
     "res = list(map(p, inp))\n"
    ]
@@ -1098,8 +1331,8 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-23T18:38:31.138496Z",
-     "start_time": "2019-07-23T18:38:30.494144Z"
+     "end_time": "2019-09-13T09:23:56.023378Z",
+     "start_time": "2019-09-13T09:23:55.002226Z"
     }
    },
    "outputs": [],
-- 
GitLab