From 70ef401ec2e5c92fce376ab1c6b40ad4198f17ed Mon Sep 17 00:00:00 2001
From: karnem <mikhail.karnevskiy@desy.de>
Date: Mon, 9 Mar 2020 12:51:47 +0100
Subject: [PATCH] Add notebook to create histograms for FF analysis

---
 .../playground/AGIPD_FF_Prepare_data.ipynb    | 313 ++++++++++++++++++
 xfel_calibrate/notebooks.py                   |   7 +
 2 files changed, 320 insertions(+)
 create mode 100644 notebooks/AGIPD/playground/AGIPD_FF_Prepare_data.ipynb

diff --git a/notebooks/AGIPD/playground/AGIPD_FF_Prepare_data.ipynb b/notebooks/AGIPD/playground/AGIPD_FF_Prepare_data.ipynb
new file mode 100644
index 000000000..19794465d
--- /dev/null
+++ b/notebooks/AGIPD/playground/AGIPD_FF_Prepare_data.ipynb
@@ -0,0 +1,313 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# AGIPD Analysis\n",
+    "\n",
+    "Author: European XFEL Detector Group, Version: 1.0\n",
+    "\n",
+    "Offline Calibration for the AGIPD Detector"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "in_folder = \"/gpfs/exfel/exp/SPB/202030/p900138/proc/\" # the folder to read data from, required\n",
+    "modules = [2] # module to consider, range allowed\n",
+    "out_folder =  \"/gpfs/exfel/exp/MID/202030/p900137/scratch/karnem/r0319_0322_0342_v02\"  # the folder to output to, required\n",
+    "cluster_profile = \"noDB\"\n",
+    "fname = '{}/CORR-R{:04d}-AGIPD{:02d}-S{:05d}.h5'\n",
+    "sequences = [-1] # module to consider, set to -1 for all, range allowed\n",
+    "cells = 'range(0,0)' # number of cells, expression should be equivalent to list\n",
+    "n_bins = 500 # number of bins of the histogram\n",
+    "h_range = [-50, 450] # range of the histogram\n",
+    "chunk_size = 5 # Number of memory cells to be processed at ones and be stored in a file\n",
+    "n_cells = 202 # total number of memory cells (used to create summary file)\n",
+    "run = 204 # run number, required\n",
+    "karabo_id = 'MID_DET_AGIPD1M-1' # karabo_id \n",
+    "templ = \"{out_folder}/hists_m{module:02d}_c*.h5\" # Template to concatinate histograms"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from functools import partial\n",
+    "from ipyparallel import Client\n",
+    "import warnings\n",
+    "import glob\n",
+    "import h5py\n",
+    "import numpy as np\n",
+    "from time import sleep, time\n",
+    "import os\n",
+    "import gc\n",
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib as mpl\n",
+    "from cal_tools.ana_tools import save_dict_to_hdf5\n",
+    "%matplotlib inline\n",
+    "warnings.filterwarnings('ignore')\n",
+    "client = Client(profile=cluster_profile)\n",
+    "view = client[:]\n",
+    "view.use_dill()\n",
+    "\n",
+    "in_folder = \"{}/r{:04d}/\".format(in_folder, run)\n",
+    "cells = list(eval(cells))\n",
+    "if len(cells)>0:\n",
+    "    print(f'List of cells {cells[0]}-{cells[-1]}: {cells}')\n",
+    "print(f'Input folder {in_folder}')\n",
+    "n_pix = 128*512\n",
+    "module = modules[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "def process_file(cells, n_bins, h_range, n_pix, module, karabo_id, file_name):\n",
+    "    import glob\n",
+    "    import h5py\n",
+    "    import numpy as np\n",
+    "    import gc\n",
+    "\n",
+    "    print('Read file {}'.format(file_name))\n",
+    "    sequence = int(file_name.split('/')[-1].split('-')[3][1:-3])\n",
+    "    res_all = np.zeros((len(cells), n_bins, n_pix)).astype(np.uint32)\n",
+    "    err = ''\n",
+    "    try:\n",
+    "        with h5py.File(file_name, \"r\") as f:\n",
+    "            path = f'/INSTRUMENT/{karabo_id}/DET/{module}CH0:xtdf/image'\n",
+    "            print(path)\n",
+    "\n",
+    "            data_h = f[f'{path}/data']\n",
+    "            cellId_h = f[f'{path}/cellId']\n",
+    "\n",
+    "            cell_id = np.array(cellId_h[()])\n",
+    "            \n",
+    "            for ic, cell in enumerate(cells):\n",
+    "                print(cell)\n",
+    "                cell_sel = np.where(cell_id == cell)\n",
+    "                data = np.array(data_h[cell_sel]).astype(np.float32)\n",
+    "\n",
+    "                print(data.shape)\n",
+    "                res_all[ic] = np.apply_along_axis(lambda a: np.histogram(a, bins=n_bins, range=h_range)[0],\n",
+    "                                          0,\n",
+    "                                          data.reshape(data.shape[0], n_pix))\n",
+    "            gc.collect()\n",
+    "    except Exception as e:\n",
+    "        err = str(e)\n",
+    "        gc.collect()\n",
+    "    return res_all, err, sequence\n",
+    "\n",
+    "\n",
+    "if sequences[0] == -1:\n",
+    "    fnames = glob.glob(fname.format(\n",
+    "        in_folder, run, module, 99999).replace('99999', '*'))\n",
+    "else:\n",
+    "    fnames = [fname.format(in_folder, run, module, x) for x in sequences]\n",
+    "\n",
+    "#proposal = int(in_folder.split('/')[6][1:])\n",
+    "sequences = [int(x.split('/')[-1].split('-')[3][1:-3]) for x in fnames]\n",
+    "\n",
+    "print(f\"List of sequences: {sorted(sequences)}\")\n",
+    "processed = np.zeros((max(sequences+[1])+1, max(cells+[1])+1))\n",
+    "processed[:,:] = np.nan\n",
+    "\n",
+    "cell_list = []\n",
+    "for cell in cells:\n",
+    "    cell_list.append(cell)\n",
+    "    if len(cell_list) >= chunk_size or cell == cells[-1]:\n",
+    "        inp = []\n",
+    "        for file_name in fnames:\n",
+    "            inp.append(file_name)\n",
+    "\n",
+    "        print(f'Process cells: {cell_list}')\n",
+    "        p = partial(process_file, cell_list, n_bins, h_range, n_pix, module, karabo_id)\n",
+    "        results = view.map_sync(p, inp)\n",
+    "        #results = list(map(p, inp))\n",
+    "\n",
+    "        all_hists = np.zeros((len(cell_list), n_bins, n_pix)).astype(np.uint32)\n",
+    "        for ir, r in enumerate(results):\n",
+    "            data, msg, s = r\n",
+    "            if msg == '':\n",
+    "                processed[s, np.array(cell_list)] = 1\n",
+    "                all_hists += data\n",
+    "            else:\n",
+    "                processed[s, np.array(cell_list)] = 0\n",
+    "                print(f'Error in {ir}: {msg}')\n",
+    "\n",
+    "\n",
+    "        out_name = '{}/hists_m{:02d}_c{:03d}-{:03d}.h5'.format(out_folder, module,\n",
+    "                                                               cell_list[0], cell_list[-1])\n",
+    "\n",
+    "        save_dict_to_hdf5({'hist': all_hists,\n",
+    "                           'cellId': np.array(cell_list),\n",
+    "                           # 'proposal': proposal,\n",
+    "                           # 'sequences': sequences,\n",
+    "                           # 'run_': [int(run)],\n",
+    "                           'nBins': n_bins,\n",
+    "                           'hRange': np.array(h_range)}, out_name)\n",
+    "\n",
+    "        cell_list = []\n",
+    "        gc.collect()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plt.figure(figsize=(10,10))\n",
+    "plt.imshow(processed, cmap=plt.cm.jet, vmin = 0, vmax = 1)\n",
+    "plt.title(f\"Processed files and cells for module {module}\")\n",
+    "plt.ylabel(\"Sequences\", fontsize=18)\n",
+    "_ = plt.xlabel(\"Cells\", fontsize=18)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Concatinate all files for given module\n",
+    "fnames = glob.glob(templ.format(out_folder=out_folder, module=module))\n",
+    "total_hist = np.zeros((n_cells, n_bins, n_pix)).astype(np.uint32)\n",
+    "\n",
+    "for file_name in fnames:\n",
+    "    with h5py.File(file_name, \"r\") as f:\n",
+    "\n",
+    "        f_hist = np.array(f['hist'][()])\n",
+    "        f_cell_id = np.array(f['cellId'][()])\n",
+    "        f_n_bins = f['nBins'][()]\n",
+    "        f_h_range = np.array(f['hRange'][()])\n",
+    "        #f_proposal = np.array(f['proposal'][()])\n",
+    "        #f_sequences = np.array(f['sequences'][()])\n",
+    "        #f_runs = np.array(f['runs'][()])\n",
+    "\n",
+    "        if n_bins != f_n_bins or f_h_range[0] != h_range[0] or f_h_range[1] != h_range[1]:\n",
+    "            print(f'file {file_name} is incompatible to be merged')\n",
+    "            continue\n",
+    "\n",
+    "        print(f'Add file {file_name} with cells {f_cell_id}')\n",
+    "        total_hist[f_cell_id] += f_hist\n",
+    "\n",
+    "\n",
+    "out_name = '{}/hists_m{:02d}_sum.h5'.format(out_folder, module)\n",
+    "print(f'Save to file: {out_name}')\n",
+    "\n",
+    "save_dict_to_hdf5({'hist': total_hist,\n",
+    "                   'cellId': np.arange(n_cells),\n",
+    "                   'nBins': n_bins,\n",
+    "                   'hRange': np.array(h_range)}, out_name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rshist = np.reshape(total_hist, (n_cells, n_bins, 512, 128))\n",
+    "\n",
+    "# some sanity check per mem cell\n",
+    "mean_hist = np.zeros(n_cells)\n",
+    "std_hist = np.zeros(n_cells)\n",
+    "sum_hist = np.zeros(n_cells)\n",
+    "for i in range(0, n_cells):\n",
+    "    mean_hist[i] = np.mean(rshist[i, :, :, :])\n",
+    "    std_hist[i] = np.std(rshist[i, :, :, :])\n",
+    "    sum_hist[i] = np.sum(rshist[i, :, :, :])/(128*512)\n",
+    "\n",
+    "x = np.linspace(0, n_cells, n_cells)\n",
+    "fig = plt.figure(figsize=(10, 10))\n",
+    "\n",
+    "ax0 = fig.add_subplot(211)\n",
+    "ax0.plot(x, mean_hist, 'k', color='#3F7F4C')\n",
+    "ax0.fill_between(x, mean_hist-std_hist, mean_hist+std_hist,\n",
+    "                 alpha=0.6, edgecolor='#3F7F4C', facecolor='#7EFF99',\n",
+    "                 linewidth=1, linestyle='dashdot', antialiased=True,\n",
+    "                 label=\" mean value $ \\pm $ std \")\n",
+    "\n",
+    "ax0.set_xlabel('Cell', fontsize=14)\n",
+    "ax0.set_ylabel('Mean over module [ADU]', fontsize=14)\n",
+    "ax0.set_title(f'Module {module}', fontsize=16, fontweight='bold')\n",
+    "ax0.grid()\n",
+    "# ax0.set_ylim(-100,100)\n",
+    "_ = ax0.legend()\n",
+    "\n",
+    "ax1 = fig.add_subplot(212)\n",
+    "ax1.plot(x, sum_hist, 'k', color='#3F7F4C')\n",
+    "\n",
+    "ax1.set_xlabel('Cell', fontsize=14)\n",
+    "ax1.set_ylabel('Average statistics', fontsize=14)\n",
+    "ax1.set_title(f'Module {module}', fontsize=16, fontweight='bold')\n",
+    "_ = ax1.legend()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Plot for single pixel and all memory cells.\n",
+    "xpix= 23\n",
+    "ypix= 44\n",
+    "\n",
+    "x = np.arange(h_range[0],h_range[1] , 1)\n",
+    "n,_ = rshist[:,:,xpix,ypix].shape\n",
+    "\n",
+    "colors = mpl.cm.rainbow(np.linspace(0, 1, n))\n",
+    "\n",
+    "\n",
+    "fig = plt.figure(figsize=(10,5))\n",
+    "fig.suptitle(f'Module {module} ', fontsize=14, fontweight='bold')\n",
+    "\n",
+    "ax = fig.add_subplot(111)\n",
+    "fig.subplots_adjust(top=0.85)\n",
+    "ax.set_title(f'single pixel [{xpix},{ypix}], all ({n_cells}) memory cells')\n",
+    "\n",
+    "ax.set_xlabel('Signal [ADU]')\n",
+    "ax.set_ylabel('Counts')\n",
+    "ax.set_xlim(-50,300)\n",
+    "for color, y in zip(colors, rshist[:,:,xpix,ypix]):\n",
+    "    ax.plot(x, y, color=color,linewidth=0.2)\n",
+    "plt.grid()  \n",
+    "plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/xfel_calibrate/notebooks.py b/xfel_calibrate/notebooks.py
index b814077b5..c3a19cdd9 100644
--- a/xfel_calibrate/notebooks.py
+++ b/xfel_calibrate/notebooks.py
@@ -41,6 +41,12 @@ notebooks = {
                             "default concurrency": None,
                             "cluster cores": 8},
         },
+       "FF_HISTS": {
+               "notebook": "notebooks/AGIPD/playground/AGIPD_FF_Prepare_data.ipynb",
+               "concurrency": {"parameter": "modules",
+                               "default concurrency": list(range(16)),
+                               "cluster cores": 30},
+               },
     },
 
     "AGIPD64K": {
@@ -235,3 +241,4 @@ notebooks = {
         },
     },
 }
+
-- 
GitLab