separate histogram from update agipd FF notebook

ac0bdeca · Karim Ahmed · 1d545612 · 1d545612 · ac0bdeca
Commit ac0bdeca authored 4 years ago by Karim Ahmed
--- a/notebooks/AGIPD/playground/AGIPD_FF_Prepare_data.ipynb
+++ b/notebooks/AGIPD/playground/AGIPD_FF_Prepare_data.ipynb
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# AGIPD Analysis\n",
-    "\n",
-    "Author: European XFEL Detector Group, Version: 1.0\n",
-    "\n",
-    "Offline Calibration for the AGIPD Detector"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "in_folder = \"/gpfs/exfel/exp/SPB/202030/p900138/proc/\" # the folder to read data from, required\n",
-    "modules = [2] # module to consider, range allowed\n",
-    "out_folder =  \"/gpfs/exfel/exp/MID/202030/p900137/scratch/karnem/r0319_0322_0342_v02\"  # the folder to output to, required\n",
-    "cluster_profile = \"noDB\"\n",
-    "fname = '{}/CORR-R{:04d}-AGIPD{:02d}-S{:05d}.h5'\n",
-    "sequences = [-1] # module to consider, set to -1 for all, range allowed\n",
-    "cells = 'range(0,0)' # number of cells, expression should be equivalent to list\n",
-    "n_bins = 500 # number of bins of the histogram\n",
-    "h_range = [-50, 450] # range of the histogram\n",
-    "chunk_size = 5 # Number of memory cells to be processed at ones and be stored in a file\n",
-    "n_cells = 202 # total number of memory cells (used to create summary file)\n",
-    "run = 204 # run number, required\n",
-    "karabo_id = 'MID_DET_AGIPD1M-1' # karabo_id \n",
-    "templ = \"{out_folder}/hists_m{module:02d}_c*.h5\" # Template to concatinate histograms"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from functools import partial\n",
-    "from ipyparallel import Client\n",
-    "import warnings\n",
-    "import glob\n",
-    "import h5py\n",
-    "import numpy as np\n",
-    "from time import sleep, time\n",
-    "import os\n",
-    "import gc\n",
-    "import matplotlib.pyplot as plt\n",
-    "import matplotlib as mpl\n",
-    "from cal_tools.ana_tools import save_dict_to_hdf5\n",
-    "%matplotlib inline\n",
-    "warnings.filterwarnings('ignore')\n",
-    "client = Client(profile=cluster_profile)\n",
-    "view = client[:]\n",
-    "view.use_dill()\n",
-    "\n",
-    "in_folder = \"{}/r{:04d}/\".format(in_folder, run)\n",
-    "cells = list(eval(cells))\n",
-    "if len(cells)>0:\n",
-    "    print(f'List of cells {cells[0]}-{cells[-1]}: {cells}')\n",
-    "print(f'Input folder {in_folder}')\n",
-    "n_pix = 128*512\n",
-    "module = modules[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [],
-   "source": [
-    "def process_file(cells, n_bins, h_range, n_pix, module, karabo_id, file_name):\n",
-    "    import glob\n",
-    "    import h5py\n",
-    "    import numpy as np\n",
-    "    import gc\n",
-    "\n",
-    "    print('Read file {}'.format(file_name))\n",
-    "    sequence = int(file_name.split('/')[-1].split('-')[3][1:-3])\n",
-    "    res_all = np.zeros((len(cells), n_bins, n_pix)).astype(np.uint32)\n",
-    "    err = ''\n",
-    "    try:\n",
-    "        with h5py.File(file_name, \"r\") as f:\n",
-    "            path = f'/INSTRUMENT/{karabo_id}/DET/{module}CH0:xtdf/image'\n",
-    "            print(path)\n",
-    "\n",
-    "            data_h = f[f'{path}/data']\n",
-    "            cellId_h = f[f'{path}/cellId']\n",
-    "\n",
-    "            cell_id = np.array(cellId_h[()])\n",
-    "            \n",
-    "            for ic, cell in enumerate(cells):\n",
-    "                print(cell)\n",
-    "                cell_sel = np.where(cell_id == cell)\n",
-    "                data = np.array(data_h[cell_sel]).astype(np.float32)\n",
-    "\n",
-    "                print(data.shape)\n",
-    "                res_all[ic] = np.apply_along_axis(lambda a: np.histogram(a, bins=n_bins, range=h_range)[0],\n",
-    "                                          0,\n",
-    "                                          data.reshape(data.shape[0], n_pix))\n",
-    "            gc.collect()\n",
-    "    except Exception as e:\n",
-    "        err = str(e)\n",
-    "        gc.collect()\n",
-    "    return res_all, err, sequence\n",
-    "\n",
-    "\n",
-    "if sequences[0] == -1:\n",
-    "    fnames = glob.glob(fname.format(\n",
-    "        in_folder, run, module, 99999).replace('99999', '*'))\n",
-    "else:\n",
-    "    fnames = [fname.format(in_folder, run, module, x) for x in sequences]\n",
-    "\n",
-    "#proposal = int(in_folder.split('/')[6][1:])\n",
-    "sequences = [int(x.split('/')[-1].split('-')[3][1:-3]) for x in fnames]\n",
-    "\n",
-    "print(f\"List of sequences: {sorted(sequences)}\")\n",
-    "processed = np.zeros((max(sequences+[1])+1, max(cells+[1])+1))\n",
-    "processed[:,:] = np.nan\n",
-    "\n",
-    "cell_list = []\n",
-    "for cell in cells:\n",
-    "    cell_list.append(cell)\n",
-    "    if len(cell_list) >= chunk_size or cell == cells[-1]:\n",
-    "        inp = []\n",
-    "        for file_name in fnames:\n",
-    "            inp.append(file_name)\n",
-    "\n",
-    "        print(f'Process cells: {cell_list}')\n",
-    "        p = partial(process_file, cell_list, n_bins, h_range, n_pix, module, karabo_id)\n",
-    "        results = view.map_sync(p, inp)\n",
-    "        #results = list(map(p, inp))\n",
-    "\n",
-    "        all_hists = np.zeros((len(cell_list), n_bins, n_pix)).astype(np.uint32)\n",
-    "        for ir, r in enumerate(results):\n",
-    "            data, msg, s = r\n",
-    "            if msg == '':\n",
-    "                processed[s, np.array(cell_list)] = 1\n",
-    "                all_hists += data\n",
-    "            else:\n",
-    "                processed[s, np.array(cell_list)] = 0\n",
-    "                print(f'Error in {ir}: {msg}')\n",
-    "\n",
-    "\n",
-    "        out_name = '{}/hists_m{:02d}_c{:03d}-{:03d}.h5'.format(out_folder, module,\n",
-    "                                                               cell_list[0], cell_list[-1])\n",
-    "\n",
-    "        save_dict_to_hdf5({'hist': all_hists,\n",
-    "                           'cellId': np.array(cell_list),\n",
-    "                           # 'proposal': proposal,\n",
-    "                           # 'sequences': sequences,\n",
-    "                           # 'run_': [int(run)],\n",
-    "                           'nBins': n_bins,\n",
-    "                           'hRange': np.array(h_range)}, out_name)\n",
-    "\n",
-    "        cell_list = []\n",
-    "        gc.collect()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fig = plt.figure(figsize=(10,10))\n",
-    "plt.imshow(processed, cmap=plt.cm.jet, vmin = 0, vmax = 1)\n",
-    "plt.title(f\"Processed files and cells for module {module}\")\n",
-    "plt.ylabel(\"Sequences\", fontsize=18)\n",
-    "_ = plt.xlabel(\"Cells\", fontsize=18)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Concatinate all files for given module\n",
-    "fnames = glob.glob(templ.format(out_folder=out_folder, module=module))\n",
-    "total_hist = np.zeros((n_cells, n_bins, n_pix)).astype(np.uint32)\n",
-    "\n",
-    "for file_name in fnames:\n",
-    "    with h5py.File(file_name, \"r\") as f:\n",
-    "\n",
-    "        f_hist = np.array(f['hist'][()])\n",
-    "        f_cell_id = np.array(f['cellId'][()])\n",
-    "        f_n_bins = f['nBins'][()]\n",
-    "        f_h_range = np.array(f['hRange'][()])\n",
-    "        #f_proposal = np.array(f['proposal'][()])\n",
-    "        #f_sequences = np.array(f['sequences'][()])\n",
-    "        #f_runs = np.array(f['runs'][()])\n",
-    "\n",
-    "        if n_bins != f_n_bins or f_h_range[0] != h_range[0] or f_h_range[1] != h_range[1]:\n",
-    "            print(f'file {file_name} is incompatible to be merged')\n",
-    "            continue\n",
-    "\n",
-    "        print(f'Add file {file_name} with cells {f_cell_id}')\n",
-    "        total_hist[f_cell_id] += f_hist\n",
-    "\n",
-    "\n",
-    "out_name = '{}/hists_m{:02d}_sum.h5'.format(out_folder, module)\n",
-    "print(f'Save to file: {out_name}')\n",
-    "\n",
-    "save_dict_to_hdf5({'hist': total_hist,\n",
-    "                   'cellId': np.arange(n_cells),\n",
-    "                   'nBins': n_bins,\n",
-    "                   'hRange': np.array(h_range)}, out_name)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "rshist = np.reshape(total_hist, (n_cells, n_bins, 512, 128))\n",
-    "\n",
-    "# some sanity check per mem cell\n",
-    "mean_hist = np.zeros(n_cells)\n",
-    "std_hist = np.zeros(n_cells)\n",
-    "sum_hist = np.zeros(n_cells)\n",
-    "for i in range(0, n_cells):\n",
-    "    mean_hist[i] = np.mean(rshist[i, :, :, :])\n",
-    "    std_hist[i] = np.std(rshist[i, :, :, :])\n",
-    "    sum_hist[i] = np.sum(rshist[i, :, :, :])/(128*512)\n",
-    "\n",
-    "x = np.linspace(0, n_cells, n_cells)\n",
-    "fig = plt.figure(figsize=(10, 10))\n",
-    "\n",
-    "ax0 = fig.add_subplot(211)\n",
-    "ax0.plot(x, mean_hist, 'k', color='#3F7F4C')\n",
-    "ax0.fill_between(x, mean_hist-std_hist, mean_hist+std_hist,\n",
-    "                 alpha=0.6, edgecolor='#3F7F4C', facecolor='#7EFF99',\n",
-    "                 linewidth=1, linestyle='dashdot', antialiased=True,\n",
-    "                 label=\" mean value $ \\pm $ std \")\n",
-    "\n",
-    "ax0.set_xlabel('Cell', fontsize=14)\n",
-    "ax0.set_ylabel('Mean over module [ADU]', fontsize=14)\n",
-    "ax0.set_title(f'Module {module}', fontsize=16, fontweight='bold')\n",
-    "ax0.grid()\n",
-    "# ax0.set_ylim(-100,100)\n",
-    "_ = ax0.legend()\n",
-    "\n",
-    "ax1 = fig.add_subplot(212)\n",
-    "ax1.plot(x, sum_hist, 'k', color='#3F7F4C')\n",
-    "\n",
-    "ax1.set_xlabel('Cell', fontsize=14)\n",
-    "ax1.set_ylabel('Average statistics', fontsize=14)\n",
-    "ax1.set_title(f'Module {module}', fontsize=16, fontweight='bold')\n",
-    "_ = ax1.legend()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Plot for single pixel and all memory cells.\n",
-    "xpix= 23\n",
-    "ypix= 44\n",
-    "\n",
-    "x = np.arange(h_range[0],h_range[1] , 1)\n",
-    "n,_ = rshist[:,:,xpix,ypix].shape\n",
-    "\n",
-    "colors = mpl.cm.rainbow(np.linspace(0, 1, n))\n",
-    "\n",
-    "\n",
-    "fig = plt.figure(figsize=(10,5))\n",
-    "fig.suptitle(f'Module {module} ', fontsize=14, fontweight='bold')\n",
-    "\n",
-    "ax = fig.add_subplot(111)\n",
-    "fig.subplots_adjust(top=0.85)\n",
-    "ax.set_title(f'single pixel [{xpix},{ypix}], all ({n_cells}) memory cells')\n",
-    "\n",
-    "ax.set_xlabel('Signal [ADU]')\n",
-    "ax.set_ylabel('Counts')\n",
-    "ax.set_xlim(-50,300)\n",
-    "for color, y in zip(colors, rshist[:,:,xpix,ypix]):\n",
-    "    ax.plot(x, y, color=color,linewidth=0.2)\n",
-    "plt.grid()  \n",
-    "plt.show()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
-%% Cell type:markdown id: tags:
-# AGIPD Analysis
-Author: European XFEL Detector Group, Version: 1.0
-Offline Calibration for the AGIPD Detector
-%% Cell type:code id: tags:
-``` python
-in_folder = "/gpfs/exfel/exp/SPB/202030/p900138/proc/" # the folder to read data from, required
-modules = [2] # module to consider, range allowed
-out_folder =  "/gpfs/exfel/exp/MID/202030/p900137/scratch/karnem/r0319_0322_0342_v02"  # the folder to output to, required
-cluster_profile = "noDB"
-fname = '{}/CORR-R{:04d}-AGIPD{:02d}-S{:05d}.h5'
-sequences = [-1] # module to consider, set to -1 for all, range allowed
-cells = 'range(0,0)' # number of cells, expression should be equivalent to list
-n_bins = 500 # number of bins of the histogram
-h_range = [-50, 450] # range of the histogram
-chunk_size = 5 # Number of memory cells to be processed at ones and be stored in a file
-n_cells = 202 # total number of memory cells (used to create summary file)
-run = 204 # run number, required
-karabo_id = 'MID_DET_AGIPD1M-1' # karabo_id
-templ = "{out_folder}/hists_m{module:02d}_c*.h5" # Template to concatinate histograms
-```
-%% Cell type:code id: tags:
-``` python
-from functools import partial
-from ipyparallel import Client
-import warnings
-import glob
-import h5py
-import numpy as np
-from time import sleep, time
-import os
-import gc
-import matplotlib.pyplot as plt
-import matplotlib as mpl
-from cal_tools.ana_tools import save_dict_to_hdf5
-%matplotlib inline
-warnings.filterwarnings('ignore')
-client = Client(profile=cluster_profile)
-view = client[:]
-view.use_dill()
-in_folder = "{}/r{:04d}/".format(in_folder, run)
-cells = list(eval(cells))
-if len(cells)>0:
-    print(f'List of cells {cells[0]}-{cells[-1]}: {cells}')
-print(f'Input folder {in_folder}')
-n_pix = 128*512
-module = modules[0]
-```
-%% Cell type:code id: tags:
-``` python
-def process_file(cells, n_bins, h_range, n_pix, module, karabo_id, file_name):
-    import glob
-    import h5py
-    import numpy as np
-    import gc
-    print('Read file {}'.format(file_name))
-    sequence = int(file_name.split('/')[-1].split('-')[3][1:-3])
-    res_all = np.zeros((len(cells), n_bins, n_pix)).astype(np.uint32)
-    err = ''
-    try:
-        with h5py.File(file_name, "r") as f:
-            path = f'/INSTRUMENT/{karabo_id}/DET/{module}CH0:xtdf/image'
-            print(path)
-            data_h = f[f'{path}/data']
-            cellId_h = f[f'{path}/cellId']
-            cell_id = np.array(cellId_h[()])
-            for ic, cell in enumerate(cells):
-                print(cell)
-                cell_sel = np.where(cell_id == cell)
-                data = np.array(data_h[cell_sel]).astype(np.float32)
-                print(data.shape)
-                res_all[ic] = np.apply_along_axis(lambda a: np.histogram(a, bins=n_bins, range=h_range)[0],
-                                          0,
-                                          data.reshape(data.shape[0], n_pix))
-            gc.collect()
-    except Exception as e:
-        err = str(e)
-        gc.collect()
-    return res_all, err, sequence
-if sequences[0] == -1:
-    fnames = glob.glob(fname.format(
-        in_folder, run, module, 99999).replace('99999', '*'))
-else:
-    fnames = [fname.format(in_folder, run, module, x) for x in sequences]
-#proposal = int(in_folder.split('/')[6][1:])
-sequences = [int(x.split('/')[-1].split('-')[3][1:-3]) for x in fnames]
-print(f"List of sequences: {sorted(sequences)}")
-processed = np.zeros((max(sequences+[1])+1, max(cells+[1])+1))
-processed[:,:] = np.nan
-cell_list = []
-for cell in cells:
-    cell_list.append(cell)
-    if len(cell_list) >= chunk_size or cell == cells[-1]:
-        inp = []
-        for file_name in fnames:
-            inp.append(file_name)
-        print(f'Process cells: {cell_list}')
-        p = partial(process_file, cell_list, n_bins, h_range, n_pix, module, karabo_id)
-        results = view.map_sync(p, inp)
-        #results = list(map(p, inp))
-        all_hists = np.zeros((len(cell_list), n_bins, n_pix)).astype(np.uint32)
-        for ir, r in enumerate(results):
-            data, msg, s = r
-            if msg == '':
-                processed[s, np.array(cell_list)] = 1
-                all_hists += data
-            else:
-                processed[s, np.array(cell_list)] = 0
-                print(f'Error in {ir}: {msg}')
-        out_name = '{}/hists_m{:02d}_c{:03d}-{:03d}.h5'.format(out_folder, module,
-                                                               cell_list[0], cell_list[-1])
-        save_dict_to_hdf5({'hist': all_hists,
-                           'cellId': np.array(cell_list),
-                           # 'proposal': proposal,
-                           # 'sequences': sequences,
-                           # 'run_': [int(run)],
-                           'nBins': n_bins,
-                           'hRange': np.array(h_range)}, out_name)
-        cell_list = []
-        gc.collect()
-```
-%% Cell type:code id: tags:
-``` python
-fig = plt.figure(figsize=(10,10))
-plt.imshow(processed, cmap=plt.cm.jet, vmin = 0, vmax = 1)
-plt.title(f"Processed files and cells for module {module}")
-plt.ylabel("Sequences", fontsize=18)
-_ = plt.xlabel("Cells", fontsize=18)
-```
-%% Cell type:code id: tags:
-``` python
-# Concatinate all files for given module
-fnames = glob.glob(templ.format(out_folder=out_folder, module=module))
-total_hist = np.zeros((n_cells, n_bins, n_pix)).astype(np.uint32)
-for file_name in fnames:
-    with h5py.File(file_name, "r") as f:
-        f_hist = np.array(f['hist'][()])
-        f_cell_id = np.array(f['cellId'][()])
-        f_n_bins = f['nBins'][()]
-        f_h_range = np.array(f['hRange'][()])
-        #f_proposal = np.array(f['proposal'][()])
-        #f_sequences = np.array(f['sequences'][()])
-        #f_runs = np.array(f['runs'][()])
-        if n_bins != f_n_bins or f_h_range[0] != h_range[0] or f_h_range[1] != h_range[1]:
-            print(f'file {file_name} is incompatible to be merged')
-            continue
-        print(f'Add file {file_name} with cells {f_cell_id}')
-        total_hist[f_cell_id] += f_hist
-out_name = '{}/hists_m{:02d}_sum.h5'.format(out_folder, module)
-print(f'Save to file: {out_name}')
-save_dict_to_hdf5({'hist': total_hist,
-                   'cellId': np.arange(n_cells),
-                   'nBins': n_bins,
-                   'hRange': np.array(h_range)}, out_name)
-```
-%% Cell type:code id: tags:
-``` python
-rshist = np.reshape(total_hist, (n_cells, n_bins, 512, 128))
-# some sanity check per mem cell
-mean_hist = np.zeros(n_cells)
-std_hist = np.zeros(n_cells)
-sum_hist = np.zeros(n_cells)
-for i in range(0, n_cells):
-    mean_hist[i] = np.mean(rshist[i, :, :, :])
-    std_hist[i] = np.std(rshist[i, :, :, :])
-    sum_hist[i] = np.sum(rshist[i, :, :, :])/(128*512)
-x = np.linspace(0, n_cells, n_cells)
-fig = plt.figure(figsize=(10, 10))
-ax0 = fig.add_subplot(211)
-ax0.plot(x, mean_hist, 'k', color='#3F7F4C')
-ax0.fill_between(x, mean_hist-std_hist, mean_hist+std_hist,
-                 alpha=0.6, edgecolor='#3F7F4C', facecolor='#7EFF99',
-                 linewidth=1, linestyle='dashdot', antialiased=True,
-                 label=" mean value $ \pm $ std ")
-ax0.set_xlabel('Cell', fontsize=14)
-ax0.set_ylabel('Mean over module [ADU]', fontsize=14)
-ax0.set_title(f'Module {module}', fontsize=16, fontweight='bold')
-ax0.grid()
-# ax0.set_ylim(-100,100)
-_ = ax0.legend()
-ax1 = fig.add_subplot(212)
-ax1.plot(x, sum_hist, 'k', color='#3F7F4C')
-ax1.set_xlabel('Cell', fontsize=14)
-ax1.set_ylabel('Average statistics', fontsize=14)
-ax1.set_title(f'Module {module}', fontsize=16, fontweight='bold')
-_ = ax1.legend()
-```
-%% Cell type:code id: tags:
-``` python
-# Plot for single pixel and all memory cells.
-xpix= 23
-ypix= 44
-x = np.arange(h_range[0],h_range[1] , 1)
-n,_ = rshist[:,:,xpix,ypix].shape
-colors = mpl.cm.rainbow(np.linspace(0, 1, n))
-fig = plt.figure(figsize=(10,5))
-fig.suptitle(f'Module {module} ', fontsize=14, fontweight='bold')
-ax = fig.add_subplot(111)
-fig.subplots_adjust(top=0.85)
-ax.set_title(f'single pixel [{xpix},{ypix}], all ({n_cells}) memory cells')
-ax.set_xlabel('Signal [ADU]')
-ax.set_ylabel('Counts')
-ax.set_xlim(-50,300)
-for color, y in zip(colors, rshist[:,:,xpix,ypix]):
-    ax.plot(x, y, color=color,linewidth=0.2)
-plt.grid()
-plt.show()
-```
--- a/xfel_calibrate/notebooks.py
+++ b/xfel_calibrate/notebooks.py
@@ -41,12 +41,6 @@ notebooks = {
                            "default concurrency": None,
                            "cluster cores": 8},
        },
-       "FF_HISTS": {
-               "notebook": "notebooks/AGIPD/playground/AGIPD_FF_Prepare_data.ipynb",
-               "concurrency": {"parameter": "modules",
-                               "default concurrency": list(range(16)),
-                               "cluster cores": 30},
-               },
    },
    "AGIPD64K": {
@@ -241,4 +235,3 @@ notebooks = {
        },
    },
 }