Skip to content
Snippets Groups Projects

Dask assisted DSSC data binning

Merged Loïc Le Guyader requested to merge dask-assisted-binning into master
Files
5
+ 253
0
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dask: 2.11.0\n"
]
}
],
"source": [
"import numpy as np\n",
"%matplotlib notebook\n",
"import matplotlib.pyplot as plt\n",
"plt.rcParams['figure.constrained_layout.use'] = True\n",
"\n",
"import dask\n",
"print(f'dask: {dask.__version__}')\n",
"import dask.array as da\n",
"\n",
"import xarray as xr"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Physical memory: 504 Gb\n"
]
}
],
"source": [
"from psutil import virtual_memory\n",
"import gc\n",
"# gc.collect() # run garbage collection to free possible memory\n",
"\n",
"mem = virtual_memory()\n",
"print(f'Physical memory: {mem.total/1024/1024/1024:.0f} Gb') # total physical memory available"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import logging\n",
"logging.basicConfig(filename='example.log', level=logging.DEBUG)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/lleguy/notebooks/ToolBox/src/toolbox_scs/__init__.py\n"
]
}
],
"source": [
"%load_ext autoreload\n",
"\n",
"%autoreload 2\n",
"\n",
"import toolbox_scs as tb\n",
"print(tb.__file__)\n",
"from toolbox_scs.routines.boz import load_dssc_module\n",
"\n",
"from extra_data import open_run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Parameters"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"parameters"
]
},
"outputs": [],
"source": [
"proposalNB = 2719\n",
"dark_runNB = 180\n",
"runNB = 179\n",
"module_group = 0\n",
"pulse_pattern = ['pumped', 'intradark', 'unpumped', 'intradark']*6 + ['pumped', 'intradark']\n",
"xaxis = 'delay' # 'nrj'\n",
"bin_width = 0.1 # [ps]\n",
"path = f'/gpfs/exfel/exp/SCS/202002/p002719/scratch/tests/r{runNB}/'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"proposalNB = int(proposalNB)\n",
"dark_runNB = int(dark_runNB)\n",
"runNB = int(runNB)\n",
"module_group = int(module_group)\n",
"bin_width = float(bin_width)\n",
"\n",
"moduleNB = list(range(module_group*4, (module_group+1)*4))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Processing function"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"def process(module):\n",
" # Load dark\n",
" arr_dark, tid_dark = load_dssc_module(proposalNB, dark_runNB, module, drop_intra_darks=False)\n",
" arr_dark = arr_dark.rechunk((100, -1, -1, -1))\n",
" dark_img = arr_dark.mean(axis=0).compute()\n",
" \n",
" # Load module data\n",
" arr, tid = load_dssc_module(proposalNB, runNB, module, drop_intra_darks=False)\n",
" arr = arr.rechunk((100, -1, -1, -1))\n",
" \n",
" # dark and intra dark correction\n",
" arr = arr - dark_img\n",
" arr = arr[:, ::2, :, :] - arr[:, 1::2, :, :]\n",
" \n",
" # Load slow data against which to bin\n",
" if xaxis == 'delay':\n",
" run, v = tb.load(proposalNB, runNB, ['PP800_DelayLine', 'BAM1932M', 'SCS_XGM'])\n",
" else:\n",
" run, v = tb.load(proposalNB, runNB, [xaxis, 'SCS_XGM'])\n",
" \n",
" # select part of the run\n",
" # v = v.isel({'trainId':slice(0,3000)})\n",
" \n",
" # combine slow and DSSC module data\n",
" xr_data = xr.DataArray(arr,\n",
" coords={'trainId': tid,\n",
" 'sa3_pId': v['sa3_pId'].values},\n",
" dims = ['trainId', 'sa3_pId', 'y', 'x'])\n",
" xr_data = xr_data.expand_dims(module=[module], axis=2)\n",
" r = xr.merge([xr_data.to_dataset(name='DSSC'), v], join='inner')\n",
" \n",
" # calculate bins\n",
" if xaxis == 'delay':\n",
" r['delay'] = tb.misc.positionToDelay(r['PP800_DelayLine'])\n",
" bam = r['BAM1932M'] - r['BAM1932M'].mean()\n",
" r['bin_delay'] = ((r['delay'] - bam)/bin_width).round()*bin_width\n",
" else:\n",
" r['bin_' + xaxis] = (r[xaxis]/bin_width).round()*bin_width\n",
" \n",
" # add the pulse pattern coordinates\n",
" Nrepeats = int(len(v['sa3_pId'].values)/len(pulse_pattern))\n",
" pp = pulse_pattern*Nrepeats\n",
" pp = np.array(pp)\n",
" r = r.assign_coords(pp=(\"sa3_pId\", pp))\n",
" \n",
" # select pattern and bin data\n",
" bin_data = None\n",
" for p in np.unique(pp):\n",
" # slice using non-index coordinates\n",
" # https://github.com/pydata/xarray/issues/2028\n",
" sub_r = r.sel(sa3_pId=(r.pp == p))\n",
" \n",
" res = sub_r.groupby('bin_'+xaxis).mean()\n",
"\n",
" if bin_data is None:\n",
" bin_data = res\n",
" bin_data['DSSC'] = res['DSSC'].expand_dims(pp=[p])\n",
" bin_data['SCS_SA3'] = res['SCS_SA3'].expand_dims(pp=[p])\n",
" else:\n",
" bin_data = xr.merge([bin_data,\n",
" res['DSSC'].expand_dims(pp=[p]),\n",
" res['SCS_SA3'].expand_dims(pp=[p])])\n",
" \n",
" # save the result\n",
" fname = path + f'run{runNB}-darkrun{dark_runNB}-module{module}.h5'\n",
" print(fname)\n",
" bin_data.to_netcdf(fname, format='NETCDF4', engine='h5netcdf')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Processing"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for m in moduleNB:\n",
" process(m)"
]
}
],
"metadata": {
"celltoolbar": "Tags",
"kernelspec": {
"display_name": "xfel",
"language": "python",
"name": "xfel"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
},
"nbsphinx": {
"execute": "never"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading