diff --git a/doc/Dask DSSC module binning.ipynb b/doc/Dask DSSC module binning.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..74b9d0b191277b990f6e34fd10fd7e17bb57eb8f --- /dev/null +++ b/doc/Dask DSSC module binning.ipynb @@ -0,0 +1,253 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dask: 2.11.0\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "%matplotlib notebook\n", + "import matplotlib.pyplot as plt\n", + "plt.rcParams['figure.constrained_layout.use'] = True\n", + "\n", + "import dask\n", + "print(f'dask: {dask.__version__}')\n", + "import dask.array as da\n", + "\n", + "import xarray as xr" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Physical memory: 504 Gb\n" + ] + } + ], + "source": [ + "from psutil import virtual_memory\n", + "import gc\n", + "# gc.collect() # run garbage collection to free possible memory\n", + "\n", + "mem = virtual_memory()\n", + "print(f'Physical memory: {mem.total/1024/1024/1024:.0f} Gb') # total physical memory available" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "logging.basicConfig(filename='example.log', level=logging.DEBUG)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/lleguy/notebooks/ToolBox/src/toolbox_scs/__init__.py\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "\n", + "%autoreload 2\n", + "\n", + "import toolbox_scs as tb\n", + "print(tb.__file__)\n", + "from toolbox_scs.routines.boz import load_dssc_module\n", + "\n", + "from extra_data import open_run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "proposalNB = 2719\n", + "dark_runNB = 180\n", + "runNB = 179\n", + "module_group = 0\n", + "pulse_pattern = ['pumped', 'intradark', 'unpumped', 'intradark']*6 + ['pumped', 'intradark']\n", + "xaxis = 'delay' # 'nrj'\n", + "bin_width = 0.1 # [ps]\n", + "path = f'/gpfs/exfel/exp/SCS/202002/p002719/scratch/tests/r{runNB}/'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "proposalNB = int(proposalNB)\n", + "dark_runNB = int(dark_runNB)\n", + "runNB = int(runNB)\n", + "module_group = int(module_group)\n", + "bin_width = float(bin_width)\n", + "\n", + "moduleNB = list(range(module_group*4, (module_group+1)*4))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Processing function" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "def process(module):\n", + " # Load dark\n", + " arr_dark, tid_dark = load_dssc_module(proposalNB, dark_runNB, module, drop_intra_darks=False)\n", + " arr_dark = arr_dark.rechunk((100, -1, -1, -1))\n", + " dark_img = arr_dark.mean(axis=0).compute()\n", + " \n", + " # Load module data\n", + " arr, tid = load_dssc_module(proposalNB, runNB, module, drop_intra_darks=False)\n", + " arr = arr.rechunk((100, -1, -1, -1))\n", + " \n", + " # dark and intra dark correction\n", + " arr = arr - dark_img\n", + " arr = arr[:, ::2, :, :] - arr[:, 1::2, :, :]\n", + " \n", + " # Load slow data against which to bin\n", + " if xaxis == 'delay':\n", + " run, v = tb.load(proposalNB, runNB, ['PP800_DelayLine', 'BAM1932M', 'SCS_XGM'])\n", + " else:\n", + " run, v = tb.load(proposalNB, runNB, [xaxis, 'SCS_XGM'])\n", + " \n", + " # select part of the run\n", + " # v = v.isel({'trainId':slice(0,3000)})\n", + " \n", + " # combine slow and DSSC module data\n", + " xr_data = xr.DataArray(arr,\n", + " coords={'trainId': tid,\n", + " 'sa3_pId': v['sa3_pId'].values},\n", + " dims = ['trainId', 'sa3_pId', 'y', 'x'])\n", + " xr_data = xr_data.expand_dims(module=[module], axis=2)\n", + " r = xr.merge([xr_data.to_dataset(name='DSSC'), v], join='inner')\n", + " \n", + " # calculate bins\n", + " if xaxis == 'delay':\n", + " r['delay'] = tb.misc.positionToDelay(r['PP800_DelayLine'])\n", + " bam = r['BAM1932M'] - r['BAM1932M'].mean()\n", + " r['bin_delay'] = ((r['delay'] - bam)/bin_width).round()*bin_width\n", + " else:\n", + " r['bin_' + xaxis] = (r[xaxis]/bin_width).round()*bin_width\n", + " \n", + " # add the pulse pattern coordinates\n", + " Nrepeats = int(len(v['sa3_pId'].values)/len(pulse_pattern))\n", + " pp = pulse_pattern*Nrepeats\n", + " pp = np.array(pp)\n", + " r = r.assign_coords(pp=(\"sa3_pId\", pp))\n", + " \n", + " # select pattern and bin data\n", + " bin_data = None\n", + " for p in np.unique(pp):\n", + " # slice using non-index coordinates\n", + " # https://github.com/pydata/xarray/issues/2028\n", + " sub_r = r.sel(sa3_pId=(r.pp == p))\n", + " \n", + " res = sub_r.groupby('bin_'+xaxis).mean()\n", + "\n", + " if bin_data is None:\n", + " bin_data = res\n", + " bin_data['DSSC'] = res['DSSC'].expand_dims(pp=[p])\n", + " bin_data['SCS_SA3'] = res['SCS_SA3'].expand_dims(pp=[p])\n", + " else:\n", + " bin_data = xr.merge([bin_data,\n", + " res['DSSC'].expand_dims(pp=[p]),\n", + " res['SCS_SA3'].expand_dims(pp=[p])])\n", + " \n", + " # save the result\n", + " fname = path + f'run{runNB}-darkrun{dark_runNB}-module{module}.h5'\n", + " print(fname)\n", + " bin_data.to_netcdf(fname, format='NETCDF4', engine='h5netcdf')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Processing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for m in moduleNB:\n", + " process(m)" + ] + } + ], + "metadata": { + "celltoolbar": "Tags", + "kernelspec": { + "display_name": "xfel", + "language": "python", + "name": "xfel" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + }, + "nbsphinx": { + "execute": "never" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/howtos.rst b/doc/howtos.rst index 9ca17baa83962966860c209495270afc96fa28f7..0d65eba4f49ba8bda1ab35bd34aed21b28069623 100644 --- a/doc/howtos.rst +++ b/doc/howtos.rst @@ -20,6 +20,60 @@ detectors DSSC ++++ + +DSSC data binning +################# + +In scattering experiment one typically wants to bin DSSC image data versus +time delay between pump and probe or versus photon energy. After this first +data reduction steps, one can do azimuthal integration on a much smaller +amount of data. + +The DSSC data binning procedure is based on the notebook +:doc:`Dask DSSC module binning <Dask DSSC module binning>`. It performs +DSSC data binning against a coordinate specified by *xaxis* which can +be *nrj* for the photon energy, *delay* in which case the delay stage +position will be converted in picoseconds and corrected but the BAM, or +another slow data channel. Specific pulse pattern can be defined, such as: + +.. code:: python + + ['pumped', 'intradark', 'unpumped', 'intradark'] + +which will be repeated. XGM data will also be binned similarly to the DSSC +data. + +Since this data reduction step can be quite time consuming for large datasets, +it is recommended to launch the notebook via a SLURM script: + +.. code:: bash + + sbatch ./bin_dssc_module_job.sh 180 179 0 + sbatch ./bin_dssc_module_job.sh 180 179 1 + sbatch ./bin_dssc_module_job.sh 180 179 2 + sbatch ./bin_dssc_module_job.sh 180 179 3 + +where 180 is the dark run number, 179 is the run nummber and 0, 1, 2 and 3 are +the 4 module group, each job processing a set of 4 DSSC module. + +The result will be 16 \*.h5 files, one per module, saved in the folder specified +in the script, a copy of which can be found in the *scripts* folder in the +toolbox source. This files can then be loaded and combined with: + +.. code:: python + + import xarray as xr + data = xr.open_mfdataset(path + '/*.h5', parallel=True, join='inner') + + +DSSC azimuthal integration +########################## + +*To be documented*. + +Legacy DSSC binning procedure +############################# + Most of the functions within toolbox_scs.detectors can be accessed directly. This is useful during development, or when working in a non-standardized way, which is often neccessary during data evaluation. For frequent routines there is the possibility to use dssc objects that guarantee consistent data structure, and reduce the amount of recurring code within the notebook. * bin data using toolbox_scs.tbdet -> *to be documented*. diff --git a/scripts/bin_dssc_module_job.sh b/scripts/bin_dssc_module_job.sh new file mode 100644 index 0000000000000000000000000000000000000000..247cfe67d304cae6a02eb4f0f7f74eff5c4e566f --- /dev/null +++ b/scripts/bin_dssc_module_job.sh @@ -0,0 +1,25 @@ +#!/bin/bash +#SBATCH -N 1 +#SBATCH --partition=exfel +#SBATCH --time=12:00:00 +#SBATCH --mail-type=END,FAIL +#SBATCH --output=logs/%j-%x.out + +PROPOSAL=2719 +DARK=$1 +RUN=$2 +MODULE_GROUP=$3 + +source /etc/profile.d/modules.sh +module load exfel +module load exfel_anaconda3/1.1 + +echo processing run $RUN +OUTPATH=/gpfs/exfel/exp/SCS/202002/p002719/scratch/tests +mkdir $OUTPATH/r$RUN + +# Run papermill +python3 -c "import papermill as pm; pm.execute_notebook('Dask DSSC module binning.ipynb', \ +'$OUTPATH/r$RUN/output$MODULE_GROUP.ipynb', \ +parameters=dict(proposalNB='$PROPOSAL', dark_runNB='$DARK', runNB='$RUN', module_group='$MODULE_GROUP', \ +path='$OUTPATH/r$RUN/', kernel='xfel'))" diff --git a/scripts/start_job_single.sh b/scripts/start_job_single.sh index 68e97d6486ca5f90d19b986458523d8aac4c884c..d3934e9a0b522432006b7c8147d6768b2e948d09 100755 --- a/scripts/start_job_single.sh +++ b/scripts/start_job_single.sh @@ -9,5 +9,9 @@ RUN=$1 MODULES=$2 RUNTYPE=$3 +source /etc/profile.d/modules.sh +module load exfel +module load exfel_anaconda3/1.1 + echo processing modules $MODULES of run $RUN python process_data_201007_23h.py --run-number $RUN --module ${MODULES} --runtype $RUNTYPE diff --git a/scripts/start_processing_all.sh b/scripts/start_processing_all.sh index 681315e8372a482598a079512a3795b78a4c94f9..f0fba41a6f57b748d30194f5e60769e288dba364 100755 --- a/scripts/start_processing_all.sh +++ b/scripts/start_processing_all.sh @@ -4,10 +4,12 @@ RUN=$1 RUNTYPE=$2 if [ $RUN ] && [ $RUNTYPE ] - then + then echo processing run $RUN -# module load exfel -# module load exfel_anaconda3/1.1 + source /etc/profile.d/modules.sh + module load exfel + module load exfel_anaconda3/1.1 + sbatch ./start_job_single.sh $RUN '0 1 2 3' $RUNTYPE sbatch ./start_job_single.sh $RUN '4 5 6 7' $RUNTYPE sbatch ./start_job_single.sh $RUN '8 9 10 11' $RUNTYPE @@ -16,4 +18,4 @@ if [ $RUN ] && [ $RUNTYPE ] echo please specify a run number and type echo available runtypes: echo energyscan, energyscan_pumped, static, static_IR, delayscan, timescan -fi \ No newline at end of file +fi