Skip to content
Snippets Groups Projects

Dask assisted DSSC data binning

Merged Loïc Le Guyader requested to merge dask-assisted-binning into master
5 files
+ 263
4
Compare changes
  • Side-by-side
  • Inline
Files
5
+ 174
0
 
%% Cell type:code id: tags:
 
``` python
 
import numpy as np
 
%matplotlib notebook
 
import matplotlib.pyplot as plt
 
plt.rcParams['figure.constrained_layout.use'] = True
 
 
import dask
 
print(f'dask: {dask.__version__}')
 
import dask.array as da
 
 
import xarray as xr
 
%% Output
 
dask: 2.11.0
 
%% Cell type:code id: tags:
 
``` python
 
from psutil import virtual_memory
 
import gc
 
# gc.collect() # run garbage collection to free possible memory
 
 
mem = virtual_memory()
 
print(f'Physical memory: {mem.total/1024/1024/1024:.0f} Gb') # total physical memory available
 
%% Output
 
Physical memory: 504 Gb
 
%% Cell type:code id: tags:
 
``` python
 
import logging
 
logging.basicConfig(filename='example.log', level=logging.DEBUG)
 
%% Cell type:code id: tags:
 
``` python
 
%load_ext autoreload
 
 
%autoreload 2
 
 
import toolbox_scs as tb
 
print(tb.__file__)
 
from toolbox_scs.routines.boz import load_dssc_module
 
 
from extra_data import open_run
 
%% Output
 
/home/lleguy/notebooks/ToolBox/src/toolbox_scs/__init__.py
 
%% Cell type:markdown id: tags:
 
# Parameters
 
%% Cell type:code id: tags:parameters
 
``` python
 
proposalNB = 2719
 
dark_runNB = 180
 
runNB = 179
 
module_group = 0
 
pulse_pattern = ['pumped', 'intradark', 'unpumped', 'intradark']*6 + ['pumped', 'intradark']
 
xaxis = 'delay' # 'nrj'
 
bin_width = 0.1 # [ps]
 
path = f'/gpfs/exfel/exp/SCS/202002/p002719/scratch/tests/r{runNB}/'
 
%% Cell type:code id: tags:
 
``` python
 
proposalNB = int(proposalNB)
 
dark_runNB = int(dark_runNB)
 
runNB = int(runNB)
 
module_group = int(module_group)
 
bin_width = float(bin_width)
 
 
moduleNB = list(range(module_group*4, (module_group+1)*4))
 
%% Cell type:markdown id: tags:
 
# Processing function
 
%% Cell type:code id: tags:
 
``` python
 
def process(module):
 
# Load dark
 
arr_dark, tid_dark = load_dssc_module(proposalNB, dark_runNB, module, drop_intra_darks=False)
 
arr_dark = arr_dark.rechunk((100, -1, -1, -1))
 
dark_img = arr_dark.mean(axis=0).compute()
 
 
# Load module data
 
arr, tid = load_dssc_module(proposalNB, runNB, module, drop_intra_darks=False)
 
arr = arr.rechunk((100, -1, -1, -1))
 
 
# dark and intra dark correction
 
arr = arr - dark_img
 
arr = arr[:, ::2, :, :] - arr[:, 1::2, :, :]
 
 
# Load slow data against which to bin
 
if xaxis == 'delay':
 
run, v = tb.load(proposalNB, runNB, ['PP800_DelayLine', 'BAM1932M', 'SCS_XGM'])
 
else:
 
run, v = tb.load(proposalNB, runNB, [xaxis, 'SCS_XGM'])
 
 
# select part of the run
 
# v = v.isel({'trainId':slice(0,3000)})
 
 
# combine slow and DSSC module data
 
xr_data = xr.DataArray(arr,
 
coords={'trainId': tid,
 
'sa3_pId': v['sa3_pId'].values},
 
dims = ['trainId', 'sa3_pId', 'y', 'x'])
 
xr_data = xr_data.expand_dims(module=[module], axis=2)
 
r = xr.merge([xr_data.to_dataset(name='DSSC'), v], join='inner')
 
 
# calculate bins
 
if xaxis == 'delay':
 
r['delay'] = tb.misc.positionToDelay(r['PP800_DelayLine'])
 
bam = r['BAM1932M'] - r['BAM1932M'].mean()
 
r['bin_delay'] = ((r['delay'] - bam)/bin_width).round()*bin_width
 
else:
 
r['bin_' + xaxis] = (r[xaxis]/bin_width).round()*bin_width
 
 
# add the pulse pattern coordinates
 
Nrepeats = int(len(v['sa3_pId'].values)/len(pulse_pattern))
 
pp = pulse_pattern*Nrepeats
 
pp = np.array(pp)
 
r = r.assign_coords(pp=("sa3_pId", pp))
 
 
# select pattern and bin data
 
bin_data = None
 
for p in np.unique(pp):
 
# slice using non-index coordinates
 
# https://github.com/pydata/xarray/issues/2028
 
sub_r = r.sel(sa3_pId=(r.pp == p))
 
 
res = sub_r.groupby('bin_'+xaxis).mean()
 
 
if bin_data is None:
 
bin_data = res
 
bin_data['DSSC'] = res['DSSC'].expand_dims(pp=[p])
 
bin_data['SCS_SA3'] = res['SCS_SA3'].expand_dims(pp=[p])
 
else:
 
bin_data = xr.merge([bin_data,
 
res['DSSC'].expand_dims(pp=[p]),
 
res['SCS_SA3'].expand_dims(pp=[p])])
 
 
# save the result
 
fname = path + f'run{runNB}-darkrun{dark_runNB}-module{module}.h5'
 
print(fname)
 
bin_data.to_netcdf(fname, format='NETCDF4', engine='h5netcdf')
 
%% Cell type:markdown id: tags:
 
# Processing
 
%% Cell type:code id: tags:
 
``` python
 
for m in moduleNB:
 
process(m)
Loading