Merge branch 'DSSC1module' into 'master'

Dssc1module See merge request SCS/ToolBox!54

Merge branch 'DSSC1module' into 'master'
Dssc1module See merge request SCS/ToolBox!54
c8d0095a · Loïc Le Guyader · 58ea0a39 · 30f19660 · c8d0095a · c8d0095a
Commit c8d0095a authored 5 years ago by Loïc Le Guyader
--- a/DSSC1module.py
+++ b/DSSC1module.py
+import multiprocessing
+from time import strftime
+from tqdm.auto import tqdm
+import os
+import warnings
+import psutil
+import karabo_data as kd
+from karabo_data.read_machinery import find_proposal
+import ToolBox as tb
+import matplotlib.pyplot as plt
+from mpl_toolkits.axes_grid1 import ImageGrid
+import matplotlib.patches as patches
+import numpy as np
+import xarray as xr
+import h5py
+from glob import glob
+from imageio import imread
+class DSSC1module:
+    def __init__(self, module, proposal):
+        """ Create a DSSC object to process 1 module of DSSC data.
+            inputs:
+                module: module number to process
+                proposal: (int,str) proposal number string
+        """
+        self.module = module
+        if isinstance(proposal,int):
+            proposal = 'p{:06d}'.format(proposal)
+        self.runFolder = find_proposal(proposal)
+        self.semester = self.runFolder.split('/')[-2]
+        self.proposal = proposal
+        self.topic = self.runFolder.split('/')[-3]
+        self.save_folder = os.path.join(self.runFolder, 'usr/condensed_runs/')
+        self.px_pitch_h = 236 # horizontal pitch in microns
+        self.px_pitch_v = 204 # vertical pitch in microns
+        self.aspect = self.px_pitch_v/self.px_pitch_h # aspect ratio of the DSSC images
+        print('DSSC configuration')
+        print(f'DSSC module: {self.module}')
+        print(f'Topic: {self.topic}')
+        print(f'Semester: {self.semester}')
+        print(f'Proposal: {self.proposal}')
+        print(f'Default save folder: {self.save_folder}')
+        if not os.path.exists(self.save_folder):
+            warnings.warn(f'Default save folder does not exist: {self.save_folder}')
+        self.dark_data = 0
+        self.max_fraction_memory = 0.8
+        self.Nworker = 10
+        self.rois = None
+    def open_run(self, run_nr, t0=0.0):
+        """ Open a run with karabo-data and prepare the virtual dataset for multiprocessing
+            inputs:
+                run_nr: the run number
+                t0: optional t0 in mm
+        """
+        print('Opening run data with karabo-data')
+        self.run_nr = run_nr
+        self.xgm = None
+        self.run = kd.open_run(self.proposal, self.run_nr)
+        self.plot_title = f'{self.proposal} run: {self.run_nr}'
+        self.fpt = self.run.detector_info(f'SCS_DET_DSSC1M-1/DET/{self.module}CH0:xtdf')['frames_per_train']
+        self.nbunches = self.run.get_array('SCS_RR_UTC/MDL/BUNCH_DECODER', 'sase3.nPulses.value')
+        self.nbunches = np.unique(self.nbunches)
+        if len(self.nbunches) == 1:
+            self.nbunches = self.nbunches[0]
+        else:
+            warnings.warn('not all trains have same length DSSC data')
+            print(f'nbunches: {self.nbunches}')
+            self.nbunches = self.nbunches[-1]
+        print(f'DSSC frames per train: {self.fpt}')
+        print(f'SA3 bunches per train: {self.nbunches}')
+        print('Collecting DSSC module files')
+        self.collect_dssc_module_file()
+        print(f'Loading XGM data')
+        self.xgm = self.run.get_array(tb.mnemonics['SCS_SA3']['source'],
+                                      tb.mnemonics['SCS_SA3']['key'],
+                                      roi=kd.by_index[:self.nbunches])
+        self.xgm = self.xgm.rename({'dim_0':'pulseId'})
+        self.xgm['pulseId'] = np.arange(0, 2*self.nbunches, 2)
+        print(f'Loading mono nrj data')
+        self.nrj = self.run.get_array(tb.mnemonics['nrj']['source'],
+                                      tb.mnemonics['nrj']['key'])
+        print(f'Loading daly line data')
+        try:
+            self.delay_mm = self.run.get_array(tb.mnemonics['PP800_DelayLine']['source'],
+                                               tb.mnemonics['PP800_DelayLine']['key'])
+        except:
+            self.delay_mm = 0*self.nrj
+        self.t0 = t0
+        self.delay_ps = tb.positionToDelay(self.delay_mm, origin=self.t0)
+    def collect_dssc_module_file(self):
+        """ Collect the raw DSSC module h5 files.
+        """
+        pattern = self.runFolder + f'/raw/r{self.run_nr:04d}/RAW-R{self.run_nr:04d}-DSSC{self.module:02d}-S*.h5'
+        self.h5list = glob(pattern)
+    def process(self, dark_pass=None):
+        """ Process DSSC data from one module using multiprocessing
+            dark_pass: if None, process data, if 'mean', compute the mean, if 'std', compute the std
+        """
+        # get available memory in GB, we will try to use 80 % of it
+        max_GB = psutil.virtual_memory().available/1024**3
+        print(f'max available memory: {max_GB} GB')
+        # max_GB / (8byte * Nworker * 128px * 512px * N_pulses)
+        self.chunksize = int(self.max_fraction_memory*max_GB * 1024**3 // (8 * self.Nworker * 128 * 512 * self.fpt))
+        print('processing', self.chunksize, 'trains per chunk')
+        if dark_pass == 'mean':
+            rois = None
+            dark = 0
+            mask = 1
+        elif dark_pass == 'std':
+            dark = self.dark_data['dark_mean']
+            rois = None
+            mask = 1
+        elif dark_pass is None:
+            dark = self.dark_data['dark_mean']
+            rois = self.rois
+            mask = self.dark_data['mask']
+        else:
+            raise ValueError(f"dark_pass should be either None or 'mean' or 'std' but not {dark_pass}")
+        jobs = []
+        for m,h5fname in enumerate(self.h5list):
+            jobs.append(dict(
+            fpt=self.fpt,
+            module=self.module,
+            h5fname=h5fname,
+            chunksize=self.chunksize,
+            nbunches=self.nbunches,
+            workerId=m,
+            Nworker=self.Nworker,
+            dark_data=dark,
+            rois=rois,
+            mask=mask
+            ))
+        timestamp = strftime('%X')
+        print(f'start time: {timestamp}')
+        with multiprocessing.Pool(self.Nworker) as pool:
+            res = pool.map(process_one_module, jobs)
+        print('finished:', strftime('%X'))
+        # rearange the multiprocessed data
+        # this is to get rid of the worker dimension, there is no sum over worker really involved
+        self.module_data = xr.concat(res, dim='worker').sum(dim='worker')
+        # reorder the dimension
+        if 'trainId' in self.module_data.dims:
+            self.module_data = self.module_data.transpose('trainId', 'pulseId', 'x', 'y')
+        else:
+            self.module_data = self.module_data.transpose('pulseId', 'x', 'y')
+        # fix some computation now that we have everything
+        self.module_data['std_data'] = np.sqrt(self.module_data['std_data']/(self.module_data['counts'] - 1))
+        self.module_data['dark_corrected_data'] = self.module_data['dark_corrected_data']/self.module_data['counts']
+        self.module_data['run'] = self.run_nr
+        if dark_pass == 'mean':
+            self.dark_data = self.module_data['dark_corrected_data'].to_dataset('dark_mean')
+            self.dark_data['run'] = self.run_nr
+        elif dark_pass == 'std':
+            self.dark_data['dark_std'] = self.module_data['std_data']
+            assert self.dark_data['run'] == self.run_nr, "noise map computed from different darks"
+        else:
+            self.module_data['xgm'] = self.xgm
+            self.module_data['nrj'] = self.nrj
+            self.module_data['delay_mm'] = self.delay_mm
+            self.module_data['delay_ps'] = self.delay_ps
+            self.module_data['t0'] = self.t0
+        self.plot_title = f"{self.proposal} run: {self.module_data['run'].values} dark: {self.dark_data['run'].values}"
+        self.module_data.attrs['plot_title'] = self.plot_title
+    def compute_mask(self, low=0.01, high=0.8):
+        """ Compute a DSSC module mask from the noise map of a dark run.
+        """
+        if self.dark_data['dark_std'] is None:
+            raise ValueError('Cannot compute from from a missing dark noise map')
+        self.dark_data['mask_low'] = low
+        self.dark_data['mask_high'] = high
+        m_std = self.dark_data['dark_std'].mean('pulseId')
+        self.dark_data['mask'] = 1 - ((m_std > self.dark_data['mask_high']) + (m_std < self.dark_data['mask_low']))
+    def plot_module(self, plot_dark=False, low=1, high=98, vmin=None, vmax=None):
+        """ Plot a module.
+            inputs:
+                plot_dark: if true, plot dark instead of run data.
+                low: low percentile fraction of the display scale
+                high: high percentile fraction of the display scale
+                vmin: low value of the display scale, overwrites vmin computed from low
+                vmax: max value of the display scale, overwrites vmax computed from high
+        """
+        if plot_dark:
+            mean = self.dark_data['dark_mean'].mean('pulseId')
+            std = self.dark_data['dark_std']
+            title = f"{self.proposal} dark: {self.dark_data['run'].values}"
+        else:
+            mean = self.module_data['dark_corrected_data'].mean('pulseId')
+            std = self.module_data['std_data']
+            title = self.plot_title
+        fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4, figsize=[5, 4*2.5])
+        _vmin, _vmax = np.percentile((mean.values[~self.dark_data['mask']]).flatten(), [low, high])
+        if vmin is None:
+            vmin = _vmin
+        if vmax is None:
+            vmax = _vmax
+        im = ax1.imshow(mean, vmin=vmin, vmax=vmax)
+        fig.colorbar(im, ax=ax1)
+        ax1.set_title('mean')
+        fig.suptitle(title)
+        im = ax2.imshow(std.mean('pulseId'), vmin=0, vmax=2)
+        fig.colorbar(im, ax=ax2)
+        ax2.set_title('std')
+        ax3.hist(std.values.flatten(), bins=200, range=[0, 2], density=True)
+        ax3.axvline(self.dark_data['mask_low'], ls='--', c='k')
+        ax3.axvline(self.dark_data['mask_high'], ls='--', c='k')
+        ax3.set_yscale('log')
+        ax3.set_ylabel('density')
+        ax3.set_xlabel('std values')
+        im = ax4.imshow(self.dark_data['mask'])
+        fig.colorbar(im, ax=ax4)
+    def save(self, save_folder=None, overwrite=False, isDark=False):
+        """ Save the crunched data.
+            inputs:
+                save_folder: string of the fodler where to save the data.
+                overwrite: boolean whether or not to overwrite existing files.
+                isDark: save the dark or the process data
+        """
+        if save_folder is None:
+            save_folder = self.save_folder
+        if isDark:
+            fname = f'run{self.run_nr}_dark.h5'  # no scan
+            data = self.dark_data
+        else:
+            fname = f'run{self.run_nr}.h5'  # run with delay scan (change for other scan types!)
+            data = self.module_data
+        save_path = os.path.join(save_folder, fname)
+        file_exists = os.path.isfile(save_path)
+        if not file_exists or (file_exists and overwrite):
+            if file_exists:
+                warnings.warn(f'Overwriting file: {save_path}')
+                os.remove(save_path)
+            data.to_netcdf(save_path, group='data')
+            os.chmod(save_path, 0o664)
+            print('saving: ', save_path)
+        else:
+            print('file', save_path, 'exists and overwrite is False')
+    def load_dark(self, dark_runNB, save_folder=None):
+        """ Load dark data.
+            inputs:
+                save_folder: string of the folder where the data were saved.
+        """
+        if save_folder is None:
+            save_folder = self.save_folder
+        self.run_nr = dark_runNB
+        self.dark_data = xr.open_dataset(os.path.join(save_folder, f'run{dark_runNB}_dark.h5'), group='data')
+        self.plot_title = f"{self.proposal} dark: {self.dark_data['run'].values}"
+    def show_rois(self):
+        fig, ax1 = plt.subplots(nrows=1, figsize=[5, 2.5])
+        try:
+            ax1.imshow(self.module_data['dark_corrected_data'].mean('pulseId') * self.dark_data['mask'])
+        except:
+            ax1.imshow(self.dark_data['dark_mean'].mean('pulseId') * self.dark_data['mask'])            
+        for r,v in self.rois.items():
+            rect = patches.Rectangle((v['y'][0], v['x'][0]),
+                                     v['y'][1] - v['y'][0],
+                                     v['x'][1] - v['x'][0],
+                                     linewidth=1, edgecolor='r', facecolor='none')
+            ax1.add_patch(rect)
+        fig.suptitle(self.plot_title)
+# since 'self' is not pickable, this function has to be outside the DSSC class so that it can be used
+# by the multiprocessing pool.map function
+def process_one_module(job):
+    chunksize = job['chunksize']
+    Nworker = job['Nworker']
+    workerId = job['workerId']
+    dark_data = job['dark_data']
+    fpt = job['fpt']
+    module = job['module']
+    rois = job['rois']
+    mask = job['mask']
+    h5fname = job['h5fname']
+    image_path = f"INSTRUMENT/SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf/image/data"
+    npulse_path = f"INDEX/SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf/image/count"
+    with h5py.File(h5fname, 'r') as m:
+        all_trainIds = m['INDEX/trainId'][()]
+    n_trains = len(all_trainIds)
+    n_chunk = np.ceil(n_trains/chunksize) + 1
+    chunks = np.linspace(0, n_trains, n_chunk, endpoint=True, dtype=int)
+    # create empty dataset to add actual data to
+    module_data = xr.DataArray(np.zeros([fpt, 128, 512], dtype=np.float64),
+                               dims=['pulseId', 'x', 'y'],
+                               coords={'pulseId':np.arange(fpt)}).to_dataset(name='dark_corrected_data')
+    module_data['std_data'] = xr.DataArray(np.zeros([fpt, 128, 512], dtype=np.float64),
+                               dims=['pulseId', 'x', 'y'])
+    if rois is not None:
+        for k in rois.keys():
+            module_data[k] = xr.DataArray(np.empty([n_trains], dtype=np.float64),
+                                   dims=['trainId'], coords = {'trainId': all_trainIds})
+    module_data['counts'] = 0
+    # crunching
+    with h5py.File(h5fname, 'r') as m:
+        #chunk_start = np.arange(len(all_trainIds), step=job['chunksize'], dtype=int)
+        trains_start = 0
+        # This line is the strange hack from https://github.com/tqdm/tqdm/issues/485
+        print(' ', end='', flush=True)
+        for k,v in enumerate(tqdm(chunks[:-1], desc=f"pool.map#{workerId:02d}")):              
+            chunk_dssc = np.s_[int(chunks[k] * fpt):int(chunks[k+1] * fpt)]  # for dssc data
+            data = m[image_path][chunk_dssc].squeeze()
+            trains = m['INDEX/trainId'][np.s_[int(chunks[k]):int(chunks[k+1])]]
+            n_trains = len(trains)                   
+            data = data.astype(np.float64)
+            data = xr.DataArray(np.reshape(data, [n_trains, fpt, 128, 512]),
+                                dims=['trainId', 'pulseId', 'x', 'y'],
+                                coords={'trainId': trains})
+            temp = data - dark_data
+            if rois is not None:
+                temp2 = temp.where(mask)
+                for k,v in rois.items():
+                    val = temp2.isel({'x':slice(v['x'][0], v['x'][1]),
+                                     'y':slice(v['y'][0], v['y'][1])}).sum(dim=['x','y'])
+                    module_data[k] = val
+            module_data['dark_corrected_data'] += temp.sum(dim='trainId')
+            module_data['std_data'] += (temp**2).sum(dim='trainId')
+            module_data['counts'] += n_trains
+    return module_data
\ No newline at end of file
--- a/XAS.py
+++ b/XAS.py
@@ -135,10 +135,22 @@ def xas(nrun, bins=None, Iokey='SCS_SA3', Itkey='MCP3apd', nrjkey='nrj', Iooffse
    def whichIo(data):
        """ Select which fields to use as I0 and which to use as I1
        """
+        if 'mcp' in Iokey.lower():
+            Io_sign = -1
+        else:
+            Io_sign = 1
+        if 'mcp' in Itkey.lower():
+            It_sign = -1
+        else:
+            It_sign = 1
        if len(data) == 0:
            return absorption([], [])
        else:
-            return absorption(-data['It'], data['Io'])
+            return absorption(It_sign*data['It'], Io_sign*data['Io'])
    if bins is None:
        num_bins = 80
@@ -169,9 +181,12 @@ def xas(nrun, bins=None, Iokey='SCS_SA3', Itkey='MCP3apd', nrjkey='nrj', Iooffse
        ax1_twin.bar(bins_c, nosample['muIo'], width=0.80*(bins_c[1]-bins_c[0]),
                color='C1', alpha=0.2)
        ax1_twin.set_ylabel('Io')
-        proposalNB=int(nrun.attrs['runFolder'].split('/')[-4][1:])
+        try:
-        runNB=int(nrun.attrs['runFolder'].split('/')[-2][1:])
+            proposalNB=int(nrun.attrs['runFolder'].split('/')[-4][1:])
-        ax1.set_title('run {:d} p{:}'.format(runNB, proposalNB))
+            runNB=int(nrun.attrs['runFolder'].split('/')[-2][1:])
+            ax1.set_title('run {:d} p{:}'.format(runNB, proposalNB))
+        except:
+            f.suptitle(nrun.attrs['plot_title'])
        ax2 = plt.subplot(gs[1])
        ax2.bar(bins_c, nosample['counts'], width=0.80*(bins_c[1]-bins_c[0]),

--- a/__init__.py
+++ b/__init__.py
@@ -5,3 +5,4 @@ from ToolBox.knife_edge import *
 from ToolBox.Laser_utils import *
 from ToolBox.DSSC import DSSC
 from ToolBox.azimuthal_integrator import *
+from ToolBox.DSSC1module import *