From 1b7d88d92d90fa1213ec92727b20b64c287f6f69 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Le=20Guyader?= <loic.le.guyader@xfel.eu>
Date: Thu, 14 Nov 2019 17:03:55 +0100
Subject: [PATCH] Fast ccd ai

---
 FastCCD.py  | 639 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 __init__.py |   1 +
 2 files changed, 640 insertions(+)
 create mode 100644 FastCCD.py

diff --git a/FastCCD.py b/FastCCD.py
new file mode 100644
index 0000000..1b8ca48
--- /dev/null
+++ b/FastCCD.py
@@ -0,0 +1,639 @@
+import multiprocessing
+from time import strftime
+import tempfile
+import shutil
+from tqdm.auto import tqdm
+import os
+import warnings
+import psutil
+
+import karabo_data as kd
+from karabo_data.read_machinery import find_proposal
+import ToolBox as tb
+import matplotlib.pyplot as plt
+from mpl_toolkits.axes_grid1 import ImageGrid
+import numpy as np
+import xarray as xr
+import h5py
+from glob import glob
+
+from imageio import imread
+
+class FastCCD:
+    
+    def __init__(self, proposal, distance=1, raw=False):
+        """ Create a FastCCD object to process FaStCCD data.
+        
+            inputs:
+                proposal: (int,str) proposal number string
+                distance: (float) distance sample to FastCCD detector in meter
+                raw: use processed data from the calibration pipeline or raw files
+            
+        """
+        if isinstance(proposal,int):
+            proposal = 'p{:06d}'.format(proposal)
+            
+        self.runFolder = find_proposal(proposal)
+        self.semester = self.runFolder.split('/')[-2]
+        self.proposal = proposal
+        self.topic = self.runFolder.split('/')[-3]
+        self.tempdir = None
+        self.save_folder = os.path.join(self.runFolder, 'usr/condensed_runs/')
+        self.distance = distance
+        self.px_pitch_h = 30 # pitch in microns
+        self.px_pitch_v = 30 # pitch in microns      
+        
+        self.aspect = 1 # aspect ratio of the FastCCD images
+        self.mask = None
+        self.max_fraction_memory = 0.8
+        self.filter_mask = None
+        self.raw = raw
+        self.gain = 1
+        
+        print('FastCCD configuration')
+        print(f'Topic: {self.topic}')
+        print(f'Semester: {self.semester}')
+        print(f'Proposal: {self.proposal}')
+        print(f'Default save folder: {self.save_folder}')
+        print(f'Sample to FastCCD distance: {self.distance} m')
+        print(f'Using raw files: {self.raw}')
+        
+        if not os.path.exists(self.save_folder):
+            warnings.warn(f'Default save folder does not exist: {self.save_folder}')
+            
+        self.max_fraction_memory = 0.8
+        self.Nworker = 10
+        self.maxSaturatedPixel = 1
+        
+        
+    def __del__(self):
+        # deleting temporay folder
+        if self.tempdir:
+            shutil.rmtree(self.tempdir)
+    
+    def open_run(self, run_nr, isDark=False, t0=0.0):
+        """ Open a run with karabo-data and prepare the virtual dataset for multiprocessing
+        
+            inputs:
+                run_nr: the run number
+                isDark: True if the run is a dark run
+                t0: optional t0 in mm
+        """
+        
+        print('Opening run data with karabo-data')
+        self.run_nr = run_nr
+        self.xgm = None
+        
+        self.run = kd.open_run(self.proposal, self.run_nr)
+        self.plot_title = f'{self.proposal} run: {self.run_nr}'
+        self.isDark = isDark
+        self.fpt = 1
+        #self.nbunches = self.run.get_array('SCS_RR_UTC/MDL/BUNCH_DECODER', 'sase3.nPulses.value')
+        #self.nbunches = np.unique(self.nbunches)
+        self.nbunches = 1
+        #if len(self.nbunches) == 1:
+        #    self.nbunches = self.nbunches[0]
+        #else:
+        #    warnings.warn('not all trains have same length FastCCD data')
+        #    print(f'nbunches: {self.nbunches}')
+        #    self.nbunches = self.nbunches[-1]
+            
+        print(f'FastCCD frames per train: {self.fpt}')
+        print(f'SA3 bunches per train: {self.nbunches}')
+        
+        print('Collecting FastCCD module files')
+        self.collect_fastccd_file()
+        
+        print(f'Loading XGM data')
+        try:
+            self.xgm = self.run.get_array(tb.mnemonics['SCS_SA3']['source'],
+                                      tb.mnemonics['SCS_SA3']['key'],
+                                      roi=kd.by_index[:self.nbunches])
+            self.xgm = self.xgm.squeeze() # remove the pulseId dimension since XGM should have only 1 value per train
+        except:
+            self.xgm = xr.DataArray(np.zeros_like(self.run.train_ids),dims = 'trainId', coords = {"trainId":self.run.train_ids})
+            
+        print(f'Loading mono nrj data')
+        try:
+            self.nrj = self.run.get_array(tb.mnemonics['nrj']['source'],
+                                      tb.mnemonics['nrj']['key'])
+               
+        except:
+            self.nrj = xr.DataArray(np.zeros_like(self.run.train_ids),dims = 'trainId', coords = {"trainId":self.run.train_ids})    
+                
+                
+        print(f'Loading delay line data')
+        try:
+            self.delay_mm = self.run.get_array(tb.mnemonics['PP800_DelayLine']['source'],
+                                               tb.mnemonics['PP800_DelayLine']['key'])
+        except:
+            self.delay_mm = xr.DataArray(np.zeros_like(self.run.train_ids),dims = 'trainId', coords = {"trainId":self.run.train_ids})    
+        self.t0 = t0
+        self.delay_ps = tb.positionToDelay(self.delay_mm, origin=self.t0, invert=True)
+
+        print(f'Loading Fast ADC5 data')
+        try:
+            self.FastADC5 = self.run.get_array(tb.mnemonics['FastADC5raw']['source'], tb.mnemonics['FastADC5raw']['key']).max('dim_0')
+            self.FastADC5[self.FastADC5<35000] = 0
+            self.FastADC5[self.FastADC5>=35000] = 1
+        except:
+            self.FastADC5 = xr.DataArray(np.zeros_like(self.run.train_ids),dims = 'trainId', coords = {"trainId":self.run.train_ids})    
+        
+        # create a dummy scan variable for dark run
+        # for other type or run, use FastCCD.define_run function to overwrite it
+        self.scan = xr.DataArray(np.ones_like(self.run.train_ids), dims=['trainId'],coords={'trainId': self.run.train_ids})
+        self.scan = self.scan.to_dataset(name='scan_variable')
+        self.scan_vname = 'dummy'
+        
+    def load_gain(self, fname):
+        """ Load a gain map by giving the filename
+        """
+        self.gain = np.load(fname)['arr_0'][:,:,0]
+        
+    def collect_fastccd_file(self):
+        """ Collect the raw fastCCD h5 files.
+        """
+        
+        if self.raw:
+            folder = 'raw'
+        else:
+            folder = 'proc'
+            
+        pattern = self.runFolder + f'/{folder}/r{self.run_nr:04d}/RAW-R{self.run_nr:04d}-DA05-S*.h5'
+        self.h5list = glob(pattern)                
+        
+    def define_scan(self, vname, bins):
+        """
+            Prepare the binning of the FastCCD data.
+            
+            inputs:
+                vname: variable name for the scan, can be a mnemonic string from ToolBox
+                    or a dictionnary with ['source', 'key'] fields
+                bins: step size (or bins_edge but not yet implemented)
+        """
+
+        if vname == 'delay_ps':
+            self.scan = self.delay_ps
+        else:
+            if type(vname) is dict:
+                self.scan = self.run.get_array(vname['source'], vname['key'])
+            elif type(vname) is str:
+                if vname not in tb.mnemonics:
+                    raise ValueError(f'{vname} not found in the ToolBox mnemonics table')
+                self.scan = self.run.get_array(tb.mnemonics[vname]['source'], tb.mnemonics[vname]['key'])
+            else:
+                raise ValueError(f'vname should be a string or a dict. We got {type(vname)}')
+            
+        if (type(bins) is int) or (type(bins) is float):
+            self.scan = bins * np.round(self.scan / bins)
+        else:
+            # TODO: digitize the data
+            raise ValueError(f'To be implemented')
+        self.scan_vname = vname
+                   
+        self.scan = self.scan.to_dataset(name='scan_variable')
+        #self.scan['xgm_pumped'] = self.xgm[:, :self.nbunches:2].mean('dim_0')
+        #self.scan['xgm_unpumped'] = self.xgm[:, 1:self.nbunches:2].mean('dim_0')
+        #self.scan.to_netcdf(self.vds_scan, group='data')
+
+        self.scan_counts = xr.DataArray(np.ones(len(self.scan['scan_variable'])),
+                                        dims=['scan_variable'],
+                                        coords={'scan_variable': self.scan['scan_variable'].values},
+                                        name='counts')
+        self.scan_points = self.scan.groupby('scan_variable').mean('trainId').coords['scan_variable'].values
+        self.scan_points_counts = self.scan_counts.groupby('scan_variable').sum()
+        self.plot_scan()
+        
+    def plot_scan(self):
+        """ Plot a previously defined scan to see the scan range and the statistics.
+        """
+        if self.scan:
+            fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=[5, 5])
+        else:
+            fig, ax1 = plt.subplots(nrows=1, figsize=[5, 2.5])
+            
+        ax1.plot(self.scan_points, self.scan_points_counts, 'o-', ms=2)
+        ax1.set_xlabel(f'{self.scan_vname}')
+        ax1.set_ylabel('# trains')
+        ax1.set_title(self.plot_title)
+        
+        if self.scan:
+            ax2.plot(self.scan['scan_variable'])
+            ax2.set_xlabel('train #')
+            ax2.set_ylabel(f'{self.scan_vname}')
+                      
+    def plot_xgm_hist(self, nbins=100):
+        """ Plots an histogram of the SCS XGM dedicated SAS3 data.
+        
+            inputs:
+                nbins: number of the bins for the histogram.
+        """
+        
+        hist, bins_edges = np.histogram(self.xgm, nbins, density=True)
+        width = 1.0 * (bins_edges[1] - bins_edges[0])
+        bins_center = 0.5*(bins_edges[:-1] + bins_edges[1:])
+        
+        plt.figure(figsize=(5,3))
+        plt.bar(bins_center, hist, align='center', width=width)
+        plt.xlabel(f"{tb.mnemonics['SCS_SA3']['source']}{tb.mnemonics['SCS_SA3']['key']}")
+        plt.ylabel('density')
+        plt.title(self.plot_title)
+        
+    def xgm_filter(self, xgm_low=-np.inf, xgm_high=np.inf):
+        """ Filters the data by train. If one pulse within a train has an SASE3 SCS XGM value below
+            xgm_low or above xgm_high, that train will be dropped from the dataset.
+            
+            inputs:
+                xgm_low: low threshold value
+                xgm_high: high threshold value
+        """
+                   
+        if self.isDark:
+            warnings.warn(f'This run was loaded as dark. Filtering on xgm makes no sense. Aborting')
+            return
+        
+        self.xgm_low = xgm_low
+        self.xgm_high = xgm_high
+        
+        filter_mask = (self.xgm > self.xgm_low) * (self.xgm < self.xgm_high)
+                   
+        if self.filter_mask is None:
+            self.filter_mask = filter_mask
+        else:
+            self.filter_mask = self.filter_mask*filter_mask
+                   
+        valid = filter_mask.astype(bool)
+        self.xgm = self.xgm.where(valid).dropna('trainId')
+        self.scan = self.scan.sel({'trainId': self.xgm.trainId})
+        nrejected = len(self.run.train_ids) - len(self.xgm.trainId)
+        print((f'Rejecting {nrejected} out of {len(self.run.train_ids)} trains due to xgm '
+               f'thresholds: [{self.xgm_low}, {self.xgm_high}]'))
+
+    def load_mask(self, fname, plot=True):
+        """ Load a FastCCD mask file.
+            
+            input:
+                fname: string of the filename of the mask file
+                plot: if True, the loaded mask is plotted
+        """
+                   
+
+        fccd_mask = imread(fname)
+        fccd_mask = fccd_mask.astype(float)[..., 0] // 255
+        fccd_mask[fccd_mask==0] = np.nan
+        self.mask = fccd_mask
+        
+        if plot:
+            plt.figure()
+            plt.imshow(self.mask)
+
+    def binning(self):
+        """ Bin the FastCCD data by the predifined scan type (FastCCD.define()) using multiprocessing
+        
+        """
+      
+        # get available memory in GB, we will try to use 80 % of it
+        max_GB = psutil.virtual_memory().available/1024**3
+        print(f'max available memory: {max_GB} GB')
+        
+        # max_GB / (8byte * 16modules * 128px * 512px * N_pulses)
+        self.chunksize = int(self.max_fraction_memory*max_GB * 1024**3 // (self.Nworker * 16 * 1934 * 960 * self.fpt))
+        
+        print('processing', self.chunksize, 'trains per chunk')
+        assert self.chunksize > 500, "cannot load FastCCD h5 files in memory"
+                   
+        jobs = []
+                   
+        for m,h5fname in enumerate(self.h5list):
+            jobs.append(dict(
+            fpt=self.fpt,
+            h5fname=h5fname,
+            chunksize=self.chunksize,
+            nbunches=self.nbunches,
+            workerId=m,
+            Nworker=self.Nworker,
+            scan = self.scan,
+            xgm = self.xgm,
+            FastADC5 = self.FastADC5
+            #maxSaturatedPixel=self.maxSaturatedPixel
+            ))
+            
+        timestamp = strftime('%X')
+        print(f'start time: {timestamp}')
+                                      
+        with multiprocessing.Pool(self.Nworker) as pool:
+            res = list(tqdm(pool.imap(process_one_module, jobs), total=len(self.h5list)))
+        
+        print('finished:', strftime('%X'))
+        
+        # rearange the multiprocessed data
+        # this is to get rid of the worker dimension, there is no sum over worker really involved
+        self.module_data = xr.concat(res, dim='workerId').sum(dim='workerId')
+                
+        self.module_data['pumped'] = self.module_data['pumped'] / self.module_data['sum_count_pumped']
+        self.module_data['unpumped'] = self.module_data['unpumped'] / self.module_data['sum_count_unpumped']           
+        self.module_data['xgm_pumped'] = self.module_data['xgm_pumped'] / self.module_data['sum_count_pumped']
+        self.module_data['xgm_unpumped'] = self.module_data['xgm_unpumped'] / self.module_data['sum_count_unpumped']           
+
+        self.module_data['run'] = self.run_nr
+        self.module_data['t0'] = self.t0
+            
+        self.plot_title = f"{self.proposal} run: {self.module_data['run'].values}"
+        self.module_data.attrs['plot_title'] = self.plot_title
+        self.module_data.attrs['scan_variable'] = self.scan_vname
+
+
+    def save(self, save_folder=None, overwrite=False):
+        """ Save the crunched data.
+        
+            inputs:
+                save_folder: string of the fodler where to save the data.
+                overwrite: boolean whether or not to overwrite existing files.
+        """
+        if save_folder is None:
+            save_folder = self.save_folder
+
+        if self.isDark:
+            fname = f'run{self.run_nr}_dark.h5'  # no scan
+        else:
+            fname = f'run{self.run_nr}.h5'  # run with delay scan (change for other scan types!)
+
+
+        save_path = os.path.join(save_folder, fname)
+        file_exists = os.path.isfile(save_path)
+
+        if not file_exists or (file_exists and overwrite):
+            if file_exists:
+                warnings.warn(f'Overwriting file: {save_path}')
+                os.remove(save_path)
+            self.module_data.to_netcdf(save_path, group='data')
+            os.chmod(save_path, 0o664)
+            print('saving: ', save_path)
+        else:
+            print('file', save_path, 'exists and overwrite is False')
+                   
+    def load_binned(self, runNB, dark_runNB=None, xgm_norm = True, save_folder=None):
+        """ load previously binned (crunched) FastCCD data by FastCCD.crunch() and FastCCD.save()
+        
+            inputs:
+                runNB: run number to load
+                dark_runNB: run number of the corresponding dark
+                xgm_norm: normlize by XGM data if True
+                save_folder: path string  where the crunched data are saved
+        """
+
+        if save_folder is None:
+            save_folder = self.save_folder
+
+        self.plot_title = f'{self.proposal} run: {runNB} dark: {dark_runNB}'
+                   
+        binned = xr.open_dataset(os.path.join(save_folder, f'run{runNB}.h5'), group='data', cache=False)
+                   
+        if dark_runNB is not None:
+            dark = xr.open_dataset(os.path.join(save_folder, f'run{dark_runNB}_dark.h5'), group='data', cache=False)
+            binned['pumped'] = self.gain*(binned['pumped'] - dark['pumped'].squeeze(drop=True))
+            binned['unpumped'] = self.gain*(binned['unpumped'] - dark['unpumped'].squeeze(drop=True))
+                   
+        if xgm_norm:
+            binned['pumped'] = binned['pumped']/binned['xgm_pumped']
+            binned['unpumped'] = binned['unpumped']/binned['xgm_unpumped']
+                   
+        self.scan_points = binned['scan_variable']
+        self.scan_points_counts = binned['sum_count_pumped'] + binned['sum_count_unpumped']
+        self.scan_vname = binned.attrs['scan_variable']
+        self.scan = None
+
+        self.binned = binned
+                   
+    def plot_FastCCD(self, use_mask = True, p_low = 1, p_high = 98, vmin = None, vmax = None):
+        """ Plot pumped and unpumped FastCCD images.
+        
+            inputs:
+                use_mask: if True, a mask is applied on the FastCCD.
+                p_low: low percentile value to adjust the contrast scale on the unpumped and pumped image
+                p_high: high percentile value to adjust the contrast scale on the unpumped and pumped image
+                vmin: low value of the image scale
+                vmax: high value of the image scale
+        """
+                   
+        if use_mask:
+            if self.mask is None:
+                raise ValueError('No mask was loaded !')
+                   
+            mask = self.mask
+            mask_txt = ' masked'
+        else:
+            mask = 1
+            mask_txt = ''
+        
+                   
+        im_pump_mean = self.binned['pumped'].mean('scan_variable')
+        im_unpump_mean = self.binned['unpumped'].mean('scan_variable')
+        
+        self.im_pump_mean = mask*im_pump_mean
+        self.im_unpump_mean = mask*im_unpump_mean
+                           
+        fig = plt.figure(figsize=(9, 4))
+        grid = ImageGrid(fig, 111,
+                 nrows_ncols=(1,2),
+                 axes_pad=0.15,
+                 share_all=True,
+                 cbar_location="right",
+                 cbar_mode="single",
+                 cbar_size="7%",
+                 cbar_pad=0.15,
+                 )
+
+        tmp = self.im_pump_mean.values.flatten()
+        try:
+            _vmin, _vmax = np.percentile(tmp[~np.isnan(tmp)], [p_low, p_high])
+        except:
+            _vmin, _vmax = (None, None)
+                   
+        if vmin is None:
+            vmin = _vmin
+        if vmax is None:
+            vmax = _vmax
+                         
+        im = grid[0].imshow(self.im_pump_mean, vmin=vmin, vmax=vmax, aspect=self.aspect)
+        grid[0].set_title('pumped' + mask_txt)
+
+        im = grid[1].imshow(self.im_unpump_mean, vmin=vmin, vmax=vmax, aspect=self.aspect)
+        grid[1].set_title('unpumped' + mask_txt)
+                   
+        grid[-1].cax.colorbar(im)
+        grid[-1].cax.toggle_label(True)
+        
+        fig.suptitle(self.plot_title)
+                   
+    def azimuthal_int(self, wl, center=None, angle_range=[0, 180-1e-6], dr=1, use_mask=True):
+        """ Perform azimuthal integration of 1D binned FastCCD run.
+        
+            inputs:
+                wl: photon wavelength
+                center: center of integration
+                angle_range: angles of integration
+                dr: dr
+                use_mask: if True, use the loaded mask
+        """
+
+        if use_mask:
+            if self.mask is None:
+                raise ValueError('No mask was loaded !')
+
+            mask = self.mask
+            mask_txt = ' masked'
+        else:
+            mask = 1
+            mask_txt = ''
+
+        im_pumped_arranged = self.binned['pumped'].values
+        im_unpumped_arranged = self.binned['unpumped'].values
+
+        im_pumped_arranged *= mask
+        im_unpumped_arranged *= mask
+
+        im_pumped_mean = im_pumped_arranged.mean(axis=0)
+        im_unpumped_mean = im_unpumped_arranged.mean(axis=0)
+
+        ai = tb.azimuthal_integrator(im_pumped_mean.shape, center, angle_range, dr=dr)
+        norm = ai(~np.isnan(im_pumped_mean))
+
+        az_pump = []
+        az_unpump = []
+
+        for i in tqdm(range(len(self.binned['scan_variable']))):
+            az_pump.append(ai(im_pumped_arranged[i]) / norm)
+            az_unpump.append(ai(im_unpumped_arranged[i]) / norm)
+
+        az_pump = np.stack(az_pump)
+        az_unpump = np.stack(az_unpump)
+
+        coords = {'scan_variable': self.binned['scan_variable'], 'distance': ai.distance}
+        azimuthal = xr.DataArray(az_pump, dims=['scan_variable', 'distance'], coords=coords)
+        azimuthal = azimuthal.to_dataset(name='pumped')
+        azimuthal['unpumped'] = xr.DataArray(az_unpump, dims=['scan_variable', 'distance'], coords=coords)
+        azimuthal = azimuthal.transpose('distance', 'scan_variable')
+
+        #t0 = 225.5
+        #azimuthal['delay'] = (t0 - azimuthal.delay)*6.6
+        #azimuthal['delay'] = azimuthal.delay
+
+        azimuthal['delta_q (1/nm)'] = 2e-9 * np.pi * np.sin(
+            np.arctan(azimuthal.distance *  self.px_pitch_v*1e-6 / self.distance)) / wl
+        
+        azimuthal.attrs = self.binned.attrs
+
+        self.azimuthal = azimuthal.swap_dims({'distance': 'delta_q (1/nm)'})
+                   
+    def plot_azimuthal_int(self, kind='difference', lim=None):
+        """ Plot a computed azimuthal integration.
+        
+            inputs:
+                kind: (str) either 'difference' or 'relative' to change the type of plot.
+        """
+        fig, [ax1, ax2, ax3] = plt.subplots(nrows=3, sharex=True, sharey=True)
+
+        xr.plot.imshow(self.azimuthal.pumped, ax=ax1, vmin=0, robust=True)
+        ax1.set_title('pumped')
+
+        xr.plot.imshow(self.azimuthal.unpumped, ax=ax2, vmin=0, robust=True)
+        ax2.set_title('unpumped')
+                   
+        if kind == 'difference':
+            val = self.azimuthal.pumped - self.azimuthal.unpumped
+            ax3.set_title('pumped - unpumped')
+        elif kind == 'relative':
+            val = (self.azimuthal.pumped - self.azimuthal.unpumped)/self.azimuthal.unpumped
+            ax3.set_title('(pumped - unpumped)/unpumped')
+        else:
+            raise ValueError('kind should be either difference or relative')
+
+        if lim is None:
+            xr.plot.imshow(val, ax=ax3, robust=True)
+        else:
+            xr.plot.imshow(val, ax=ax3, vmin=lim[0], vmax=lim[1])
+                   
+        ax3.set_xlabel(self.scan_vname)
+        fig.suptitle(f'{self.plot_title}')
+
+    def plot_azimuthal_line_cut(self, data, qranges, qwidths):
+        """ Plot line scans on top of the data.
+        
+            inputs:
+                data: an azimuthal integrated xarray DataArray with 'delta_q (1/nm)' as one of its dimension.
+                qranges: a list of q-range
+                qwidth: a list of q-width, same length as qranges
+        """
+                   
+        fig, [ax1, ax2] = plt.subplots(nrows=2, sharex=True, figsize=[8, 7])
+
+        xr.plot.imshow(data, ax=ax1, robust=True)
+
+        # attributes are not propagated during xarray mathematical operation https://github.com/pydata/xarray/issues/988
+        # so we might not have in data the scan vaiable name anymore
+        ax1.set_xlabel(self.scan_vname) 
+        fig.suptitle(f'{self.plot_title}')
+    
+        for i, (qr, qw) in enumerate(zip(qranges, qwidths)):
+            sel = (data['delta_q (1/nm)'] > (qr - qw/2)) * (data['delta_q (1/nm)'] < (qr + qw/2))
+            val = data.where(sel).mean('delta_q (1/nm)')
+            ax2.plot(data.scan_variable, val, c=f'C{i}', label=f'q = {qr:.2f}')
+        
+            ax1.axhline(qr - qw/2, c=f'C{i}', lw=1)
+            ax1.axhline(qr + qw/2, c=f'C{i}', lw=1)
+        ax2.legend()
+        ax2.set_xlabel(self.scan_vname)
+        
+                   
+# since 'self' is not pickable, this function has to be outside the FastCCD class so that it can be used
+# by the multiprocessing pool.map function
+def process_one_module(job):
+    fpt = job['fpt']
+    Nworker = job['Nworker']
+    workerId = job['workerId']
+    scan = job['scan']
+    chunksize = job['chunksize']
+    nbunches = job['nbunches']
+    h5fname = job['h5fname']
+    xgm = job['xgm']
+    FastADC5 = job['FastADC5']                   
+    #maxSaturatedPixel = job['maxSaturatedPixel']
+
+    image_path = f'/INSTRUMENT/SCS_CDIDET_FCCD2M/DAQ/FCCD:daqOutput/data/image/pixels'
+
+    # crunching
+    with h5py.File(h5fname, 'r') as m:
+        fastccd_trains = m['/INSTRUMENT/SCS_CDIDET_FCCD2M/DAQ/FCCD:daqOutput/data/trainId'][()]
+        data = m[image_path][()].squeeze().astype(np.float64)
+
+        unique_trainIds, unique_list = np.unique(fastccd_trains, return_index = True)                  
+        unique_nz_list = np.nonzero(unique_trainIds)[0]
+        
+        fastccd_trains = unique_trainIds[unique_nz_list]
+        coords = {'trainId': fastccd_trains}
+
+        fastccd = xr.DataArray(data[unique_nz_list, :, :], dims=['trainId', 'x', 'y'], coords=coords)
+        fastccd = fastccd.where(fastccd.sum(('x','y'), skipna=True) > 0)
+                   
+        aligned_vals = xr.align(*[fastccd, xgm, FastADC5], join='inner')
+        ds = xr.Dataset(dict(zip(['fastccd', 'xgm', 'FastADC5'], aligned_vals)))
+        ds['sum_count'] = xr.full_like(ds['fastccd'][..., 0, 0], fill_value=1)
+                   
+        # grouping and summing
+        ds['scan_variable'] = scan['scan_variable'] # this only adds scan data for matching trainIds
+        ds = ds.dropna('trainId')
+        #print(ds)
+                   
+        data_pumped = ds.where(ds['FastADC5'] > 0, drop=True).groupby('scan_variable').sum('trainId')
+        data_unpumped = ds.where(ds['FastADC5'] < 1, drop=True).groupby('scan_variable').sum('trainId')
+                   
+        module_data = data_pumped['fastccd'].to_dataset('pumped')
+        module_data['unpumped'] = data_unpumped['fastccd']   
+        module_data['sum_count_pumped'] = data_pumped['sum_count']
+        module_data['sum_count_unpumped'] = data_unpumped['sum_count']
+        module_data['xgm_pumped'] = data_pumped['xgm']
+        module_data['xgm_unpumped'] = data_unpumped['xgm']
+        module_data['workerId'] = workerId
+        
+    return module_data
\ No newline at end of file
diff --git a/__init__.py b/__init__.py
index 1ee52fb..4af2bd2 100644
--- a/__init__.py
+++ b/__init__.py
@@ -6,3 +6,4 @@ from ToolBox.Laser_utils import *
 from ToolBox.DSSC import DSSC
 from ToolBox.azimuthal_integrator import *
 from ToolBox.DSSC1module import *
+from ToolBox.FastCCD import *
-- 
GitLab