Compare revisions

c61ef985 · c61ef985 · c61ef985 · c61ef985 · c61ef985 · c61ef985
--- a/doc/metadata.png
+++ b/doc/metadata.png
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
+sphinx
+sphinx_rtd_theme
+autoapi
+sphinx-autoapi
+nbsphinx
+urllib3<2.0.0
--- a/doc/scripts/bin_dssc_module_job.sh
+++ b/doc/scripts/bin_dssc_module_job.sh
+#!/bin/bash
+#SBATCH -N 1
+#SBATCH --partition=exfel
+#SBATCH --time=12:00:00
+#SBATCH --mail-type=END,FAIL
+#SBATCH --output=logs/%j-%x.out
+
+while getopts ":p:d:r:k:m:x:b:" option
+do
+    case $option in
+        p) PROPOSAL="$OPTARG";;
+        d) DARK="$OPTARG";;
+        r) RUN="$OPTARG";;
+        k) KERNEL="$OPTARG";;
+        m) MODULE_GROUP="$OPTARG";;
+        x) XAXIS="$OPTARG";;
+        b) BINWIDTH="$OPTARG";;
+        \?) echo "Unknown option"
+            exit 1;;
+        :) echo "Missing option for input flag"
+           exit 1;;
+    esac
+done
+
+# Load xfel environment
+source /etc/profile.d/modules.sh
+module load exfel exfel-python
+
+echo processing run $RUN
+PDIR=$(findxfel $PROPOSAL)
+PPROPOSAL="p$(printf '%06d' $PROPOSAL)"
+RDIR="$PDIR/usr/processed_runs/r$(printf '%04d' $RUN)"
+mkdir $RDIR
+
+NB='Dask DSSC module binning.ipynb'
+
+# kernel list can be seen from 'jupyter kernelspec list'
+if [ -z "${KERNEL}" ]; then
+	  KERNEL="toolbox_$PPROPOSAL"
+fi
+
+python -c "import papermill as pm; pm.execute_notebook(\
+  '$NB', \
+  '$RDIR/output$MODULE_GROUP.ipynb', \
+  kernel_name='$KERNEL', \
+  parameters=dict(proposalNB=int('$PROPOSAL'), \
+                  dark_runNB=int('$DARK'), \
+                  runNB=int('$RUN'), \
+                  module_group=int('$MODULE_GROUP'), \
+                  path='$RDIR/', \
+                  xaxis='$XAXIS', \
+                  bin_width=float('$BINWIDTH')))"
--- a/doc/scripts/boz_parameters_job.sh
+++ b/doc/scripts/boz_parameters_job.sh
+#!/bin/bash
+#SBATCH -N 1
+#SBATCH --partition=allgpu
+#SBATCH --constraint=V100
+#SBATCH --time=2:00:00
+#SBATCH --mail-type=END,FAIL
+#SBATCH --output=logs/%j-%x.out
+
+ROISTH='1'
+SATLEVEL='500'
+MODULE='15'
+
+while getopts ":p:d:r:k:g:t:s:m:" option
+do
+    case $option in
+        p) PROPOSAL="$OPTARG";;
+        d) DARK="$OPTARG";;
+        r) RUN="$OPTARG";;
+        k) KERNEL="$OPTARG";;
+        g) GAIN="$OPTARG";;
+        t) ROISTH="$OPTARG";;
+        s) SATLEVEL="$OPTARG";;
+        m) MODULE="$OPTARG";;
+        \?) echo "Unknown option"
+            exit 1;;
+        :) echo "Missing option for input flag"
+           exit 1;;
+    esac
+done
+
+# Load xfel environment
+source /etc/profile.d/modules.sh
+module load exfel exfel-python
+
+echo processing run $RUN
+PDIR=$(findxfel $PROPOSAL)
+PPROPOSAL="p$(printf '%06d' $PROPOSAL)"
+RDIR="$PDIR/usr/processed_runs/r$(printf '%04d' $RUN)"
+mkdir $RDIR
+
+NB='BOZ analysis part I.a Correction determination.ipynb'
+
+# kernel list can be seen from 'jupyter kernelspec list'
+if [ -z "${KERNEL}" ]; then
+	KERNEL="toolbox_$PPROPOSAL"
+fi
+
+python -c "import papermill as pm; pm.execute_notebook(\
+  '$NB', \
+  '$RDIR/output.ipynb', \
+  kernel_name='$KERNEL', \
+  parameters=dict(proposal=int('$PROPOSAL'), \
+                  darkrun=int('$DARK'), \
+                  run=int('$RUN'), \
+                  module=int('$MODULE'), \
+                  gain=float('$GAIN'), \
+                  rois_th=float('$ROISTH'), \
+                  sat_level=int('$SATLEVEL')))"
--- a/doc/scripts/format_data.py
+++ b/doc/scripts/format_data.py
+import os
+import logging
+import argparse
+
+import numpy as np
+
+import toolbox_scs as tb
+import toolbox_scs.detectors as tbdet
+
+logging.basicConfig(level=logging.INFO)
+log_root = logging.getLogger(__name__)
+
+# -----------------------------------------------------------------------------
+# user input:
+# -----------------------------------------------------------------------------
+run_type = 'static, delay, .....'
+description = 'useful description or comment .....'
+#add xgm data to formatted file if save_xgm_binned was set to True
+metadata = ['binner1', 'binner2', 'xgm_binned'] # ['binner1', 'binner2'] 
+# -----------------------------------------------------------------------------
+
+
+def formatting(run_number, run_folder):
+    log_root.debug("Collect, combine and format files in run folder")
+
+    run_formatted = tbdet.DSSCFormatter(run_folder)
+    run_formatted.combine_files()
+
+    run_formatted.add_dataArray(metadata)
+
+    attrs = {'run_type':run_type,
+             'description':description,
+             'run_number':run_number}
+    run_formatted.add_attributes(attrs)
+
+    run_formatted.save_formatted_data(f'{run_folder}run_{run_number}_formatted.h5')
+    log_root.debug("Formatting finished successfully.")
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--run-number', metavar='S',
+                        action='store',
+                        help='run number')
+    parser.add_argument('--run-folder', metavar='S',
+                        action='store',
+                        help='the run folder containing fractional data')
+    args = parser.parse_args()
+
+    formatting(str(args.run_number), str(args.run_folder))
--- a/doc/scripts/format_data.sh
+++ b/doc/scripts/format_data.sh
+#!/bin/bash
+
+RUN_NR=${1}
+RUN_DIR="../processed_runs/r_${RUN_NR}/"
+
+
+if [ -d $RUN_DIR ]
+    then 
+        echo creating formatted .h5 file for run $RUN_NR in $RUN_DIR
+	source /etc/profile.d/modules.sh
+        module load exfel exfel-python
+
+        python format_data.py --run-number $RUN_NR --run-folder $RUN_DIR
+        #chgrp -R 60002711-part $RUN_DIR
+        chmod -R 777 $RUN_DIR
+    else
+        echo run folder $RUN_DIR does not exist
+        echo please provide a valid run number
+fi
--- a/doc/scripts/process_data_201007_23h.py
+++ b/doc/scripts/process_data_201007_23h.py
+import os
+import logging
+import argparse
+import h5py
+
+import numpy as np
+import extra_data as ed
+
+import toolbox_scs as tb
+import toolbox_scs.detectors as tbdet
+
+logging.basicConfig(level=logging.INFO)
+log_root = logging.getLogger(__name__)
+
+
+# -----------------------------------------------------------------------------
+# user input: run-type specific
+# -----------------------------------------------------------------------------
+proposal_nb = 2599
+output_filepath = "../processed_runs/"
+
+# these get set by the shell script now! (e.g. "--runtype static")
+# runtype = 'energyscan'
+# runtype = 'energyscan_pumped'
+# runtype = 'static'
+# runtype = 'static_IR'
+# runtype = 'delayscan'
+# runtype = 'timescan'
+
+# useful metadata to be added to h5 files
+scriptname = os.path.basename(__file__)
+
+save_xgm_binned = True
+
+# optional prebinning methods for DSSC data
+normevery = 2 # 2 if use intradark, 1 otherwise
+xgm_mask = True  # True: xgm_threshold will be used to drop corresponding DSSC frames accordingly to the xgm treshold
+xgm_threshold = (1000, np.inf) # or you mean bad pulses here ?
+filename_dark = None # 200
+xgm_normalization = False 
+
+# -----------------------------------------------------------------------------
+
+
+def process(run_nb, runtype, modules=[]):
+    run_description = f'{runtype}; script {scriptname}'
+    print(run_description)
+    mod_list = modules
+    if len(mod_list)==0:
+        mod_list = [i for i in range(16)]
+
+    path = f'{output_filepath}r_{run_nb}/'
+    log_root.info("create run objects")
+    run_info = tbdet.load_dssc_info(proposal_nb, run_nb)
+    fpt = run_info['frames_per_train']
+    n_trains = run_info['number_of_trains']
+    trainIds = run_info['trainIds']
+
+    # -------------------------------------------------------------------------
+    # user input: run specific
+    # -------------------------------------------------------------------------
+    run_obj = ed.open_run(proposal_nb, run_nb)
+    
+    if runtype == 'static':
+        buckets_train = np.zeros(n_trains)
+        pulsepattern = ['image', 'intradark']
+        buckets_pulse = pulsepattern * (fpt // len(pulsepattern))
+    
+    if runtype == 'energyscan':
+        buckets_train = tb.get_array(run_obj, 'nrj', 0.1).values
+        pulsepattern = ['image', 'intradark']
+        buckets_pulse = pulsepattern * (fpt // len(pulsepattern))
+    
+    if runtype == 'static_IR':
+        buckets_train = np.zeros(n_trains)
+        pulsepattern = ['unpumped', 'unpumped_intradark', 'pumped', 'pumped_intradark']
+        buckets_pulse = pulsepattern * (fpt // len(pulsepattern))
+        
+    if runtype == 'energyscan_pumped':
+        buckets_train = tb.get_array(run_obj, 'nrj', 0.1).values
+        pulsepattern = ['unpumped', 'unpumped_intradark', 'pumped', 'pumped_intradark']
+        buckets_pulse = pulsepattern * (fpt // len(pulsepattern))
+        
+    if runtype == 'delayscan':
+        buckets_train = tb.get_array(run_obj, 'PP800_DelayLine', 0.03).values
+        pulsepattern = ['unpumped', 'unpumped_intradark', 'pumped', 'pumped_intradark']
+        buckets_pulse = pulsepattern * (fpt // len(pulsepattern))
+    
+    if runtype == 'timescan':  # 10s bins (tstamp is in ns)
+        bin_nsec = 10 * 1e9
+        tstamp = run_obj.get_array('SCS_RR_UTC/TSYS/TIMESERVER', 'id.timestamp')
+        buckets_train = (bin_nsec * np.round(tstamp / bin_nsec) - tstamp.min()) / 1e9
+        pulsepattern = ['unpumped', 'unpumped_intradark', 'pumped', 'pumped_intradark']
+        buckets_pulse = pulsepattern * (fpt // len(pulsepattern))
+    # -------------------------------------------------------------------------
+
+    # create binner
+    binner1 = tbdet.create_dssc_bins("trainId",trainIds,buckets_train)
+    binner2 = tbdet.create_dssc_bins("pulse",
+                                     np.linspace(0,fpt-1,fpt, dtype=int),
+                                     buckets_pulse)
+    binners = {'trainId': binner1, 'pulse': binner2}
+    bin_obj = tbdet.DSSCBinner(proposal_nb, run_nb,
+                               binners=binners,
+                               dssc_coords_stride=normevery)
+    
+    if xgm_mask:
+        bin_obj.create_pulsemask('xgm', xgm_threshold)
+
+    dark=None
+    if filename_dark:
+        dark = tbdet.load_xarray(filename_dark)
+        dark = dark['data']
+
+    bin_params = {'modules':mod_list,
+                  'chunksize':248,
+                  'filepath':path,
+                  'xgm_normalization':xgm_normalization,
+                  'normevery':normevery,
+                  'dark_image':dark}
+
+    log_root.info("start binning routine")
+    bin_obj.process_data(**bin_params)
+
+    log_root.info("Add additional data to module files")
+    if save_xgm_binned:
+        bin_obj.load_xgm()
+        xgm_binned = bin_obj.get_xgm_binned()
+
+    if not os.path.isdir(path):
+        os.mkdir(path)
+    for m in mod_list:
+        fname = f'run_{run_nb}_module{m}.h5'
+        if save_xgm_binned:
+            tbdet.save_xarray(
+                path+fname, xgm_binned, group='xgm_binned', mode='a')
+        tbdet.save_xarray(path+fname, binner1, group='binner1', mode='a')
+        tbdet.save_xarray(path+fname, binner2, group='binner2', mode='a')
+        metadata = {'run_number':run_nb,
+                    'module':m,
+                    'run_description':run_description}
+        tbdet.save_attributes_h5(path+fname, metadata)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--run-number', metavar='S',
+                        action='store',
+                        help='the run to be processed')
+    parser.add_argument('--module', metavar='S',
+                        nargs='+', action='store',
+                        help='modules to be processed')
+    parser.add_argument('--runtype', metavar='S',
+                        nargs='+', action='store',
+                        help=('type of run (static, static_IR, energyscan, energyscan_pumped)'
+                              ', delayscan', 'timescan)'))
+    args = parser.parse_args()
+    
+    runtype = args.runtype[0]
+    if args.run_number:
+        if args.module is not None:
+            modules = []
+            if len(args.module) == 1:
+                args.module = args.module[0].split(" ")
+            modules = list(map(int, args.module))
+            process(str(args.run_number), runtype, modules)
+        else:
+            process(str(args.run_number), runtype)
--- a/doc/scripts/start_job_single.sh
+++ b/doc/scripts/start_job_single.sh
+#!/bin/bash
+#SBATCH -N 1
+#SBATCH --partition=upex
+#SBATCH --time=00:30:00
+#SBATCH --mail-type=END,FAIL
+#SBATCH --output=../logs/%j-%x.out
+
+RUN=$1
+MODULES=$2
+RUNTYPE=$3
+
+source /etc/profile.d/modules.sh
+module load exfel exfel-python
+
+echo processing modules $MODULES of run $RUN
+python process_data_201007_23h.py --run-number $RUN --module ${MODULES} --runtype $RUNTYPE
--- a/doc/scripts/start_processing_all.sh
+++ b/doc/scripts/start_processing_all.sh
+#!/bin/bash
+
+RUN=$1
+RUNTYPE=$2
+
+if [ $RUN ] && [ $RUNTYPE ]
+    then
+        echo processing run $RUN
+        source /etc/profile.d/modules.sh
+        module load exfel exfel-python
+        
+        sbatch ./start_job_single.sh $RUN '0 1 2 3' $RUNTYPE
+        sbatch ./start_job_single.sh $RUN '4 5 6 7' $RUNTYPE
+        sbatch ./start_job_single.sh $RUN '8 9 10 11' $RUNTYPE
+        sbatch ./start_job_single.sh $RUN '12 13 14 15' $RUNTYPE
+    else
+        echo please specify a run number and type
+        echo available runtypes:
+        echo energyscan, energyscan_pumped, static, static_IR, delayscan, timescan
+fi
--- a/doc/sview.png
+++ b/doc/sview.png
--- a/doc/transient reflectivity.rst
+++ b/doc/transient reflectivity.rst
+Finding time overlap by transient reflectivity
+----------------------------------------------
+
+Transient reflectivity of the optical laser measured on a large bandgap material pumped by the FEL is often used at SCS to find the time overlap between the two beams. The example notebook 
+
+* :doc:`Transient reflectivity measurement <Transient reflectivity measurement>`
+
+shows how to analyze such data, including correcting the delay by the bunch arrival monitor (BAM).
+
--- a/notebook_examples/XAS and XMCD energy shift investigation.ipynb
+++ b/notebook_examples/XAS and XMCD energy shift investigation.ipynb
--- a/notebook_examples/tim-normalization.ipynb
+++ b/notebook_examples/tim-normalization.ipynb
--- a/setup.py
+++ b/setup.py
+from setuptools import setup, find_packages
+
+with open('README.rst') as f:
+    readme = f.read()
+
+with open('VERSION') as f:
+    _version = f.read()
+    _version = _version.strip("\n")
+
+
+basic_analysis_reqs = ['numpy', 'scipy',]  # and is readily available in Karabo
+advanced_analysis_reqs = [
+    'pandas', 'imageio', 'xarray>=0.13.0', 'psutil', 'h5py', 'h5netcdf',]
+interactive_reqs = ['ipykernel', 'matplotlib', 'tqdm',]
+maxwell_reqs = ['joblib', 'papermill', 'dask[diagnostics]',
+                'extra_data', 'extra_geom', 'euxfel_bunch_pattern>=0.6',
+                'pyFAI',]
+docs_reqs = ['sphinx', 'nbsphinx', 'sphinx-autoap', 'pydata-sphinx-theme']
+
+setup(name='toolbox_scs',
+      version=_version,
+      description="A collection of code for the SCS beamline",
+      long_description=readme,
+      author='SCS team',
+      author_email='scs@xfel.eu',
+      url="https://git.xfel.eu/gitlab/SCS/ToolBox.git",
+      keywords='XAS, xgm, DSSC, FCCD, PPL',
+      license="GPL",
+      package_dir={'': 'src'},
+      packages=find_packages('src'),
+      package_data={},
+      install_requires=basic_analysis_reqs,
+      extras_require={
+          'advanced': advanced_analysis_reqs,
+          'interactive': interactive_reqs,
+          'maxwell': advanced_analysis_reqs + interactive_reqs + maxwell_reqs,
+          'docs': docs_reqs,
+          'test': ['pytest']
+      }
+)
--- a/src/toolbox_scs/__init__.py
+++ b/src/toolbox_scs/__init__.py
+from .constants import *
+from .detectors import *
+
+# Module name is the same as a child function, we use alias to avoid conflict
+import toolbox_scs.load as load_module
+from .load import *
+
+from .misc import *
+from .mnemonics_machinery import *
+from .routines import *
+
+__all__ = (
+    # top-level modules
+    constants.__all__
+    + load_module.__all__
+    + mnemonics_machinery.__all__
+
+    # submodules
+    + detectors.__all__
+    + misc.__all__
+    + routines.__all__
+)
--- a/src/toolbox_scs/base/__init__.py
+++ b/src/toolbox_scs/base/__init__.py
+from . import knife_edge as knife_edge_module
+from .knife_edge import *
+
+
+__all__ = knife_edge_module.__all__
--- a/src/toolbox_scs/base/knife_edge.py
+++ b/src/toolbox_scs/base/knife_edge.py
+import numpy as np
+from scipy import special
+from scipy.optimize import curve_fit
+
+
+__all__ = ['knife_edge', 'knife_edge_base']
+
+
+def knife_edge(positions, intensities, axisRange=None, p0=None):
+    """
+    Calculates the beam radius at 1/e^2 from a knife-edge scan by
+    fitting with erfc function: f(a,b,u) = a*erfc(u) + b or
+    where u = sqrt(2)*(x-x0)/w0 with w0 the beam radius at 1/e^2
+    and x0 the beam center.
+
+    Parameters
+    ----------
+    positions : np.ndarray
+        Motor position values, typically 1D
+    intensities : np.ndarray
+        Intensity values, could be either 1D or 2D, with the number or rows
+        equivalent with the position size
+    axisRange : sequence of two floats or None
+        Edges of the scanning axis between which to apply the fit.
+    p0 : list of floats, numpy 1D array
+        Initial parameters used for the fit: x0, w0, a, b. If None, a beam
+        radius of 100 um is assumed.
+
+    Returns
+    -------
+    width : float
+        The beam radius at 1/e^2
+    std : float
+        The standard deviation of the width
+    """
+    popt, pcov = knife_edge_base(positions, intensities,
+                                 axisRange=axisRange, p0=p0)
+    width, std = 0, 0
+    if popt is not None and pcov is not None:
+        width, std = np.abs(popt[1]), pcov[1, 1]**0.5
+    return width, std
+
+
+def knife_edge_base(positions, intensities, axisRange=None, p0=None):
+    """
+    The base implementation of the knife-edge scan analysis.
+
+    Calculates the beam radius at 1/e^2 from a knife-edge scan by
+    fitting with erfc function: f(a,b,u) = a*erfc(u) + b or
+    where u = sqrt(2)*(x-x0)/w0 with w0 the beam radius at 1/e^2
+    and x0 the beam center.
+
+    Parameters
+    ----------
+    positions : np.ndarray
+        Motor position values, typically 1D
+    intensities : np.ndarray
+        Intensity values, could be either 1D or 2D, with the number or rows
+        equivalent with the position size
+    axisRange : sequence of two floats or None
+        Edges of the scanning axis between which to apply the fit.
+    p0 : list of floats, numpy 1D array
+        Initial parameters used for the fit: x0, w0, a, b. If None, a beam
+        radius of 100 um is assumed.
+
+    Returns
+    -------
+    popt : sequence of floats or None
+        The parameters of the resulting fit.
+    pcov : sequence of floats
+        The covariance matrix of the resulting fit.
+    """
+    # Prepare arrays
+    positions, intensities = prepare_arrays(positions, intensities,
+                                            xRange=axisRange)
+
+    # Estimate initial fitting params
+    if p0 is None:
+        p0 = [np.mean(positions), 0.1, np.max(intensities) / 2, 0]
+
+    # Fit
+    popt, pcov = function_fit(erfc, positions, intensities, p0=p0)
+
+    return popt, pcov
+
+
+def function_fit(func, x, y, **kwargs):
+    """A wrapper for scipy.optimize curve_fit()
+    """
+    # Fit
+    try:
+        popt, pcov = curve_fit(func, x, y, **kwargs)
+    except (TypeError, RuntimeError) as err:
+        print("Fit did not converge:", err)
+        popt, pcov = (None, None)
+    return popt, pcov
+
+
+def prepare_arrays(arrX: np.ndarray, arrY: np.ndarray,
+                   xRange=None, yRange=None):
+    """
+    Preprocessing of the input x and y arrays.
+
+    This involves the following steps.
+    1. Converting the arrays to 1D of the same size
+    2. Select the ranges from the input x- and y-ranges
+    3. Retrieve finite values.
+    """
+    # Convert both arrays to 1D of the same size
+    arrX, arrY = arrays_to1d(arrX, arrY)
+
+    # Select ranges
+    if xRange is not None:
+        low, high = xRange
+        if low == high:
+            raise ValueError('The supplied xRange is not a valid range.')
+        mask_ = range_mask(arrX, low, high)
+        arrX = arrX[mask_]
+        arrY = arrY[mask_]
+    if yRange is not None:
+        low, high = yRange
+        if low == high:
+            raise ValueError('The supplied xRange is not a valid range.')
+        mask_ = range_mask(arrY, low, high)
+        arrX = arrX[mask_]
+        arrY = arrY[mask_]
+
+    # Clean both arrays by only getting finite values
+    finite_idx = np.isfinite(arrX) & np.isfinite(arrY)
+    arrX = arrX[finite_idx]
+    arrY = arrY[finite_idx]
+
+    return arrX, arrY
+
+
+def arrays_to1d(arrX: np.ndarray, arrY: np.ndarray):
+    """Flatten two arrays and matches their sizes
+    """
+    assert arrX.shape[0] == arrY.shape[0]
+    arrX, arrY = arrX.flatten(), arrY.flatten()
+    if len(arrX) > len(arrY):
+        arrY = np.repeat(arrY, len(arrX) // len(arrY))
+    else:
+        arrX = np.repeat(arrX, len(arrY) // len(arrX))
+    return arrX, arrY
+
+
+def range_mask(array, minimum=None, maximum=None):
+    """Retrieve the resulting array from the given minimum and maximum
+    """
+    default = np.ones(array.shape, dtype=bool)
+    min_slice, max_slice = default, default
+    if minimum is not None:
+        if minimum > np.nanmax(array):
+            raise ValueError('The range minimum is too large.')
+        min_slice = array >= minimum
+
+    if maximum is not None:
+        if maximum < np.nanmin(array):
+            raise ValueError('The range maximum is too small.')
+        max_slice = array <= maximum
+
+    return min_slice & max_slice
+
+
+def erfc(x, x0, w0, a, b):
+    return a * special.erfc(np.sqrt(2) * (x - x0) / w0) + b
--- a/src/toolbox_scs/base/tests/__init__.py
+++ b/src/toolbox_scs/base/tests/__init__.py
--- a/src/toolbox_scs/base/tests/test_knife_edge.py
+++ b/src/toolbox_scs/base/tests/test_knife_edge.py
--- a/src/toolbox_scs/constants.py
+++ b/src/toolbox_scs/constants.py
No results found