diff --git a/VERSION b/VERSION index 96fa45a53be6ba0bae9296c97120fad98eda4799..1e126641c538487d3cf47d9296848e2337e4140d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.0.0-alpha.1 \ No newline at end of file +1.0.2-alpha.2 \ No newline at end of file diff --git a/src/toolbox_scs/__init__.py b/src/toolbox_scs/__init__.py index 702886fc22da9bd0d1c555a69fd64f65fe8f3751..3e4867d43772f40b337d86e6ac92280a19a7c35e 100644 --- a/src/toolbox_scs/__init__.py +++ b/src/toolbox_scs/__init__.py @@ -1,4 +1,4 @@ -from .load import (load, concatenateRuns, load_scan_variable, +from .load import (load, concatenateRuns, load_binned_array, load_run, run_by_path) from .constants import mnemonics @@ -7,7 +7,7 @@ __all__ = ( # functions "load", "concatenateRuns", - "load_scan_variable", + "load_binned_array", "load_run", "run_by_path", # Classes diff --git a/src/toolbox_scs/detectors/__init__.py b/src/toolbox_scs/detectors/__init__.py index 5bd740ab476bcef83960dc2fc1c56c45fe642e47..a3674192a2581bdf706ac6e96c4ac7a4123ddeef 100644 --- a/src/toolbox_scs/detectors/__init__.py +++ b/src/toolbox_scs/detectors/__init__.py @@ -43,6 +43,7 @@ clean_ns = [ 'dssc_routines', 'dssc_processing', 'dssc_data', + 'dssc_misc', 'dssc_plot', 'azimuthal_integrator', 'FastCCD', diff --git a/src/toolbox_scs/detectors/dssc.py b/src/toolbox_scs/detectors/dssc.py index 52afa89e0df97367214aaab391acb0390f6f90ba..e729c97e8aabda5cdb76a78e2ca31dd21eb39f6a 100644 --- a/src/toolbox_scs/detectors/dssc.py +++ b/src/toolbox_scs/detectors/dssc.py @@ -2,31 +2,112 @@ DSSC-detector class module -------------------------- - The dssc detector class provides a namespace for frequent evaluation - routines. + The dssc detector class. It represents a namespace for frequent evaluation + while implicitly applying/requiring certain structure/naming conventions to + its objects. comments: - DSSC class methods will mostly use functions defined in other - files. The redefinition of its names makes it easy to adapt the code in - case we later rename the underlying functions in the course of - development. + files. The redefinition of its names makes it easy to adapt the code + in case we later rename the underlying functions in the course of + development. - contributions should comply with pep8 code structure guidelines. + - Plot routines don't fit into objects since they are rather fluent. + They have been outsourced to dssc_plot.py. Alternatively they could + be accessed as tbdet member functions. """ - +import os +import logging +import joblib +import multiprocessing + +import numpy as np +import xarray as xr + +from ..load import ( + load_run as _load_run, + load_binned_array as _load_binned_array, +) +from .xgm import ( + load_xgm as _load_xgm_data, + ) from .dssc_misc import ( load_dssc_info as _dssc_info, - calc_xgm_frame_indices as _xgm_frame_indices + calc_xgm_frame_indices as _xgm_frame_indices, ) from .dssc_data import ( save_to_file as _save_to_file, load_from_file as _load_from_file, ) +from .dssc_processing import ( + process_intra_train as _process_intra_train, + bin_data_multipr as _bin_data_multipr, + bin_data as _bin_data, + ) + + +log = logging.getLogger(__name__) + + +def _setup_dir(): + for f in ['tmp', 'images', 'processed_runs']: + if not os.path.isdir(f): + os.mkdir(f) class DSSC: - def __init__(self): - # setup_dir() - pass + def __init__(self, proposal_nr, run_nr, + is_dark=False, bin_variable_name=None, bin_size=None, + framepattern=['image'], xgm_threshold=(0, np.inf), + detector_distance=1 + ): + """ + (doc to be done) + """ + _setup_dir() + + # --------------------------------------------------------------------- + # Internal variables + # --------------------------------------------------------------------- + self.proposal_nr = proposal_nr + self.run_nr = run_nr + self.framepattern = framepattern + self.is_dark = is_dark + self.info = _dssc_info(proposal_nr, run_nr) + self.fpt = self.info['frames_per_train'] + self.run = _load_run(proposal_nr, run_nr, include='*DA*') + self.xgm_threshold = xgm_threshold + + + # --------------------------------------------------------------------- + # Create file containing bins for later data reduction (bin_data()) + # --------------------------------------------------------------------- + self.scanfile = './tmp/scan.h5' + self.bin_variable = self.get_bin_variable( + self.run, bin_variable_name, bin_size) + _save_to_file(self.bin_variable, self.scanfile, overwrite = True) + + # --------------------------------------------------------------------- + # Additional data reduction through masking + # --------------------------------------------------------------------- + self.filter_mask = None + self.maskfile = './tmp/mask.h5' + self.xgm = None + if not self.is_dark: + self.xgm = self.load_xgm() + self.create_filter_mask() + + # --------------------------------------------------------------------- + # Detector geometry + # --------------------------------------------------------------------- + self.det_distance = detector_distance + self.px_pitch_h = 236 + self.px_pitch_v = 204 + self.aspect = self.px_pitch_v/self.px_pitch_h + self.geom = None + self.mask = None + + log.debug("Constructed DSSC object") def __del__(self): # cleanup @@ -35,60 +116,93 @@ class DSSC: # ------------------------------------------------------------------------- # Data handling # ------------------------------------------------------------------------- - def open_run(self): - pass - def load_geom(self): pass - def load_scan(self): - pass - def load_mask(self): pass def load_xgm(self): - pass - - def xgm_filter(self): - pass - - def load_binned_data(self): - pass + xgm = _load_xgm_data(self.run) + xgm_frame_coords = _xgm_frame_indices(xgm.shape[1], self.framepattern) + xgm['pulse'] = xgm_frame_coords + return xgm + + def create_filter_mask(self): + pulsemask = xr.DataArray( + np.ones([len(self.run.train_ids), self.fpt], dtype=bool), + dims=['trainId', 'pulse'], + coords={'trainId': self.run.train_ids, + 'pulse': range(self.fpt)}) + if self.xgm is not None: + n_frames_dark = len([p for p in self.framepattern if 'dark' in p]) + valid = (self.xgm > self.xgm_threshold[0]) * \ + (self.xgm < self.xgm_threshold[1]) + pulsemask = valid.combine_first(pulsemask).astype(bool) + nrejected = int(valid.size - valid.sum()) + percent_rejected = 100 * nrejected / valid.size + log.info(f'rejecting {nrejected} out of {valid.size} pulses' + f'({percent_rejected:.1f}%) due to xgm threshold') + + _save_to_file(pulsemask, self.maskfile, overwrite = True) + + def get_bin_variable(self, run, name, stepsize=None): + data = _load_binned_array(run, name, stepsize) + data.name = 'scan_variable' + return data def save_binned_data(self): pass - + def merge_xgm_binned(self): + #pulses_no_dark = [p for p in framepattern if 'dark' not in p] + #if maskfile is not None: + #xgm = xgm.where(valid) + #xgm = tbdet.split_frames(xgm, pulses_no_dark, prefix='xgm_') + #xgm['scan_variable'] = scan_variable + #xgm = xgm.groupby('scan_variable').mean('trainId') + #module_data = xr.merge([module_data, xgm]) + #module_data = module_data.transpose( + # 'scan_variable', 'module', 'x', 'y') pass + # ------------------------------------------------------------------------- # Data processing # ------------------------------------------------------------------------- - def get_binned_data(self): - - #option 1 - #with multiprocessing.Pool(16) as pool: - # module_data = pool.map(tbdet.bin_data_multipr, jobs) - - #option 2 - #module_data = joblib.Parallel(n_jobs=16) \ - # (joblib.delayed(tbdet.bin_data)(**jobs[i]) for i in range(16)) - - #module_data = xr.concat(module_data, dim='module') - #module_data = module_data.dropna('scan_variable') - #module_data['run'] = run_nr - - pass + def bin_data(self, use_joblib=False, process_modules=[], chunksize = 512): + log.info("Binning data according to bins given in scanfile") + log.info(f'Processing {chunksize} trains per chunk') + + mod_list = process_modules + if any(mod_list) is False: + mod_list = [i for i in range(16)] + n_jobs = len(mod_list) + + jobs = [] + for m in mod_list: + jobs.append(dict( + proposal=self.proposal_nr, + run_nr=self.run_nr, + module=m, + chunksize=chunksize, + binfile=self.scanfile, + framepattern=self.framepattern, + maskfile=None if self.is_dark else self.maskfile, + )) + + data = None + if use_joblib: + data = joblib.Parallel(n_jobs=n_jobs) \ + (joblib.delayed(_bin_data)(**jobs[i]) for i in range(n_jobs)) + else: + with multiprocessing.Pool(n_jobs) as pool: + data = pool.map(_bin_data_multipr, jobs) + + data = xr.concat(data, dim='module') + data = data.dropna('scan_variable') + data['run'] = self.run_nr + log.info(f'Binning done') + return data def azimuthal_integration(self): pass - - - # ------------------------------------------------------------------------- - # Data visualization -> tbdet member functions - # ------------------------------------------------------------------------- - #def plot_xgm_threshold(self): - # pass - - #def plot_xgm_hist(self): - # pass \ No newline at end of file diff --git a/src/toolbox_scs/detectors/dssc_bkp.py b/src/toolbox_scs/detectors/dssc_bkp.py index 6bda4b3ba85dc3354672b07eef49d3dbf77ea0e9..843d2cd3bf2595ac9616fa85b59f8e47866e90a9 100644 --- a/src/toolbox_scs/detectors/dssc_bkp.py +++ b/src/toolbox_scs/detectors/dssc_bkp.py @@ -171,7 +171,8 @@ class DSSC: """ if self.xgm is None: self.xgm = self.run.get_array(tb.mnemonics['SCS_SA3']['source'], - tb.mnemonics['SCS_SA3']['key'], roi=ed.by_index[:self.nbunches]) + tb.mnemonics['SCS_SA3']['key'], + roi=ed.by_index[:self.nbunches]) def plot_xgm_hist(self, nbins=100): """ Plots an histogram of the SCS XGM dedicated SAS3 data. diff --git a/src/toolbox_scs/detectors/dssc_plot.py b/src/toolbox_scs/detectors/dssc_plot.py index 4cd18d9ff0f571cfa22e7adfa356d87664081a30..9c66a8a567aec7db7a59c43a62c58e1f08914119 100644 --- a/src/toolbox_scs/detectors/dssc_plot.py +++ b/src/toolbox_scs/detectors/dssc_plot.py @@ -15,34 +15,50 @@ import numpy as np import xarray as xr -def plot_xgm_threshold(xgm, scan, - xgm_min = None, xgm_max = None, - run_nr = '', - safe_fig = False): +def plot_xgm_threshold(xgm, + xgm_min = None, xgm_max = None, + run_nr = '', + safe_fig = False): - fig, [ax1, ax2] = plt.subplots(nrows=2, sharex=True) + fig = plt.figure() + ax = fig.add_subplot(111) - ax1.plot(xgm.trainId, xgm, 'o', c='C0', ms=1) - ax1.set_ylabel('xgm') + ax.plot(xgm.trainId, xgm, 'o', c='C0', ms=1) if xgm_min: - ax1.axhline(xgm_min, c='r') + ax.axhline(xgm_min, c='r') if xgm_max: - ax1.axhline(xgm_max, c='r') + ax.axhline(xgm_max, c='r') - ax2.plot(scan.trainId, scan) - ax2.set_ylabel('scan variable') - ax2.set_xlabel('trainId') + ax.set_ylabel('xgm') + ax.set_title(f'run: {run_nr}') - ax1.set_title(f'run: {run_nr}') + if safe_fig == True: + tstamp = strftime('%y%m%d_%H%M') + fig.savefig(f'images/run{run_nr}_scan_{tstamp}.png', dpi=200) + + +def plot_scanned_range(scan_variable, + xgm_min = None, xgm_max = None, + run_nr = '', + safe_fig = False): + + fig = plt.figure() + ax = fig.add_subplot(111) + + ax.plot(scan_variable.trainId, scan_variable) + + ax.set_ylabel('scan variable') + ax.set_xlabel('trainId') + ax.set_title(f'run: {run_nr}') if safe_fig == True: tstamp = strftime('%y%m%d_%H%M') fig.savefig(f'images/run{run_nr}_scan_{tstamp}.png', dpi=200) -def plot_scan_histogram(scan, - run_nr = '', - safe_fig = False): +def plot_hist_binvar(scan, + run_nr = '', + safe_fig = False): counts = xr.DataArray(np.ones(len(scan)), dims=['scan_variable'], @@ -51,8 +67,11 @@ def plot_scan_histogram(scan, counts = counts.groupby('scan_variable').sum() - fig, ax = plt.subplots() + fig = plt.figure() + ax = fig.add_subplot(111) + ax.plot(counts.scan_variable, counts, 'o', ms=4) + ax.set_xlabel('scan variable') ax.set_ylabel('number of trains') ax.set_title(f'run {run_nr}') @@ -63,5 +82,5 @@ def plot_scan_histogram(scan, fig.savefig(f'images/run{run_nr}_scan_{tstamp}.png', dpi=200) -def plot_processed_histogram(hist_data): +def plot_hist_processed(hist_data): pass diff --git a/src/toolbox_scs/load.py b/src/toolbox_scs/load.py index 7892c1109d8c30e36f5ea01f26b68bb09d6d5ed0..d8024bcd800c61cb5fea7a24c06263f24805ae3f 100644 --- a/src/toolbox_scs/load.py +++ b/src/toolbox_scs/load.py @@ -240,56 +240,60 @@ def concatenateRuns(runs): return result -def load_scan_variable(run, mnemonic, stepsize=None): +def load_binned_array(run, mnemonic_key=None, binsize=None): """ - Loads the given scan variable and rounds scan positions to integer - multiples of stepsize for consistent grouping (except for - stepsize=None). - Returns a dummy scan if mnemonic is set to None. + Loads the required 1D-data and rounds its values to integer multiples of + stepsize for consistent grouping (except for stepsize=None). + Returns a dummy array if mnemonic is set to None. Parameters ---------- run: karabo_data.DataCollection path to the run directory - mnemonic: dic - single entry of mnemonics collection. None creates a dummy file - to average over all trains of the run + mnemonic_key: str + Identifier of a single item in the mnemonic collection. None creates a dummy + file to average over all trains in the run stepsize : float - nominal stepsize of the scan - values of scan_variable will be - rounded to integer multiples of this value + nominal stepsize of the array data - values will be rounded to integer + multiples of this value Returns ------- - scan : xarray.DataArray - xarray DataArray containing the specified scan variable using - the trainId as coordinate. + data : xarray.DataArray + xarray DataArray containing the binned array using the trainId as + coordinate. + + Raises + ------ + ToolBoxValueError: Exception + Toolbox specific exception, indicating a non-valid mnemonic entry Example ------- >>> import toolbox_scs as tb >>> run = tb.load_run(2212, 235) >>> mnemonic = 'PP800_PhaseShifter' - >>> scan_variable = tb.load_scan_variable( + >>> binned_array = tb.load_binned_array( self.ed_run, mnemonic, 0.5) """ try: - if mnemonic is None: + if mnemonic_key is None: data = xr.DataArray( np.ones(len(run.train_ids), dtype=np.int16), dims=['trainId'], coords={'trainId': run.train_ids}) - elif mnemonic in _mnemonics_ld: - mnem = _mnemonics_ld[mnemonic] + elif mnemonic_key in _mnemonics_ld: + mnem = _mnemonics_ld[mnemonic_key] data = run.get_array(*mnem.values()) else: - raise ToolBoxValueError("Invalid mnemonic", mnemonic) + raise ToolBoxValueError("Invalid mnemonic", mnemonic_key) - if stepsize is not None: - data = stepsize * np.round(data / stepsize) - data.name = 'scan_variable' - log.debug(f"Constructed scan variable for {mnemonic}") + if binsize is not None: + data = binsize * np.round(data / binsize) + data.name = 'binned_array' + log.debug(f"Constructed binned array for {mnemonic_key}") except ToolBoxValueError as err: log.error(f"{err.message}") raise - + return data \ No newline at end of file diff --git a/src/toolbox_scs/test/test_dssc_cls.py b/src/toolbox_scs/test/test_dssc_cls.py new file mode 100644 index 0000000000000000000000000000000000000000..74c92c3e861a9cae3e741df483495cbd96571e1a --- /dev/null +++ b/src/toolbox_scs/test/test_dssc_cls.py @@ -0,0 +1,141 @@ +import unittest +import logging +import os +import argparse +import shutil +import multiprocessing +from time import strftime + +import numpy as np +import xarray as xr + +import toolbox_scs as tb +import toolbox_scs.detectors as tbdet + +logging.basicConfig(level=logging.DEBUG) +log_root = logging.getLogger(__name__) + + +suites = {"no-processing": ( + "test_create", + "test_tmpfiles", + ), + "processing": ( + "test_binning", + ) + } + + +_temp_dirs = ['tmp', 'images', 'processed_runs'] + + +def setup_tmp_dir(): + for d in _temp_dirs: + if not os.path.isdir(d): + os.mkdir(d) + + +def cleanup_tmp_dir(): + for d in _temp_dirs: + shutil.rmtree(d, ignore_errors=True) + log_root.info(f'remove {d}') + + +class TestDSSC(unittest.TestCase): + @classmethod + def setUpClass(cls): + log_root.info("Start global setup") + # --------------------------------------------------------------------- + # global test settings + # --------------------------------------------------------------------- + setup_tmp_dir() + log_root.info("Finished global setup, start tests") + + @classmethod + def tearDownClass(cls): + log_root.info("Clean up test environment....") + cleanup_tmp_dir() + + def test_create(self): + params = {'bin_variable_name': 'PP800_PhaseShifter', + 'framepattern': ['pumped', 'unpumped']} + + # normal + run235 = tbdet.DSSC(2212, 235) + del(run235) + run235 = tbdet.DSSC(2212, 235, is_dark=False) + run235 = tbdet.DSSC(2212, 235, is_dark=False, **params) + self.assertEqual(run235.bin_variable.values[0], 7585.5) + + # expected fails + with self.assertRaises(FileNotFoundError) as cm: + run235 = tbdet.DSSC(2212, 2354) + err_msg = "[Errno 2] No such file or directory: " \ + "'/gpfs/exfel/exp/SCS/201901/p002212/raw/r2354'" + self.assertEqual(str(cm.exception), err_msg) + + def test_tmpfiles(self): + params = {'bin_variable_name': 'PP800_PhaseShifter', + 'framepattern': ['pumped', 'unpumped']} + run235 = tbdet.DSSC(2212, 235, is_dark=False, **params) + self.assertEqual(run235.scanfile, './tmp/scan.h5') + self.assertTrue(os.path.isfile('./tmp/mask.h5')) + + def test_binning(self): + params = {'bin_variable_name': 'PP800_PhaseShifter', + 'is_dark': True, + 'framepattern': ['pumped', 'unpumped']} + + testrun = tbdet.DSSC(2212, 232, **params) + mod_list = [0,1] + data = testrun.bin_data(use_joblib=False, process_modules=mod_list) + self.assertEqual(data.run, 232) + data = testrun.bin_data(use_joblib=True, process_modules=mod_list) + self.assertEqual(data.run, 232) + + +def list_suites(): + print("\nPossible test suites:\n" + "-" * 79) + for key in suites: + print(key) + print("-" * 79 + "\n") + + +def suite(*tests): + suite = unittest.TestSuite() + for test in tests: + suite.addTest(TestDSSC(test)) + return suite + + +def main(*cliargs): + try: + for test_suite in cliargs: + if test_suite in suites: + runner = unittest.TextTestRunner(verbosity=2) + runner.run(suite(*suites[test_suite])) + else: + log_root.warning( + "Unknown suite: '{}'".format(test_suite)) + pass + except Exception as err: + log_root.error("Unecpected error: {}".format(err), + exc_info=True) + pass + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--list-suites', + action='store_true', + help='list possible test suites') + parser.add_argument('--run-suites', metavar='S', + nargs='+', action='store', + help='a list of valid test suites') + args = parser.parse_args() + + if args.list_suites: + list_suites() + + if args.run_suites: + main(*args.run_suites) diff --git a/src/toolbox_scs/test/test_detectors_dssc.py b/src/toolbox_scs/test/test_dssc_methods.py similarity index 100% rename from src/toolbox_scs/test/test_detectors_dssc.py rename to src/toolbox_scs/test/test_dssc_methods.py