diff --git a/src/toolbox_scs/detectors/dssc.py b/src/toolbox_scs/detectors/dssc.py index a5aa64bb8d28cf2a25430e7c782cbf64eb5af3c2..58cb59b4d6d4510c5d94acddb295ee7afeeeef12 100644 --- a/src/toolbox_scs/detectors/dssc.py +++ b/src/toolbox_scs/detectors/dssc.py @@ -127,6 +127,8 @@ class DSSCBinner: xgm_threshold=(0, np.inf), normevery=1): """ creates a mask for dssc frames according to measured xgm intensity. + Once such a mask has been constructed, it will be used in the data + reduction process to drop out-of-bounds pulses. """ fpt = self.info['frames_per_train'] n_trains = self.info['number_of_trains'] @@ -188,10 +190,20 @@ class DSSCBinner: xgm_normalization=False, normevery=1 ): """ - Load and bin dssc data according to self.bins + Load and bin dssc data according to self.bins. No data is returned by + this method. The condensed data is written to file by the worker + processes directly. Parameters ---------- + modules: list of ints + a list containing the module numbers that should be processed. If + empty, all modules are processed. + filepath: str + the path where the files containing the reduced data should be + stored. + chunksize: int + The number of trains that should be read in one iterative step. backend: str joblib multiprocessing backend to be used. At the moment it can be any of joblibs standard backends: 'loky' (default), @@ -204,20 +216,16 @@ class DSSCBinner: cpu's. Note that when using the default backend there is no need to adjust this parameter with the current implementation. - modules: list of ints - a list containing the module numbers that should be processed. If - empty, all modules are processed. - chunksize: int - The number of trains that should be read in one iterative step. dark_image: xarray.DataArray - DataArray with dimensions compatible with the loaded dssc data. + DataArray with dimensions compatible with the loaded dssc data. If + given, it will be subtracted from the dssc data before the binning. + The dark image needs to be of dimension module, trainId, pulse, x + and y. + xgm_normalization: boolean + if true, the dssc data is normalized by the xgm data before the + binning. normevery: int integer indicating which out of normevery frame will be normalized. - - Returns - ------- - data: xarray.DataArray - DataArray containing the processed data. """ log.info("Bin data according to binners") log.info(f'Process {chunksize} trains per chunk') diff --git a/src/toolbox_scs/detectors/dssc_processing.py b/src/toolbox_scs/detectors/dssc_processing.py index 256e13f4a7d64c439accf6c19dd8fd64e17d28a2..02fb10c069e501a1c03345157bd18d151aa5b19b 100644 --- a/src/toolbox_scs/detectors/dssc_processing.py +++ b/src/toolbox_scs/detectors/dssc_processing.py @@ -140,8 +140,8 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners, path='./', pulsemask=None, dark_image=None, - xgm_normalization=False, xgm_mnemonic='SCS_SA3', + xgm_normalization=False, normevery=1 ): """ @@ -158,18 +158,21 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners, module : int DSSC module to process chunksize : int - number of trains to process simultaneously + number of trains to load simultaneously info: dictionary dictionary containing keys 'dims', 'frames_per_train', 'total_frames', - 'trainIds' + 'trainIds', 'number_of_trains'. dssc_binners: dictionary a dictionary containing binner objects created by the tbdet member function "create_binner()" + path : str + location in which the .h5 files, containing the binned data, should + be stored. pulsemask : numpy.ndarray array of booleans to be used to mask dssc data according to xgm data. dark_image: xarray.DataArray an xarray dataarray with matching coordinates with the loaded data. If - dark_image is not None it will be substracted from each individual dssc + dark_image is not None it will be subtracted from each individual dssc frame. xgm_normalization: bool true if the data should be divided by the corresponding xgm value. @@ -209,7 +212,7 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners, chunk_hist = xr.full_like(chunk_data[:,:,0,0], fill_value=1) # --------------------------------------------------------------------- - # optional blocks -> ToDo: see merge request !89 + # optional blocks -> ToDo: see merge request !87 # --------------------------------------------------------------------- # option 1: prefiltering -> xgm pulse masking if pulsemask is not None: @@ -218,9 +221,9 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners, chunk_data = chunk_data.where(pulsemask) chunk_hist = chunk_hist.where(pulsemask) - # option 2: substraction of dark image/s + # option 2: subtraction of dark image/s if dark_image is not None: - log.debug(f'Module {module}: substract dark') + log.debug(f'Module {module}: subtract dark') chunk_data.values = chunk_data.values - dark_image.values # slower: using xarray directly #chunk_data = chunk_data - dark_image @@ -267,7 +270,7 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners, module_data = module_data.transpose('trainId', 'pulse', 'x', 'y') module_data.attrs['module'] = module - log.info(f'saving module {module}') + log.debug(f'saving module {module}') if not os.path.isdir(path): os.mkdir(path) fname = f'run_{run_nr}_module{module}.h5' diff --git a/src/toolbox_scs/test/test_dssc_cls.py b/src/toolbox_scs/test/test_dssc_cls.py index c43b45f5218c2e05abb25d5251ec2257a626939c..e548a5433bee7f7ceb683337c8bb5edccac9adae 100644 --- a/src/toolbox_scs/test/test_dssc_cls.py +++ b/src/toolbox_scs/test/test_dssc_cls.py @@ -20,7 +20,8 @@ suites = {"no-processing": ( "test_create", ), "processing": ( - "test_normalization_all2", + "test_processing_quick", + #"test_normalization_all", ) } @@ -42,13 +43,7 @@ class TestDSSC(unittest.TestCase): @classmethod def setUpClass(cls): log_root.info("Start global setup") - # --------------------------------------------------------------------- - # global test settings - # --------------------------------------------------------------------- - setup_tmp_dir() - - # --------------------------------------------------------------------- log_root.info("Finished global setup, start tests") @classmethod @@ -95,12 +90,12 @@ class TestDSSC(unittest.TestCase): "'/gpfs/exfel/exp/SCS/201901/p002212/raw/r2354'" self.assertEqual(str(cm.exception), err_msg) - - def test_normalization_all2(self): + def test_processing_quick(self): proposal_nb = 2530 - - # dark + module_list=[2] run_nb = 49 + + run_info = tbdet.load_dssc_info(proposal_nb, run_nb) fpt = run_info['frames_per_train'] n_trains = run_info['number_of_trains'] @@ -117,9 +112,58 @@ class TestDSSC(unittest.TestCase): buckets_pulse) binners = {'trainId': binner1, 'pulse': binner2} bin_obj = tbdet.DSSCBinner(proposal_nb, run_nb, binners=binners) - dark = bin_obj.process_data(modules=[15], chunksize=248) + bin_obj.process_data( + modules=module_list, filepath='./tmp/', chunksize=248) + filename = f'./tmp/run_{run_nb}_module{module_list[0]}.h5' + self.assertTrue(os.path.isfile(filename)) + + run_formatted = tbdet.DSSCFormatter('./tmp/') + run_formatted.combine_files() + attrs = {'run_type':'useful description', + 'comment':'blabla', + 'run_number':run_nb} + run_formatted.add_attributes(attrs) + run_formatted.save_formatted_data( + f'./tmp/run_{run_nb}_formatted.h5') + data = tbdet.load_xarray(f'./tmp/run_{run_nb}_formatted.h5') + self.assertIsNotNone(data) + + def test_normalization_all(self): + proposal_nb = 2530 + module_list=[2] + + # dark + run_nb = 49 + run_info = tbdet.load_dssc_info(proposal_nb, run_nb) + fpt = run_info['frames_per_train'] + n_trains = run_info['number_of_trains'] + trainIds = run_info['trainIds'] - # run to normalize + buckets_train = np.zeros(n_trains) + + binner1 = tbdet.create_dssc_bins("trainId", + trainIds, + buckets_train) + binner2 = tbdet.create_dssc_bins("pulse", + np.linspace(0,fpt-1,fpt, dtype=int), + np.linspace(0,fpt-1,fpt, dtype=int)) + binners = {'trainId': binner1, 'pulse': binner2} + bin_obj = tbdet.DSSCBinner(proposal_nb, run_nb, binners=binners) + bin_obj.process_data( + modules=module_list, filepath='./tmp/', chunksize=248) + filename = f'./tmp/run_{run_nb}_module{module_list[0]}.h5' + self.assertTrue(os.path.isfile(filename)) + + run_formatted = tbdet.DSSCFormatter('./tmp/') + run_formatted.combine_files() + attrs = {'run_type':'useful description', + 'comment':'blabla', + 'run_number':run_nb} + run_formatted.add_attributes(attrs) + run_formatted.save_formatted_data( + f'./tmp/run_{run_nb}_formatted.h5') + + # main run run_nb = 50 run_info = tbdet.load_dssc_info(proposal_nb, run_nb) fpt = run_info['frames_per_train'] @@ -138,15 +182,18 @@ class TestDSSC(unittest.TestCase): binners = {'trainId': binner1, 'pulse': binner2} bin_obj = tbdet.DSSCBinner(proposal_nb, run_nb, binners=binners) - bin_params = {'modules':[15], + dark = tbdet.load_xarray('./tmp/run_49_formatted.h5') + bin_params = {'modules':module_list, 'chunksize':248, + 'filepath':'./tmp/', 'xgm_normalization':True, 'normevery':2, - 'dark_image':dark['data'][:,0,0,:,:] + 'dark_image':dark['data'] } - data = bin_obj.process_data(**bin_params) - self.assertIsNotNone(data.data) + bin_obj.process_data(**bin_params) + filename = f'./tmp/run_{run_nb}_module{module_list[0]}.h5' + self.assertTrue(os.path.isfile(filename))