Skip to content
Snippets Groups Projects
Commit d220cc94 authored by Rafael Gort's avatar Rafael Gort
Browse files

Merge branch 'cherry-pick-4ae0f38e' into 'DSSC_methods_AS'

Updated documentation and adapted test suites

See merge request !93
parents 40d8b717 26a4c163
No related branches found
No related tags found
2 merge requests!93Updated documentation and adapted test suites,!91WIP: Dssc methods as
...@@ -132,6 +132,8 @@ class DSSCBinner: ...@@ -132,6 +132,8 @@ class DSSCBinner:
xgm_threshold=(0, np.inf), normevery=1): xgm_threshold=(0, np.inf), normevery=1):
""" """
creates a mask for dssc frames according to measured xgm intensity. creates a mask for dssc frames according to measured xgm intensity.
Once such a mask has been constructed, it will be used in the data
reduction process to drop out-of-bounds pulses.
""" """
fpt = self.info['frames_per_train'] fpt = self.info['frames_per_train']
n_trains = self.info['number_of_trains'] n_trains = self.info['number_of_trains']
...@@ -193,10 +195,20 @@ class DSSCBinner: ...@@ -193,10 +195,20 @@ class DSSCBinner:
xgm_normalization=False, normevery=1 xgm_normalization=False, normevery=1
): ):
""" """
Load and bin dssc data according to self.bins Load and bin dssc data according to self.bins. No data is returned by
this method. The condensed data is written to file by the worker
processes directly.
Parameters Parameters
---------- ----------
modules: list of ints
a list containing the module numbers that should be processed. If
empty, all modules are processed.
filepath: str
the path where the files containing the reduced data should be
stored.
chunksize: int
The number of trains that should be read in one iterative step.
backend: str backend: str
joblib multiprocessing backend to be used. At the moment it can be joblib multiprocessing backend to be used. At the moment it can be
any of joblibs standard backends: 'loky' (default), any of joblibs standard backends: 'loky' (default),
...@@ -209,20 +221,16 @@ class DSSCBinner: ...@@ -209,20 +221,16 @@ class DSSCBinner:
cpu's. cpu's.
Note that when using the default backend there is no need to adjust Note that when using the default backend there is no need to adjust
this parameter with the current implementation. this parameter with the current implementation.
modules: list of ints
a list containing the module numbers that should be processed. If
empty, all modules are processed.
chunksize: int
The number of trains that should be read in one iterative step.
dark_image: xarray.DataArray dark_image: xarray.DataArray
DataArray with dimensions compatible with the loaded dssc data. DataArray with dimensions compatible with the loaded dssc data. If
given, it will be subtracted from the dssc data before the binning.
The dark image needs to be of dimension module, trainId, pulse, x
and y.
xgm_normalization: boolean
if true, the dssc data is normalized by the xgm data before the
binning.
normevery: int normevery: int
integer indicating which out of normevery frame will be normalized. integer indicating which out of normevery frame will be normalized.
Returns
-------
data: xarray.DataArray
DataArray containing the processed data.
""" """
log.info("Bin data according to binners") log.info("Bin data according to binners")
log.info(f'Process {chunksize} trains per chunk') log.info(f'Process {chunksize} trains per chunk')
......
...@@ -140,8 +140,8 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners, ...@@ -140,8 +140,8 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
path='./', path='./',
pulsemask=None, pulsemask=None,
dark_image=None, dark_image=None,
xgm_normalization=False,
xgm_mnemonic='SCS_SA3', xgm_mnemonic='SCS_SA3',
xgm_normalization=False,
normevery=1 normevery=1
): ):
""" """
...@@ -158,18 +158,21 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners, ...@@ -158,18 +158,21 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
module : int module : int
DSSC module to process DSSC module to process
chunksize : int chunksize : int
number of trains to process simultaneously number of trains to load simultaneously
info: dictionary info: dictionary
dictionary containing keys 'dims', 'frames_per_train', 'total_frames', dictionary containing keys 'dims', 'frames_per_train', 'total_frames',
'trainIds' 'trainIds', 'number_of_trains'.
dssc_binners: dictionary dssc_binners: dictionary
a dictionary containing binner objects created by the tbdet member a dictionary containing binner objects created by the tbdet member
function "create_binner()" function "create_binner()"
path : str
location in which the .h5 files, containing the binned data, should
be stored.
pulsemask : numpy.ndarray pulsemask : numpy.ndarray
array of booleans to be used to mask dssc data according to xgm data. array of booleans to be used to mask dssc data according to xgm data.
dark_image: xarray.DataArray dark_image: xarray.DataArray
an xarray dataarray with matching coordinates with the loaded data. If an xarray dataarray with matching coordinates with the loaded data. If
dark_image is not None it will be substracted from each individual dssc dark_image is not None it will be subtracted from each individual dssc
frame. frame.
xgm_normalization: bool xgm_normalization: bool
true if the data should be divided by the corresponding xgm value. true if the data should be divided by the corresponding xgm value.
...@@ -209,7 +212,7 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners, ...@@ -209,7 +212,7 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
chunk_hist = xr.full_like(chunk_data[:,:,0,0], fill_value=1) chunk_hist = xr.full_like(chunk_data[:,:,0,0], fill_value=1)
# --------------------------------------------------------------------- # ---------------------------------------------------------------------
# optional blocks -> ToDo: see merge request !89 # optional blocks -> ToDo: see merge request !87
# --------------------------------------------------------------------- # ---------------------------------------------------------------------
# option 1: prefiltering -> xgm pulse masking # option 1: prefiltering -> xgm pulse masking
if pulsemask is not None: if pulsemask is not None:
...@@ -218,9 +221,9 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners, ...@@ -218,9 +221,9 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
chunk_data = chunk_data.where(pulsemask) chunk_data = chunk_data.where(pulsemask)
chunk_hist = chunk_hist.where(pulsemask) chunk_hist = chunk_hist.where(pulsemask)
# option 2: substraction of dark image/s # option 2: subtraction of dark image/s
if dark_image is not None: if dark_image is not None:
log.debug(f'Module {module}: substract dark') log.debug(f'Module {module}: subtract dark')
chunk_data.values = chunk_data.values - dark_image.values chunk_data.values = chunk_data.values - dark_image.values
# slower: using xarray directly # slower: using xarray directly
#chunk_data = chunk_data - dark_image #chunk_data = chunk_data - dark_image
...@@ -267,7 +270,7 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners, ...@@ -267,7 +270,7 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
module_data = module_data.transpose('trainId', 'pulse', 'x', 'y') module_data = module_data.transpose('trainId', 'pulse', 'x', 'y')
module_data.attrs['module'] = module module_data.attrs['module'] = module
log.info(f'saving module {module}') log.debug(f'saving module {module}')
if not os.path.isdir(path): if not os.path.isdir(path):
os.mkdir(path) os.mkdir(path)
fname = f'run_{run_nr}_module{module}.h5' fname = f'run_{run_nr}_module{module}.h5'
......
...@@ -20,7 +20,8 @@ suites = {"no-processing": ( ...@@ -20,7 +20,8 @@ suites = {"no-processing": (
"test_create", "test_create",
), ),
"processing": ( "processing": (
"test_normalization_all2", "test_processing_quick",
#"test_normalization_all",
) )
} }
...@@ -42,13 +43,7 @@ class TestDSSC(unittest.TestCase): ...@@ -42,13 +43,7 @@ class TestDSSC(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
log_root.info("Start global setup") log_root.info("Start global setup")
# ---------------------------------------------------------------------
# global test settings
# ---------------------------------------------------------------------
setup_tmp_dir() setup_tmp_dir()
# ---------------------------------------------------------------------
log_root.info("Finished global setup, start tests") log_root.info("Finished global setup, start tests")
@classmethod @classmethod
...@@ -95,12 +90,12 @@ class TestDSSC(unittest.TestCase): ...@@ -95,12 +90,12 @@ class TestDSSC(unittest.TestCase):
"'/gpfs/exfel/exp/SCS/201901/p002212/raw/r2354'" "'/gpfs/exfel/exp/SCS/201901/p002212/raw/r2354'"
self.assertEqual(str(cm.exception), err_msg) self.assertEqual(str(cm.exception), err_msg)
def test_processing_quick(self):
def test_normalization_all2(self):
proposal_nb = 2530 proposal_nb = 2530
module_list=[2]
# dark
run_nb = 49 run_nb = 49
run_info = tbdet.load_dssc_info(proposal_nb, run_nb) run_info = tbdet.load_dssc_info(proposal_nb, run_nb)
fpt = run_info['frames_per_train'] fpt = run_info['frames_per_train']
n_trains = run_info['number_of_trains'] n_trains = run_info['number_of_trains']
...@@ -117,9 +112,58 @@ class TestDSSC(unittest.TestCase): ...@@ -117,9 +112,58 @@ class TestDSSC(unittest.TestCase):
buckets_pulse) buckets_pulse)
binners = {'trainId': binner1, 'pulse': binner2} binners = {'trainId': binner1, 'pulse': binner2}
bin_obj = tbdet.DSSCBinner(proposal_nb, run_nb, binners=binners) bin_obj = tbdet.DSSCBinner(proposal_nb, run_nb, binners=binners)
dark = bin_obj.process_data(modules=[15], chunksize=248) bin_obj.process_data(
modules=module_list, filepath='./tmp/', chunksize=248)
filename = f'./tmp/run_{run_nb}_module{module_list[0]}.h5'
self.assertTrue(os.path.isfile(filename))
run_formatted = tbdet.DSSCFormatter('./tmp/')
run_formatted.combine_files()
attrs = {'run_type':'useful description',
'comment':'blabla',
'run_number':run_nb}
run_formatted.add_attributes(attrs)
run_formatted.save_formatted_data(
f'./tmp/run_{run_nb}_formatted.h5')
data = tbdet.load_xarray(f'./tmp/run_{run_nb}_formatted.h5')
self.assertIsNotNone(data)
def test_normalization_all(self):
proposal_nb = 2530
module_list=[2]
# dark
run_nb = 49
run_info = tbdet.load_dssc_info(proposal_nb, run_nb)
fpt = run_info['frames_per_train']
n_trains = run_info['number_of_trains']
trainIds = run_info['trainIds']
# run to normalize buckets_train = np.zeros(n_trains)
binner1 = tbdet.create_dssc_bins("trainId",
trainIds,
buckets_train)
binner2 = tbdet.create_dssc_bins("pulse",
np.linspace(0,fpt-1,fpt, dtype=int),
np.linspace(0,fpt-1,fpt, dtype=int))
binners = {'trainId': binner1, 'pulse': binner2}
bin_obj = tbdet.DSSCBinner(proposal_nb, run_nb, binners=binners)
bin_obj.process_data(
modules=module_list, filepath='./tmp/', chunksize=248)
filename = f'./tmp/run_{run_nb}_module{module_list[0]}.h5'
self.assertTrue(os.path.isfile(filename))
run_formatted = tbdet.DSSCFormatter('./tmp/')
run_formatted.combine_files()
attrs = {'run_type':'useful description',
'comment':'blabla',
'run_number':run_nb}
run_formatted.add_attributes(attrs)
run_formatted.save_formatted_data(
f'./tmp/run_{run_nb}_formatted.h5')
# main run
run_nb = 50 run_nb = 50
run_info = tbdet.load_dssc_info(proposal_nb, run_nb) run_info = tbdet.load_dssc_info(proposal_nb, run_nb)
fpt = run_info['frames_per_train'] fpt = run_info['frames_per_train']
...@@ -138,15 +182,18 @@ class TestDSSC(unittest.TestCase): ...@@ -138,15 +182,18 @@ class TestDSSC(unittest.TestCase):
binners = {'trainId': binner1, 'pulse': binner2} binners = {'trainId': binner1, 'pulse': binner2}
bin_obj = tbdet.DSSCBinner(proposal_nb, run_nb, binners=binners) bin_obj = tbdet.DSSCBinner(proposal_nb, run_nb, binners=binners)
bin_params = {'modules':[15], dark = tbdet.load_xarray('./tmp/run_49_formatted.h5')
bin_params = {'modules':module_list,
'chunksize':248, 'chunksize':248,
'filepath':'./tmp/',
'xgm_normalization':True, 'xgm_normalization':True,
'normevery':2, 'normevery':2,
'dark_image':dark['data'][:,0,0,:,:] 'dark_image':dark['data']
} }
data = bin_obj.process_data(**bin_params) bin_obj.process_data(**bin_params)
self.assertIsNotNone(data.data) filename = f'./tmp/run_{run_nb}_module{module_list[0]}.h5'
self.assertTrue(os.path.isfile(filename))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment