Skip to content
Snippets Groups Projects
Commit d220cc94 authored by Rafael Gort's avatar Rafael Gort
Browse files

Merge branch 'cherry-pick-4ae0f38e' into 'DSSC_methods_AS'

Updated documentation and adapted test suites

See merge request !93
parents 40d8b717 26a4c163
No related branches found
No related tags found
2 merge requests!93Updated documentation and adapted test suites,!91WIP: Dssc methods as
......@@ -132,6 +132,8 @@ class DSSCBinner:
xgm_threshold=(0, np.inf), normevery=1):
"""
creates a mask for dssc frames according to measured xgm intensity.
Once such a mask has been constructed, it will be used in the data
reduction process to drop out-of-bounds pulses.
"""
fpt = self.info['frames_per_train']
n_trains = self.info['number_of_trains']
......@@ -193,10 +195,20 @@ class DSSCBinner:
xgm_normalization=False, normevery=1
):
"""
Load and bin dssc data according to self.bins
Load and bin dssc data according to self.bins. No data is returned by
this method. The condensed data is written to file by the worker
processes directly.
Parameters
----------
modules: list of ints
a list containing the module numbers that should be processed. If
empty, all modules are processed.
filepath: str
the path where the files containing the reduced data should be
stored.
chunksize: int
The number of trains that should be read in one iterative step.
backend: str
joblib multiprocessing backend to be used. At the moment it can be
any of joblibs standard backends: 'loky' (default),
......@@ -209,20 +221,16 @@ class DSSCBinner:
cpu's.
Note that when using the default backend there is no need to adjust
this parameter with the current implementation.
modules: list of ints
a list containing the module numbers that should be processed. If
empty, all modules are processed.
chunksize: int
The number of trains that should be read in one iterative step.
dark_image: xarray.DataArray
DataArray with dimensions compatible with the loaded dssc data.
DataArray with dimensions compatible with the loaded dssc data. If
given, it will be subtracted from the dssc data before the binning.
The dark image needs to be of dimension module, trainId, pulse, x
and y.
xgm_normalization: boolean
if true, the dssc data is normalized by the xgm data before the
binning.
normevery: int
integer indicating which out of normevery frame will be normalized.
Returns
-------
data: xarray.DataArray
DataArray containing the processed data.
"""
log.info("Bin data according to binners")
log.info(f'Process {chunksize} trains per chunk')
......
......@@ -140,8 +140,8 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
path='./',
pulsemask=None,
dark_image=None,
xgm_normalization=False,
xgm_mnemonic='SCS_SA3',
xgm_normalization=False,
normevery=1
):
"""
......@@ -158,18 +158,21 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
module : int
DSSC module to process
chunksize : int
number of trains to process simultaneously
number of trains to load simultaneously
info: dictionary
dictionary containing keys 'dims', 'frames_per_train', 'total_frames',
'trainIds'
'trainIds', 'number_of_trains'.
dssc_binners: dictionary
a dictionary containing binner objects created by the tbdet member
function "create_binner()"
path : str
location in which the .h5 files, containing the binned data, should
be stored.
pulsemask : numpy.ndarray
array of booleans to be used to mask dssc data according to xgm data.
dark_image: xarray.DataArray
an xarray dataarray with matching coordinates with the loaded data. If
dark_image is not None it will be substracted from each individual dssc
dark_image is not None it will be subtracted from each individual dssc
frame.
xgm_normalization: bool
true if the data should be divided by the corresponding xgm value.
......@@ -209,7 +212,7 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
chunk_hist = xr.full_like(chunk_data[:,:,0,0], fill_value=1)
# ---------------------------------------------------------------------
# optional blocks -> ToDo: see merge request !89
# optional blocks -> ToDo: see merge request !87
# ---------------------------------------------------------------------
# option 1: prefiltering -> xgm pulse masking
if pulsemask is not None:
......@@ -218,9 +221,9 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
chunk_data = chunk_data.where(pulsemask)
chunk_hist = chunk_hist.where(pulsemask)
# option 2: substraction of dark image/s
# option 2: subtraction of dark image/s
if dark_image is not None:
log.debug(f'Module {module}: substract dark')
log.debug(f'Module {module}: subtract dark')
chunk_data.values = chunk_data.values - dark_image.values
# slower: using xarray directly
#chunk_data = chunk_data - dark_image
......@@ -267,7 +270,7 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
module_data = module_data.transpose('trainId', 'pulse', 'x', 'y')
module_data.attrs['module'] = module
log.info(f'saving module {module}')
log.debug(f'saving module {module}')
if not os.path.isdir(path):
os.mkdir(path)
fname = f'run_{run_nr}_module{module}.h5'
......
......@@ -20,7 +20,8 @@ suites = {"no-processing": (
"test_create",
),
"processing": (
"test_normalization_all2",
"test_processing_quick",
#"test_normalization_all",
)
}
......@@ -42,13 +43,7 @@ class TestDSSC(unittest.TestCase):
@classmethod
def setUpClass(cls):
log_root.info("Start global setup")
# ---------------------------------------------------------------------
# global test settings
# ---------------------------------------------------------------------
setup_tmp_dir()
# ---------------------------------------------------------------------
log_root.info("Finished global setup, start tests")
@classmethod
......@@ -95,12 +90,12 @@ class TestDSSC(unittest.TestCase):
"'/gpfs/exfel/exp/SCS/201901/p002212/raw/r2354'"
self.assertEqual(str(cm.exception), err_msg)
def test_normalization_all2(self):
def test_processing_quick(self):
proposal_nb = 2530
# dark
module_list=[2]
run_nb = 49
run_info = tbdet.load_dssc_info(proposal_nb, run_nb)
fpt = run_info['frames_per_train']
n_trains = run_info['number_of_trains']
......@@ -117,9 +112,58 @@ class TestDSSC(unittest.TestCase):
buckets_pulse)
binners = {'trainId': binner1, 'pulse': binner2}
bin_obj = tbdet.DSSCBinner(proposal_nb, run_nb, binners=binners)
dark = bin_obj.process_data(modules=[15], chunksize=248)
bin_obj.process_data(
modules=module_list, filepath='./tmp/', chunksize=248)
filename = f'./tmp/run_{run_nb}_module{module_list[0]}.h5'
self.assertTrue(os.path.isfile(filename))
run_formatted = tbdet.DSSCFormatter('./tmp/')
run_formatted.combine_files()
attrs = {'run_type':'useful description',
'comment':'blabla',
'run_number':run_nb}
run_formatted.add_attributes(attrs)
run_formatted.save_formatted_data(
f'./tmp/run_{run_nb}_formatted.h5')
data = tbdet.load_xarray(f'./tmp/run_{run_nb}_formatted.h5')
self.assertIsNotNone(data)
def test_normalization_all(self):
proposal_nb = 2530
module_list=[2]
# dark
run_nb = 49
run_info = tbdet.load_dssc_info(proposal_nb, run_nb)
fpt = run_info['frames_per_train']
n_trains = run_info['number_of_trains']
trainIds = run_info['trainIds']
# run to normalize
buckets_train = np.zeros(n_trains)
binner1 = tbdet.create_dssc_bins("trainId",
trainIds,
buckets_train)
binner2 = tbdet.create_dssc_bins("pulse",
np.linspace(0,fpt-1,fpt, dtype=int),
np.linspace(0,fpt-1,fpt, dtype=int))
binners = {'trainId': binner1, 'pulse': binner2}
bin_obj = tbdet.DSSCBinner(proposal_nb, run_nb, binners=binners)
bin_obj.process_data(
modules=module_list, filepath='./tmp/', chunksize=248)
filename = f'./tmp/run_{run_nb}_module{module_list[0]}.h5'
self.assertTrue(os.path.isfile(filename))
run_formatted = tbdet.DSSCFormatter('./tmp/')
run_formatted.combine_files()
attrs = {'run_type':'useful description',
'comment':'blabla',
'run_number':run_nb}
run_formatted.add_attributes(attrs)
run_formatted.save_formatted_data(
f'./tmp/run_{run_nb}_formatted.h5')
# main run
run_nb = 50
run_info = tbdet.load_dssc_info(proposal_nb, run_nb)
fpt = run_info['frames_per_train']
......@@ -138,15 +182,18 @@ class TestDSSC(unittest.TestCase):
binners = {'trainId': binner1, 'pulse': binner2}
bin_obj = tbdet.DSSCBinner(proposal_nb, run_nb, binners=binners)
bin_params = {'modules':[15],
dark = tbdet.load_xarray('./tmp/run_49_formatted.h5')
bin_params = {'modules':module_list,
'chunksize':248,
'filepath':'./tmp/',
'xgm_normalization':True,
'normevery':2,
'dark_image':dark['data'][:,0,0,:,:]
'dark_image':dark['data']
}
data = bin_obj.process_data(**bin_params)
self.assertIsNotNone(data.data)
bin_obj.process_data(**bin_params)
filename = f'./tmp/run_{run_nb}_module{module_list[0]}.h5'
self.assertTrue(os.path.isfile(filename))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment