Skip to content
Snippets Groups Projects
Commit afeee965 authored by Laurent Mercadier's avatar Laurent Mercadier
Browse files

Merge branch 'fix-get-array' into 'master'

fix `get_array`, add wrappers to some of `extra_data` basic functions

See merge request !116
parents 03569509 c471aaa4
No related branches found
No related tags found
1 merge request!116fix `get_array`, add wrappers to some of `extra_data` basic functions
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
- contributions should comply with pep8 code structure guidelines. - contributions should comply with pep8 code structure guidelines.
- Plot routines don't fit into objects since they are rather fluent. - Plot routines don't fit into objects since they are rather fluent.
They have been outsourced to dssc_plot.py. They can now be accessed They have been outsourced to dssc_plot.py. They can now be accessed
as tbdet member functions. as toolbox_scs member functions.
""" """
import os import os
import logging import logging
...@@ -56,7 +56,7 @@ class DSSCBinner: ...@@ -56,7 +56,7 @@ class DSSCBinner:
run number run number
binners: dictionary binners: dictionary
dictionary containing binners constructed using the dictionary containing binners constructed using the
'create_dssc_bins' tbdet-method. 'create_dssc_bins' toolbox_scs.detectors-method.
xgm_name: str xgm_name: str
a valid mnemonic key of the XGM data to be used to mask the dssc a valid mnemonic key of the XGM data to be used to mask the dssc
frames. Since the xgm is used in several methods its name can be frames. Since the xgm is used in several methods its name can be
...@@ -76,8 +76,8 @@ class DSSCBinner: ...@@ -76,8 +76,8 @@ class DSSCBinner:
Example Example
------- -------
1.) quick -> generic bins, no xgm, 1.) quick -> generic bins, no xgm,
>>> import toolbox_scs.detectors as tbdet >>> import toolbox_scs as tb
>>> run235 = tbdet.DSSCBinner(proposal_nb=2212, run_nb=235) >>> run235 = tb.DSSCBinner(proposal_nb=2212, run_nb=235)
2.) detailed -> docs 2.) detailed -> docs
""" """
......
...@@ -75,19 +75,18 @@ def create_dssc_bins(name, coordinates, bins): ...@@ -75,19 +75,18 @@ def create_dssc_bins(name, coordinates, bins):
Examples Examples
-------- --------
>>> import toolbox_scs as tb >>> import toolbox_scs as tb
>>> import toolbox_scs.detectors as tbdet >>> run = tb.open_run(2212, 235, include='*DA*')
>>> run = tb.load_run(2212, 235, include='*DA*')
1.) binner along 'pulse' dimension. Group data into two bins. 1.) binner along 'pulse' dimension. Group data into two bins.
>>> bins_pulse = ['pumped', 'unpumped'] * 10 >>> bins_pulse = ['pumped', 'unpumped'] * 10
>>> binner_pulse = tbdet.create_dssc_bins("pulse", >>> binner_pulse = tb.create_dssc_bins("pulse",
np.linspace(0,19,20, dtype=int), np.linspace(0,19,20, dtype=int),
bins_pulse) bins_pulse)
2.) binner along 'train' dimension. Group data into bins corresponding 2.) binner along 'train' dimension. Group data into bins corresponding
to the positions of a delay stage for instance. to the positions of a delay stage for instance.
>>> bins_trainId = tb.get_array(run, 'PP800_PhaseShifter', 0.04) >>> bins_trainId = tb.get_array(run, 'PP800_PhaseShifter', 0.04)
>>> binner_train = tbdet.create_dssc_bins("trainId", >>> binner_train = tb.create_dssc_bins("trainId",
run.trainIds, run.trainIds,
bins_trainId.values) bins_trainId.values)
""" """
......
...@@ -165,7 +165,7 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners, ...@@ -165,7 +165,7 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners,
dictionary containing keys 'dims', 'frames_per_train', 'total_frames', dictionary containing keys 'dims', 'frames_per_train', 'total_frames',
'trainIds', 'number_of_trains'. 'trainIds', 'number_of_trains'.
dssc_binners: dictionary dssc_binners: dictionary
a dictionary containing binner objects created by the tbdet member a dictionary containing binner objects created by the ToolBox member
function "create_binner()" function "create_binner()"
path : str path : str
location in which the .h5 files, containing the binned data, should location in which the .h5 files, containing the binned data, should
......
...@@ -23,8 +23,10 @@ import toolbox_scs.detectors as tbdet ...@@ -23,8 +23,10 @@ import toolbox_scs.detectors as tbdet
__all__ = [ __all__ = [
'concatenateRuns', 'concatenateRuns',
'find_run_path',
'get_array', 'get_array',
'load', 'load',
'open_run',
'run_by_path', 'run_by_path',
] ]
...@@ -52,9 +54,9 @@ def load(proposalNB=None, runNB=None, ...@@ -52,9 +54,9 @@ def load(proposalNB=None, runNB=None,
Parameters Parameters
---------- ----------
proposalNB: (str, int) proposalNB: str, int
proposal number e.g. 'p002252' or 2252 proposal number e.g. 'p002252' or 2252
runNB: (str, int) runNB: str, int
run number as integer run number as integer
fields: str, list of str, list of dict fields: str, list of str, list of dict
list of mnemonics to load specific data such as "fastccd", list of mnemonics to load specific data such as "fastccd",
...@@ -62,17 +64,18 @@ def load(proposalNB=None, runNB=None, ...@@ -62,17 +64,18 @@ def load(proposalNB=None, runNB=None,
{"extra": {'source: 'SCS_CDIFFT_MAG/SUPPLY/CURRENT', {"extra": {'source: 'SCS_CDIFFT_MAG/SUPPLY/CURRENT',
'key': 'actual_current.value', 'key': 'actual_current.value',
'dim': None}} 'dim': None}}
subFolder: (str) subFolder: str
sub-folder from which to load the data. Use 'raw' for raw data 'raw', 'proc' (processed) or 'all' (both 'raw' and 'proc') to access
or 'proc' for processed data. data from either or both of those folders. If 'all' is used, sources
display: (bool) present in 'proc' overwrite those in 'raw'. The default is 'raw'.
display: bool
whether to show the run.info or not whether to show the run.info or not
validate: (bool) validate: bool
whether to run extra-data-validate or not whether to run extra-data-validate or not
subset: subset:
a subset of train that can be load with by_index[:5] for the first 5 a subset of train that can be load with by_index[:5] for the first 5
trains trains
rois: dictionary rois: dict
a dictionnary of mnemonics with a list of rois definition and a dictionnary of mnemonics with a list of rois definition and
the desired names, for example: the desired names, for example:
{'fastccd': {'ref': {'roi': by_index[730:890, 535:720], {'fastccd': {'ref': {'roi': by_index[730:890, 535:720],
...@@ -112,11 +115,7 @@ def load(proposalNB=None, runNB=None, ...@@ -112,11 +115,7 @@ def load(proposalNB=None, runNB=None,
>>> run, data = tb.load(2212, 208, ['SCS_SA3', 'MCP2apd', 'nrj']) >>> run, data = tb.load(2212, 208, ['SCS_SA3', 'MCP2apd', 'nrj'])
""" """
if isinstance(runNB, int): runFolder = find_run_path(proposalNB, runNB, subFolder)
runNB = 'r{:04d}'.format(runNB)
if isinstance(proposalNB, int):
proposalNB = 'p{:06d}'.format(proposalNB)
runFolder = os.path.join(find_proposal(proposalNB), subFolder, runNB)
run = ed.RunDirectory(runFolder).select_trains(subset) run = ed.RunDirectory(runFolder).select_trains(subset)
if fields is None: if fields is None:
return run, xr.Dataset() return run, xr.Dataset()
...@@ -241,47 +240,95 @@ def run_by_path(path): ...@@ -241,47 +240,95 @@ def run_by_path(path):
return ed.RunDirectory(path) return ed.RunDirectory(path)
def concatenateRuns(runs): def find_run_path(proposalNB, runNB, data='raw'):
""" Sorts and concatenate a list of runs with identical data variables """
along the trainId dimension. Return the run path given the specified proposal and run numbers.
Input: Parameters
runs: (list) the xarray Datasets to concatenate ----------
Output: proposalNB: (str, int)
a concatenated xarray Dataset proposal number e.g. 'p002252' or 2252
runNB: (str, int)
run number as integer
data: str
'raw', 'proc' (processed) or 'all' (both 'raw' and 'proc') to access
data from either or both of those folders. If 'all' is used, sources
present in 'proc' overwrite those in 'raw'. The default is 'raw'.
Returns
-------
path: str
The run path.
""" """
firstTid = {i: int(run.trainId[0].values) for i, run in enumerate(runs)} if isinstance(runNB, int):
orderedDict = dict(sorted(firstTid.items(), key=lambda t: t[1])) runNB = 'r{:04d}'.format(runNB)
orderedRuns = [runs[i] for i in orderedDict] if isinstance(proposalNB, int):
keys = orderedRuns[0].keys() proposalNB = 'p{:06d}'.format(proposalNB)
for run in orderedRuns[1:]: return os.path.join(find_proposal(proposalNB), data, runNB)
if run.keys() != keys:
print('data fields between different runs are not identical. '
'Cannot combine runs.')
return
result = xr.concat(orderedRuns, dim='trainId')
for k in orderedRuns[0].attrs.keys():
result.attrs[k] = [run.attrs[k] for run in orderedRuns]
return result
def open_run(proposalNB, runNB, subset=ed.by_index[:], **kwargs):
"""
Get extra_data.DataCollection in a given proposal.
Wraps the extra_data open_run routine and adds subset selection, out of
convenience for the toolbox user. More information can be found in the
extra_data documentation.
def get_array(run, mnemonic_key=None, stepsize=None): Parameters
----------
proposalNB: (str, int)
proposal number e.g. 'p002252' or 2252
runNB: (str, int)
run number e.g. 17 or 'r0017'
subset:
a subset of train that can be load with by_index[:5] for the first 5
trains
**kwargs
--------
data: str
default -> 'raw'
include: str
default -> '*'
Returns
-------
run : extra_data.DataCollection
DataCollection object containing information about the specified
run. Data can be loaded using built-in class methods.
"""
return ed.open_run(proposalNB, runNB, **kwargs).select_trains(subset)
def get_array(run=None, mnemonic=None, stepsize=None,
subset=ed.by_index[:], subFolder='raw',
proposalNB=None, runNB=None):
""" """
Loads the required 1D-data and rounds its values to integer multiples of Loads one data array for the specified mnemonic and rounds its values to
stepsize for consistent grouping (except for stepsize=None). integer multiples of stepsize for consistent grouping (except for
Returns a dummy array if mnemonic is set to None. stepsize=None).
Returns a 1D array of ones if mnemonic is set to None.
Parameters Parameters
---------- ----------
run: karabo_data.DataCollection run: extra_data.DataCollection
path to the run directory DataCollection containing the data.
mnemonic_key: str Used if proposalNB and runNB are None.
mnemonic: str
Identifier of a single item in the mnemonic collection. None creates a Identifier of a single item in the mnemonic collection. None creates a
dummy file to average over all trains in the run dummy 1D array of ones with length equal to the number of trains.
stepsize : float stepsize : float
nominal stepsize of the array data - values will be rounded to integer nominal stepsize of the array data - values will be rounded to integer
multiples of this value multiples of this value.
subset:
a subset of train that can be load with by_index[:5] for the first 5
trains
subFolder: (str)
sub-folder from which to load the data. Use 'raw' for raw data
or 'proc' for processed data.
proposalNB: (str, int)
proposal number e.g. 'p002252' or 2252.
runNB: (str, int)
run number e.g. 17 or 'r0017'.
Returns Returns
------- -------
...@@ -297,26 +344,32 @@ def get_array(run, mnemonic_key=None, stepsize=None): ...@@ -297,26 +344,32 @@ def get_array(run, mnemonic_key=None, stepsize=None):
Example Example
------- -------
>>> import toolbox_scs as tb >>> import toolbox_scs as tb
>>> run = tb.load_run(2212, 235) >>> run = tb.open_run(2212, 235)
>>> mnemonic = 'PP800_PhaseShifter' >>> mnemonic = 'PP800_PhaseShifter'
>>> data_PhaseShifter = tb.get_array(run, mnemonic, 0.5) >>> data_PhaseShifter = tb.get_array(run, mnemonic, 0.5)
""" """
if run is None:
run = open_run(proposalNB, runNB, subset, data=subFolder)
if not isinstance(run, ed.DataCollection):
raise TypeError(f'run argument has type {type(run)} but '
'expected type is extra_data.DataCollection')
run = run.select_trains(subset)
run_mnemonics = mnemonics_for_run(run)
try: try:
if mnemonic_key is None: if mnemonic is None:
data = xr.DataArray( data = xr.DataArray(
np.ones(len(run.train_ids), dtype=np.int16), np.ones(len(run.train_ids), dtype=np.int16),
dims=['trainId'], coords={'trainId': run.train_ids}) dims=['trainId'], coords={'trainId': run.train_ids})
elif mnemonic_key in _mnemonics: elif mnemonic in run_mnemonics:
mnem = _mnemonics[mnemonic_key] mnem = run_mnemonics[mnemonic]
data = run.get_array(*mnem.values()) data = run.get_array(*mnem.values(), name=mnemonic)
else: else:
raise ToolBoxValueError("Invalid mnemonic", mnemonic_key) raise ToolBoxValueError("Invalid mnemonic", mnemonic)
if stepsize is not None: if stepsize is not None:
data = stepsize * np.round(data / stepsize) data = stepsize * np.round(data / stepsize)
data.name = 'data' log.debug(f"Got data for {mnemonic}")
log.debug(f"Got data for {mnemonic_key}")
except ToolBoxValueError as err: except ToolBoxValueError as err:
log.error(f"{err.message}") log.error(f"{err.message}")
raise raise
...@@ -324,6 +377,31 @@ def get_array(run, mnemonic_key=None, stepsize=None): ...@@ -324,6 +377,31 @@ def get_array(run, mnemonic_key=None, stepsize=None):
return data return data
def concatenateRuns(runs):
""" Sorts and concatenate a list of runs with identical data variables
along the trainId dimension.
Input:
runs: (list) the xarray Datasets to concatenate
Output:
a concatenated xarray Dataset
"""
firstTid = {i: int(run.trainId[0].values) for i, run in enumerate(runs)}
orderedDict = dict(sorted(firstTid.items(), key=lambda t: t[1]))
orderedRuns = [runs[i] for i in orderedDict]
keys = orderedRuns[0].keys()
for run in orderedRuns[1:]:
if run.keys() != keys:
print('data fields between different runs are not identical. '
'Cannot combine runs.')
return
result = xr.concat(orderedRuns, dim='trainId')
for k in orderedRuns[0].attrs.keys():
result.attrs[k] = [run.attrs[k] for run in orderedRuns]
return result
def load_bpt(run, merge_with=None, run_mnemonics=None): def load_bpt(run, merge_with=None, run_mnemonics=None):
if run_mnemonics is None: if run_mnemonics is None:
run_mnemonics = mnemonics_for_run(run) run_mnemonics = mnemonics_for_run(run)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment