diff --git a/src/toolbox_scs/detectors/dssc.py b/src/toolbox_scs/detectors/dssc.py index 7aa93a9f4697e2506bb92c117014139ef87ba784..e193f4174c03ef4f2fb298cd923cc8fe92020d1b 100644 --- a/src/toolbox_scs/detectors/dssc.py +++ b/src/toolbox_scs/detectors/dssc.py @@ -10,7 +10,7 @@ - contributions should comply with pep8 code structure guidelines. - Plot routines don't fit into objects since they are rather fluent. They have been outsourced to dssc_plot.py. They can now be accessed - as tbdet member functions. + as toolbox_scs member functions. """ import os import logging @@ -56,7 +56,7 @@ class DSSCBinner: run number binners: dictionary dictionary containing binners constructed using the - 'create_dssc_bins' tbdet-method. + 'create_dssc_bins' toolbox_scs.detectors-method. xgm_name: str a valid mnemonic key of the XGM data to be used to mask the dssc frames. Since the xgm is used in several methods its name can be @@ -76,8 +76,8 @@ class DSSCBinner: Example ------- 1.) quick -> generic bins, no xgm, - >>> import toolbox_scs.detectors as tbdet - >>> run235 = tbdet.DSSCBinner(proposal_nb=2212, run_nb=235) + >>> import toolbox_scs as tb + >>> run235 = tb.DSSCBinner(proposal_nb=2212, run_nb=235) 2.) detailed -> docs """ diff --git a/src/toolbox_scs/detectors/dssc_misc.py b/src/toolbox_scs/detectors/dssc_misc.py index db89aa76db27e22545ab23662a25ee2ec50ec483..96da909761d9084d378220dff407d956f3f1d58f 100644 --- a/src/toolbox_scs/detectors/dssc_misc.py +++ b/src/toolbox_scs/detectors/dssc_misc.py @@ -75,19 +75,18 @@ def create_dssc_bins(name, coordinates, bins): Examples -------- >>> import toolbox_scs as tb - >>> import toolbox_scs.detectors as tbdet - >>> run = tb.load_run(2212, 235, include='*DA*') + >>> run = tb.open_run(2212, 235, include='*DA*') 1.) binner along 'pulse' dimension. Group data into two bins. >>> bins_pulse = ['pumped', 'unpumped'] * 10 - >>> binner_pulse = tbdet.create_dssc_bins("pulse", + >>> binner_pulse = tb.create_dssc_bins("pulse", np.linspace(0,19,20, dtype=int), bins_pulse) 2.) binner along 'train' dimension. Group data into bins corresponding to the positions of a delay stage for instance. >>> bins_trainId = tb.get_array(run, 'PP800_PhaseShifter', 0.04) - >>> binner_train = tbdet.create_dssc_bins("trainId", + >>> binner_train = tb.create_dssc_bins("trainId", run.trainIds, bins_trainId.values) """ diff --git a/src/toolbox_scs/detectors/dssc_processing.py b/src/toolbox_scs/detectors/dssc_processing.py index a95607db058883662eb332041f72083fc374b210..f3e2ae1ac13f94923df62a3a6f98eabdf9c6cdae 100644 --- a/src/toolbox_scs/detectors/dssc_processing.py +++ b/src/toolbox_scs/detectors/dssc_processing.py @@ -165,7 +165,7 @@ def process_dssc_data(proposal, run_nr, module, chunksize, info, dssc_binners, dictionary containing keys 'dims', 'frames_per_train', 'total_frames', 'trainIds', 'number_of_trains'. dssc_binners: dictionary - a dictionary containing binner objects created by the tbdet member + a dictionary containing binner objects created by the ToolBox member function "create_binner()" path : str location in which the .h5 files, containing the binned data, should diff --git a/src/toolbox_scs/load.py b/src/toolbox_scs/load.py index b959361ed7f9938f249da2db6b9415ca1e93b63e..e9c94170e1f4f638fbe9853252d8940469ee2b2e 100644 --- a/src/toolbox_scs/load.py +++ b/src/toolbox_scs/load.py @@ -23,8 +23,10 @@ import toolbox_scs.detectors as tbdet __all__ = [ 'concatenateRuns', + 'find_run_path', 'get_array', 'load', + 'open_run', 'run_by_path', ] @@ -52,9 +54,9 @@ def load(proposalNB=None, runNB=None, Parameters ---------- - proposalNB: (str, int) + proposalNB: str, int proposal number e.g. 'p002252' or 2252 - runNB: (str, int) + runNB: str, int run number as integer fields: str, list of str, list of dict list of mnemonics to load specific data such as "fastccd", @@ -62,17 +64,18 @@ def load(proposalNB=None, runNB=None, {"extra": {'source: 'SCS_CDIFFT_MAG/SUPPLY/CURRENT', 'key': 'actual_current.value', 'dim': None}} - subFolder: (str) - sub-folder from which to load the data. Use 'raw' for raw data - or 'proc' for processed data. - display: (bool) + subFolder: str + 'raw', 'proc' (processed) or 'all' (both 'raw' and 'proc') to access + data from either or both of those folders. If 'all' is used, sources + present in 'proc' overwrite those in 'raw'. The default is 'raw'. + display: bool whether to show the run.info or not - validate: (bool) + validate: bool whether to run extra-data-validate or not subset: a subset of train that can be load with by_index[:5] for the first 5 trains - rois: dictionary + rois: dict a dictionnary of mnemonics with a list of rois definition and the desired names, for example: {'fastccd': {'ref': {'roi': by_index[730:890, 535:720], @@ -112,11 +115,7 @@ def load(proposalNB=None, runNB=None, >>> run, data = tb.load(2212, 208, ['SCS_SA3', 'MCP2apd', 'nrj']) """ - if isinstance(runNB, int): - runNB = 'r{:04d}'.format(runNB) - if isinstance(proposalNB, int): - proposalNB = 'p{:06d}'.format(proposalNB) - runFolder = os.path.join(find_proposal(proposalNB), subFolder, runNB) + runFolder = find_run_path(proposalNB, runNB, subFolder) run = ed.RunDirectory(runFolder).select_trains(subset) if fields is None: return run, xr.Dataset() @@ -241,47 +240,95 @@ def run_by_path(path): return ed.RunDirectory(path) -def concatenateRuns(runs): - """ Sorts and concatenate a list of runs with identical data variables - along the trainId dimension. +def find_run_path(proposalNB, runNB, data='raw'): + """ + Return the run path given the specified proposal and run numbers. - Input: - runs: (list) the xarray Datasets to concatenate - Output: - a concatenated xarray Dataset + Parameters + ---------- + proposalNB: (str, int) + proposal number e.g. 'p002252' or 2252 + runNB: (str, int) + run number as integer + data: str + 'raw', 'proc' (processed) or 'all' (both 'raw' and 'proc') to access + data from either or both of those folders. If 'all' is used, sources + present in 'proc' overwrite those in 'raw'. The default is 'raw'. + Returns + ------- + path: str + The run path. """ - firstTid = {i: int(run.trainId[0].values) for i, run in enumerate(runs)} - orderedDict = dict(sorted(firstTid.items(), key=lambda t: t[1])) - orderedRuns = [runs[i] for i in orderedDict] - keys = orderedRuns[0].keys() - for run in orderedRuns[1:]: - if run.keys() != keys: - print('data fields between different runs are not identical. ' - 'Cannot combine runs.') - return + if isinstance(runNB, int): + runNB = 'r{:04d}'.format(runNB) + if isinstance(proposalNB, int): + proposalNB = 'p{:06d}'.format(proposalNB) + return os.path.join(find_proposal(proposalNB), data, runNB) - result = xr.concat(orderedRuns, dim='trainId') - for k in orderedRuns[0].attrs.keys(): - result.attrs[k] = [run.attrs[k] for run in orderedRuns] - return result +def open_run(proposalNB, runNB, subset=ed.by_index[:], **kwargs): + """ + Get extra_data.DataCollection in a given proposal. + Wraps the extra_data open_run routine and adds subset selection, out of + convenience for the toolbox user. More information can be found in the + extra_data documentation. -def get_array(run, mnemonic_key=None, stepsize=None): + Parameters + ---------- + proposalNB: (str, int) + proposal number e.g. 'p002252' or 2252 + runNB: (str, int) + run number e.g. 17 or 'r0017' + subset: + a subset of train that can be load with by_index[:5] for the first 5 + trains + + **kwargs + -------- + data: str + default -> 'raw' + include: str + default -> '*' + + Returns + ------- + run : extra_data.DataCollection + DataCollection object containing information about the specified + run. Data can be loaded using built-in class methods. + """ + return ed.open_run(proposalNB, runNB, **kwargs).select_trains(subset) + + +def get_array(run=None, mnemonic=None, stepsize=None, + subset=ed.by_index[:], subFolder='raw', + proposalNB=None, runNB=None): """ - Loads the required 1D-data and rounds its values to integer multiples of - stepsize for consistent grouping (except for stepsize=None). - Returns a dummy array if mnemonic is set to None. + Loads one data array for the specified mnemonic and rounds its values to + integer multiples of stepsize for consistent grouping (except for + stepsize=None). + Returns a 1D array of ones if mnemonic is set to None. Parameters ---------- - run: karabo_data.DataCollection - path to the run directory - mnemonic_key: str + run: extra_data.DataCollection + DataCollection containing the data. + Used if proposalNB and runNB are None. + mnemonic: str Identifier of a single item in the mnemonic collection. None creates a - dummy file to average over all trains in the run + dummy 1D array of ones with length equal to the number of trains. stepsize : float nominal stepsize of the array data - values will be rounded to integer - multiples of this value + multiples of this value. + subset: + a subset of train that can be load with by_index[:5] for the first 5 + trains + subFolder: (str) + sub-folder from which to load the data. Use 'raw' for raw data + or 'proc' for processed data. + proposalNB: (str, int) + proposal number e.g. 'p002252' or 2252. + runNB: (str, int) + run number e.g. 17 or 'r0017'. Returns ------- @@ -297,26 +344,32 @@ def get_array(run, mnemonic_key=None, stepsize=None): Example ------- >>> import toolbox_scs as tb - >>> run = tb.load_run(2212, 235) + >>> run = tb.open_run(2212, 235) >>> mnemonic = 'PP800_PhaseShifter' >>> data_PhaseShifter = tb.get_array(run, mnemonic, 0.5) """ + if run is None: + run = open_run(proposalNB, runNB, subset, data=subFolder) + if not isinstance(run, ed.DataCollection): + raise TypeError(f'run argument has type {type(run)} but ' + 'expected type is extra_data.DataCollection') + run = run.select_trains(subset) + run_mnemonics = mnemonics_for_run(run) try: - if mnemonic_key is None: + if mnemonic is None: data = xr.DataArray( np.ones(len(run.train_ids), dtype=np.int16), dims=['trainId'], coords={'trainId': run.train_ids}) - elif mnemonic_key in _mnemonics: - mnem = _mnemonics[mnemonic_key] - data = run.get_array(*mnem.values()) + elif mnemonic in run_mnemonics: + mnem = run_mnemonics[mnemonic] + data = run.get_array(*mnem.values(), name=mnemonic) else: - raise ToolBoxValueError("Invalid mnemonic", mnemonic_key) + raise ToolBoxValueError("Invalid mnemonic", mnemonic) if stepsize is not None: data = stepsize * np.round(data / stepsize) - data.name = 'data' - log.debug(f"Got data for {mnemonic_key}") + log.debug(f"Got data for {mnemonic}") except ToolBoxValueError as err: log.error(f"{err.message}") raise @@ -324,6 +377,31 @@ def get_array(run, mnemonic_key=None, stepsize=None): return data +def concatenateRuns(runs): + """ Sorts and concatenate a list of runs with identical data variables + along the trainId dimension. + + Input: + runs: (list) the xarray Datasets to concatenate + Output: + a concatenated xarray Dataset + """ + firstTid = {i: int(run.trainId[0].values) for i, run in enumerate(runs)} + orderedDict = dict(sorted(firstTid.items(), key=lambda t: t[1])) + orderedRuns = [runs[i] for i in orderedDict] + keys = orderedRuns[0].keys() + for run in orderedRuns[1:]: + if run.keys() != keys: + print('data fields between different runs are not identical. ' + 'Cannot combine runs.') + return + + result = xr.concat(orderedRuns, dim='trainId') + for k in orderedRuns[0].attrs.keys(): + result.attrs[k] = [run.attrs[k] for run in orderedRuns] + return result + + def load_bpt(run, merge_with=None, run_mnemonics=None): if run_mnemonics is None: run_mnemonics = mnemonics_for_run(run)